From 976bfa1969c9cd2b7b0795b0592a3bf2e6f4b747 Mon Sep 17 00:00:00 2001 From: Mr-Wiseguy Date: Thu, 21 Mar 2024 02:46:08 -0400 Subject: [PATCH] Reduced input latency and improved gyro by moving input polling to main game thread and moving frame wait to after DL submission, fixed bluetooth dualsense gyro --- patches/input.c | 4 +- patches/input_latency.c | 147 ++++++++++++++++++++++++++++++++++++ patches/patches.h | 1 + patches/patches.ld | 2 +- patches/syms.ld | 3 + src/main/main.cpp | 2 + src/recomp/cont.cpp | 19 +++++ src/recomp/recomp.cpp | 2 +- ultramodern/events.cpp | 1 + ultramodern/ultramodern.hpp | 1 + 10 files changed, 178 insertions(+), 4 deletions(-) create mode 100644 patches/input_latency.c diff --git a/patches/input.c b/patches/input.c index 4b4a07f..2710340 100644 --- a/patches/input.c +++ b/patches/input.c @@ -53,7 +53,7 @@ s32 func_80847190(PlayState* play, Player* this, s32 arg2) { temp3 = ((play->state.input[0].rel.stick_y >= 0) ? 1 : -1) * (s32)((1.0f - Math_CosS(play->state.input[0].rel.stick_y * 0xC8)) * 1500.0f); - this->actor.focus.rot.x += temp3 + (s32)((target_gyro_x - applied_gyro_x) * -3.0f); + this->actor.focus.rot.x += temp3 + (s32)((target_gyro_x - applied_gyro_x) * -1.5f); applied_gyro_x = target_gyro_x; if (this->stateFlags1 & PLAYER_STATE1_800000) { @@ -66,7 +66,7 @@ s32 func_80847190(PlayState* play, Player* this, s32 arg2) { var_s0 = this->actor.focus.rot.y - this->actor.shape.rot.y; temp3 = ((play->state.input[0].rel.stick_x >= 0) ? 1 : -1) * (s32)((1.0f - Math_CosS(play->state.input[0].rel.stick_x * 0xC8)) * -1500.0f); - var_s0 += temp3 + (s32)((target_gyro_y - applied_gyro_y) * 3.0f); + var_s0 += temp3 + (s32)((target_gyro_y - applied_gyro_y) * 1.5f); applied_gyro_y = target_gyro_y; this->actor.focus.rot.y = CLAMP(var_s0, -0x4AAA, 0x4AAA) + this->actor.shape.rot.y; diff --git a/patches/input_latency.c b/patches/input_latency.c new file mode 100644 index 0000000..fb9dbf6 --- /dev/null +++ b/patches/input_latency.c @@ -0,0 +1,147 @@ +#include "patches.h" +#include "sys_cfb.h" +#include "buffers.h" +#include "fault.h" + +void recomp_set_current_frame_poll_id(); +void PadMgr_HandleRetrace(void); +void PadMgr_LockPadData(void); +void PadMgr_UnlockPadData(void); + +void PadMgr_ThreadEntry() { + // @recomp Controller polling was moved to the main thread, so there's nothing to do here. +} + +// @recomp Patched to do the actual input polling. +void PadMgr_GetInput(Input* inputs, s32 gameRequest) { + // @recomp Do an actual poll if gameRequest is true. + if (gameRequest) { + PadMgr_HandleRetrace(); + // @recomp Tag the current frame's input polling id for latency tracking. + recomp_set_current_frame_poll_id(); + } + PadMgr_LockPadData(); + PadMgr_GetInputNoLock(inputs, gameRequest); + PadMgr_UnlockPadData(); +} + +// @recomp Just call PadMgr_GetInput. +void PadMgr_GetInput2(Input* inputs, s32 gameRequest) { + PadMgr_GetInput(inputs, gameRequest); +} + +extern CfbInfo sGraphCfbInfos[3]; + +// @recomp Immediately sends the graphics task instead of queueing it in the scheduler. +void Graph_TaskSet00(GraphicsContext* gfxCtx, GameState* gameState) { + static s32 retryCount = 10; + static s32 cfbIdx = 0; + OSTask_t* task = &gfxCtx->task.list.t; + OSScTask* scTask = &gfxCtx->task; + OSTimer timer; + OSMesg msg; + CfbInfo* cfb; + + // @recomp Disable the wait here so that it can be moved after task submission for minimizing latency. +// retry: +// osSetTimer(&timer, OS_USEC_TO_CYCLES(3 * 1000 * 1000), 0, &gfxCtx->queue, (OSMesg)666); +// osRecvMesg(&gfxCtx->queue, &msg, OS_MESG_BLOCK); +// osStopTimer(&timer); + +// if (msg == (OSMesg)666) { +// osSyncPrintf("GRAPH SP TIMEOUT\n"); +// if (retryCount >= 0) { +// retryCount--; +// Sched_SendGfxCancelMsg(&gSchedContext); +// goto retry; +// } else { +// // graph.c: No more! die! +// osSyncPrintf("graph.c:もうダメ!死ぬ!\n"); +// Fault_AddHungupAndCrashImpl("RCP is HUNG UP!!", "Oh! MY GOD!!"); +// } +// } + + gfxCtx->masterList = gGfxMasterDL; + if (gfxCtx->callback != NULL) { + gfxCtx->callback(gfxCtx, gfxCtx->callbackArg); + } + + task->type = M_GFXTASK; + task->flags = OS_SC_DRAM_DLIST; + task->ucodeBoot = SysUcode_GetUCodeBoot(); + task->ucodeBootSize = SysUcode_GetUCodeBootSize(); + task->ucode = SysUcode_GetUCode(); + task->ucodeData = SysUcode_GetUCodeData(); + task->ucodeSize = SP_UCODE_SIZE; + task->ucodeDataSize = SP_UCODE_DATA_SIZE; + task->dramStack = (u64*)gGfxSPTaskStack; + task->dramStackSize = sizeof(gGfxSPTaskStack); + task->outputBuff = gGfxSPTaskOutputBufferPtr; + task->outputBuffSize = gGfxSPTaskOutputBufferEnd; + task->dataPtr = (u64*)gGfxMasterDL; + task->dataSize = 0; + task->yieldDataPtr = (u64*)gGfxSPTaskYieldBuffer; + task->yieldDataSize = sizeof(gGfxSPTaskYieldBuffer); + + scTask->next = NULL; + scTask->flags = OS_SC_RCP_MASK | OS_SC_SWAPBUFFER | OS_SC_LAST_TASK; + + if (SREG(33) & 1) { + SREG(33) &= ~1; + scTask->flags &= ~OS_SC_SWAPBUFFER; + gfxCtx->framebufferIndex--; + } + + scTask->msgQ = &gfxCtx->queue; + scTask->msg = NULL; + + { s32 pad; } + + + cfb = &sGraphCfbInfos[cfbIdx]; + cfbIdx = (cfbIdx + 1) % ARRAY_COUNT(sGraphCfbInfos); + + cfb->fb1 = gfxCtx->curFrameBuffer; + cfb->swapBuffer = gfxCtx->curFrameBuffer; + + if (gfxCtx->updateViMode) { + gfxCtx->updateViMode = false; + cfb->viMode = gfxCtx->viMode; + cfb->features = gfxCtx->viConfigFeatures; + cfb->xScale = gfxCtx->xScale; + cfb->yScale = gfxCtx->yScale; + } else { + cfb->viMode = NULL; + } + cfb->unk_10 = 0; + cfb->updateRate = gameState->framerateDivisor; + + scTask->framebuffer = cfb; + + while (gfxCtx->queue.validCount != 0) { + osRecvMesg(&gfxCtx->queue, NULL, OS_MESG_NOBLOCK); + } + + gfxCtx->schedMsgQ = &gSchedContext.cmdQ; + osSendMesg(&gSchedContext.cmdQ, scTask, OS_MESG_BLOCK); + Sched_SendEntryMsg(&gSchedContext); + + // @recomp Manually wait the required number of VI periods after submitting the task + // so that the next frame doesn't need to wait before submitting its task. + static IrqMgrClient irq_client = {0}; + static OSMesgQueue vi_queue = {0}; + static OSMesg vi_buf[8] = {0}; + static bool created = false; + + // Create the message queue and install the VI irq manager + if (!created) { + created = true; + osCreateMesgQueue(&vi_queue, vi_buf, ARRAY_COUNT(vi_buf)); + extern IrqMgr gIrqMgr; + IrqMgr_AddClient(&gIrqMgr, &irq_client, &vi_queue); + } + + for (int i = 0; i < cfb->updateRate; i++) { + osRecvMesg(&vi_queue, NULL, OS_MESG_BLOCK); + } +} diff --git a/patches/patches.h b/patches/patches.h index 7906932..2d9b051 100644 --- a/patches/patches.h +++ b/patches/patches.h @@ -2,6 +2,7 @@ #define __PATCHES_H__ // TODO fix renaming symbols in patch recompilation +#define osCreateMesgQueue osCreateMesgQueue_recomp #define osRecvMesg osRecvMesg_recomp #define osSendMesg osSendMesg_recomp #define sinf __sinf_recomp diff --git a/patches/patches.ld b/patches/patches.ld index ce543b3..6734bab 100644 --- a/patches/patches.ld +++ b/patches/patches.ld @@ -1,4 +1,4 @@ -RAMBASE = 0x80800100; /* Used to hold any new symbols */ +RAMBASE = 0x80801000; /* Used to hold any new symbols */ EXTRA_RAM_SIZE = 0x01000000; /* Amount of extra ram allocated by recomp */ MEMORY { diff --git a/patches/syms.ld b/patches/syms.ld index dc46b22..662bba8 100644 --- a/patches/syms.ld +++ b/patches/syms.ld @@ -21,3 +21,6 @@ recomp_get_bgm_volume = 0x8F000030; recomp_get_low_health_beeps_enabled = 0x8F000034; __sinf_recomp = 0x8F000038; __cosf_recomp = 0x8F00003C; +osCreateMesgQueue_recomp = 0x8F000048; +recomp_set_current_frame_poll_id = 0x8F00004C; + diff --git a/src/main/main.cpp b/src/main/main.cpp index f5c862a..3e365fb 100644 --- a/src/main/main.cpp +++ b/src/main/main.cpp @@ -42,6 +42,8 @@ void exit_error(const char* str, Ts ...args) { ultramodern::gfx_callbacks_t::gfx_data_t create_gfx() { SDL_SetHint(SDL_HINT_WINDOWS_DPI_AWARENESS, "permonitorv2"); SDL_SetHint(SDL_HINT_GAMECONTROLLER_USE_BUTTON_LABELS, "0"); + SDL_SetHint(SDL_HINT_JOYSTICK_HIDAPI_PS4_RUMBLE, "1"); + SDL_SetHint(SDL_HINT_JOYSTICK_HIDAPI_PS5_RUMBLE, "1"); if (SDL_Init(SDL_INIT_VIDEO | SDL_INIT_GAMECONTROLLER) > 0) { exit_error("Failed to initialize SDL2: %s\n", SDL_GetError()); } diff --git a/src/recomp/cont.cpp b/src/recomp/cont.cpp index 473964e..cfbac86 100644 --- a/src/recomp/cont.cpp +++ b/src/recomp/cont.cpp @@ -3,6 +3,24 @@ static ultramodern::input_callbacks_t input_callbacks; +constexpr size_t num_poll_ids = 8; +std::chrono::system_clock::time_point input_poll_times[num_poll_ids]; +s32 cur_poll_id = 0; +s32 cur_frame_poll_id = 0; + +void update_poll_time() { + cur_poll_id = (cur_poll_id + 1) % num_poll_ids; + input_poll_times[cur_poll_id] = std::chrono::system_clock::now(); +} + +extern "C" void recomp_set_current_frame_poll_id(uint8_t* rdram, recomp_context* ctx) { + cur_frame_poll_id = cur_poll_id; +} + +void ultramodern::measure_input_latency() { + // printf("Delta: %ld micros\n", std::chrono::duration_cast(std::chrono::system_clock::now() - input_poll_times[cur_frame_poll_id])); +} + void set_input_callbacks(const ultramodern::input_callbacks_t& callbacks) { input_callbacks = callbacks; } @@ -36,6 +54,7 @@ extern "C" void osContStartReadData_recomp(uint8_t* rdram, recomp_context* ctx) if (input_callbacks.poll_input) { input_callbacks.poll_input(); } + update_poll_time(); ultramodern::send_si_message(); } diff --git a/src/recomp/recomp.cpp b/src/recomp/recomp.cpp index df40c86..666857b 100644 --- a/src/recomp/recomp.cpp +++ b/src/recomp/recomp.cpp @@ -330,7 +330,7 @@ void init(uint8_t* rdram, recomp_context* ctx) { recomp::do_rom_read(rdram, entrypoint, 0x10001000, 0x100000); // Read in any extra data from patches - read_patch_data(rdram, (gpr)(s32)0x80800100); + read_patch_data(rdram, (gpr)(s32)0x80801000); // Set up stack pointer ctx->r29 = 0xFFFFFFFF803FFFF0u; diff --git a/ultramodern/events.cpp b/ultramodern/events.cpp index 3cad0b0..ed7da59 100644 --- a/ultramodern/events.cpp +++ b/ultramodern/events.cpp @@ -316,6 +316,7 @@ void gfx_thread_func(uint8_t* rdram, std::atomic_flag* thread_ready, ultramodern RT64EnableInstantPresent(application); enabled_instant_present = true; } + ultramodern::measure_input_latency(); // Tell the game that the RSP completed instantly. This will allow it to queue other task types, but it won't // start another graphics task until the RDP is also complete. Games usually preserve the RSP inputs until the RDP // is finished as well, so sending this early shouldn't be an issue in most cases. diff --git a/ultramodern/ultramodern.hpp b/ultramodern/ultramodern.hpp index 1d5b8af..2ed3367 100644 --- a/ultramodern/ultramodern.hpp +++ b/ultramodern/ultramodern.hpp @@ -96,6 +96,7 @@ std::chrono::system_clock::time_point get_start(); std::chrono::system_clock::duration time_since_start(); void get_window_size(uint32_t& width, uint32_t& height); uint32_t get_target_framerate(uint32_t original); +void measure_input_latency(); // Audio void init_audio();