Fixed crash caused by input latency reduction patch

This commit is contained in:
Mr-Wiseguy 2024-03-30 20:00:02 -04:00
parent 1caf81e1cf
commit 1651298a2c
5 changed files with 94 additions and 25 deletions

View File

@ -2,6 +2,7 @@
#include "sys_cfb.h" #include "sys_cfb.h"
#include "buffers.h" #include "buffers.h"
#include "fault.h" #include "fault.h"
#include "z64speed_meter.h"
void recomp_set_current_frame_poll_id(); void recomp_set_current_frame_poll_id();
void PadMgr_HandleRetrace(void); void PadMgr_HandleRetrace(void);
@ -31,6 +32,10 @@ void PadMgr_GetInput2(Input* inputs, s32 gameRequest) {
} }
extern CfbInfo sGraphCfbInfos[3]; extern CfbInfo sGraphCfbInfos[3];
u32 recomp_time_us();
void recomp_measure_latency();
OSMesgQueue *rdp_queue_ptr = NULL;
// @recomp Immediately sends the graphics task instead of queueing it in the scheduler. // @recomp Immediately sends the graphics task instead of queueing it in the scheduler.
void Graph_TaskSet00(GraphicsContext* gfxCtx, GameState* gameState) { void Graph_TaskSet00(GraphicsContext* gfxCtx, GameState* gameState) {
@ -41,11 +46,28 @@ void Graph_TaskSet00(GraphicsContext* gfxCtx, GameState* gameState) {
OSTimer timer; OSTimer timer;
OSMesg msg; OSMesg msg;
CfbInfo* cfb; CfbInfo* cfb;
// @recomp Additional static members for extra scheduling purposes.
static IrqMgrClient irq_client = {0};
static OSMesgQueue vi_queue = {0};
static OSMesg vi_buf[8] = {0};
static OSMesgQueue rdp_queue = {0};
static OSMesg rdp_mesg = NULL;
static bool created = false;
if (!created) {
created = true;
osCreateMesgQueue(&vi_queue, vi_buf, ARRAY_COUNT(vi_buf));
osCreateMesgQueue(&rdp_queue, &rdp_mesg, 1);
extern IrqMgr gIrqMgr;
IrqMgr_AddClient(&gIrqMgr, &irq_client, &vi_queue);
}
// @recomp Disable the wait here so that it can be moved after task submission for minimizing latency. // @recomp Disable the wait here so that it can be moved after task submission for minimizing latency.
// retry: // retry:
// osSetTimer(&timer, OS_USEC_TO_CYCLES(3 * 1000 * 1000), 0, &gfxCtx->queue, (OSMesg)666); // osSetTimer(&timer, OS_USEC_TO_CYCLES(3 * 1000 * 1000), 0, &gfxCtx->queue, (OSMesg)666);
// osRecvMesg(&gfxCtx->queue, &msg, OS_MESG_BLOCK); u32 count_before = recomp_time_us();
osRecvMesg(&gfxCtx->queue, &msg, OS_MESG_BLOCK);
u32 count_after = recomp_time_us();
// osStopTimer(&timer); // osStopTimer(&timer);
// if (msg == (OSMesg)666) { // if (msg == (OSMesg)666) {
@ -97,7 +119,6 @@ void Graph_TaskSet00(GraphicsContext* gfxCtx, GameState* gameState) {
{ s32 pad; } { s32 pad; }
cfb = &sGraphCfbInfos[cfbIdx]; cfb = &sGraphCfbInfos[cfbIdx];
cfbIdx = (cfbIdx + 1) % ARRAY_COUNT(sGraphCfbInfos); cfbIdx = (cfbIdx + 1) % ARRAY_COUNT(sGraphCfbInfos);
@ -122,26 +143,66 @@ void Graph_TaskSet00(GraphicsContext* gfxCtx, GameState* gameState) {
osRecvMesg(&gfxCtx->queue, NULL, OS_MESG_NOBLOCK); osRecvMesg(&gfxCtx->queue, NULL, OS_MESG_NOBLOCK);
} }
// @recomp Set up the dedicated RDP complete message queue pointer for the scheduler.
rdp_queue_ptr = &rdp_queue;
gfxCtx->schedMsgQ = &gSchedContext.cmdQ; gfxCtx->schedMsgQ = &gSchedContext.cmdQ;
recomp_measure_latency();
osSendMesg(&gSchedContext.cmdQ, scTask, OS_MESG_BLOCK); osSendMesg(&gSchedContext.cmdQ, scTask, OS_MESG_BLOCK);
Sched_SendEntryMsg(&gSchedContext); Sched_SendEntryMsg(&gSchedContext);
// @recomp Wait for the RDP complete message to mitigate waiting between creating the next task and submitting it.
osRecvMesg(&rdp_queue, NULL, OS_MESG_BLOCK);
rdp_queue_ptr = NULL;
// @recomp Manually wait the required number of VI periods after submitting the task // @recomp Manually wait the required number of VI periods after submitting the task
// so that the next frame doesn't need to wait before submitting its task. // so that the next frame doesn't need to wait before submitting its task.
static IrqMgrClient irq_client = {0}; u32 vi_count_before = recomp_time_us();
static OSMesgQueue vi_queue = {0};
static OSMesg vi_buf[8] = {0};
static bool created = false;
// Create the message queue and install the VI irq manager
if (!created) {
created = true;
osCreateMesgQueue(&vi_queue, vi_buf, ARRAY_COUNT(vi_buf));
extern IrqMgr gIrqMgr;
IrqMgr_AddClient(&gIrqMgr, &irq_client, &vi_queue);
}
for (int i = 0; i < cfb->updateRate; i++) { for (int i = 0; i < cfb->updateRate; i++) {
osRecvMesg(&vi_queue, NULL, OS_MESG_BLOCK); osRecvMesg(&vi_queue, NULL, OS_MESG_BLOCK);
} }
// @recomp Flush any excess VI messages that came in.
while (osRecvMesg(&vi_queue, NULL, OS_MESG_NOBLOCK) == 0) {
;
}
u32 vi_count_after = recomp_time_us();
// recomp_printf("recv wait added %d us\nVI wait: %d us\n", count_after - count_before, vi_count_after - vi_count_before);
}
extern OSTime sRDPStartTime;
// @recomp Patched to send a message to the dedicated RDP complete message queue.
void Sched_HandleRDPDone(SchedContext* sched) {
OSScTask* curRDP;
OSScTask* nextRSP = NULL;
OSScTask* nextRDP = NULL;
s32 state;
if (sched->curRDPTask == NULL) {
osSyncPrintf("__scHandleRDP:sc->curRDPTask == NULL\n");
return;
}
// Log run time
gRDPTimeAcc = osGetTime() - sRDPStartTime;
// Mark task done
curRDP = sched->curRDPTask;
sched->curRDPTask = NULL;
curRDP->state &= ~OS_SC_DP;
// @recomp Send a message to the dedicated RDP complete message queue if it's currently set up.
if (rdp_queue_ptr) {
osSendMesg(rdp_queue_ptr, NULL, OS_MESG_BLOCK);
}
Sched_NotifyDone(sched, curRDP);
// Schedule and run next task
state = ((sched->curRSPTask == NULL) << 1) | (sched->curRDPTask == NULL);
if (Sched_Schedule(sched, &nextRSP, &nextRDP, state) != state) {
Sched_RunTask(sched, nextRSP, nextRDP);
}
} }

View File

@ -23,4 +23,5 @@ __sinf_recomp = 0x8F000038;
__cosf_recomp = 0x8F00003C; __cosf_recomp = 0x8F00003C;
osCreateMesgQueue_recomp = 0x8F000048; osCreateMesgQueue_recomp = 0x8F000048;
recomp_set_current_frame_poll_id = 0x8F00004C; recomp_set_current_frame_poll_id = 0x8F00004C;
recomp_time_us = 0x8F000050;
recomp_measure_latency = 0x8F000054;

View File

@ -77,3 +77,7 @@ extern "C" void recomp_get_bgm_volume(uint8_t* rdram, recomp_context* ctx) {
extern "C" void recomp_get_low_health_beeps_enabled(uint8_t* rdram, recomp_context* ctx) { extern "C" void recomp_get_low_health_beeps_enabled(uint8_t* rdram, recomp_context* ctx) {
_return(ctx, static_cast<u32>(recomp::get_low_health_beeps_enabled())); _return(ctx, static_cast<u32>(recomp::get_low_health_beeps_enabled()));
} }
extern "C" void recomp_time_us(uint8_t* rdram, recomp_context* ctx) {
_return(ctx, static_cast<u32>(std::chrono::duration_cast<std::chrono::microseconds>(ultramodern::time_since_start()).count()));
}

View File

@ -3,22 +3,22 @@
static ultramodern::input_callbacks_t input_callbacks; static ultramodern::input_callbacks_t input_callbacks;
constexpr size_t num_poll_ids = 8; std::chrono::system_clock::time_point input_poll_time;
std::chrono::system_clock::time_point input_poll_times[num_poll_ids];
s32 cur_poll_id = 0;
s32 cur_frame_poll_id = 0;
void update_poll_time() { void update_poll_time() {
cur_poll_id = (cur_poll_id + 1) % num_poll_ids; input_poll_time = std::chrono::system_clock::now();
input_poll_times[cur_poll_id] = std::chrono::system_clock::now();
} }
extern "C" void recomp_set_current_frame_poll_id(uint8_t* rdram, recomp_context* ctx) { extern "C" void recomp_set_current_frame_poll_id(uint8_t* rdram, recomp_context* ctx) {
cur_frame_poll_id = cur_poll_id; // TODO reimplement the system for tagging polls with IDs to handle games with multithreaded input polling.
}
extern "C" void recomp_measure_latency(uint8_t* rdram, recomp_context* ctx) {
ultramodern::measure_input_latency();
} }
void ultramodern::measure_input_latency() { void ultramodern::measure_input_latency() {
// printf("Delta: %ld micros\n", std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now() - input_poll_times[cur_frame_poll_id])); // printf("Delta: %ld micros\n", std::chrono::duration_cast<std::chrono::microseconds>(std::chrono::system_clock::now() - input_poll_time));
} }
void set_input_callbacks(const ultramodern::input_callbacks_t& callbacks) { void set_input_callbacks(const ultramodern::input_callbacks_t& callbacks) {

View File

@ -315,14 +315,17 @@ void gfx_thread_func(uint8_t* rdram, std::atomic_flag* thread_ready, ultramodern
RT64EnableInstantPresent(application); RT64EnableInstantPresent(application);
enabled_instant_present = true; enabled_instant_present = true;
} }
ultramodern::measure_input_latency();
// Tell the game that the RSP completed instantly. This will allow it to queue other task types, but it won't // Tell the game that the RSP completed instantly. This will allow it to queue other task types, but it won't
// start another graphics task until the RDP is also complete. Games usually preserve the RSP inputs until the RDP // start another graphics task until the RDP is also complete. Games usually preserve the RSP inputs until the RDP
// is finished as well, so sending this early shouldn't be an issue in most cases. // is finished as well, so sending this early shouldn't be an issue in most cases.
// If this causes issues then the logic can be replaced with responding to yield requests. // If this causes issues then the logic can be replaced with responding to yield requests.
sp_complete(); sp_complete();
auto rt64_start = std::chrono::system_clock::now();
RT64SendDL(rdram, &task_action->task); RT64SendDL(rdram, &task_action->task);
auto rt64_end = std::chrono::system_clock::now();
dp_complete(); dp_complete();
// printf("RT64 ProcessDList time: %d us\n", static_cast<u32>(std::chrono::duration_cast<std::chrono::microseconds>(rt64_end - rt64_start).count()));
} }
else if (const auto* swap_action = std::get_if<SwapBuffersAction>(&action)) { else if (const auto* swap_action = std::get_if<SwapBuffersAction>(&action)) {
events_context.vi.current_buffer = events_context.vi.next_buffer; events_context.vi.current_buffer = events_context.vi.next_buffer;