diff --git a/src/timer/SDL_timer.c b/src/timer/SDL_timer.c index 2d6ad8b02..b47117e52 100644 --- a/src/timer/SDL_timer.c +++ b/src/timer/SDL_timer.c @@ -664,20 +664,71 @@ void SDL_DelayNS(Uint64 ns) void SDL_DelayPrecise(Uint64 ns) { Uint64 current_value = SDL_GetTicksNS(); - Uint64 target_value = current_value + ns; + const Uint64 target_value = current_value + ns; - // Sleep for a short number of cycles - // We'll use 1 ms as a scheduling timeslice, it's a good value for modern operating systems - const int SCHEDULING_TIMESLICE_NS = 1 * SDL_NS_PER_MS; - while (current_value < target_value) { - Uint64 remaining_ns = (target_value - current_value); - if (remaining_ns > (SCHEDULING_TIMESLICE_NS + SDL_NS_PER_US)) { - // Sleep for a short time, less than the scheduling timeslice - SDL_SYS_DelayNS(SCHEDULING_TIMESLICE_NS - SDL_NS_PER_US); - } else { - // Spin for any remaining time - SDL_CPUPauseInstruction(); + // Sleep for a short number of cycles when real sleeps are desired. + // We'll use 1 ms, it's the minimum guaranteed to produce real sleeps across + // all platforms. + const Uint64 SHORT_SLEEP_NS = 1 * SDL_NS_PER_MS; + + // Try to sleep short of target_value. If for some crazy reason + // a particular platform sleeps for less than 1 ms when 1 ms was requested, + // that's fine, the code below can cope with that, but in practice no + // platforms behave that way. + Uint64 max_sleep_ns = SHORT_SLEEP_NS; + while (current_value + max_sleep_ns < target_value) { + // Sleep for a short time + SDL_SYS_DelayNS(SHORT_SLEEP_NS); + + const Uint64 now = SDL_GetTicksNS(); + const Uint64 next_sleep_ns = (now - current_value); + if (next_sleep_ns > max_sleep_ns) { + max_sleep_ns = next_sleep_ns; } + current_value = now; + } + + // Do a shorter sleep of the remaining time here, less the max overshoot in + // the first loop. Due to maintaining max_sleep_ns as + // greater-than-or-equal-to-1 ms, we can always subtract off 1 ms to get + // the duration overshot beyond a 1 ms sleep request; if the system never + // overshot, great, it's zero duration. By choosing the max overshoot + // amount, we're likely to not overshoot here. If the sleep here ends up + // functioning like SDL_DelayNS(0) internally, that's fine, we just don't + // get to do a more-precise-than-1 ms-resolution sleep to undershoot by a + // small amount on the current system, but SDL_DelayNS(0) does at least + // introduce a small, yielding delay on many platforms, better than an + // unyielding busyloop. + // + // Note that we'll always do at least one sleep in this function, so the + // minimum resolution will be that of SDL_SYS_DelayNS() + if (current_value < target_value && (target_value - current_value) > (max_sleep_ns - SHORT_SLEEP_NS)) { + const Uint64 delay_ns = (target_value - current_value) - (max_sleep_ns - SHORT_SLEEP_NS); + SDL_SYS_DelayNS(delay_ns); + current_value = SDL_GetTicksNS(); + } + + // We've likely undershot target_value at this point by a pretty small + // amount, but maybe not. The footgun case if not handled here is where + // we've undershot by a large amount, like several ms, but still smaller + // than the amount max_sleep_ns overshot by; in such a situation, the above + // shorter-sleep block didn't do any delay, the if-block wasn't entered. + // Also, maybe the shorter-sleep undershot by several ms, so we still don't + // want to spin a lot then. In such a case, we accept the possibility of + // overshooting to not spin much, or if overshot here, not at all, keeping + // CPU/power usage down in any case. Due to scheduler sloppiness, it's + // entirely possible to end up undershooting/overshooting here by much less + // than 1 ms even if the current system's sleep function is only 1 + // ms-resolution, as SDL_GetTicksNS() generally is better resolution than 1 + // ms on the systems SDL supports. + while (current_value + SHORT_SLEEP_NS < target_value) { + SDL_SYS_DelayNS(SHORT_SLEEP_NS); + current_value = SDL_GetTicksNS(); + } + + // Spin for any remaining time + while (current_value < target_value) { + SDL_CPUPauseInstruction(); current_value = SDL_GetTicksNS(); } } diff --git a/test/testtimer.c b/test/testtimer.c index 9523cd0dd..bdf7e908f 100644 --- a/test/testtimer.c +++ b/test/testtimer.c @@ -213,7 +213,7 @@ int main(int argc, char *argv[]) Uint64 total_overslept = 0; start = SDL_GetTicksNS(); - SDL_DelayNS(1); + SDL_DelayPrecise(1); now = SDL_GetTicksNS(); actual_delay = (now - start); SDL_Log("Minimum precise delay: %" SDL_PRIu64 " ns\n", actual_delay); @@ -221,7 +221,7 @@ int main(int argc, char *argv[]) SDL_Log("Timing 100 frames at 60 FPS\n"); for (i = 0; i < 100; ++i) { start = SDL_GetTicksNS(); - SDL_DelayNS(desired_delay); + SDL_DelayPrecise(desired_delay); now = SDL_GetTicksNS(); actual_delay = (now - start); if (actual_delay > desired_delay) {