1
- // Hardware Performance Counter delay routines, by 8bitbubsy
1
+ /*
2
+ ** Hardware Performance Counter delay routines
3
+ */
2
4
3
5
#ifdef _WIN32
4
6
#define WIN32_MEAN_AND_LEAN
11
13
#include <stdbool.h>
12
14
#include "pt2_hpc.h"
13
15
14
- // more bits than this makes little sense (double -> uint64_t precision)
15
- #define FRAC_BITS 53
16
+ #define FRAC_BITS 63
16
17
#define FRAC_SCALE (1ULL << FRAC_BITS)
17
18
#define FRAC_MASK (FRAC_SCALE-1)
18
19
19
20
hpcFreq_t hpcFreq ;
20
21
21
22
#ifdef _WIN32 // Windows usleep() implementation
22
23
24
+ #define STATUS_SUCCESS 0
25
+
26
+ static bool canAdjustTimerResolution ;
27
+
23
28
static NTSTATUS (__stdcall * NtDelayExecution )(BOOL Alertable , PLARGE_INTEGER DelayInterval );
24
29
static NTSTATUS (__stdcall * NtQueryTimerResolution )(PULONG MinimumResolution , PULONG MaximumResolution , PULONG ActualResolution );
25
30
static NTSTATUS (__stdcall * NtSetTimerResolution )(ULONG DesiredResolution , BOOLEAN SetResolution , PULONG CurrentResolution );
@@ -30,26 +35,27 @@ static void usleepGood(int32_t usec)
30
35
{
31
36
LARGE_INTEGER delayInterval ;
32
37
33
- // NtDelayExecution() delays in 100ns-units, and negative value = delay from current time
38
+ // NtDelayExecution() delays in 100ns-units, and a negative value means to delay from current time
34
39
usec *= -10 ;
35
40
36
- delayInterval .HighPart = 0xFFFFFFFF ;
41
+ delayInterval .HighPart = 0xFFFFFFFF ; // negative 64-bit value, we only set the lower dword
37
42
delayInterval .LowPart = usec ;
38
43
NtDelayExecution (false, & delayInterval );
39
44
}
40
45
41
- static void usleepWeak (int32_t usec ) // fallback if no NtDelayExecution()
46
+ static void usleepPoor (int32_t usec ) // fallback if no NtDelayExecution()
42
47
{
43
48
Sleep ((usec + 500 ) / 1000 );
44
49
}
45
50
46
51
static void windowsSetupUsleep (void )
47
52
{
48
53
NtDelayExecution = (NTSTATUS (__stdcall * )(BOOL , PLARGE_INTEGER ))GetProcAddress (GetModuleHandle ("ntdll.dll" ), "NtDelayExecution" );
54
+ usleep = (NtDelayExecution != NULL ) ? usleepGood : usleepPoor ;
55
+
49
56
NtQueryTimerResolution = (NTSTATUS (__stdcall * )(PULONG , PULONG , PULONG ))GetProcAddress (GetModuleHandle ("ntdll.dll" ), "NtQueryTimerResolution" );
50
57
NtSetTimerResolution = (NTSTATUS (__stdcall * )(ULONG , BOOLEAN , PULONG ))GetProcAddress (GetModuleHandle ("ntdll.dll" ), "NtSetTimerResolution" );
51
-
52
- usleep = (NtDelayExecution != NULL ) ? usleepGood : usleepWeak ;
58
+ canAdjustTimerResolution = (NtQueryTimerResolution != NULL && NtSetTimerResolution != NULL );
53
59
}
54
60
#endif
55
61
@@ -59,50 +65,69 @@ void hpc_Init(void)
59
65
windowsSetupUsleep ();
60
66
#endif
61
67
hpcFreq .freq64 = SDL_GetPerformanceFrequency ();
62
- hpcFreq .dFreq = (double )hpcFreq .freq64 ;
63
- hpcFreq .dFreqMulMicro = (1000.0 * 1000.0 ) / hpcFreq .dFreq ;
68
+
69
+ double dFreq = (double )hpcFreq .freq64 ;
70
+
71
+ hpcFreq .dFreqMulMs = 1000.0 / dFreq ;
72
+ hpcFreq .dFreqMulMicro = (1000.0 * 1000.0 ) / dFreq ;
64
73
}
65
74
66
- void hpc_SetDurationInHz (hpc_t * hpc , const double dHz )
75
+ // returns 64-bit fractional part of u64 divided by u32
76
+ static uint64_t getFrac64FromU64DivU32 (uint64_t dividend , uint32_t divisor )
67
77
{
68
- const double dDuration = hpcFreq .dFreq / dHz ;
78
+ if (dividend == 0 || divisor == 0 || divisor >= dividend )
79
+ return 0 ;
80
+
81
+ dividend %= divisor ;
82
+
83
+ if (dividend == 0 )
84
+ return 0 ;
69
85
70
- // break down duration into integer and frac parts
71
- double dDurationInt ;
72
- double dDurationFrac = modf (dDuration , & dDurationInt );
86
+ const uint32_t quotient = (uint32_t )((dividend << 32 ) / divisor );
87
+ const uint32_t remainder = (uint32_t )((dividend << 32 ) % divisor );
73
88
74
- // set 64:53fp values
75
- hpc -> duration64Int = (uint64_t )dDurationInt ;
76
- hpc -> duration64Frac = (uint64_t )round (dDurationFrac * FRAC_SCALE );
89
+ const uint32_t resultHi = quotient ;
90
+ const uint32_t resultLo = (uint32_t )(((uint64_t )remainder << 32 ) / divisor );
91
+
92
+ return ((uint64_t )resultHi << 32 ) | resultLo ;
77
93
}
78
94
79
- void hpc_ResetEndTime (hpc_t * hpc )
95
+ void hpc_SetDurationInHz (hpc_t * hpc , uint32_t hz )
80
96
{
81
- hpc -> endTime64Int = SDL_GetPerformanceCounter () + hpc -> duration64Int ;
82
- hpc -> endTime64Frac = hpc -> duration64Frac ;
97
+ // set 64:63fp value
98
+ hpc -> durationInt = hpcFreq .freq64 / hz ;
99
+ hpc -> durationFrac = getFrac64FromU64DivU32 (hpcFreq .freq64 , hz ) >> 1 ;
100
+
101
+ hpc -> resetFrame = hz * 3600 ; // reset counters every hour
102
+
83
103
}
84
104
85
- void hpc_Wait (hpc_t * hpc )
105
+ void hpc_ResetCounters (hpc_t * hpc )
86
106
{
87
- #ifdef _WIN32 // set resolution to 0.5ms (safest minium) - this is confirmed to improve NtDelayExecution() and Sleep()
88
- ULONG originalTimerResolution , minRes , maxRes , curRes ;
107
+ hpc -> endTimeInt = SDL_GetPerformanceCounter () + hpc -> durationInt ;
108
+ hpc -> endTimeFrac = hpc -> durationFrac ;
109
+ }
89
110
90
- if (NtQueryTimerResolution != NULL && NtSetTimerResolution != NULL )
111
+ void hpc_Wait (hpc_t * hpc )
112
+ {
113
+ #ifdef _WIN32
114
+ /* Make sure resolution is set to 0.5ms (safest minimum) - this is confirmed to improve
115
+ ** NtDelayExecution() and Sleep(). This will only be changed when needed, not per frame.
116
+ */
117
+ ULONG curRes , minRes , maxRes , junk ;
118
+ if (canAdjustTimerResolution && NtQueryTimerResolution (& minRes , & maxRes , & curRes ) == STATUS_SUCCESS )
91
119
{
92
- if (!NtQueryTimerResolution (& minRes , & maxRes , & originalTimerResolution ))
93
- {
94
- if (originalTimerResolution != 5000 && maxRes <= 5000 )
95
- NtSetTimerResolution (5000 , TRUE, & curRes ); // set to 0.5ms (safest minimum)
96
- }
120
+ if (curRes != 5000 && maxRes <= 5000 )
121
+ NtSetTimerResolution (5000 , TRUE, & junk ); // 0.5ms
97
122
}
98
123
#endif
99
124
100
125
const uint64_t currTime64 = SDL_GetPerformanceCounter ();
101
- if (currTime64 < hpc -> endTime64Int )
126
+ if (currTime64 < hpc -> endTimeInt )
102
127
{
103
- uint64_t timeLeft64 = hpc -> endTime64Int - currTime64 ;
128
+ uint64_t timeLeft64 = hpc -> endTimeInt - currTime64 ;
104
129
105
- // limit (and cast to) int32_t for fast SSE2 SIMD usage
130
+ // convert to int32_t for fast SSE2 SIMD usage lateron
106
131
if (timeLeft64 > INT32_MAX )
107
132
timeLeft64 = INT32_MAX ;
108
133
@@ -115,12 +140,25 @@ void hpc_Wait(hpc_t *hpc)
115
140
116
141
// set next end time
117
142
118
- hpc -> endTime64Int += hpc -> duration64Int ;
143
+ hpc -> endTimeInt += hpc -> durationInt ;
144
+
145
+ // handle fractional part
146
+ hpc -> endTimeFrac += hpc -> durationFrac ;
147
+ if (hpc -> endTimeFrac >= FRAC_SCALE )
148
+ {
149
+ hpc -> endTimeFrac &= FRAC_MASK ;
150
+ hpc -> endTimeInt ++ ;
151
+ }
119
152
120
- hpc -> endTime64Frac += hpc -> duration64Frac ;
121
- if (hpc -> endTime64Frac >= FRAC_SCALE )
153
+ /* The counter ("endTimeInt") can accumulate major errors after a couple of hours,
154
+ ** since each frame is not happening at perfect intervals.
155
+ ** To fix this, reset the counter's int & frac once every hour. We should only get
156
+ ** up to one frame of stutter while they are resetting, then it's back to normal.
157
+ */
158
+ hpc -> frameCounter ++ ;
159
+ if (hpc -> frameCounter >= hpc -> resetFrame )
122
160
{
123
- hpc -> endTime64Frac &= FRAC_MASK ;
124
- hpc -> endTime64Int ++ ;
161
+ hpc -> frameCounter = 0 ;
162
+ hpc_ResetCounters ( hpc ) ;
125
163
}
126
164
}
0 commit comments