Skip to content

Commit 8f72eec

Browse files
diseanbinarymaster
authored andcommitted
[HALPC98] Implement TSC calibration
CORE-17977
1 parent c943223 commit 8f72eec

File tree

4 files changed

+258
-15
lines changed

4 files changed

+258
-15
lines changed

hal/halx86/pc98.cmake

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -34,6 +34,7 @@ list(APPEND HAL_PC98_SOURCE
3434
pic/processor.c)
3535

3636
list(APPEND HAL_PC98_ASM_SOURCE
37+
pc98/delay.S
3738
generic/trap.S
3839
pic/pic.S)
3940

hal/halx86/pc98/delay.S

Lines changed: 125 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,125 @@
1+
/*
2+
* PROJECT: NEC PC-98 series HAL
3+
* LICENSE: GPL-2.0-or-later (https://spdx.org/licenses/GPL-2.0-or-later)
4+
* PURPOSE: Busy-wait loop implementation
5+
* COPYRIGHT: Copyright 2026 Dmitry Borisov <di.sean@protonmail.com>
6+
*/
7+
8+
/* INCLUDES ******************************************************************/
9+
10+
#include <asm.inc>
11+
#include <ks386.inc>
12+
13+
#include "delay.h"
14+
15+
/* GLOBALS *******************************************************************/
16+
17+
#define PIC1_CONTROL_PORT HEX(00)
18+
#define PIC1_DATA_PORT HEX(02)
19+
#define PIC2_CONTROL_PORT HEX(08)
20+
#define PIC2_DATA_PORT HEX(0A)
21+
22+
#define PIC_EOI HEX(20)
23+
24+
#define MSR_RDTSC HEX(10)
25+
26+
/* FUNCTIONS *****************************************************************/
27+
28+
.code
29+
30+
#ifdef _USE_ML
31+
INIT_ASM SEGMENT PARA PUBLIC USE32 READ WRITE EXECUTE DISCARD
32+
#endif
33+
34+
PUBLIC _HalpTscCalibrationISR
35+
_HalpTscCalibrationISR:
36+
push edx
37+
push ecx
38+
push eax
39+
40+
/* The first thing we do is read the current TSC value */
41+
rdtsc
42+
43+
mov ecx, dword ptr ds:[_TscCalibrationPhase]
44+
45+
/* Ignore the first interrupt since it fires randomly */
46+
test ecx, ecx
47+
je .FirstInterrupt
48+
49+
/* Check if we're already done */
50+
cmp ecx, NUM_SAMPLES
51+
ja .Done
52+
53+
/* Store the current TSC value (the phase number is 1-based) */
54+
mov dword ptr ds:[ecx*8 + _TscCalibrationArray + 0 - 8], eax
55+
mov dword ptr ds:[ecx*8 + _TscCalibrationArray + 4 - 8], edx
56+
jmp .AdvancePhase
57+
58+
.FirstInterrupt:
59+
/* Reset TSC value to 0 */
60+
mov ecx, MSR_RDTSC
61+
xor eax, eax
62+
xor edx, edx
63+
wrmsr
64+
65+
.AdvancePhase:
66+
inc dword ptr ds:[_TscCalibrationPhase]
67+
68+
.Done:
69+
/* Send the EOI for the IRQ */
70+
mov al, PIC_EOI
71+
out PIC1_CONTROL_PORT, al
72+
73+
pop eax
74+
pop ecx
75+
pop edx
76+
iretd
77+
78+
.align 8
79+
80+
PUBLIC _TscCalibrationArray
81+
_TscCalibrationArray:
82+
.space NUM_SAMPLES * 8
83+
84+
PUBLIC _TscCalibrationPhase
85+
_TscCalibrationPhase:
86+
.long 0
87+
88+
#ifdef _USE_ML
89+
INIT_ASM ENDS
90+
#endif
91+
92+
PUBLIC _KeStallExecutionProcessor@4
93+
_KeStallExecutionProcessor@4:
94+
push ebx
95+
96+
/*
97+
* Force the in-order execution of the RDTSC instruction.
98+
* HAL will overwrite this with a no-op instruction on older processors.
99+
*/
100+
xor eax, eax
101+
cpuid
102+
103+
/* Get the initial time */
104+
rdtsc
105+
106+
/* Calculate the ending time */
107+
mov ecx, eax
108+
mov eax, fs:[KPCR_STALL_SCALE_FACTOR]
109+
mov ebx, edx
110+
mul dword ptr [esp + 8]
111+
add ecx, eax
112+
adc ebx, edx
113+
114+
/* Loop until time is elapsed */
115+
.Loop:
116+
rdtsc
117+
cmp eax, ecx
118+
mov eax, edx
119+
sbb eax, ebx
120+
jc .Loop
121+
122+
pop ebx
123+
ret 4
124+
125+
END

hal/halx86/pc98/delay.c

Lines changed: 124 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,9 @@
11
/*
22
* PROJECT: NEC PC-98 series HAL
33
* LICENSE: GPL-2.0-or-later (https://spdx.org/licenses/GPL-2.0-or-later)
4-
* PURPOSE: Delay routines
5-
* COPYRIGHT: Copyright 2020 Dmitry Borisov (di.sean@protonmail.com)
4+
* PURPOSE: TSC calibration for the busy-wait loop routine
5+
* COPYRIGHT: Copyright 2011 Timo Kreuzer <timo.kreuzer@reactos.org>
6+
* Copyright 2026 Dmitry Borisov <di.sean@protonmail.com>
67
*/
78

89
/* INCLUDES ******************************************************************/
@@ -12,29 +13,137 @@
1213
#define NDEBUG
1314
#include <debug.h>
1415

15-
/* PRIVATE FUNCTIONS *********************************************************/
16+
#include "delay.h"
1617

18+
/* GLOBALS *******************************************************************/
19+
20+
#define SAMPLE_FREQUENCY 1024 // 0.977 ms
21+
22+
VOID
23+
__cdecl
24+
HalpTscCalibrationISR(VOID);
25+
26+
extern volatile ULONG TscCalibrationPhase;
27+
extern ULONG64 TscCalibrationArray[NUM_SAMPLES];
28+
29+
/* FUNCTIONS *****************************************************************/
30+
31+
static
1732
CODE_SEG("INIT")
1833
VOID
19-
NTAPI
20-
HalpCalibrateStallExecution(VOID)
34+
HalpPrepareStallExecution(VOID)
2135
{
22-
/* FIXME */
23-
NOTHING;
36+
PUCHAR Instruction = (PUCHAR)((ULONG_PTR)KeStallExecutionProcessor + 1);
37+
PKPRCB Prcb = KeGetCurrentPrcb();
38+
39+
/* xor eax, eax; cpuid */
40+
ASSERT((Instruction[1] == 0xC0) && // The byte [0] has different encodings
41+
(Instruction[2] == 0x0F) &&
42+
(Instruction[3] == 0xA2));
43+
44+
/*
45+
* Starting with the Pentium Pro processor it is necessary to force
46+
* the in-order execution of the RDTSC instruction using a serializing instruction.
47+
* For more details, please refer to Section 3.1 of
48+
* Intel "Using the RDTSC Instruction for Performance Monitoring".
49+
*
50+
* Patch the KeStallExecutionProcessor function to remove the serializing instruction
51+
* for the Pentium and Pentium MMX processors.
52+
*/
53+
if ((Prcb->CpuType < 6) && !strcmp(Prcb->VendorString, "GenuineIntel"))
54+
{
55+
/* Replace "xor eax, eax; cpuid" with "lea esi, [esi+0]" */
56+
Instruction[0] = 0x8D;
57+
Instruction[1] = 0x74;
58+
Instruction[2] = 0x26;
59+
Instruction[3] = 0x00;
60+
61+
KeSweepICache(Instruction, 4);
62+
}
2463
}
2564

26-
/* PUBLIC FUNCTIONS **********************************************************/
65+
static
66+
CODE_SEG("INIT")
67+
ULONG64
68+
HalpDoLinearRegression(
69+
_In_ ULONG XMax,
70+
_In_reads_(XMax + 1) const ULONG64* ArrayY)
71+
{
72+
ULONG X, SumXX;
73+
ULONG64 SumXY;
74+
75+
/* Calculate the sum of the squares of X */
76+
SumXX = (XMax * (XMax + 1) * (2 * XMax + 1)) / 6;
77+
78+
/* Calculate the sum of the differences to the first value weighted by X */
79+
for (SumXY = 0, X = 1; X <= XMax; X++)
80+
{
81+
SumXY += X * (ArrayY[X] - ArrayY[0]);
82+
}
83+
84+
/* Account for sample frequency */
85+
SumXY *= SAMPLE_FREQUENCY;
2786

28-
#ifndef _MINIHAL_
87+
/* Return the quotient of the sums */
88+
return (SumXY + (SumXX / 2)) / SumXX;
89+
}
90+
91+
CODE_SEG("INIT")
2992
VOID
3093
NTAPI
31-
KeStallExecutionProcessor(
32-
_In_ ULONG MicroSeconds)
94+
HalpCalibrateStallExecution(VOID)
3395
{
34-
while (MicroSeconds--)
96+
ULONG_PTR Flags;
97+
PVOID PreviousHandler;
98+
TIMER_CONTROL_PORT_REGISTER TimerControl;
99+
ULONG TimerFrequency;
100+
USHORT Period;
101+
ULONG64 CpuClockFrequency;
102+
103+
/* Check if the CPU supports RDTSC */
104+
if (!(KeGetCurrentPrcb()->FeatureBits & KF_RDTSC))
35105
{
36-
/* FIXME: Use stall factor */
37-
WRITE_PORT_UCHAR((PUCHAR)CPU_IO_o_ARTIC_DELAY, 0);
106+
KeBugCheck(HAL_INITIALIZATION_FAILED);
38107
}
108+
109+
Flags = __readeflags();
110+
_disable();
111+
112+
PreviousHandler = KeQueryInterruptHandler(PIC_TIMER_IRQ);
113+
KeRegisterInterruptHandler(PRIMARY_VECTOR_BASE + PIC_TIMER_IRQ, HalpTscCalibrationISR);
114+
115+
/* Program the PIT for binary mode */
116+
TimerControl.BcdMode = FALSE;
117+
TimerControl.OperatingMode = PitOperatingMode2;
118+
TimerControl.Channel = PitChannel0;
119+
TimerControl.AccessMode = PitAccessModeLowHigh;
120+
121+
if (__inbyte(0x42) & 0x20)
122+
TimerFrequency = TIMER_FREQUENCY_1;
123+
else
124+
TimerFrequency = TIMER_FREQUENCY_2;
125+
Period = (TimerFrequency + (SAMPLE_FREQUENCY / 2)) / SAMPLE_FREQUENCY;
126+
127+
__outbyte(TIMER_CONTROL_PORT, TimerControl.Bits);
128+
__outbyte(TIMER_CHANNEL0_DATA_PORT, Period & 0xFF);
129+
__outbyte(TIMER_CHANNEL0_DATA_PORT, Period >> 8);
130+
131+
HalEnableSystemInterrupt(PRIMARY_VECTOR_BASE + PIC_TIMER_IRQ, CLOCK2_LEVEL, Latched);
132+
133+
/* Collect the sample data */
134+
_enable();
135+
while (TscCalibrationPhase != (NUM_SAMPLES + 1))
136+
NOTHING;
137+
_disable();
138+
139+
HalDisableSystemInterrupt(PRIMARY_VECTOR_BASE + PIC_TIMER_IRQ, CLOCK2_LEVEL);
140+
KeRegisterInterruptHandler(PRIMARY_VECTOR_BASE + PIC_TIMER_IRQ, PreviousHandler);
141+
142+
/* Calculate an average, using simplified linear regression */
143+
CpuClockFrequency = HalpDoLinearRegression(NUM_SAMPLES - 1, TscCalibrationArray);
144+
KeGetPcr()->StallScaleFactor = (ULONG)(CpuClockFrequency / 1000000);
145+
146+
HalpPrepareStallExecution();
147+
148+
__writeeflags(Flags);
39149
}
40-
#endif

hal/halx86/pc98/delay.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,8 @@
1+
/*
2+
* PROJECT: NEC PC-98 series HAL
3+
* LICENSE: GPL-2.0-or-later (https://spdx.org/licenses/GPL-2.0-or-later)
4+
* PURPOSE: TSC calibration definitions
5+
* COPYRIGHT: Copyright 2026 Dmitry Borisov <di.sean@protonmail.com>
6+
*/
7+
8+
#define NUM_SAMPLES 4

0 commit comments

Comments
 (0)