|
1 | 1 | /* |
2 | 2 | * PROJECT: NEC PC-98 series HAL |
3 | 3 | * LICENSE: GPL-2.0-or-later (https://spdx.org/licenses/GPL-2.0-or-later) |
4 | | - * PURPOSE: Delay routines |
5 | | - * COPYRIGHT: Copyright 2020 Dmitry Borisov (di.sean@protonmail.com) |
| 4 | + * PURPOSE: TSC calibration for the busy-wait loop routine |
| 5 | + * COPYRIGHT: Copyright 2011 Timo Kreuzer <timo.kreuzer@reactos.org> |
| 6 | + * Copyright 2026 Dmitry Borisov <di.sean@protonmail.com> |
6 | 7 | */ |
7 | 8 |
|
8 | 9 | /* INCLUDES ******************************************************************/ |
|
12 | 13 | #define NDEBUG |
13 | 14 | #include <debug.h> |
14 | 15 |
|
15 | | -/* PRIVATE FUNCTIONS *********************************************************/ |
| 16 | +#include "delay.h" |
16 | 17 |
|
| 18 | +/* GLOBALS *******************************************************************/ |
| 19 | + |
| 20 | +#define SAMPLE_FREQUENCY 1024 // 0.977 ms |
| 21 | + |
| 22 | +VOID |
| 23 | +__cdecl |
| 24 | +HalpTscCalibrationISR(VOID); |
| 25 | + |
| 26 | +extern volatile ULONG TscCalibrationPhase; |
| 27 | +extern ULONG64 TscCalibrationArray[NUM_SAMPLES]; |
| 28 | + |
| 29 | +/* FUNCTIONS *****************************************************************/ |
| 30 | + |
| 31 | +static |
17 | 32 | CODE_SEG("INIT") |
18 | 33 | VOID |
19 | | -NTAPI |
20 | | -HalpCalibrateStallExecution(VOID) |
| 34 | +HalpPrepareStallExecution(VOID) |
21 | 35 | { |
22 | | - /* FIXME */ |
23 | | - NOTHING; |
| 36 | + PUCHAR Instruction = (PUCHAR)((ULONG_PTR)KeStallExecutionProcessor + 1); |
| 37 | + PKPRCB Prcb = KeGetCurrentPrcb(); |
| 38 | + |
| 39 | + /* xor eax, eax; cpuid */ |
| 40 | + ASSERT((Instruction[1] == 0xC0) && // The byte [0] has different encodings |
| 41 | + (Instruction[2] == 0x0F) && |
| 42 | + (Instruction[3] == 0xA2)); |
| 43 | + |
| 44 | + /* |
| 45 | + * Starting with the Pentium Pro processor it is necessary to force |
| 46 | + * the in-order execution of the RDTSC instruction using a serializing instruction. |
| 47 | + * For more details, please refer to Section 3.1 of |
| 48 | + * Intel "Using the RDTSC Instruction for Performance Monitoring". |
| 49 | + * |
| 50 | + * Patch the KeStallExecutionProcessor function to remove the serializing instruction |
| 51 | + * for the Pentium and Pentium MMX processors. |
| 52 | + */ |
| 53 | + if ((Prcb->CpuType < 6) && !strcmp(Prcb->VendorString, "GenuineIntel")) |
| 54 | + { |
| 55 | + /* Replace "xor eax, eax; cpuid" with "lea esi, [esi+0]" */ |
| 56 | + Instruction[0] = 0x8D; |
| 57 | + Instruction[1] = 0x74; |
| 58 | + Instruction[2] = 0x26; |
| 59 | + Instruction[3] = 0x00; |
| 60 | + |
| 61 | + KeSweepICache(Instruction, 4); |
| 62 | + } |
24 | 63 | } |
25 | 64 |
|
26 | | -/* PUBLIC FUNCTIONS **********************************************************/ |
| 65 | +static |
| 66 | +CODE_SEG("INIT") |
| 67 | +ULONG64 |
| 68 | +HalpDoLinearRegression( |
| 69 | + _In_ ULONG XMax, |
| 70 | + _In_reads_(XMax + 1) const ULONG64* ArrayY) |
| 71 | +{ |
| 72 | + ULONG X, SumXX; |
| 73 | + ULONG64 SumXY; |
| 74 | + |
| 75 | + /* Calculate the sum of the squares of X */ |
| 76 | + SumXX = (XMax * (XMax + 1) * (2 * XMax + 1)) / 6; |
| 77 | + |
| 78 | + /* Calculate the sum of the differences to the first value weighted by X */ |
| 79 | + for (SumXY = 0, X = 1; X <= XMax; X++) |
| 80 | + { |
| 81 | + SumXY += X * (ArrayY[X] - ArrayY[0]); |
| 82 | + } |
| 83 | + |
| 84 | + /* Account for sample frequency */ |
| 85 | + SumXY *= SAMPLE_FREQUENCY; |
27 | 86 |
|
28 | | -#ifndef _MINIHAL_ |
| 87 | + /* Return the quotient of the sums */ |
| 88 | + return (SumXY + (SumXX / 2)) / SumXX; |
| 89 | +} |
| 90 | + |
| 91 | +CODE_SEG("INIT") |
29 | 92 | VOID |
30 | 93 | NTAPI |
31 | | -KeStallExecutionProcessor( |
32 | | - _In_ ULONG MicroSeconds) |
| 94 | +HalpCalibrateStallExecution(VOID) |
33 | 95 | { |
34 | | - while (MicroSeconds--) |
| 96 | + ULONG_PTR Flags; |
| 97 | + PVOID PreviousHandler; |
| 98 | + TIMER_CONTROL_PORT_REGISTER TimerControl; |
| 99 | + ULONG TimerFrequency; |
| 100 | + USHORT Period; |
| 101 | + ULONG64 CpuClockFrequency; |
| 102 | + |
| 103 | + /* Check if the CPU supports RDTSC */ |
| 104 | + if (!(KeGetCurrentPrcb()->FeatureBits & KF_RDTSC)) |
35 | 105 | { |
36 | | - /* FIXME: Use stall factor */ |
37 | | - WRITE_PORT_UCHAR((PUCHAR)CPU_IO_o_ARTIC_DELAY, 0); |
| 106 | + KeBugCheck(HAL_INITIALIZATION_FAILED); |
38 | 107 | } |
| 108 | + |
| 109 | + Flags = __readeflags(); |
| 110 | + _disable(); |
| 111 | + |
| 112 | + PreviousHandler = KeQueryInterruptHandler(PIC_TIMER_IRQ); |
| 113 | + KeRegisterInterruptHandler(PRIMARY_VECTOR_BASE + PIC_TIMER_IRQ, HalpTscCalibrationISR); |
| 114 | + |
| 115 | + /* Program the PIT for binary mode */ |
| 116 | + TimerControl.BcdMode = FALSE; |
| 117 | + TimerControl.OperatingMode = PitOperatingMode2; |
| 118 | + TimerControl.Channel = PitChannel0; |
| 119 | + TimerControl.AccessMode = PitAccessModeLowHigh; |
| 120 | + |
| 121 | + if (__inbyte(0x42) & 0x20) |
| 122 | + TimerFrequency = TIMER_FREQUENCY_1; |
| 123 | + else |
| 124 | + TimerFrequency = TIMER_FREQUENCY_2; |
| 125 | + Period = (TimerFrequency + (SAMPLE_FREQUENCY / 2)) / SAMPLE_FREQUENCY; |
| 126 | + |
| 127 | + __outbyte(TIMER_CONTROL_PORT, TimerControl.Bits); |
| 128 | + __outbyte(TIMER_CHANNEL0_DATA_PORT, Period & 0xFF); |
| 129 | + __outbyte(TIMER_CHANNEL0_DATA_PORT, Period >> 8); |
| 130 | + |
| 131 | + HalEnableSystemInterrupt(PRIMARY_VECTOR_BASE + PIC_TIMER_IRQ, CLOCK2_LEVEL, Latched); |
| 132 | + |
| 133 | + /* Collect the sample data */ |
| 134 | + _enable(); |
| 135 | + while (TscCalibrationPhase != (NUM_SAMPLES + 1)) |
| 136 | + NOTHING; |
| 137 | + _disable(); |
| 138 | + |
| 139 | + HalDisableSystemInterrupt(PRIMARY_VECTOR_BASE + PIC_TIMER_IRQ, CLOCK2_LEVEL); |
| 140 | + KeRegisterInterruptHandler(PRIMARY_VECTOR_BASE + PIC_TIMER_IRQ, PreviousHandler); |
| 141 | + |
| 142 | + /* Calculate an average, using simplified linear regression */ |
| 143 | + CpuClockFrequency = HalpDoLinearRegression(NUM_SAMPLES - 1, TscCalibrationArray); |
| 144 | + KeGetPcr()->StallScaleFactor = (ULONG)(CpuClockFrequency / 1000000); |
| 145 | + |
| 146 | + HalpPrepareStallExecution(); |
| 147 | + |
| 148 | + __writeeflags(Flags); |
39 | 149 | } |
40 | | -#endif |
|
0 commit comments