diff --git a/Platform.cpp b/Platform.cpp index a49255d7..9453e5a0 100644 --- a/Platform.cpp +++ b/Platform.cpp @@ -1,6 +1,7 @@ #include "Platform.h" - +#include #include +#include // for lrint #include long getenvlong(const char *name, long minval, long defval, long maxval) @@ -18,6 +19,47 @@ long getenvlong(const char *name, long minval, long defval, long maxval) return l; } +struct StampedRdtsc +{ + const uint64_t ticks, ns; + + StampedRdtsc() : ticks(timer_start()), ns(timeofday()) { } + + bool GoodDelta(const StampedRdtsc& t) const { + constexpr uint64_t ms20 = 20*1000*1000; // 20ms, two ticks of HZ=100 + constexpr uint64_t mln2 = 2*1000*1000; // 20ms @ 100 MHz = 2M ticks + const uint64_t dtick = timer_sub(ticks, t.ticks); + return mln2 <= dtick && dtick != timer_inf && ms20 <= (ns - t.ns); + } + + unsigned int FreqMHzSince(const StampedRdtsc& t) const { + return lrint(double(timer_sub(ticks, t.ticks)) / (ns - t.ns) * 1e9 / 1e6); + } +}; + +static unsigned int CpuFreqMHz; +static std::unique_ptr BaseTS; + +void SampleCpuFreq ( void ) +{ + if (!BaseTS) { + if (CpuFreqMHz) + return; + else + BaseTS.reset(new StampedRdtsc()); + } + StampedRdtsc now; + if (now.GoodDelta(*BaseTS)) + CpuFreqMHz = now.FreqMHzSince(*BaseTS); +} + +unsigned int GetCpuFreqMHz( void ) +{ + if (BaseTS) + BaseTS.reset(); + return CpuFreqMHz; +} + #if defined(_WIN32) #include @@ -35,6 +77,20 @@ void SetThreadAffinity ( std::thread &t, int cpu ) } #endif +uint64_t timeofday(void) +{ + // TODO: availability macros?... + // GetTickCount ~ Windows 2000+ + // GetTickCount64 ~ Vista+, Server 2008+ + // QueryUnbiasedInterruptTime ~ Windows 7+, Server 2008 R2 + // see https://learn.microsoft.com/en-us/windows/win32/winprog/using-the-windows-headers + unsigned long long ns100; + if (QueryUnbiasedInterruptTime(&ns100)) + return ns100 * 100; + return GetTickCount64() * 1000000; + // return GetTickCount() * 1000000; +} + #else #include diff --git a/Platform.h b/Platform.h index 10440fa8..acd7ffd9 100644 --- a/Platform.h +++ b/Platform.h @@ -21,6 +21,8 @@ void SetThreadAffinity ( std::thread &t, int cpu ); # endif #endif void SetAffinity ( int cpu ); +void SampleCpuFreq(void); +unsigned int GetCpuFreqMHz(); long getenvlong(const char *name, long minval, long defval, long maxval); // That's not UINT64_MAX as it's converted to int64_t sometimes. @@ -84,6 +86,7 @@ static inline uint64_t timer_sub(uint64_t a, uint64_t b) #pragma intrinsic(__rdtsc) // Read Time Stamp Counter #define timer_counts_ns() (false) +uint64_t timeofday(void); #define rdtsc() __rdtsc() #define timer_start() __rdtsc() #define timer_end() __rdtsc() diff --git a/SpeedTest.cpp b/SpeedTest.cpp index 2d8b0eeb..9b06639f 100644 --- a/SpeedTest.cpp +++ b/SpeedTest.cpp @@ -250,12 +250,16 @@ double SpeedTest ( pfHash hash, uint32_t seed, const int trials, const int block } else { + SampleCpuFreq(); t = (double)timehash(hash,block,blocksize,itrial); } if(t > 0) times.push_back(t); } + if (blocksize > TIMEHASH_SMALL_LEN_MAX) + SampleCpuFreq(); + //---------- std::sort(times.begin(),times.end()); @@ -280,18 +284,20 @@ void BulkSpeedTest ( pfHash hash, uint32_t seed ) volatile double warmup_cycles = SpeedTest(hash,seed,trials,blocksize,0); + const double MHz2MiBps = (1000.0 * 1000.0) / (1024.0 * 1024.0); for(int align = 7; align >= 0; align--) { double cycles = SpeedTest(hash,seed,trials,blocksize,align); double bestbpc = double(blocksize)/cycles; - - double bestbps = (bestbpc * 3000000000.0 / 1048576.0); - printf("Alignment %2d - %6.3f bytes/cycle - %7.2f MiB/sec @ 3 ghz\n",align,bestbpc,bestbps); + const unsigned cpuMHz = GetCpuFreqMHz(); + double bestbps = (bestbpc * cpuMHz * MHz2MiBps); + printf("Alignment %2d - %6.3f bytes/cycle - %7.2f MiB/sec @ %u MHz\n",align,bestbpc,bestbps,cpuMHz); sumbpc += bestbpc; } + const unsigned cpuMHz = GetCpuFreqMHz(); sumbpc = sumbpc / 8.0; - printf("Average - %6.3f bytes/cycle - %7.2f MiB/sec @ 3 ghz\n",sumbpc,(sumbpc * 3000000000.0 / 1048576.0)); + printf("Average - %6.3f bytes/cycle - %7.2f MiB/sec @ %u MHz\n",sumbpc,sumbpc*cpuMHz*MHz2MiBps,cpuMHz); fflush(NULL); }