Skip to content

Commit

Permalink
Report CPU frequency as measured by rdtsc()
Browse files Browse the repository at this point in the history
It makes timer accounting issues like rurban#241 more visible
  • Loading branch information
darkk committed Oct 3, 2024
1 parent 1d85820 commit a1a7fa1
Show file tree
Hide file tree
Showing 3 changed files with 68 additions and 5 deletions.
56 changes: 55 additions & 1 deletion Platform.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include "Platform.h"

#include <memory>
#include <stdio.h>
#include <math.h> // for lrint
#include <assert.h>

long getenvlong(const char *name, long minval, long defval, long maxval)
Expand All @@ -18,6 +19,47 @@ long getenvlong(const char *name, long minval, long defval, long maxval)
return l;
}

struct StampedRdtsc
{
const uint64_t ticks, ns;

StampedRdtsc() : ticks(timer_start()), ns(timeofday()) { }

bool GoodDelta(const StampedRdtsc& t) const {
constexpr uint64_t ms20 = 20*1000*1000; // 20ms, two ticks of HZ=100
constexpr uint64_t mln2 = 2*1000*1000; // 20ms @ 100 MHz = 2M ticks
const uint64_t dtick = timer_sub(ticks, t.ticks);
return mln2 <= dtick && dtick != timer_inf && ms20 <= (ns - t.ns);
}

unsigned int FreqMHzSince(const StampedRdtsc& t) const {
return lrint(double(timer_sub(ticks, t.ticks)) / (ns - t.ns) * 1e9 / 1e6);
}
};

static unsigned int CpuFreqMHz;
static std::unique_ptr<StampedRdtsc> BaseTS;

void SampleCpuFreq ( void )
{
if (!BaseTS) {
if (CpuFreqMHz)
return;
else
BaseTS.reset(new StampedRdtsc());
}
StampedRdtsc now;
if (now.GoodDelta(*BaseTS))
CpuFreqMHz = now.FreqMHzSince(*BaseTS);
}

unsigned int GetCpuFreqMHz( void )
{
if (BaseTS)
BaseTS.reset();
return CpuFreqMHz;
}

#if defined(_WIN32)

#include <windows.h>
Expand All @@ -35,6 +77,18 @@ void SetThreadAffinity ( std::thread &t, int cpu )
}
#endif

uint64_t timeofday(void)
{
// GetTickCount ~ Windows 2000+
// GetTickCount64 ~ Vista+, Server 2008+
// QueryUnbiasedInterruptTime ~ Windows 7+, Server 2008 R2
// see https://learn.microsoft.com/en-us/windows/win32/winprog/using-the-windows-headers
unsigned long long ns100;
if (QueryUnbiasedInterruptTime(&ns100))
return ns100 * 100u;
return GetTickCount64() * 1000000U;
}

#else

#include <sched.h>
Expand Down
3 changes: 3 additions & 0 deletions Platform.h
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@ void SetThreadAffinity ( std::thread &t, int cpu );
# endif
#endif
void SetAffinity ( int cpu );
void SampleCpuFreq(void);
unsigned int GetCpuFreqMHz();
long getenvlong(const char *name, long minval, long defval, long maxval);

// That's not UINT64_MAX as it's converted to int64_t sometimes.
Expand Down Expand Up @@ -84,6 +86,7 @@ static inline uint64_t timer_sub(uint64_t a, uint64_t b)
#pragma intrinsic(__rdtsc)
// Read Time Stamp Counter
#define timer_counts_ns() (false)
uint64_t timeofday(void);
#define rdtsc() __rdtsc()
#define timer_start() __rdtsc()
#define timer_end() __rdtsc()
Expand Down
14 changes: 10 additions & 4 deletions SpeedTest.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -250,12 +250,16 @@ double SpeedTest ( pfHash hash, uint32_t seed, const int trials, const int block
}
else
{
SampleCpuFreq();
t = (double)timehash(hash,block,blocksize,itrial);
}

if(t > 0) times.push_back(t);
}

if (blocksize > TIMEHASH_SMALL_LEN_MAX)
SampleCpuFreq();

//----------

std::sort(times.begin(),times.end());
Expand All @@ -280,18 +284,20 @@ void BulkSpeedTest ( pfHash hash, uint32_t seed )

volatile double warmup_cycles = SpeedTest(hash,seed,trials,blocksize,0);

const double MHz2MiBps = (1000.0 * 1000.0) / (1024.0 * 1024.0);
for(int align = 7; align >= 0; align--)
{
double cycles = SpeedTest(hash,seed,trials,blocksize,align);

double bestbpc = double(blocksize)/cycles;

double bestbps = (bestbpc * 3000000000.0 / 1048576.0);
printf("Alignment %2d - %6.3f bytes/cycle - %7.2f MiB/sec @ 3 ghz\n",align,bestbpc,bestbps);
const unsigned cpuMHz = GetCpuFreqMHz();
double bestbps = (bestbpc * cpuMHz * MHz2MiBps);
printf("Alignment %2d - %6.3f bytes/cycle - %7.2f MiB/sec @ %u MHz\n",align,bestbpc,bestbps,cpuMHz);
sumbpc += bestbpc;
}
const unsigned cpuMHz = GetCpuFreqMHz();
sumbpc = sumbpc / 8.0;
printf("Average - %6.3f bytes/cycle - %7.2f MiB/sec @ 3 ghz\n",sumbpc,(sumbpc * 3000000000.0 / 1048576.0));
printf("Average - %6.3f bytes/cycle - %7.2f MiB/sec @ %u MHz\n",sumbpc,sumbpc*cpuMHz*MHz2MiBps,cpuMHz);
fflush(NULL);
}

Expand Down

0 comments on commit a1a7fa1

Please sign in to comment.