This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
/* | |
https://en.wikipedia.org/wiki/Time_Stamp_Counter | |
https://ru.wikipedia.org/wiki/Rdtsc | |
*/ | |
#include <stdio.h> | |
typedef unsigned long long uint64; | |
int main() { | |
uint64 val; | |
unsigned int h, l; | |
for (int i=0; i<=10; i++) { | |
__asm__ __volatile__("rdtsc" : "=a" (l), "=d" (h)); | |
val = ((uint64)l) | (((uint64)h) << 32); | |
printf("%llu \n", val); | |
} | |
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
// http://developers.redhat.com/blog/2016/03/11/practical-micro-benchmarking-with-ltrace-and-sched/ | |
/* One drawback of the RDTSC instruction is that the CPU is allowed to reorder | |
it relative to other instructions, which causes noise in our results. Fortunately, | |
Intel has provided an RDTSCP instruction that’s more deterministic. We’ll pair | |
that with a CPUID instruction which acts as a memory barrier, resulting in this: */ | |
static __inline__ int64_t rdtsc_s(void) | |
{ | |
unsigned a, d; | |
asm volatile("cpuid" ::: "%rax", "%rbx", "%rcx", "%rdx"); | |
asm volatile("rdtsc" : "=a" (a), "=d" (d)); | |
return ((unsigned long)a) | (((unsigned long)d) << 32); | |
} | |
static __inline__ int64_t rdtsc_e(void) | |
{ | |
unsigned a, d; | |
asm volatile("rdtscp" : "=a" (a), "=d" (d)); | |
asm volatile("cpuid" ::: "%rax", "%rbx", "%rcx", "%rdx"); | |
return ((unsigned long)a) | (((unsigned long)d) << 32); | |
} | |
. . . | |
clocks_before = rdtsc_s (); | |
p = malloc (i); /* Test goes here */ | |
clocks_after = rdtsc_e (); | |
clocks_per_malloc = clocks_after - clocks_before; | |
// let the OS use CPU #0 | |
// boot options: | |
// linux . . . isolcpus=1,2,3,4,5,6,7 | |
// check: | |
// taskset -p $$ | |
// Interrupt affinity: | |
// cd /proc/irq | |
// for i in */smp_affinity; do echo 1 > $i; done | |