This program displays the count of actual CPU core cycles executed by the current core (using the related PMC which I believe is UNHALTED_CORE_CYCLES)
#include <unistd.h>
#include <cstdio>
int main(int argc, char* argv[]){
unsigned long a, d, c, result;
c = (1UL<<30)+1;
__asm__ volatile("rdpmc" : "=a" (a), "=d" (d) : "c" (c));
result = (a | (d << 32));
printf("Current cycles : %lu\n", result);
}
It works well on Intel processors, but displays a "Segmentation fault" on AMD ones (7001 and 7002). My first guess was to find a new c
value related to CPU_CLOCKS_UNHALTED AMD event (0x76) without success for the moment
echo 2 | sudo tee /sys/devices/cpu/rdpmc # enable RDPMC always, not just when a perf event is open
The number is wrong, AMD uses different RDPMC values than Intel. Depending on the processor, multiple events are directly supported through rdpmc
, please refer to this AMD manual for further information (section rdpmc
).
The core cycle number should be 0
in your case.
This code works for me to count PERF_COUNT_HW_INSTRUCTIONS
#include <asm/unistd.h>
#include <linux/perf_event.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/ioctl.h>
#include <unistd.h>
static long
perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
int cpu, int group_fd, unsigned long flags) {
int ret;
ret = syscall(__NR_perf_event_open, hw_event, pid, cpu, group_fd, flags);
return ret;
}
#define rdpmc(counter, low, high) \
__asm__ __volatile__("rdpmc" \
: "=a"(low), "=d"(high) \
: "c"(counter))
int main() {
unsigned long values1, values2;
unsigned int fixed0, low, high;
struct perf_event_attr pe;
int fd, i;
//PERF_COUNT_HW_INSTRUCTIONS
// Performance counter 1 on AMD
// 1 << 30 on Intel
fixed0 = 1;
memset(&pe, 0, sizeof(struct perf_event_attr));
pe.type = PERF_TYPE_HARDWARE;
pe.size = sizeof(struct perf_event_attr);
pe.config = PERF_COUNT_HW_INSTRUCTIONS;
pe.disabled = 1;
pe.exclude_kernel = 0;
pe.exclude_hv = 0;
pe.exclude_idle = 0;
fd = perf_event_open(&pe, 0, -1, -1, 0);
if (fd == -1) {
fprintf(stderr, "Error opening leader %llx\n", pe.config);
exit(EXIT_FAILURE);
}
for (i = 1; i <= 50; i++) {
ioctl(fd, PERF_EVENT_IOC_RESET, 0);
ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
rdpmc(fixed0, low, high);
values1 = ((unsigned long)high << 32) + (unsigned long)low;
asm volatile("lfence": : :"memory"); // test ()
rdpmc(fixed0, low, high);
values2 = ((unsigned long)high << 32) + (unsigned long)low;
ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
printf(" %lu\n", values2-values1);
}
close(fd);
}
Tested on Ryzen 7950X