cperformancecounterperfcpu-cycles

Counting CPU cycles with `perf_event` in C yields different value than `perf`


I try to count the CPU cycles of a single process via a short C code snippet. A MWE is the cpucycles.c.

cpucycles.c (heavily based on the man page example)

#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <string.h>
#include <sys/ioctl.h>
#include <linux/perf_event.h>
#include <asm/unistd.h>

static long
perf_event_open(struct perf_event_attr *hw_event, pid_t pid,
                int cpu, int group_fd, unsigned long flags)
{
    int ret;
    ret = syscall(__NR_perf_event_open, hw_event, pid, cpu,
                    group_fd, flags);
    return ret;
}

long long
cpu_cycles(pid_t pid, unsigned int microseconds)
{
    struct perf_event_attr pe;
    long long count;
    int fd;

    memset(&pe, 0, sizeof(struct perf_event_attr));
    pe.type = PERF_TYPE_HARDWARE;
    pe.size = sizeof(struct perf_event_attr);
    pe.config = PERF_COUNT_HW_CPU_CYCLES;
    pe.disabled = 1;
    pe.exclude_kernel = 1;
    pe.exclude_hv = 1;

    fd = perf_event_open(&pe, pid, -1, -1, 0);
    if (fd == -1) {
        return -1;
    }

    ioctl(fd, PERF_EVENT_IOC_RESET, 0);
    ioctl(fd, PERF_EVENT_IOC_ENABLE, 0);
    usleep(microseconds);
    ioctl(fd, PERF_EVENT_IOC_DISABLE, 0);
    read(fd, &count, sizeof(long long));

    close(fd);
    return count;
}

int main(int argc, char **argv)
{
    printf("CPU cycles: %lld\n", cpu_cycles(atoi(argv[1]), atoi(argv[2])));
    return 0;
}

Next, I compile it, set the perf_event access rights, start a process with full CPU utilization and count the CPU cycles of it via perf as well as my cpucycles.

$ gcc -o cpucycles cpucycles.c
$ echo 1 | sudo tee /proc/sys/kernel/perf_event_paranoid
$ cat /dev/urandom > /dev/null &
[1] 3214
$ perf stat -e cycles -p 3214 -x, sleep 1
3072358388,,cycles,1000577415,100,00,,,,
$ ./cpucycles 3214 1000000
CPU cycles: 287953

Obviously, only the ´3072358388´ CPU cycles from ´perf´ are correct for my 3 GHz CPU. Why is my ´cpucycles´ returning such ridicules small values?


Solution

  • You're excluding the kernel in your profiling when setting pe.exclude_kernel = 1;.

    I just verified that by just setting that flag to 0, I get large numbers, and setting it to 1 I get small numbers.

    cat /dev/urandom > /dev/null will pretty much spend all its cpu time inside the kernel. The userland bits will be a read to a buffer and the write from that buffer while all the heavy lifting in this case is done by the kernel.