clinuxintelperf

perf_event_open - how to monitoring multiple events


does anyone know how to set perf_event_attr struct that can trigger PMU to monitoring multiple (type) event via perf_event_open()?

Like perf record -e cycles,faults ls, it has two different event type (PERF_TYPE_HARDWARE and PERF_TYPE_SOFTWARE), but in example on perf_event_open's manpage, perf_event_attr.type can only assigned single value.

Any suggestion will be appreciate, thanks!

20170208 Update Thanks for @gudok pointing me a direction, but the result seems some abnormal. Demo program as below (for measure whole system's CPU cycle and cache miss):

#define _GNU_SOURCE
#include <stdlib.h>
#include <stdio.h>
#include <unistd.h>
#include <sys/syscall.h>
#include <string.h>
#include <sys/ioctl.h>
#include <linux/perf_event.h>
#include <linux/hw_breakpoint.h>
#include <asm/unistd.h>
#include <errno.h>
#include <stdint.h>
#include <inttypes.h>
#include <time.h>

struct read_format {
  uint64_t nr;
  struct {
    uint64_t value;
    uint64_t id;
  } values[];
};

int main(int argc, char* argv[]) {
  struct perf_event_attr pea;
  int fd1, fd2;
  uint64_t id1, id2;
  uint64_t val1, val2;
  char buf[4096];
  struct read_format* rf = (struct read_format*) buf;
  int i,j;
  struct timespec time, time2;

  time.tv_sec = 1;
  time.tv_nsec = 0;

  memset(&pea, 0, sizeof(struct perf_event_attr));
  pea.type = PERF_TYPE_HARDWARE;
  pea.size = sizeof(struct perf_event_attr);
  pea.config = PERF_COUNT_HW_CPU_CYCLES;
  pea.disabled = 1;
  pea.exclude_kernel = 1;
  pea.exclude_hv = 1;
  pea.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
  fd1 = syscall(__NR_perf_event_open, &pea, 0, -1, -1, 0);
  ioctl(fd1, PERF_EVENT_IOC_ID, &id1);

  memset(&pea, 0, sizeof(struct perf_event_attr));
  pea.type = PERF_TYPE_HARDWARE;
  pea.size = sizeof(struct perf_event_attr);
  pea.config = PERF_COUNT_HW_CACHE_MISSES;
  pea.disabled = 1;
  pea.exclude_kernel = 1;
  pea.exclude_hv = 1;
  pea.precise_ip = 2;  // want to using PEBS 
  pea.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
  fd2 = syscall(__NR_perf_event_open, &pea, 0, -1, fd1 /*!!!*/, 0);
  ioctl(fd2, PERF_EVENT_IOC_ID, &id2);

  ioctl(fd1, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP);
  ioctl(fd1, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP);
  while (1) {
    nanosleep(&time, &time2);

    //ioctl(fd1, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP);

    read(fd1, buf, sizeof(buf));
    for (i = 0; i < rf->nr; i++) {
      if (rf->values[i].id == id1) {
        val1 = rf->values[i].value;
      } else if (rf->values[i].id == id2) {
        val2 = rf->values[i].value;
      }
    }

    printf("cpu cycles: %"PRIu64"\n", val1);
    printf("cache misses: %"PRIu64"\n", val2);

  }

  return 0;
}

And the output is:

cpu cycles: 120   // Just have about 120 CPU cycles in a second
cache misses: 0   // and doesn't have any cache miss?
cpu cycles: 233
cache misses: 0
cpu cycles: 352
cache misses: 0
cpu cycles: 455
cache misses: 0
cpu cycles: 562
cache misses: 0
cpu cycles: 673
cache misses: 0
cpu cycles: 794
cache misses: 0
cpu cycles: 907
cache misses: 0
cpu cycles: 1011
cache misses: 0
cpu cycles: 1129
cache misses: 3
cpu cycles: 1269
cache misses: 4
cpu cycles: 1423

Solution

  • That's a bit tricky.

    We create first counter as usual. Additionally, we pass PERF_FORMAT_GROUP and PERF_FORMAT_ID to be able to work with multiple counters simultaneously. This counter will be our group leader.

    struct perf_event_attr pea;
    int fd1, fd2;
    uint64_t id1, id2; 
    
    memset(&pea, 0, sizeof(struct perf_event_attr));
    pea.type = PERF_TYPE_HARDWARE;
    pea.size = sizeof(struct perf_event_attr);
    pea.config = PERF_COUNT_HW_CPU_CYCLES;
    pea.disabled = 1;
    pea.exclude_kernel = 1;
    pea.exclude_hv = 1;
    pea.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
    fd1 = syscall(__NR_perf_event_open, &pea, 0, -1, -1, 0);
    

    Next, we retrieve identifier for the first counter:

    ioctl(fd1, PERF_EVENT_IOC_ID, &id1);
    

    Second (and all further counters) are created in the same fashion with only one exception: we pass fd1 value as group leader argument:

    memset(&pea, 0, sizeof(struct perf_event_attr));
    pea.type = PERF_TYPE_SOFTWARE;
    pea.size = sizeof(struct perf_event_attr);
    pea.config = PERF_COUNT_SW_PAGE_FAULTS;
    pea.disabled = 1;
    pea.exclude_kernel = 1;
    pea.exclude_hv = 1;
    pea.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
    fd2 = syscall(__NR_perf_event_open, &pea, 0, -1, fd1, 0); // <-- here
    ioctl(fd2, PERF_EVENT_IOC_ID, &id2);
    

    Next we need to declare a data structure to read multiple counters at once. You have to declare different set of fields depending on what flags you pass to perf_event_open. Manual page mentions all possible fields. In our case, we passed PERF_FORMAT_ID flag which adds id field. This will allow us to distinguish between different counters.

    struct read_format {
        uint64_t nr;
        struct {
            uint64_t value;
            uint64_t id;
        } values[/*2*/];
    };
    

    Now we call standard profiling ioctls:

    ioctl(fd1, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP);
    ioctl(fd1, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP);
    do_something();
    ioctl(fd1, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP);
    

    Finally, we read the counters from group leader file descriptor. Both counters are returned in single read_format structure that we declared:

    char buf[4096];
    struct read_format* rf = (struct read_format*) buf;
    uint64_t val1, val2;
    
    read(fd1, buf, sizeof(buf));
    for (i = 0; i < rf->nr; i++) {
      if (rf->values[i].id == id1) {
        val1 = rf->values[i].value;
      } else if (rf->values[i].id == id2) {
        val2 = rf->values[i].value;
      }
    }
    printf("cpu cycles: %"PRIu64"\n", val1);
    printf("page faults: %"PRIu64"\n", val2);
    

    Below is the full program listing:

    #define _GNU_SOURCE
    #include <stdlib.h>
    #include <stdio.h>
    #include <unistd.h>
    #include <sys/syscall.h>
    #include <string.h>
    #include <sys/ioctl.h>
    #include <linux/perf_event.h>
    #include <linux/hw_breakpoint.h>
    #include <asm/unistd.h>
    #include <errno.h>
    #include <stdint.h>
    #include <inttypes.h>
    
    struct read_format {
      uint64_t nr;
      struct {
        uint64_t value;
        uint64_t id;
      } values[];
    };
    
    void do_something() {
      int i;
      char* ptr;
    
      ptr = malloc(100*1024*1024);
      for (i = 0; i < 100*1024*1024; i++) {
        ptr[i] = (char) (i & 0xff); // pagefault
      }
      free(ptr);
    }
    
    int main(int argc, char* argv[]) {
      struct perf_event_attr pea;
      int fd1, fd2;
      uint64_t id1, id2;
      uint64_t val1, val2;
      char buf[4096];
      struct read_format* rf = (struct read_format*) buf;
      int i;
    
      memset(&pea, 0, sizeof(struct perf_event_attr));
      pea.type = PERF_TYPE_HARDWARE;
      pea.size = sizeof(struct perf_event_attr);
      pea.config = PERF_COUNT_HW_CPU_CYCLES;
      pea.disabled = 1;
      pea.exclude_kernel = 1;
      pea.exclude_hv = 1;
      pea.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
      fd1 = syscall(__NR_perf_event_open, &pea, 0, -1, -1, 0);
      ioctl(fd1, PERF_EVENT_IOC_ID, &id1);
    
      memset(&pea, 0, sizeof(struct perf_event_attr));
      pea.type = PERF_TYPE_SOFTWARE;
      pea.size = sizeof(struct perf_event_attr);
      pea.config = PERF_COUNT_SW_PAGE_FAULTS;
      pea.disabled = 1;
      pea.exclude_kernel = 1;
      pea.exclude_hv = 1;
      pea.read_format = PERF_FORMAT_GROUP | PERF_FORMAT_ID;
      fd2 = syscall(__NR_perf_event_open, &pea, 0, -1, fd1 /*!!!*/, 0);
      ioctl(fd2, PERF_EVENT_IOC_ID, &id2);
    
    
      ioctl(fd1, PERF_EVENT_IOC_RESET, PERF_IOC_FLAG_GROUP);
      ioctl(fd1, PERF_EVENT_IOC_ENABLE, PERF_IOC_FLAG_GROUP);
      do_something();
      ioctl(fd1, PERF_EVENT_IOC_DISABLE, PERF_IOC_FLAG_GROUP);
    
    
      read(fd1, buf, sizeof(buf));
      for (i = 0; i < rf->nr; i++) {
        if (rf->values[i].id == id1) {
          val1 = rf->values[i].value;
        } else if (rf->values[i].id == id2) {
          val2 = rf->values[i].value;
        }
      }
    
      printf("cpu cycles: %"PRIu64"\n", val1);
      printf("page faults: %"PRIu64"\n", val2);
    
      return 0;
    }