linuxlinux-kernelkernel-moduleperfapple-silicon

Reading performance counters in Apple Sillicon in a Kernel Module


I have a M2 apple machine with Asahi Linux running on it. I'm trying to write a kernel module and in the module I want to measure the number of cycles spent on running a set of assembly instructions. Now I know that the default ARM performance registers don't work on M1 or M2 and apple has implemented their own PMC registers.

The problem with directly reading the PMC0 register(asm eq. MRS X0, PMC0) is that it has to be initialized, enabled and events have to be defined via apple's performance monitor control register(PMCR0) first, which is a lot of work to include in my own module.

I have seen Asahi team's work for Perf here which works fine. I'm not sure if I could actually use Perf in my case.

So my question is, is it possible to use Perf tool to measure the cycles on some part of a kernel module? I'd also very much appreciate any other ideas.


Solution

  • Here's how I used already implemented code from Asahi to setup the performance counters on M1 in my own kernel module:

    void setup_counter(int pmc_counter){
        //configure
        u64 val, user_bit, kernel_bit;
        user_bit = BIT(get_bit_offset(pmc_counter, PMCR1_COUNT_A64_EL0_0_7));
        kernel_bit = BIT(get_bit_offset(pmc_counter, PMCR1_COUNT_A64_EL1_0_7));
    
    
        val = read_sysreg_s(SYS_IMP_APL_PMCR1_EL1);
        val |= user_bit;
        val |= kernel_bit;
        write_sysreg_s(val, SYS_IMP_APL_PMCR1_EL1);
        isb();
    
        //enable
        write_sysreg_s(0xff, sys_reg(3, 1, 15, 0, 0));
        isb();
    
        //start
        val = read_sysreg_s(SYS_IMP_APL_PMCR0_EL1);
        val &= ~(PMCR0_IMODE | PMCR0_IACT);
        val |= FIELD_PREP(PMCR0_IMODE, PMCR0_IMODE_FIQ);
        write_sysreg_s(val, SYS_IMP_APL_PMCR0_EL1);
        isb();
    }
    

    And here's the header file:

    #ifndef __TIMING_h
    #define __TIMING_h
    
    #include <linux/bits.h>
    #include <asm/sysreg.h>
    
    /* Counters */
    #define SYS_IMP_APL_PMC0_EL1    sys_reg(3, 2, 15, 0, 0)
    #define SYS_IMP_APL_PMC1_EL1    sys_reg(3, 2, 15, 1, 0)
    #define SYS_IMP_APL_PMC2_EL1    sys_reg(3, 2, 15, 2, 0)
    #define SYS_IMP_APL_PMC3_EL1    sys_reg(3, 2, 15, 3, 0)
    #define SYS_IMP_APL_PMC4_EL1    sys_reg(3, 2, 15, 4, 0)
    #define SYS_IMP_APL_PMC5_EL1    sys_reg(3, 2, 15, 5, 0)
    #define SYS_IMP_APL_PMC6_EL1    sys_reg(3, 2, 15, 6, 0)
    #define SYS_IMP_APL_PMC7_EL1    sys_reg(3, 2, 15, 7, 0)
    #define SYS_IMP_APL_PMC8_EL1    sys_reg(3, 2, 15, 9, 0)
    #define SYS_IMP_APL_PMC9_EL1    sys_reg(3, 2, 15, 10, 0)
    
    /* Core PMC control register */
    #define SYS_IMP_APL_PMCR0_EL1   sys_reg(3, 1, 15, 0, 0)
    #define PMCR0_CNT_ENABLE_0_7    GENMASK(7, 0)
    #define PMCR0_IMODE     GENMASK(10, 8)
    #define PMCR0_IMODE_OFF     0
    #define PMCR0_IMODE_PMI     1
    #define PMCR0_IMODE_AIC     2
    #define PMCR0_IMODE_HALT    3
    #define PMCR0_IMODE_FIQ     4
    #define PMCR0_IACT      BIT(11)
    #define PMCR0_PMI_ENABLE_0_7    GENMASK(19, 12)
    #define PMCR0_STOP_CNT_ON_PMI   BIT(20)
    #define PMCR0_CNT_GLOB_L2C_EVT  BIT(21)
    #define PMCR0_DEFER_PMI_TO_ERET BIT(22)
    #define PMCR0_ALLOW_CNT_EN_EL0  BIT(30)
    #define PMCR0_CNT_ENABLE_8_9    GENMASK(33, 32)
    #define PMCR0_PMI_ENABLE_8_9    GENMASK(45, 44)
    
    #define SYS_IMP_APL_PMCR1_EL1   sys_reg(3, 1, 15, 1, 0)
    #define PMCR1_COUNT_A64_EL0_0_7 GENMASK(15, 8)
    #define PMCR1_COUNT_A64_EL1_0_7 GENMASK(23, 16)
    #define PMCR1_COUNT_A64_EL0_8_9 GENMASK(41, 40)
    #define PMCR1_COUNT_A64_EL1_8_9 GENMASK(49, 48)
    
    #define SYS_IMP_APL_PMCR2_EL1   sys_reg(3, 1, 15, 2, 0)
    #define SYS_IMP_APL_PMCR3_EL1   sys_reg(3, 1, 15, 3, 0)
    #define SYS_IMP_APL_PMCR4_EL1   sys_reg(3, 1, 15, 4, 0)
    
    #define SYS_IMP_APL_PMESR0_EL1  sys_reg(3, 1, 15, 5, 0)
    #define PMESR0_EVT_CNT_2    GENMASK(7, 0)
    #define PMESR0_EVT_CNT_3    GENMASK(15, 8)
    #define PMESR0_EVT_CNT_4    GENMASK(23, 16)
    #define PMESR0_EVT_CNT_5    GENMASK(31, 24)
    
    #define SYS_IMP_APL_PMESR1_EL1  sys_reg(3, 1, 15, 6, 0)
    #define PMESR1_EVT_CNT_6    GENMASK(7, 0)
    #define PMESR1_EVT_CNT_7    GENMASK(15, 8)
    #define PMESR1_EVT_CNT_8    GENMASK(23, 16)
    #define PMESR1_EVT_CNT_9    GENMASK(31, 24)
    
    #define SYS_IMP_APL_PMSR_EL1    sys_reg(3, 1, 15, 13, 0)
    #define PMSR_OVERFLOW       GENMASK(9, 0)
    
    #define get_bit_offset(index, mask) (__ffs(mask) + (index))
    
    extern void setup_counter(int pmc_counter);
    #define read_clk_counter() read_sysreg_s(SYS_IMP_APL_PMC0_EL1);
    #define read_instruction_counter() read_sysreg_s(SYS_IMP_APL_PMC1_EL1);
    
    #endif /* __TIMING_h */