linuxkernel-moduledevice-treememsetpetalinux

SError kernel panic when memset_io'ing on device-tree memory area in Petalinux kernel module


My Platform: ZynQ MP; PetaLinux 2020.2

Build system: Ubuntu 18.04

I'm writing a Kernel module which registers itself as a platform_device with compatible string 'erika' with the PetaLinux Kernel on boot.

I specified two memory areas in the system-user.dtsi file for this device (one bigger one (16M) to hold data which will get exchanged between the PetaLinux on the APU and a bare metal implementation on the RPU and one smaller one (4k) to hold signals for a interrupt shared between the kernel module on the APU and the bare metal application on the RPU):

Inside the module's probe()-function, I map both memory regions with:

/* Get shared memory for the device */
r_mem_shm = platform_get_resource(nic->pdev, IORESOURCE_MEM, 0);
if (!r_mem_shm) {
   dev_err(dev, "invalid address\n");
   return -ENODEV;
}

/* Get IPI register memory for the device */
r_mem_ipi = platform_get_resource(nic->pdev, IORESOURCE_MEM, 1);
if (!r_mem_ipi) {
   dev_err(dev, "invalid address\n");
   return -ENODEV;
}

nic->base_addr_shm = devm_memremap(&pdev->dev, r_mem_shm->start, r_mem_shm->end - r_mem_shm->start + 1, MEMREMAP_WT);
if (IS_ERR(nic->base_addr_shm)) {
    dev_err(dev, "Could not map shared memory at 0x%08llx",
        (uint64_t __force)r_mem_shm->start);
    return PTR_ERR(nic->base_addr_shm);
}

nic->base_addr_ipi = devm_memremap(&pdev->dev, r_mem_ipi->start, r_mem_ipi->end - r_mem_ipi->start + 1, MEMREMAP_WT);
if (IS_ERR(nic->base_addr_ipi)) {
    dev_err(dev, "Could not map IPI memory at 0x%08llx",
        (uint64_t __force)r_mem_ipi->start);
    return PTR_ERR(nic->base_addr_ipi);
}

This succeeds. Later in the function (after successfully grabbing the IRQ), I zero out this two memory regions to have a clean starting state:

platform_set_drvdata(nic->pdev, nic);
nic->shm_start = r_mem_shm->start;
nic->shm_end = r_mem_shm->end;
nic->shm_size = nic->shm_end - nic->shm_start + 1;
nic->ipi_start = r_mem_ipi->start;
nic->ipi_end = r_mem_ipi->end;
nic->ipi_size = nic->ipi_end - nic->ipi_start + 1;

/* Clear shared memory & IPI memory */
dev_info(dev, "before memset_io(shm....), nic->shm_size = %d", nic->shm_size);
memset_io(nic->base_addr_shm, 0, nic->shm_size);
dev_info(dev, "before memset_io(ipi....), nic->ipi_size = %d", nic->ipi_size);
memset_io(nic->base_addr_ipi, 0, nic->ipi_size);
printk(KERN_NOTICE "after memset_io(nic->base_addr_ipi)");

Judging by my debug prints, zeroing out the memory region at nic->base_addr_shm succeeds, whereas the second memset_io call causes a kernel panic:

[    5.234907] erika: loading out-of-tree module taints kernel.
[    5.241470] <1>Hello world from erika module.
[    5.246068] erika 3ed80000.shm: Device Tree Probing
[    5.261139] erika 3ed80000.shm: erika shared memory at 0x3ed80000 mapped to 0xffff800015000000 with size 0x01000000
[    5.275169] erika 3ed80000.shm: erika IPI memory at 0xff340000 mapped to 0xffff80001006d000 with size 0x00001000
[    5.275577] zynqmp_r5_remoteproc zynqmp-rpu: RPU core_conf: split
[    5.285399] erika 3ed80000.shm: before memset_io(shm....), nic->shm_size = 16777216
[    5.293642]  r5@0: no mailboxes.
[    5.302430] remoteproc remoteproc0: r5@0 is available
[    5.319927] erika 3ed80000.shm: before memset_io(ipi....), nic->ipi_size = 4096
[    5.327430] SError Interrupt on CPU2, code 0xbf000002 -- SError
[    5.327434] CPU: 2 PID: 371 Comm: udevd Tainted: G           O      5.4.0-xilinx-v2020.2 #1
[    5.327435] Hardware name: xlnx,zynqmp (DT)
[    5.327437] pstate: 80000005 (Nzcv daif -PAN -UAO)
[    5.327438] pc : __memset_io+0x68/0x98
[    5.327440] lr : erika_probe+0x258/0x3bc [erika]
[    5.327441] sp : ffff800012d6b940
[    5.327443] x29: ffff800012d6b950 x28: 0000000000000100
[    5.327446] x27: ffff80001013f510 x26: 000000000000002d
[    5.327450] x25: ffff000877a98380 x24: ffff00087ab0d800
[    5.327453] x23: ffff800008c513f8 x22: ffff00087aaa7000
[    5.327456] x21: 0000000000000000 x20: ffff00087ab0d810
[    5.327459] x19: ffff00087aaa77c0 x18: 0000000000000010
[    5.327462] x17: 000000000f1828b4 x16: 00000000a67c5c83
[    5.327465] x15: ffff00087a04b2e8 x14: ffffffffffffffff
[    5.327469] x13: ffff800092d6b5b7 x12: ffff800012d6b5bf
[    5.327472] x11: ffff8000110f5000 x10: 0000000000000000
[    5.327475] x9 : ffff800011193000 x8 : 0000000000000152
[    5.327478] x7 : 0000000000000006 x6 : ffff8000111930f2
[    5.327481] x5 : 0000000000000003 x4 : 0000000000000000
[    5.327484] x3 : 0000000000000000 x2 : 0000000000001000
[    5.327488] x1 : ffff80001006e000 x0 : ffff80001006d0c8
[    5.327492] Kernel panic - not syncing: Asynchronous SError Interrupt
[    5.327494] CPU: 2 PID: 371 Comm: udevd Tainted: G           O      5.4.0-xilinx-v2020.2 #1
[    5.327496] Hardware name: xlnx,zynqmp (DT)
[    5.327497] Call trace:
[    5.327498]  dump_backtrace+0x0/0x140
[    5.327500]  show_stack+0x14/0x20
[    5.327501]  dump_stack+0xac/0xd0
[    5.327502]  panic+0x140/0x30c
[    5.327504]  __stack_chk_fail+0x0/0x18
[    5.327505]  arm64_serror_panic+0x74/0x80
[    5.327506]  do_serror+0x114/0x118
[    5.327508]  el1_error+0x84/0xf8
[    5.327509]  __memset_io+0x68/0x98
[    5.327510]  platform_drv_probe+0x50/0xa0
[    5.327511]  really_probe+0xd8/0x2f8
[    5.327513]  driver_probe_device+0x54/0xe8
[    5.327514]  device_driver_attach+0x6c/0x78
[    5.327516]  __driver_attach+0x54/0xd0
[    5.327517]  bus_for_each_dev+0x6c/0xc0
[    5.327518]  driver_attach+0x20/0x28
[    5.327520]  bus_add_driver+0x148/0x1e0
[    5.327521]  driver_register+0x60/0x110
[    5.327522]  __platform_driver_register+0x44/0x50
[    5.327524]  erika_init+0x28/0x1000 [erika]
[    5.327525]  do_one_initcall+0x50/0x190
[    5.327527]  do_init_module+0x50/0x1f0
[    5.327528]  load_module+0x1ca4/0x2218
[    5.327530]  __do_sys_finit_module+0xd0/0xe8
[    5.327531]  __arm64_sys_finit_module+0x1c/0x28
[    5.327533]  el0_svc_common.constprop.0+0x68/0x160
[    5.327534]  el0_svc_handler+0x6c/0x88
[    5.327535]  el0_svc+0x8/0xc
[    5.327548] SMP: stopping secondary CPUs
[    5.327549] Kernel Offset: disabled
[    5.327551] CPU features: 0x0002,20002004
[    5.327552] Memory Limit: none

Solution

  • I think I solved the issue (also thanks to 0andriy's comment):

    My mistake was that I treated the IPI register space

    0x0 0xff340000 0x0 0x1000
    

    as raw memory instead as what it is - the position where certain very specific registers for IPI communication using Channel 7 reside (see table 13-3 in https://www.xilinx.com/support/documentation/user_guides/ug1085-zynq-ultrascale-trm.pdf). You should not (and obviosuly cannot without screwing up [Peta]linux) simply clear out this area to 0.

    I now changed the code as follows and it seems to work (at least the system is now booting up properly, I have to yet check with my colleagues whether we actually can do IPI communication now):

        /* Get shared memory for the device */
        r_mem_shm = platform_get_resource(nic->pdev, IORESOURCE_MEM, 0);
        if (!r_mem_shm) {
            dev_err(dev, "invalid address\n");
            return -ENODEV;
        }
    
        /* Get IPI register memory for the device */
        r_mem_ipi = platform_get_resource(nic->pdev, IORESOURCE_MEM, 1);
        if (!r_mem_ipi) {
            dev_err(dev, "invalid address\n");
            return -ENODEV;
        }
    
        nic->base_addr_shm = devm_memremap(&pdev->dev, r_mem_shm->start, r_mem_shm->end - r_mem_shm->start + 1, MEMREMAP_WB);
        if (IS_ERR(nic->base_addr_shm)) {
            dev_err(dev, "Could not map shared memory at %pR", r_mem_shm);
            return PTR_ERR(nic->base_addr_shm);
        }
    
        nic->base_addr_ipi = devm_ioremap_resource(&pdev->dev, r_mem_ipi);
        if (IS_ERR(nic->base_addr_ipi)) {
            dev_err(dev, "Could not map IPI memory at %pR", r_mem_ipi);
            return PTR_ERR(nic->base_addr_ipi);
        }
    
        /* Clear shared memory */
        memset(nic->base_addr_shm, 0, nic->shm_size);