Currently I've been working and stuck on the job of upgrading Linux Kernel from 4.19 to 5.15.
There's source code that in kernel/arch/.../our_pci.c
, my former coworker left pcibios_window_alignment()
in order to get certain buses with prefetchable mem-io aligned with size of 0x8000000(128MB), for hotplug equipments.
Which resulted in something like below on kernel 4.19.
# cat /proc/iomem
...
50000000-50ffffff : axi-pcie@50000000
60000000-7fffffff : axi-pcie@50000000
60000000-7fffffff : PCI Bus 0000:01
60000000-7bffffff : PCI Bus 0000:02
60000000-67ffffff : PCI Bus 0000:04 <- These 3, are size of 0x8000000
68000000-6fffffff : PCI Bus 0000:05 <-
70000000-77ffffff : PCI Bus 0000:06 <-
78000000-781fffff : PCI Bus 0000:03
78200000-783fffff : PCI Bus 0000:07
7c000000-7cefffff : PCI Bus 0000:02
7c000000-7c2fffff : PCI Bus 0000:03
7c000000-7c0000ff : 0000:03:00.0
7c300000-7c4fffff : PCI Bus 0000:04
7c500000-7c6fffff : PCI Bus 0000:05
7c700000-7c8fffff : PCI Bus 0000:06
7c900000-7cafffff : PCI Bus 0000:07
...
On kernel 5.15, it looks like this, much shorter. dmesg | grep pci
said they "failed to assign".
~ # cat /proc/iomem
...
50000000-50ffffff : 50000000.axi-pcie axi-pcie@50000000
60000000-7fffffff : axi-pcie@50000000
... <- No more things related to pcie
And after doing rescan with echo 1 > /sys/bus/pci/rescan
,
I can finally get something in /proc/iomem
. BUT there's a little difference from the address after 0x7c000000.
~ # cat /proc/iomem
...
50000000-50ffffff : 50000000.axi-pcie axi-pcie@50000000
60000000-7fffffff : axi-pcie@50000000
60000000-7fffffff : PCI Bus 0000:01
60000000-7bffffff : PCI Bus 0000:02
60000000-67ffffff : PCI Bus 0000:04
68000000-6fffffff : PCI Bus 0000:05
70000000-77ffffff : PCI Bus 0000:06
78000000-781fffff : PCI Bus 0000:03
78200000-783fffff : PCI Bus 0000:07
7c000000-7cefffff : PCI Bus 0000:02
7c000000-7c1fffff : PCI Bus 0000:03 <- Should be 0000:03:00.0 below this line...
7c200000-7c3fffff : PCI Bus 0000:04 <- Sizes are not same as kernel 4.19's
7c400000-7c5fffff : PCI Bus 0000:05
7c600000-7c7fffff : PCI Bus 0000:06
7c800000-7c9fffff : PCI Bus 0000:07
...
~ # cat /sys/bus/pci/devices/0000:03:00.0/resource
0x0000000000000000 0x0000000000000000 0x0000000000000000 <- ALL ZERO
0x0000000000000000 0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000 0x0000000000000000
Then, when I'm trying to load driver kernel module, I get 0
from pci_resource_start(...)
. Seems the resource are not initialized properly.
I've tried to remove the pcibios_window_alignment()
strong function, then kernel can handle resource assignment on it's own.
~ # cat /proc/iomem
...
50000000-50ffffff : 50000000.axi-pcie axi-pcie@50000000
60000000-7fffffff : axi-pcie@50000000
60000000-613fffff : PCI Bus 0000:01
60000000-609fffff : PCI Bus 0000:02
60000000-601fffff : PCI Bus 0000:03
60000000-600000ff : 0000:03:00.0 <- What was missing when I rescan, appeared here now.
60200000-603fffff : PCI Bus 0000:04
60400000-605fffff : PCI Bus 0000:05
60600000-607fffff : PCI Bus 0000:06
60800000-609fffff : PCI Bus 0000:07
60a00000-613fffff : PCI Bus 0000:02
60a00000-60bfffff : PCI Bus 0000:03
60c00000-60dfffff : PCI Bus 0000:04
60e00000-60ffffff : PCI Bus 0000:05
61000000-611fffff : PCI Bus 0000:06
61200000-613fffff : PCI Bus 0000:07
...
The driver, that will cause "Internal error: Oops" if it fail, passed but the others failed. :(
Increasing the memory range in device tree to 1GB, results in
~ # cat /proc/iomem
...
50000000-50ffffff : 50000000.axi-pcie axi-pcie@50000000
60000000-9fffffff : axi-pcie@50000000
60000000-87ffffff : PCI Bus 0000:01
60000000-7fffffff : PCI Bus 0000:02
60000000-67ffffff : PCI Bus 0000:04
68000000-6fffffff : PCI Bus 0000:05
70000000-77ffffff : PCI Bus 0000:06
78000000-781fffff : PCI Bus 0000:03
78200000-783fffff : PCI Bus 0000:07
80000000-809fffff : PCI Bus 0000:02 <- Just goes up to 0x80000000. Why it skip 0x7c000000?
80000000-801fffff : PCI Bus 0000:03
80000000-800000ff : 0000:03:00.0
80200000-803fffff : PCI Bus 0000:04
80400000-805fffff : PCI Bus 0000:05
80600000-807fffff : PCI Bus 0000:06
80800000-809fffff : PCI Bus 0000:07
...
~ # cat /sys/bus/pci/devices/0000:03:00.0/resource
0x0000000080000000 0x00000000800000ff 0x0000000000040200
0x0000000000000000 0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000 0x0000000000000000
0x0000000000000000 0x0000000000000000 0x0000000000000000
...
Then the driver got error reading. I also tried pci=...
bootargs none of them helped.
Now, should I look for DECLARE_PCI_FIXUP_HEADER(...)
or DECLARE_PCI_FIXUP_FINAL(...)
for help to force assign resources?
Just found out linux kernel PCI driver also CHANGED, for example, for pcie-xilinx.c
.
it changed from
static int xilinx_pcie_probe(struct platform_device *pdev)
{
...
err = pci_scan_root_bus_bridge(bridge);
if (err < 0)
goto error;
bus = bridge->bus;
pci_assign_unassigned_bus_resources(bus);
list_for_each_entry(child, &bus->children, node)
pcie_bus_configure_settings(child);
pci_bus_add_devices(bus);
return 0;
...
}
to
static int xilinx_pcie_probe(struct platform_device *pdev)
{
...
err = pci_host_probe(bridge);
if (err)
xilinx_free_msi_domains(port);
return err;
...
}
AND, for pci_host_probe()
, it does
...
if (pci_has_flag(PCI_PROBE_ONLY)) {
pci_bus_claim_resources(bus);
} else {
pci_bus_size_bridges(bus);
pci_bus_assign_resources(bus);
list_for_each_entry(child, &bus->children, node)
pcie_bus_configure_settings(child);
}
pci_bus_add_devices(bus);
return 0;
...
There IS difference between pci_bus_assign_resources(...)
and pci_assign_unassigned_bus_resources(...)
, which happened to be the root cause that alignment is not the same as kernel v 4.19.
Especially,
void pci_bus_assign_resources(const struct pci_bus *bus)
{
__pci_bus_assign_resources(bus, NULL, NULL);
}
It calls pci_bus_assign_resources(...)
with realloc_head = NULL
.
While pci_assign_unassigned_bus_resources(...)
has valid realloc_head = &add_list
.
void pci_assign_unassigned_bus_resources(struct pci_bus *bus)
{
struct pci_dev *dev;
/* List of resources that want additional resources */
LIST_HEAD(add_list);
down_read(&pci_bus_sem);
for_each_pci_bridge(dev, bus)
if (pci_has_subordinate(dev))
__pci_bus_size_bridges(dev->subordinate, &add_list);
up_read(&pci_bus_sem);
__pci_bus_assign_resources(bus, &add_list, NULL);
BUG_ON(!list_empty(&add_list));
}
which in the later process __pci_bus_size_bridges(...)
, passing min_size = 0
to pbus_size_mem(...)
void __pci_bus_size_bridges(struct pci_bus *bus, struct list_head *realloc_head)
{
...
ret = pbus_size_mem(bus, prefmask, prefmask,
prefmask, prefmask,
realloc_head ? 0 : additional_mmio_pref_size,
additional_mmio_pref_size, realloc_head);
...
}
So my solution is adding
pci_add_flags(PCI_PROBE_ONLY);
to my code at kernel/arch/.../my_pcie.c
to avoid the new resource assignment and then hit echo 1 > /sys/bus/pci/rescan
to trigger resource assignment.
which results in
~ # cat /proc/iomem
...
50000000-50ffffff : 50000000.axi-pcie axi-pcie@50000000
60000000-7fffffff : axi-pcie@50000000
60000000-7bffffff : PCI Bus 0000:01
60000000-7bffffff : PCI Bus 0000:02
60000000-67ffffff : PCI Bus 0000:04 <- The alignment is the same as v4.19
68000000-6fffffff : PCI Bus 0000:05 <-
70000000-77ffffff : PCI Bus 0000:06 <-
78000000-781fffff : PCI Bus 0000:03
78200000-783fffff : PCI Bus 0000:07
7c000000-7c9fffff : PCI Bus 0000:01
7c000000-7c9fffff : PCI Bus 0000:02
7c000000-7c1fffff : PCI Bus 0000:03
7c000000-7c0000ff : 0000:03:00.0 <- Got this resource assignment! GREAT!
7c200000-7c3fffff : PCI Bus 0000:04
7c400000-7c5fffff : PCI Bus 0000:05
7c600000-7c7fffff : PCI Bus 0000:06
7c800000-7c9fffff : PCI Bus 0000:07
...
Hope this can help other people struggling to figure out PCIe alignment problem during kernel upgrade.