I'm writing a bare-metal firmware for a virtual RISC-V SoC. Below is code that communicates with a physical device on the SoC and it is using constant data from the my_data array that sits in ROM. The linker script contains a definition for __global_pointer$ that is equal to the start of this constant data array. My assumption is, that the linker will use this register for relaxation to create code-size efficient code.
void main(void);
__attribute__((naked, noreturn)) void _start(void) {
__asm__ __volatile__("":::"memory");
__asm__ __volatile__ (".option push");
__asm__ __volatile__ (".option norelax");
__asm__ __volatile__ ("add x1, x0, x0");
__asm__ __volatile__ ("la x2, __stack_start$");
__asm__ __volatile__ ("la x3, __global_pointer$");
__asm__ __volatile__ ("add x4, x0, x0");
// ... rest of registers
__asm__ __volatile__ ("add x31, x0, x0");
__asm__ __volatile__ (".option pop");
__asm__ __volatile__("":::"memory");
main();
__asm__ __volatile__ ("j _start");
}
typedef struct {
volatile unsigned int regs [0x1000];
} phy_t __attribute__ ((aligned (0x8000)));
extern phy_t __phy$;
const unsigned int my_data[3] = {
0x12345678u,
0x23456789u,
0x3456789Au,
};
void main() {
__phy$.regs[0x0000] = my_data[0];
__phy$.regs[0x0001] = my_data[1];
__phy$.regs[0x0002] = my_data[2];
}
OUTPUT_FORMAT("elf32-littleriscv", "elf32-littleriscv", "elf32-littleriscv")
OUTPUT_ARCH(riscv)
MEMORY {
PHY ( W ) : ORIGIN = 0x00000000, LENGTH = 0x00008000
RAM (AWL) : ORIGIN = 0x00010000, LENGTH = 0x00010000
ROM (XR ) : ORIGIN = 0x00020000, LENGTH = 0x00010000
}
SECTIONS {
.text : ALIGN(4) {
PROVIDE(__text_start__$ = .);
*(.text .text.*)
PROVIDE(__text_end__$ = .);
} > ROM
.rodata : ALIGN(4) {
PROVIDE(__rodata_start__$ = .);
PROVIDE(__global_pointer$ = .);
*(.rodata .rodata.*)
*(.constdata .constdata.*)
*(.srodata .srodata.*)
PROVIDE(__rodata_end__$ = .);
} > ROM
.data : ALIGN(4) {
PROVIDE(__data_source__$ = LOADADDR(.data));
PROVIDE(__data_start__$ = .);
*(.data_begin .data_begin.*)
*(.data .data.*)
*(.data_end .data_end.*)
*(.sdata .sdata.*)
PROVIDE(__data_end__$ = .);
} > RAM AT>ROM
.stack (NOLOAD) : ALIGN(4) {
PROVIDE(__stack_end$ = .);
. = ORIGIN(RAM) + LENGTH(RAM);
PROVIDE(__stack_start$ = .);
} > RAM
.phy (NOLOAD) : ALIGN(16) {
PROVIDE(__phy$ = .);
} > PHY
}
CFLAGS=-march=rv32ic -mabi=ilp32 -Os -mcmodel=medlow -Xlinker --emit-relocs
LDFLAGS=-nostdlib -static -ffreestanding
all:
riscv-unknown-elf-gcc $(CFLAGS) $(LDFLAGS) -o firmware.elf -x c firmware.c -T firmware.ld
riscv-unknown-elf-objdump -M numeric -d -r firmware.elf > firmware.lst
However, the listing contains the data inlined as lui+addi instruction pairs instead.
The map output contains the my_data array in the correct section .srodata, putting it in the .sdata section via __attribute__((section(".sdata"))) doesn't help either.
I have tried options -msmall-data-limit=512, -fno-pie, -Xlinker --relax-gp, -Xlinker gpsize=512 on GCC 15.1.0.
Is there a way to force relaxation via the GP register?
00020020 <main>:
20020: 12345737 lui x14,0x12345
20024: 67870713 addi x14,x14,1656 # 12345678 <my_data+0x12325630>
20024: R_RISCV_NONE *ABS*+0x4
20024: R_RISCV_RELAX *ABS*
20028: 00e02023 sw x14,0(x0) # 0 <__phy$>
20028: R_RISCV_GPREL_S __phy$
20028: R_RISCV_RELAX *ABS*
2002c: 23456737 lui x14,0x23456
20030: 00000793 li x15,0
20030: R_RISCV_GPREL_I __phy$
20030: R_RISCV_RELAX *ABS*
20034: 78970713 addi x14,x14,1929 # 23456789 <my_data+0x23436741>
20038: c3d8 sw x14,4(x15)
2003a: 34568737 lui x14,0x34568
2003e: 89a70713 addi x14,x14,-1894 # 3456789a <my_data+0x34547852>
20042: c798 sw x14,8(x15)
20044: 8082 ret
...
.rodata 0x0000000000020048 0xc
[!provide] PROVIDE (__rodata_start__$ = .)
0x0000000000020048 PROVIDE (__global_pointer$ = .)
*(.rodata .rodata.*)
*(.constdata .constdata.*)
*(.srodata .srodata.*)
.srodata 0x0000000000020048 0xc
0x0000000000020048 my_data
*(.sdata .sdata.*)
[!provide] PROVIDE (__rodata_end__$ = .)
As pointed out by Andrey Turkin, using volatile forces the compiler to not inline the constants anymore and produce memory accesses instead. The updated code is as follows:
typedef struct {
volatile unsigned int regs [0x1000];
} phy_t __attribute__ ((aligned (0x8000)));
extern phy_t __phy$;
const unsigned int my_data[3] = {
0x12345678u,
0x23456789u,
0x3456789Au,
};
void main() {
__phy$.regs[0x0000] = ((volatile unsigned int*) my_data)[0];
__phy$.regs[0x0001] = ((volatile unsigned int*) my_data)[1];
__phy$.regs[0x0002] = ((volatile unsigned int*) my_data)[2];
}
Another issue is the use of a weak symbol via the `PROVIDE` in the linker script. The fixed linker script is as follows:
OUTPUT_FORMAT("elf32-littleriscv", "elf32-littleriscv", "elf32-littleriscv")
OUTPUT_ARCH(riscv)
MEMORY {
PHY ( W ) : ORIGIN = 0x00000000, LENGTH = 0x00008000
RAM (AWL) : ORIGIN = 0x00010000, LENGTH = 0x00010000
ROM (XR ) : ORIGIN = 0x00020000, LENGTH = 0x00010000
}
SECTIONS {
.text : ALIGN(4) {
PROVIDE(__text_start__$ = .);
*(.text .text.*)
PROVIDE(__text_end__$ = .);
} > ROM
.rodata : ALIGN(4) {
PROVIDE(__rodata_start__$ = .);
*(.rodata .rodata.*)
*(.constdata .constdata.*)
*(.srodata .srodata.*)
PROVIDE(__rodata_end__$ = .);
/*
- Do not use PROVIDE(__global_pointer$ = ...) here.
Otherwise, __global_pointer$ will be ignored during relaxation.
- The 0x800 is the maximum 12 bit immediate encoded
in the RISC-V instructions.
- The 0x38 is the safety margin used during relaxation
where no relaxation is performed near the gp address.
*/
__global_pointer$ = __rodata_start__$ + 0x800 - 0x38;
} > ROM
.data : ALIGN(4) {
PROVIDE(__data_source__$ = LOADADDR(.data));
PROVIDE(__data_start__$ = .);
*(.data_begin .data_begin.*)
*(.data .data.*)
*(.data_end .data_end.*)
*(.sdata .sdata.*)
PROVIDE(__data_end__$ = .);
} > RAM AT>ROM
.stack (NOLOAD) : ALIGN(4) {
PROVIDE(__stack_end$ = .);
. = ORIGIN(RAM) + LENGTH(RAM);
PROVIDE(__stack_start$ = .);
} > RAM
.phy (NOLOAD) : ALIGN(16) {
PROVIDE(__phy$ = .);
} > PHY
}
This now produces relaxations that can use the ZERO and GP registers for addressing:
main:
addi sp,sp,-16
sw ra,12(sp)
sw s0,8(sp)
addi s0,sp,16
lui a5,0x0assembly
R_RISCV_HI20 my_data
R_RISCV_RELAX *ABS*
mv a5,a5
R_RISCV_LO12_I my_data
R_RISCV_RELAX *ABS*
lw a4,0(a5)
lui a5,0x0
R_RISCV_HI20 __phy$
R_RISCV_RELAX *ABS*
sw a4,0(a5) # 0 <main>
R_RISCV_LO12_S __phy$
R_RISCV_RELAX *ABS*
lui a5,0x0
R_RISCV_HI20 my_data+0x4
R_RISCV_RELAX *ABS*+0x4
addi a5,a5,4 # 4 <main+0x4>
R_RISCV_LO12_I my_data+0x4
R_RISCV_RELAX *ABS*+0x4
lw a4,0(a5)
...
sw a4,8(a5)
li a5,0
mv a0,a5
lw ra,12(sp)
lw s0,8(sp)
addi sp,sp,16
ret
The resulting instructions after linking are not fully optimal, but now make use of the GP register as below:
main:
addi sp,sp,-16
sw ra,12(sp)
sw s0,8(sp)
addi s0,sp,16
addi a5,gp,56 # 20038 <my_data>
lw a4,0(a5)
sw a4,0(zero) # 0 <__phy$>
addi a5,gp,60 # 2003c <my_data+0x4>
lw a4,0(a5)
li a5,0
sw a4,4(a5)
addi a5,gp,64 # 20040 <my_data+0x8>
lw a4,0(a5)
li a5,0
sw a4,8(a5)
li a5,0
mv a0,a5
lw ra,12(sp)
lw s0,8(sp)
addi sp,sp,16
ret