cassemblycortex-mthumblpc

ARM-C Inter-working


I am trying out a simple program for ARM-C inter-working. Here is the code:

 #include<stdio.h>
#include<stdlib.h>

int Double(int a);
extern int Start(void);
int main(){

int result=0;
printf("in C main\n");
result=Start();
printf("result=%d\n",result);
return 0;
}

int Double(int a)
{
  printf("inside double func_argument_value=%d\n",a);

  return (a*2);
}

The assembly file goes as-

.syntax unified
    .cpu cortex-m3
    .thumb

    .align
    .global Start
    .global Double
    .thumb_func



Start:
      mov r10,lr
      mov r0,#42
      bl Double
      mov lr,r10
      mov r2,r0
      mov pc,lr

During debugging on LPC1769(embedded artists board), I get an hardfault error on the instruction " result=Start(). " I am trying to do an arm-C internetworking here. the lr value during the execution of the above the statement(result=Start()) is 0x0000029F, where the faulting instruction is,and the pc value is 0x0000029E. This is how I got the faulting instruction in r1

__asm("mrs r0,MSP\n"
"isb\n"
"ldr r1,[r0,#24]\n");

Can anybody please explain where I am going wrong? Any solution is appreciated. Thank you in advance.

I am a beginner in cortex-m3 & am using the NXP LPCXpresso IDE powered by Code_Red. Here is the disassembly of my code.

      IntDefaultHandler:
00000269:   push {r7} 
0000026b:   add r7, sp, #0
0000026d:   b.n 0x26c <IntDefaultHandler+4>
0000026f:   nop 
00000271:   mov r3, lr
00000273:   mov.w r0, #42 ; 0x2a
00000277:   bl 0x2c0 <Double>
0000027b:   mov lr, r3
0000027d:   mov r2, r0
0000027f:   mov pc, lr



main:
00000281:   push {r7, lr}
00000283:   sub sp, #8
00000285:   add r7, sp, #0
00000287:   mov.w r3, #0
0000028b:   str r3, [r7, #4]
0000028d:   movw r3, #11212 ; 0x2bcc
00000291:   movt r3, #0
00000295:   mov r0, r3
00000297:   bl 0xd64 <printf>
0000029b:   bl 0x270 <Start>
0000029f:   mov r3, r0
000002a1:   str r3, [r7, #4]
000002a3:   movw r3, #11224 ; 0x2bd8
000002a7:   movt r3, #0
000002ab:   mov r0, r3
000002ad:   ldr r1, [r7, #4]
000002af:   bl 0xd64 <printf>
000002b3:   mov.w r3, #0
000002b7:   mov r0, r3
000002b9:   add.w r7, r7, #8
000002bd:   mov sp, r7
000002bf:   pop {r7, pc}

Double:
000002c0:   push {r7, lr}
000002c2:   sub sp, #8
000002c4:   add r7, sp, #0
000002c6:   str r0, [r7, #4]
000002c8:   movw r3, #11236 ; 0x2be4
000002cc:   movt r3, #0
000002d0:   mov r0, r3
000002d2:   ldr r1, [r7, #4]
000002d4:   bl 0xd64 <printf>
000002d8:   ldr r3, [r7, #4]
000002da:   mov.w r3, r3, lsl #1
000002de:   mov r0, r3
000002e0:   add.w r7, r7, #8
000002e4:   mov sp, r7
000002e6:   pop {r7, pc}

As per your advice Dwelch, I have changed the r10 to r3.


Solution

  • I assume you mean interworking not internetworking? The LPC1769 is a cortex-m3 which is thumb/thumb2 only so it doesnt support arm instructions so there is no interworking available for that platform. Nevertheless, playing with the compiler to see what goes on:

    Get the compiler to do it for you first, then try it yourself in asm...

    start.s

    .thumb
    .globl _start
    _start:
        ldr r0,=hello
        mov lr,pc
        bx r0
    hang : b hang
    

    hello.c

    extern unsigned int two ( unsigned int );
    unsigned int hello ( unsigned int h )
    {
        return(two(h)+7);
    }
    

    two.c

    unsigned int two ( unsigned int t )
    {
        return(t+5);
    }
    

    Makefile

    hello.list : start.s hello.c two.c
        arm-none-eabi-as -mthumb start.s -o start.o
        arm-none-eabi-gcc -c -O2 hello.c -o hello.o
        arm-none-eabi-gcc -c -O2 -mthumb two.c -o two.o
        arm-none-eabi-ld -Ttext=0x1000 start.o hello.o two.o -o hello.elf
        arm-none-eabi-objdump -D hello.elf > hello.list
    
    clean :
        rm -f *.o
        rm -f *.elf
        rm -f *.list
    

    produces hello.list

    Disassembly of section .text:
    
    00001000 <_start>:
        1000:   4801        ldr r0, [pc, #4]    ; (1008 <hang+0x2>)
        1002:   46fe        mov lr, pc
        1004:   4700        bx  r0
    
    00001006 <hang>:
        1006:   e7fe        b.n 1006 <hang>
        1008:   0000100c    andeq   r1, r0, ip
    
    0000100c <hello>:
        100c:   e92d4008    push    {r3, lr}
        1010:   eb000004    bl  1028 <__two_from_arm>
        1014:   e8bd4008    pop {r3, lr}
        1018:   e2800007    add r0, r0, #7
        101c:   e12fff1e    bx  lr
    
    00001020 <two>:
        1020:   3005        adds    r0, #5
        1022:   4770        bx  lr
        1024:   0000        movs    r0, r0
        ...
    
    00001028 <__two_from_arm>:
        1028:   e59fc000    ldr ip, [pc]    ; 1030 <__two_from_arm+0x8>
        102c:   e12fff1c    bx  ip
        1030:   00001021    andeq   r1, r0, r1, lsr #32
        1034:   00000000    andeq   r0, r0, r0
    

    hello.o disassembled by itself:

    00000000 <hello>:
       0:   e92d4008    push    {r3, lr}
       4:   ebfffffe    bl  0 <two>
       8:   e8bd4008    pop {r3, lr}
       c:   e2800007    add r0, r0, #7
      10:   e12fff1e    bx  lr
    

    the compiler uses bl assuming/hoping it will be calling arm from arm. but it didnt, so what they did was put a trampoline in there.

    0000100c <hello>:
        100c:   e92d4008    push    {r3, lr}
        1010:   eb000004    bl  1028 <__two_from_arm>
        1014:   e8bd4008    pop {r3, lr}
        1018:   e2800007    add r0, r0, #7
        101c:   e12fff1e    bx  lr
    
    
    00001028 <__two_from_arm>:
        1028:   e59fc000    ldr ip, [pc]    ; 1030 <__two_from_arm+0x8>
        102c:   e12fff1c    bx  ip
        1030:   00001021    andeq   r1, r0, r1, lsr #32
        1034:   00000000    andeq   r0, r0, r0
    

    the bl to __two_from_arm is an arm mode to arm mode branch link. the address of the destination function (two) with the lsbit set, which tells bx to switch to thumb mode, is loaded into the disposable register ip (r12?) then the bx ip happens switching modes. the branch link had setup the return address in lr, which was an arm mode address no doubt (lsbit zero).

    00001020 <two>:
        1020:   3005        adds    r0, #5
        1022:   4770        bx  lr
        1024:   0000        movs    r0, r0
    

    the two() function does its thing and returns, note you have to use bx lr not mov pc,lr when interworking. Basically if you are not running an ARMv4 without the T, or an ARMv5 without the T, mov pc,lr is an okay habit. But anything ARMv4T or newer (ARMv5T or newer) use bx lr to return from a function unless you have a special reason not to. (avoid using pop {pc} as well for the same reason unless you really need to save that instruction and are not interworking). Now being on a cortex-m3 which is thumb+thumb2 only, well you cant interwork so you can use mov pc,lr and pop {pc}, but the code is not portable, and it is not a good habit as that habit will bite you when you switch back to arm programming.

    So since hello was in arm mode when it used bl which is what set the link register, the bx in two_from_arm does not touch the link register, so when two() returns with a bx lr it is returning to arm mode after the bl __two_from_arm line in the hello() function.

    Also note the extra 0x0000 after the thumb function, this was to align the program on a word boundary so that the following arm code was aligned...

    to see how the compiler does thumb to arm change two as follows

    unsigned int three ( unsigned int );
    unsigned int two ( unsigned int t )
    {
        return(three(t)+5);
    }
    

    and put that function in hello.c

    extern unsigned int two ( unsigned int );
    unsigned int hello ( unsigned int h )
    {
        return(two(h)+7);
    }
    
    unsigned int three ( unsigned int t )
    {
        return(t+3);
    }
    

    and now we get another trampoline

    00001028 <two>:
        1028:   b508        push    {r3, lr}
        102a:   f000 f80b   bl  1044 <__three_from_thumb>
        102e:   3005        adds    r0, #5
        1030:   bc08        pop {r3}
        1032:   bc02        pop {r1}
        1034:   4708        bx  r1
        1036:   46c0        nop         ; (mov r8, r8)
    ...
    00001044 <__three_from_thumb>:
        1044:   4778        bx  pc
        1046:   46c0        nop         ; (mov r8, r8)
        1048:   eafffff4    b   1020 <three>
        104c:   00000000    andeq   r0, r0, r0
    

    Now this is a very cool trampoline. the bl to three_from_thumb is in thumb mode and the link register is set to return to the two() function with the lsbit set no doubt to indicate to return to thumb mode.

    The trampoline starts with a bx pc, pc is set to two instructions ahead and the pc internally always has the lsbit clear so a bx pc will always take you to arm mode if not already in arm mode, and in either mode two instructions ahead. Two instructions ahead of the bx pc is an arm instruction that branches (not branch link!) to the three function, completing the trampoline.

    Notice how I wrote the call to hello() in the first place

    _start:
            ldr r0,=hello
            mov lr,pc
            bx r0
        hang : b hang
    

    that actually wont work will it? It will get you from arm to thumb but not from thumb to arm. I will leave that as an exercise for the reader.

    If you change start.s to this

    .thumb
    
    .globl _start
    _start:
        bl hello
    hang : b hang
    

    the linker takes care of us:

    00001000 <_start>:
        1000:   f000 f820   bl  1044 <__hello_from_thumb>
    
    00001004 <hang>:
        1004:   e7fe        b.n 1004 <hang>
        ...
    
    00001044 <__hello_from_thumb>:
        1044:   4778        bx  pc
        1046:   46c0        nop         ; (mov r8, r8)
        1048:   eaffffee    b   1008 <hello>
    

    I would and do always disassemble programs like these to make sure the compiler and linker resolved these issues. Also note that for example __hello_from_thumb can be used from any thumb function, if I call hello from several places, some arm, some thumb, and hello was compiled for arm, then the arm calls would call hello directly (if they can reach it) and all the thumb calls would share the same hello_from_thumb (if they can reach it).

    The compiler in these examples was assuming code that stays in the same mode (simple branch link) and the linker added the interworking code...

    If you really meant inter-networking and not interworking, then please describe what that is and I will delete this answer.

    EDIT:

    You were using a register to preserve lr during the call to Double, that will not work, no register will work for that you need to use memory, and the easiest is the stack. See how the compiler does it:

    00001008 <hello>:
        1008:   e92d4008    push    {r3, lr}
        100c:   eb000009    bl  1038 <__two_from_arm>
        1010:   e8bd4008    pop {r3, lr}
        1014:   e2800007    add r0, r0, #7
        1018:   e12fff1e    bx  lr
    

    r3 is pushed likely to align the stack on a 64 bit boundary (makes it faster). the thing to notice is the link register is preserved on the stack, but the pop does not pop to pc because this is not an ARMv4 build, so a bx is needed to return from the function. Because this is arm mode we can pop to lr and simply bx lr.

    For thumb you can only push r0-r7 and lr directly and pop r0-r7 and pc directly you dont want to pop to pc because that only works if you are staying in the same mode (thumb or arm). this is fine for a cortex-m, or fine if you know what all of your callers are, but in general bad. So

    00001024 <two>:
        1024:   b508        push    {r3, lr}
        1026:   f000 f811   bl  104c <__three_from_thumb>
        102a:   3005        adds    r0, #5
        102c:   bc08        pop {r3}
        102e:   bc02        pop {r1}
        1030:   4708        bx  r1
    

    same deal r3 is used as a dummy register to keep the stack aligned for performance (I used the default build for gcc 4.8.0 which is likely a platform with a 64 bit axi bus, specifying the architecture might remove that extra register). Because we cannot pop pc, I assume because r1 and r3 would be out of order and r3 was chosen (they could have chosen r2 and saved an instruction) there are two pops, one to get rid of the dummy value on the stack and the other to put the return value in a register so that they can bx to it to return.

    Your Start function does not conform to the ABI and as a result when you mix it in with such large libraries as a printf call, no doubt you will crash. If you didnt it was dumb luck. Your assembly listing of main shows that neither r4 nor r10 were used and assuming main() is not called other than the bootstrap, then that is why you got away with either r4 or r10.

    If this really is an LPC1769 this this whole discussion is irrelevant as it does not support ARM and does not support interworking (interworking = mixing of ARM mode code and thumb mode code). Your problem was unrelated to interworking, you are not interworking (note the pop {pc} at the end of the functions). Your problem was likely related to your assembly code.

    EDIT2:

    Changing the makefile to specify the cortex-m

    00001008 <hello>:
        1008:   b508        push    {r3, lr}
        100a:   f000 f805   bl  1018 <two>
        100e:   3007        adds    r0, #7
        1010:   bd08        pop {r3, pc}
        1012:   46c0        nop         ; (mov r8, r8)
    
    00001014 <three>:
        1014:   3003        adds    r0, #3
        1016:   4770        bx  lr
    
    00001018 <two>:
        1018:   b508        push    {r3, lr}
        101a:   f7ff fffb   bl  1014 <three>
        101e:   3005        adds    r0, #5
        1020:   bd08        pop {r3, pc}
        1022:   46c0        nop         ; (mov r8, r8)
    

    first and foremost it is all thumb since there is no arm mode on a cortex-m, second the bx is not needed for function returns (Because there are no arm/thumb mode changes). So pop {pc} will work.

    it is curious that the dummy register is still used on a push, I tried an arm7tdmi/armv4t build and it still did that, so there is some other flag to use to get rid of that behavior.

    If your desire was to learn how to make an assembly function that you can call from C, you should have just done that. Make a C function that somewhat resembles the framework of the function you want to create in asm:

    extern unsigned int Double ( unsigned int );
    unsigned int Start ( void )
    {
        return(Double(42));
    }
    

    assemble then disassemble

    00000000 <Start>:
       0:   b508        push    {r3, lr}
       2:   202a        movs    r0, #42 ; 0x2a
       4:   f7ff fffe   bl  0 <Double>
       8:   bd08        pop {r3, pc}
       a:   46c0        nop         ; (mov r8, r8)
    

    and start with that as you assembly function.

    .globl Start
    .thumb_func
    Start:
       push {lr}
       mov  r0, #42 
       bl   Double
       pop  {pc}
    

    That, or read the arm abi for gcc and understand what registers you can and cant use without saving them on the stack, what registers are used for passing and returning parameters.