assemblyarmthumb

Simple example of Table Branch Byte (TBB) in arm thumb


I am trying to figure out the details of how TBB works in arm assembly. Im just trying to figure out a simple example but no matter what my code goes to infinite loop or doesn't compile.

.syntax unified             
.thumb      

BranchTable_Byte:
 .byte 0 @; Case1 offset calculation
 .byte ((Case2-Case1)/2) @; Case2 offset calculation
 .byte ((Case3-Case1)/2) @; Case3 offset calculation

.text
.global example_TBB
.thumb_func
 example_TBB:
 mov r1, #1

 ADR.W r0, BranchTable_Byte
 TBB [r0, r1] @; R1 is the index, R0 is the base address of the branch table

 Case1:
    @; an instruction sequence follows
    mov r2, #1
    b endTBB
 Case2:
    @; an instruction sequence follows
    mov r3, #2
    b endTBB
 Case3:
    @; an instruction sequence follows
    mov r4, #3
    b endTBB

 endTBB:

 bx lr

I believe what should happen is that since r1=1, the tbb op code should branch to case 2, but I am getting infinite loops and/or compilation errors no matter how long ive played with it for.


Solution

  • so.s

    .globl _start
    _start:
        bl example_TBB
        b .
    

    tbb.s

    .syntax unified
    .thumb
    
    BranchTable_Byte:
     .byte 0 @; Case1 offset calculation
     .byte ((Case2-Case1)/2) @; Case2 offset calculation
     .byte ((Case3-Case1)/2) @; Case3 offset calculation
    
    .text
    .global example_TBB
    .thumb_func
     example_TBB:
     mov r1, #1
    
     ADR.W r0, BranchTable_Byte
     TBB [r0, r1] @; R1 is the index, R0 is the base address of the branch table
    
     Case1:
        @; an instruction sequence follows
        mov r0, #1
        b endTBB
     Case2:
        @; an instruction sequence follows
        mov r0, #2
        b endTBB
     Case3:
        @; an instruction sequence follows
        mov r0, #3
        b endTBB
    
     endTBB:
    
     bx lr
    

    To create a place for the text vs data address doesn't really matter this is to see what the tools are doing:

    arm-none-eabi-ld -Ttext=0x1000 -Tdata=0x2000 so.o tbb.o -o so.elf
    
    00001000 <_start>:
        1000:   fb000000    blx 100a <example_TBB>
        1004:   eafffffe    b   1004 <_start+0x4>
    
    00001008 <BranchTable_Byte>:
        1008:       svcmi   0x00060300
    
    0000100a <example_TBB>:
        100a:   01f04f06    mvnseq  r4, r6, lsl #30
        100e:   af01        add r7, sp, #4
        1010:   08f2        lsrs    r2, r6, #3
    

    and there you go, there is a huge problem. How can 3 bytes of data from a table fit in two bytes?

    Your code implies you maybe wanted to do this:

    .syntax unified
    .thumb
    .data
    BranchTable_Byte:
     .byte 0 @; Case1 offset calculation
     .byte ((Case2-Case1)/2) @; Case2 offset calculation
     .byte ((Case3-Case1)/2) @; Case3 offset calculation
    
    .text
    .global example_TBB
    .thumb_func
     example_TBB:
     mov r1, #1
    
     ADR.W r0, BranchTable_Byte
     TBB [r0, r1] @; R1 is the index, R0 is the base address of the branch table
    
     Case1:
        @; an instruction sequence follows
        mov r0, #1
        b endTBB
     Case2:
        @; an instruction sequence follows
        mov r0, #2
        b endTBB
     Case3:
        @; an instruction sequence follows
        mov r0, #3
        b endTBB
    
     endTBB:
    
     bx lr
    

    Wow, that's even worse (well it's an adr, not a load address):

    00001000 <_start>:
        1000:   fa000000    blx 1008 <example_TBB>
        1004:   eafffffe    b   1004 <_start+0x4>
    
    00001008 <example_TBB>:
        1008:   f04f 0101   mov.w   r1, #1
        100c:   f2af 0004   subw    r0, pc, #4
        1010:   e8d0 f001   tbb [r0, r1]
    
    00001014 <Case1>:
        1014:   f04f 0001   mov.w   r0, #1
        1018:   e005        b.n 1026 <endTBB>
    
    0000101a <Case2>:
        101a:   f04f 0002   mov.w   r0, #2
        101e:   e002        b.n 1026 <endTBB>
    
    00001020 <Case3>:
        1020:   f04f 0003   mov.w   r0, #3
        1024:   e7ff        b.n 1026 <endTBB>
    
    00001026 <endTBB>:
        1026:   4770        bx  lr
    
    Disassembly of section .data:
    
    00002000 <__data_start>:
        2000:   Address 0x0000000000002000 is out of bounds.
    

    Your table is 3 bytes deep make it four for alignment purposes

    .syntax unified
    .thumb
    BranchTable_Byte:
     .byte 0 @; Case1 offset calculation
     .byte ((Case2-Case1)/2) @; Case2 offset calculation
     .byte ((Case3-Case1)/2) @; Case3 offset calculation
     .byte 0
     
    .text
    .global example_TBB
    .thumb_func
     example_TBB:
     mov r1, #1
    
     ADR.W r0, BranchTable_Byte
     TBB [r0, r1] @; R1 is the index, R0 is the base address of the branch table
    
     Case1:
        @; an instruction sequence follows
        mov r0, #1
        b endTBB
     Case2:
        @; an instruction sequence follows
        mov r0, #2
        b endTBB
     Case3:
        @; an instruction sequence follows
        mov r0, #3
        b endTBB
    
     endTBB:
    
     bx lr
    

    gives

    00001000 <_start>:
        1000:   fa000001    blx 100c <example_TBB>
        1004:   eafffffe    b   1004 <_start+0x4>
    
    00001008 <BranchTable_Byte>:
        1008:   00060300    andeq   r0, r6, r0, lsl #6
    
    0000100c <example_TBB>:
        100c:   f04f 0101   mov.w   r1, #1
        1010:   f2af 000c   subw    r0, pc, #12
        1014:   e8d0 f001   tbb [r0, r1]
    

    Much better: 4 bytes fit in 4 bytes now. that is good. But what's better is if you put data inline with code you should align or maybe put the data after:

    .syntax unified
    .thumb
    BranchTable_Byte:
     .byte 0 @; Case1 offset calculation
     .byte ((Case2-Case1)/2) @; Case2 offset calculation
     .byte ((Case3-Case1)/2) @; Case3 offset calculation
    
    .text
    .align
    .global example_TBB
    .thumb_func
    
     example_TBB:
     mov r1, #1
    ...
    

    and that fixes it too:

    00001000 <_start>:
        1000:   fa000001    blx 100c <example_TBB>
        1004:   eafffffe    b   1004 <_start+0x4>
    
    00001008 <BranchTable_Byte>:
        1008:   00060300    andeq   r0, r6, r0, lsl #6
    
    0000100c <example_TBB>:
        100c:   f04f 0101   mov.w   r1, #1
        1010:   f2af 000c   subw    r0, pc, #12
        1014:   e8d0 f001   tbb [r0, r1]
    

    You probably want your table in .text which is where you specified it. if you put it in .data then you have to get it from flash to ram assuming this is a microcontroller. But you would need to do things slightly differently.

    .syntax unified
    .thumb
    
    .data
    BranchTable_Byte:
     .byte 0 @; Case1 offset calculation
     .byte ((Case2-Case1)/2) @; Case2 offset calculation
     .byte ((Case3-Case1)/2) @; Case3 offset calculation
    
    .text
    .global example_TBB
    .thumb_func
    
     example_TBB:
     mov r1, #1
    
     ldr r0,=BranchTable_Byte
     TBB [r0, r1] @; R1 is the index, R0 is the base address of the branch table
    
     Case1:
        @; an instruction sequence follows
        mov r0, #1
        b endTBB
     Case2:
        @; an instruction sequence follows
        mov r0, #2
        b endTBB
     Case3:
        @; an instruction sequence follows
        mov r0, #3
        b endTBB
    
     endTBB:
    
     bx lr
    
    
    Disassembly of section .text:
    
    00001000 <_start>:
        1000:   fa000000    blx 1008 <example_TBB>
        1004:   eafffffe    b   1004 <_start+0x4>
    
    00001008 <example_TBB>:
        1008:   f04f 0101   mov.w   r1, #1
        100c:   4806        ldr r0, [pc, #24]   ; (1028 <endTBB+0x4>)
        100e:   e8d0 f001   tbb [r0, r1]
    
    00001012 <Case1>:
        1012:   f04f 0001   mov.w   r0, #1
        1016:   e005        b.n 1024 <endTBB>
    
    00001018 <Case2>:
        1018:   f04f 0002   mov.w   r0, #2
        101c:   e002        b.n 1024 <endTBB>
    
    0000101e <Case3>:
        101e:   f04f 0003   mov.w   r0, #3
        1022:   e7ff        b.n 1024 <endTBB>
    
    00001024 <endTBB>:
        1024:   4770        bx  lr
        1026:   20000000    andcs   r0, r0, r0
        ...
    
    Disassembly of section .data:
    
    00002000 <__data_start>:
        2000:   Address 0x0000000000002000 is out of bounds.
    

    Don't you hate it when they do that?

    .syntax unified
    .thumb
    
    .data
    BranchTable_Byte:
     .byte 0 @; Case1 offset calculation
     .byte ((Case2-Case1)/2) @; Case2 offset calculation
     .byte ((Case3-Case1)/2) @; Case3 offset calculation
    
    .text
    .global example_TBB
    .thumb_func
    
     example_TBB:
     mov r1, #1
    
     ldr r0,btbadd
     TBB [r0, r1] @; R1 is the index, R0 is the base address of the branch table
    
     Case1:
        @; an instruction sequence follows
        mov r0, #1
        b endTBB
     Case2:
        @; an instruction sequence follows
        mov r0, #2
        b endTBB
     Case3:
        @; an instruction sequence follows
        mov r0, #3
        b endTBB
    
    .align
    btbadd: .word BranchTable_Byte
    
     endTBB:
    
     bx lr
    

    and that's better as far as that approach goes:

    Disassembly of section .text:
    
    00001000 <_start>:
        1000:   fa000000    blx 1008 <example_TBB>
        1004:   eafffffe    b   1004 <_start+0x4>
    
    00001008 <example_TBB>:
        1008:   f04f 0101   mov.w   r1, #1
        100c:   4805        ldr r0, [pc, #20]   ; (1024 <btbadd>)
        100e:   e8d0 f001   tbb [r0, r1]
    
    00001012 <Case1>:
        1012:   f04f 0001   mov.w   r0, #1
        1016:   e007        b.n 1028 <endTBB>
    
    00001018 <Case2>:
        1018:   f04f 0002   mov.w   r0, #2
        101c:   e004        b.n 1028 <endTBB>
    
    0000101e <Case3>:
        101e:   f04f 0003   mov.w   r0, #3
        1022:   e001        b.n 1028 <endTBB>
    
    00001024 <btbadd>:
        1024:   00002000    andeq   r2, r0, r0
    
    00001028 <endTBB>:
        1028:   4770        bx  lr
        102a:   46c0        nop         ; (mov r8, r8)
    
    Disassembly of section .data:
    
    00002000 <__data_start>:
        2000:   Address 0x0000000000002000 is out of bounds.
    

    but now you have .data out there for something like this you don't need it to be .data.

    And note that if you are linking this in with compiled code your compiler likely conforms to the arm calling convention which says you cant modify r4 in your function, you must preserve it. And that's why I modified your code (which I think you got from me when I ported it to gas for you?)

    I forgot .thumb in so.s. That's fine not the code of interest, not going to repair above but will below you might want to add some more paranoia to the code and why not sprinkle it with .aligns...

    so.s

    .thumb
    .globl _start
    _start:
        .word 0x20001000
        .word reset
        .word loop
        .word loop
    
    .thumb_func
    loop: b loop
    .thumb_func
    reset:
        mov r0,#1
        bl example_TBB
        b .
    

    tbb.s

    .syntax unified
    .thumb
    
    .align
    BranchTable_Byte:
        .byte ((Case0-Case0)/2)
        .byte ((Case1-Case0)/2)
        .byte ((Case2-Case0)/2)
        .byte ((Case3-Case0)/2)
    
    .align
    .global example_TBB
    .thumb_func
    example_TBB:
        and r0,#3
        adr.w r1, BranchTable_Byte
        tbb [r1, r0]
    
    .align
    Case0:
        mov r0, #1
        b endTBB
    Case1:
        mov r0, #2
        b endTBB
    Case2:
        mov r0, #3
        b endTBB
    Case3:
        mov r0, #4
        b endTBB
    
    .align
    endTBB:
        bx lr
    

    gives

    Disassembly of section .text:
    
    08000000 <_start>:
     8000000:   20001000    andcs   r1, r0, r0
     8000004:   08000013    stmdaeq r0, {r0, r1, r4}
     8000008:   08000011    stmdaeq r0, {r0, r4}
     800000c:   08000011    stmdaeq r0, {r0, r4}
    
    08000010 <loop>:
     8000010:   e7fe        b.n 8000010 <loop>
    
    08000012 <reset>:
     8000012:   2001        movs    r0, #1
     8000014:   f000 f804   bl  8000020 <example_TBB>
     8000018:   e7fe        b.n 8000018 <reset+0x6>
        ...
    
    0800001c <BranchTable_Byte>:
     800001c:   09060300    stmdbeq r6, {r8, r9}
    
    08000020 <example_TBB>:
     8000020:   f000 0003   and.w   r0, r0, #3
     8000024:   f2af 010c   subw    r1, pc, #12
     8000028:   e8d1 f000   tbb [r1, r0]
    
    0800002c <Case0>:
     800002c:   f04f 0001   mov.w   r0, #1
     8000030:   e008        b.n 8000044 <endTBB>
    
    08000032 <Case1>:
     8000032:   f04f 0002   mov.w   r0, #2
     8000036:   e005        b.n 8000044 <endTBB>
    
    08000038 <Case2>:
     8000038:   f04f 0003   mov.w   r0, #3
     800003c:   e002        b.n 8000044 <endTBB>
    
    0800003e <Case3>:
     800003e:   f04f 0004   mov.w   r0, #4
     8000042:   e7ff        b.n 8000044 <endTBB>
    
    08000044 <endTBB>:
     8000044:   4770        bx  lr
     8000046:   46c0        nop         ; (mov r8, r8)
    

    And that is a complete program that you can run on your stm32 and use openocd to stop and examine the registers when finished to see what r0 is set to. You can also do this

    .syntax unified
    .thumb
    .globl _start
    _start:
        mov r0,#1
        bl example_TBB
        b .
    
    .align
    BranchTable_Byte:
        .byte ((Case0-Case0)/2)
        .byte ((Case1-Case0)/2)
        .byte ((Case2-Case0)/2)
        .byte ((Case3-Case0)/2)
    
    .align
    .global example_TBB
    .thumb_func
    example_TBB:
        and r0,#3
        adr.w r1, BranchTable_Byte
        tbb [r1, r0]
    
    .align
    Case0:
        mov r0, #1
        b endTBB
    Case1:
        mov r0, #2
        b endTBB
    Case2:
        mov r0, #3
        b endTBB
    Case3:
        mov r0, #4
        b endTBB
    
    .align
    endTBB:
        bx lr
    

    link for ram at 0x20000000

    Disassembly of section .text:
    
    20000000 <_start>:
    20000000:   f04f 0001   mov.w   r0, #1
    20000004:   f000 f804   bl  20000010 <example_TBB>
    20000008:   e7fe        b.n 20000008 <_start+0x8>
    2000000a:   46c0        nop         ; (mov r8, r8)
    
    2000000c <BranchTable_Byte>:
    2000000c:   09060300    stmdbeq r6, {r8, r9}
    
    20000010 <example_TBB>:
    20000010:   f000 0003   and.w   r0, r0, #3
    20000014:   f2af 010c   subw    r1, pc, #12
    20000018:   e8d1 f000   tbb [r1, r0]
    
    2000001c <Case0>:
    2000001c:   f04f 0001   mov.w   r0, #1
    20000020:   e008        b.n 20000034 <endTBB>
    
    20000022 <Case1>:
    20000022:   f04f 0002   mov.w   r0, #2
    20000026:   e005        b.n 20000034 <endTBB>
    
    20000028 <Case2>:
    20000028:   f04f 0003   mov.w   r0, #3
    2000002c:   e002        b.n 20000034 <endTBB>
    
    2000002e <Case3>:
    2000002e:   f04f 0004   mov.w   r0, #4
    20000032:   e7ff        b.n 20000034 <endTBB>
    
    20000034 <endTBB>:
    20000034:   4770        bx  lr
    20000036:   46c0        nop         
    

    Then you can load it, run it, halt and examine r0 from openocd talking to your STM32...