cgccshared-librariesstrcpyobjdump

behavior of string literal in rodata section of shared object


#include <stdio.h>
#include <string.h>

#define CONFDIR "/opt/hp9300/pov64_IworkspaceIdocpv989Ieightonews-r7.6-dev_tests-000001"
#define NEW_CONFDIR "/etc" CONFDIR
// test.c
int foo() {
  char confdir[127+1]; // plz ignore the buffer checks
  strcpy(confdir, NEW_CONFDIR);
  // strncpy(confdir, NEW_CONFDIR, sizeof(confdir));
  return 0;
}

// output when strcpy
// objdump -s test.so
// no .rodata section, only .text

Contents of section .text:
 1040 488d3dc1 2f000048 8d05ba2f 00004839  H.=./..H.../..H9
 1050 f8741548 8b056e2f 00004885 c07409ff  .t.H..n/..H..t..
 1060 e00f1f80 00000000 c30f1f80 00000000  ................
 1070 488d3d91 2f000048 8d358a2f 00004829  H.=./..H.5./..H)
 1080 fe4889f0 48c1ee3f 48c1f803 4801c648  .H..H..?H...H..H
 1090 d1fe7414 488b053d 2f000048 85c07408  ..t.H..=/..H..t.
 10a0 ffe0660f 1f440000 c30f1f80 00000000  ..f..D..........
 10b0 f30f1efa 803d4d2f 00000075 2b554883  .....=M/...u+UH.
 10c0 3d1a2f00 00004889 e5740c48 8d3d2e2d  =./...H..t.H.=.-
 10d0 0000e859 ffffffe8 64ffffff c605252f  ...Y....d.....%/
 10e0 0000015d c30f1f00 c30f1f80 00000000  ...]............
 10f0 f30f1efa e977ffff ff554889 e54883ec  .....w...UH..H..
 1100 08488d45 8048ba2f 6574632f 6f707448  .H.E.H./etc/optH
 1110 b92f6870 39333030 2f488910 48894808  ./hp9300/H..H.H.
 1120 48be706f 7636345f 497748bf 6f726b73  H.pov64_IwH.orks
 1130 70616365 48897010 48897818 48ba4964  paceH.p.H.x.H.Id
 1140 6f637076 393848b9 39496569 6768746f  ocpv98H.9Ieighto
 1150 48895020 48894828 48be6e65 77732d72  H.P H.H(H.news-r
 1160 372e48bf 362d6465 765f7465 48897030  7.H.6-dev_teH.p0
 1170 48897838 48ba6576 5f746573 747348b9  H.x8H.ev_testsH.
 1180 2d303030 30303100 4889503b 48894843  -000001.H.P;H.HC
 1190 b8000000 00c9c3                      .......         


// output when strncpy
// objdump -s test.so
// readelf -p ".rodata" test.so
Contents of section .rodata:
 2000 2f657463 2f6f7074 2f687039 3330302f  /etc/opt/hp9300/
 2010 706f7636 345f4977 6f726b73 70616365  pov64_Iworkspace
 2020 49646f63 70763938 39496569 6768746f  Idocpv989Ieighto
 2030 6e657773 2d72372e 362d6465 765f7465  news-r7.6-dev_te
 2040 7374732d 30303030 303100             sts-000001.     

// output when strncpy
// readelf -p ".rodata" test.so
String dump of section '.rodata':
  [     0]  /etc/opt/hp9300/pov64_IworkspaceIdocpv989Ieightonews-r7.6-dev_tests-000001


when I compile it on Linux using gcc -o test.so -fPIC -shared -g and inspect the string constants of shared lib test.so using objdump or strings, I see a sub-string of NEW_CONFDIR, truncated to multiple of 16 bytes.

But, if I change the code to use strncpy (as shown in the code above) and compile again with the same set of flags, it shows full string with no truncation.

Is this expected, does compiler do any optimization with string constants ? Or there is an issues with using strcpy and strncpy in this case and their effect on rodata of shared object files ?

I am only concerned about the compilation phase and generation of shared objects not actual execution of code. The tool searches rodata of shared libs for NEW_CONFDIR.


Solution

  • I reproduced this behavior: take the OP's source, compile with gcc -o test.so -fPIC -shared -g, run strings test.so:

    _ITM_deregisterTMCloneTable
    _ITM_registerTMCloneTable
    __cxa_finalize
    u+UH
    /etc/optH
    /hp9300/H
    pov64_IwH
    orkspaceH
    Idocpv98H
    9IeightoH
    news-r7.H
    6-dev_teH
    ev_testsH
    -000001
    ;*3$"
    GCC: (Debian 13.2.0-5) 13.2.0
    ...
    

    So let's figure out what's going on. objdump -d test.so shows:

    00000000000010f9 <foo>:
        10f9:       55                      push   %rbp
        10fa:       48 89 e5                mov    %rsp,%rbp
        10fd:       48 83 ec 08             sub    $0x8,%rsp
    
    ; RAX = &configdir[0]
        1101:       48 8d 45 80             lea    -0x80(%rbp),%rax 
        1105:       48 ba 2f 65 74 63 2f    movabs $0x74706f2f6374652f,%rdx
        110c:       6f 70 74
        110f:       48 b9 2f 68 70 39 33    movabs $0x2f3030333970682f,%rcx
        1116:       30 30 2f
    
    ; copy 8 bytes "/etc/opt" to &confdir[0]
        1119:       48 89 10                mov    %rdx,(%rax)
    
    ; copy "/hp9300/" to &confdir[8]
        111c:       48 89 48 08             mov    %rcx,0x8(%rax)
    
    ; ... etc.
        1120:       48 be 70 6f 76 36 34    movabs $0x77495f3436766f70,%rsi
        1127:       5f 49 77
        112a:       48 bf 6f 72 6b 73 70    movabs $0x65636170736b726f,%rdi
        1131:       61 63 65
        1134:       48 89 70 10             mov    %rsi,0x10(%rax)
        1138:       48 89 78 18             mov    %rdi,0x18(%rax)
        113c:       48 ba 49 64 6f 63 70    movabs $0x38397670636f6449,%rdx
        1143:       76 39 38
        1146:       48 b9 39 49 65 69 67    movabs $0x6f74686769654939,%rcx
        114d:       68 74 6f
        1150:       48 89 50 20             mov    %rdx,0x20(%rax)
        1154:       48 89 48 28             mov    %rcx,0x28(%rax)
        1158:       48 be 6e 65 77 73 2d    movabs $0x2e37722d7377656e,%rsi
        115f:       72 37 2e
        1162:       48 bf 36 2d 64 65 76    movabs $0x65745f7665642d36,%rdi
        1169:       5f 74 65
        116c:       48 89 70 30             mov    %rsi,0x30(%rax)
        1170:       48 89 78 38             mov    %rdi,0x38(%rax)
        1174:       48 ba 65 76 5f 74 65    movabs $0x73747365745f7665,%rdx
        117b:       73 74 73
        117e:       48 b9 2d 30 30 30 30    movabs $0x3130303030302d,%rcx
        1185:       30 31 00
        1188:       48 89 50 3b             mov    %rdx,0x3b(%rax)
        118c:       48 89 48 43             mov    %rcx,0x43(%rax)
        1190:       b8 00 00 00 00          mov    $0x0,%eax
        1195:       c9                      leave
        1196:       c3                      ret
    

    This explains why the string is broken up into 8-byte chunks, but where does the (9-th) H come from?

    Look at the byte following the "string" constant starting at 0x1122. The MOVABS ..., %rcx instruction code is 0x48 0xbf, and 0x48 just happens to "spell" H, and that's why there is an H there.

    Also, 0xbf just happens to not be ASCII, thus breaking the string.

    If the opcode for MOVABS ..., %rcx was 0x48 0x5f instead, your "string" would look like /etc/optH_/hp9300/....

    Related answer about AWAVAUATUSH and similar strings.


    So what happens when you use strncpy ?

    The compiler decides to call external version instead of inlining the strncpy:

    objdump -d test.so
    ...
    0000000000001109 <foo>:
        1109:       55                      push   %rbp
        110a:       48 89 e5                mov    %rsp,%rbp
        110d:       48 83 c4 80             add    $0xffffffffffffff80,%rsp
        1111:       48 8d 45 80             lea    -0x80(%rbp),%rax
        1115:       ba 80 00 00 00          mov    $0x80,%edx
        111a:       48 8d 0d df 0e 00 00    lea    0xedf(%rip),%rcx        # 2000 <_fini+0xecc>
        1121:       48 89 ce                mov    %rcx,%rsi
        1124:       48 89 c7                mov    %rax,%rdi
        1127:       e8 04 ff ff ff          call   1030 <strncpy@plt>
        112c:       b8 00 00 00 00          mov    $0x0,%eax
        1131:       c9                      leave
        1132:       c3                      ret
    

    P.S. You can ask GCC to not inline strcpy with -fno-builtin-strcpy, and that will "restore" the full string (because GCC will call external strcpy from libc with that flag).