cassemblyx86variable-length-arrayautomatic-storage

stack space for a vector that its size is given at runtime? (C code)


Supose this C code:

int main(){
    int n;
    scanf("%d\n", &n);

    int a[n];
    int i;
    
    for (i = 0; i<n; i++){
        a[i] = 1;
    }

}

We have a vector which is in the stack space, but we don't know the size of the vector until the execution time (until the user gives a value to variable n). So my question is: when and how space is reserved for that vector int the stack section?

Until now I had understood that the stack space was reserved at compile time and the heap space at runtime (with functions like malloc). But we can't know the size of this vector until runtime.

I have thought that what could be done is to subtract the value of n at the moment of knowing it from the stack pointer and thus you enlarge the stack of that function so that the vector fits (this substraction that I mentioned would be seen only in the assembled code).

But I have been doing some testing watching the /proc/[pid]/maps content. And the stack space of the process is not changing, so what I thought (in assembly code an instruction that substracts n*sizeof(int) to the top of the stack) is not being done. I've watched the content of /proc/[pid]/maps in the very beggining of the main function and on the very end.

If I assembly this code for x86 (gcc -m32 -o test.c) y get the following assembly code (in case you need it):

.file   "test.c"
    .text
    .section    .rodata
.LC0:
    .string "%d\n"
    .text
    .globl  main
    .type   main, @function
main:
.LFB0:
    .cfi_startproc
    leal    4(%esp), %ecx
    .cfi_def_cfa 1, 0
    andl    $-16, %esp
    pushl   -4(%ecx)
    pushl   %ebp
    .cfi_escape 0x10,0x5,0x2,0x75,0
    movl    %esp, %ebp
    pushl   %esi
    pushl   %ebx
    pushl   %ecx
    .cfi_escape 0xf,0x3,0x75,0x74,0x6
    .cfi_escape 0x10,0x6,0x2,0x75,0x7c
    .cfi_escape 0x10,0x3,0x2,0x75,0x78
    subl    $44, %esp
    call    __x86.get_pc_thunk.ax
    addl    $_GLOBAL_OFFSET_TABLE_, %eax
    movl    %gs:20, %ecx
    movl    %ecx, -28(%ebp)
    xorl    %ecx, %ecx
    movl    %esp, %edx
    movl    %edx, %esi
    subl    $8, %esp
    leal    -44(%ebp), %edx
    pushl   %edx
    leal    .LC0@GOTOFF(%eax), %edx
    pushl   %edx
    movl    %eax, %ebx
    call    __isoc99_scanf@PLT
    addl    $16, %esp
    movl    -44(%ebp), %eax
    leal    -1(%eax), %edx
    movl    %edx, -36(%ebp)
    sall    $2, %eax
    leal    3(%eax), %edx
    movl    $16, %eax
    subl    $1, %eax
    addl    %edx, %eax
    movl    $16, %ebx
    movl    $0, %edx
    divl    %ebx
    imull   $16, %eax, %eax
    subl    %eax, %esp
    movl    %esp, %eax
    addl    $3, %eax
    shrl    $2, %eax
    sall    $2, %eax
    movl    %eax, -32(%ebp)
    movl    $0, -40(%ebp)
    jmp .L2
.L3:
    movl    -32(%ebp), %eax
    movl    -40(%ebp), %edx
    movl    $1, (%eax,%edx,4)
    addl    $1, -40(%ebp)
.L2:
    movl    -44(%ebp), %eax
    cmpl    %eax, -40(%ebp)
    jl  .L3
    movl    %esi, %esp
    movl    $0, %eax
    movl    -28(%ebp), %ecx
    xorl    %gs:20, %ecx
    je  .L5
    call    __stack_chk_fail_local
.L5:
    leal    -12(%ebp), %esp
    popl    %ecx
    .cfi_restore 1
    .cfi_def_cfa 1, 0
    popl    %ebx
    .cfi_restore 3
    popl    %esi
    .cfi_restore 6
    popl    %ebp
    .cfi_restore 5
    leal    -4(%ecx), %esp
    .cfi_def_cfa 4, 4
    ret
    .cfi_endproc
.LFE0:
    .size   main, .-main
    .section    .text.__x86.get_pc_thunk.ax,"axG",@progbits,__x86.get_pc_thunk.ax,comdat
    .globl  __x86.get_pc_thunk.ax
    .hidden __x86.get_pc_thunk.ax
    .type   __x86.get_pc_thunk.ax, @function
__x86.get_pc_thunk.ax:
.LFB1:
    .cfi_startproc
    movl    (%esp), %eax
    ret
    .cfi_endproc
.LFE1:
    .hidden __stack_chk_fail_local
    .ident  "GCC: (Ubuntu 7.5.0-3ubuntu1~18.04) 7.5.0"
    .section    .note.GNU-stack,"",@progbits

Solution

  • First of all, your code is badly broken: n isn't set until after it's used to set the size for int vector[n];. Changing n after that does not change the array dimension. Variable-length arrays are a C99 feature, and C99 removes the need for declarations to come before any other statements in a block, making it possible for you to scanf into n before the int vector[n]; statement reserves space on the stack for an array of that size.

    Until now I had understood that the stack space was reserved at compile time and the heap space at runtime

    The total stack region is reserved at program startup. Depending on the OS, the amount of space reserved for stack growth is chosen by OS settings, not metadata in the executable. (e.g. in Linux, by the ulimit -s setting for the initial thread's stack, and pthreads chooses how much space to allocate for each thread stack.)

    The layout of a stack frame is fixed at compile time (position of local variables relative to each other), but the actual allocation happens every time the function runs. That's how functions can be recursive and re-entrant! That's also what makes the stack a stack: make space at the end for the current function, release it right before returning. (Variable-length arrays and alloca have runtime-variable size so a compiler would typically put them below other locals.)

    Only static storage is truly reserved / allocated at compile time. (Global and static variables.)

    (ISO C doesn't require an actual stack, just the LIFO semantics of automatic-storage variable lifetimes. A few implementations on some ISAs basically dynamically allocate space for stack frames, like with malloc, instead of using a stack.)

    That rules out statically allocating space at compile time for local variables. On most C implementations, they're on the stack with x86-64 sub rsp, 24 or whatever. Of course the layout of the locals relative to each other is fixed at compile time, inside the large allocation, so compilers don't need to make code that stores pointers to objects, they just emit instructions that use addressing modes like [rsp + 4].

    So my question is: when and how space is reserved for that vector int the stack section?

    Logically in the C abstract machine: when the int vector[n] statement is reached, in this execution of the function. By contrast, fixed-size objects exist at the top of the enclosing scope.

    Thus your example is badly broken. You leave n uninitialized until after the VLA is allocated!! Compile your code with warnings enabled to catch problems like this. scanf should be before int vector[n]. (Also, don't call plain arrays "vector", that looks wrong to people who know C++.)

    But in this case, the C and x86 rules that mention that local variables should be placed in the order of their declaration would not be respected.

    There is no such rule. In ISO C it's Undefined Behaviour to even write vector < &n and compare addresses of separate objects. (C++ allows this with std::less; C doesn't have an equivalent Does C have an equivalent of std::less from C++?).

    A C compiler is allowed to lay out its stack frame however it chooses, e.g. grouping small objects together to avoid wasting space on padding for alignment of the larger more-aligned objects.

    x86 asm doesn't have variable declarations at all. It's up to you as a programmer (or the C compiler) to write instructions that move the stack pointer, and use memory addressing modes to access the memory you want to access. Often you'll do this in a way that implements the high-level concept of a "variable".

    For example, lets make a version of your function that takes n as a function arg, instead of bothering with scanf.

    #include <stdio.h>
    
    void use_mem(void*);   // compiler can't optimize away calls to this unknown function
    
    void foo(int size) {
        int n = size;  // uninitialized was UB
        int array[n];
        int i;
        i = 5;     // optimizes away, i is kept in a register
    
        //scanf("%d\n", &n);  // read some different size later???  makes no sense
        for (i = 0; i<n; i++){
            array[i] = 1;
        }
        use_mem(array);  // make the stores not be dead
    }
    

    On Godbolt with GCC10.1 -O2 -Wall, for x86-64 System V:

    foo(int):
            push    rbp
            movsx   rax, edi             # sign-extend n
            lea     rax, [15+rax*4]      # round size up
            and     rax, -16             # to a multiple of 16, to main stack alignment
            mov     rbp, rsp            # finish setting up a frame pointer
            sub     rsp, rax             # allocate space for array[]
            mov     r8, rsp              # keep a pointer to it
            test    edi, edi             # if ( n==0 ) skip the loop
            jle     .L2
            mov     edi, edi             # zero-extend n
            mov     rax, r8              # int *p = array
            lea     rdx, [r8+rdi*4]      # endp = &array[(unsigned)n]
    .L3:                                 # do{
            mov     DWORD PTR [rax], 1     # *p = 1
            add     rax, 4                 # pointer increment
            cmp     rax, rdx
            jne     .L3                  # }while(p != endp)
    .L2:
            mov     rdi, r8             # pass a pointer to the VLA
            call    use_mem(void*)
            leave                       # tear down frame pointer / stack frame
            ret
    

    Note that when call use_mem runs, the array[n] space is above the stack pointer, i.e. "allocated".

    If use_mem called back into this function, another instance of the VLA with its own size would be allocated on the stack.

    The leave instruction is just mov rsp, rbp / pop rbp, so it sets the stack pointer to point above the allocated space, deallocating it.