linuxdebuggingassemblynasm

Unable to parse a word in nasm


I got a function description: accepts a buffer address and size as arguments. Reads next word from stdin (skipping whitespaces into buffer). Stops and returns 0 if word is too big for the buffer specified; otherwise returns a buffer address. This function should null-terminate the accepted string. But the code which I wrote doesn't work when I try to input something like echo -n "" | ./read_word or echo -n "\t " | ./read_word. The output is

[3]    40467 done                              echo -n "\t   " | 
       40468 segmentation fault (core dumped)  ./read_word

What can be wrong in my code?

Here it is:

read_word:
    xor r8, r8
    xor r9, r9
    mov r9, rdi
    push rdi
    push rsi
    .whitespace_reader:
        call read_char
        
        mov rdi, rax
        push rax
        call .whitespace_checker
        cmp rax, 2
        jz .fail
        
        cmp rax, 0
        pop rax

        jz .whitespace_reader
    
    pop rsi
    pop rdi
         
    .word_reader:
        inc r8
        cmp r8, rsi
        jnb .fail              ; если >=, фейлим, иначе пишем в буфер
        mov byte [rdi], al
        inc rdi

        push rdi
        push rsi
        
        call read_char

        pop rsi
        pop rdi
        push rdi
        mov rdi, rax
        
        push rax
        call .whitespace_checker
        cmp rax, 0
        pop rax
        pop rdi
        jz .success

        jmp .word_reader


    ; возвращает 0, если символ пробельный, 1 если непробельный и 2, если нуль-терминатор
    .whitespace_checker:
        mov rax, rdi
        cmp rax, 0x20
        jz .ret_0
        cmp rax, 0x9
        jz .ret_0
        cmp rax, 0xA
        jz .ret_0
        cmp rax, 0
        jz .ret_2
        
        mov rax, 1
        ret 

        .ret_0:
            mov rax, 0
            ret

        .ret_2:
            mov rax, 2
            ret
    
    .fail:
        xor rax, rax
        ret
        
    .success:
        inc rdi
        mov byte [rdi], 0
        mov rax, r9
        mov rdx, r8
        ret`

Solution

  • Both of your examples just contain whitespace plus a terminating zero. The .whitespace_reader will loop through the whitespace and then stumble upon the terminating zero for which it will jump to .fail. The ret instruction there will be using an unbalanced stack as the return address is blocked by the presence of the RDI, RSI, and RAX registers that you have pushed. There's no quick fix for this in your present code as it contains additional problems.

    This is my rewrite, incorporating all of the above:

    ; IN (rdi,rsi) OUT (rax,rdx)
    read_word:
        push rbx                  ; Save call-preserved registers
        push r12
        push r13
        xor  ebx, ebx             ; WordLength = 0
        mov  r12, rdi             ; BufferAddress
        mov  r13, rsi             ; BufferSize
    
    .whitespace_reader:           ; Skip leading whitespace
        call read_char            ; -> RAX
        call .whitespace_checker  ; -> RDX={0,1,2}
        cmp  dl, 1
        jb   .whitespace_reader   ; 0 == Whitespace
        ja   .fail                ; 2 == Terminating zero
                                  ; 1 == Normal character
    .word_reader:
        inc  rbx
        cmp  rbx, r13
        jnb  .fail 
        mov  [r12], al
        inc  r12
        call read_char            ; -> RAX
        call .whitespace_checker  ; -> RDX={0,1,2}
        cmp  dl, 1
        je   .word_reader         ; 1 == Normal character
    .success:
        mov  byte [r12], 0        ; New terminating-zero
        mov  rax, r12             ; WordAddress to RAX
        sub  rax, rbx
        mov  rdx, rbx             ; WordLength to RDX
        jmp  .done
    
    ; IN (rax) OUT (rdx)
    .whitespace_checker:
        xor  edx, edx             ; 0 == Whitespace
        cmp  al, 32
        je   .ret
        cmp  al, 9
        je   .ret
        cmp  al, 10
        je   .ret
        inc  edx                  ; 1 == Normal character
        test al, al
        jnz  .ret
        inc  edx                  ; 2 == Terminating zero
    .ret:
        ret
        
    .fail:
        xor  eax, eax
    .done:
        pop  r13                  ; Restore call-preserved registers
        pop  r12
        pop  rbx
        ret