windowsassemblyx86nasmwin64

Trying to read console input from x64 assembler using pure Win64 APIs (No C runtime)


I'm just learning x64 assembler and I've just encountered a problem I cannot quite explain. From how Kernel32.dll's ReadFile works from C code, I was expecting it to stop at the console and wait for me to input a full line before returning to the caller, something that surprisingly didn't work at all for me. The ReadFile procedure seems to return a zero length string no matter what is being pressed on the keyboard, or for that matter what is passed into it from a pipe on the command shell.

;%USERPROFILE%\nasm\learning\stdio.asm
;
;Basic usage of the standard input/output/error channels.
;
;nasm -f win64 stdio.asm
;golink /console /ni /entry main stdio.obj kernel32.dll

%include "\inc\nasmx.inc"
%include "\inc\win32\windows.inc"
%include "\inc\win32\kernel32.inc"

%ifidn __BITS__, 0x40
;// assert: set call stack for procedure prolog to max
;// invoke param bytes for 64-bit assembly mode
DEFAULT REL
NASMX_PRAGMA CALLSTACK, 0x30
%endif

entry toplevel
;
section .data
errmsg      db      "No errors to report!",0xd,0xa
errmsglen   equ     $-errmsg
query       db      "What is your name?",0xd,0xa
querylen    equ     $-query
greet       db      "Welcome, "
greetlen    equ     $-greet
crlf        db      0xd,0xa
crlflen     equ     $-crlf
bNamelim    db      0xff
minusone    equ     0xffffffffffffffff
zero        equ     0x0

section .bss
    hStdInput   resq    0x1
    hStdOutput  resq    0x1
    hStdError   resq    0x1
    hNum        resq    0x1
    hMode       resq    0x1
    bName       resb    0x100
    bNamelen    resq    0x1

section .text
proc    toplevel, ptrdiff_t argcount, ptrdiff_t cmdline
locals none
    invoke GetStdHandle, STD_INPUT_HANDLE
    mov qword [hStdInput], rax
;    invoke GetConsoleMode, qword [hStdInput],  hMode
;    mov rdx, [hMode]
;    and dl, ENABLE_PROCESSED_INPUT
;    and dl, ENABLE_LINE_INPUT
;    and dl, ENABLE_ECHO_INPUT
;    invoke SetConsoleMode, qword [hStdInput], rdx
    invoke GetStdHandle, STD_OUTPUT_HANDLE
    mov qword [hStdOutput], rax
    invoke GetStdHandle, STD_ERROR_HANDLE
    mov qword [hStdError], rax

    invoke WriteFile, qword [hStdOutput], query, querylen, hNum, zero
    invoke WaitForSingleObject, qword[hStdInput], minusone
    invoke ReadFile, qword [hStdInput], bName, bNamelim, bNamelen, zero
    invoke WriteFile, qword [hStdOutput], greet, greetlen, hNum, zero
    invoke WriteFile, qword [hStdOutput], bName, bNamelen, hNum, zero
    invoke WriteFile, qword [hStdOutput], crlf, crlflen, hNum, zero
    invoke WriteFile, qword [hStdError], errmsg, errmsglen, hNum, zero
    invoke ExitProcess, zero
endproc

I've done the same function using the C runtime and that works, but now I'm trying to get a working verion without using that crutch. I'm using NASM (with NASMX include files providing macros) and GoLink, linking against kernel32.dll. What am I doing wrong? What behaviour of which API did I miss? From the MSDN articles on the Win32 console APIs, the behaviour of ReadFile surprises me.

Also, if I remove the WaitForSingleObject call from the assembly, something that isn't present in the C equivalent, the entire program finishes running without ever stopping to wait for console input, despite ReadFile being supposed to do just that.

EDIT Well, Raymond Chen asked about the macro expansions and if they were correct according to calling conventions, so:

    invoke GetStdHandle, STD_INPUT_HANDLE
    mov qword [hStdInput], rax

this gets turned into

    sub rsp,byte +0x20
    mov rcx,0xfffffffffffffff6
    call qword 0x2000
    add rsp,byte +0x20
    mov [0x402038],rax

which seems to follow the calling conventions of Win64 for 0-4 integer argument calls just fine. How about the five argument form?

    invoke WriteFile, qword [hStdOutput], query, querylen, hNum, zero

This gets turned into

    sub rsp,byte +0x30
    mov rcx,[0x402040]
    mov rdx,0x402016
    mov r8d,0x14
    mov r9,0x402050
    mov qword [rsp+0x20],0x0
    call qword 0x2006
    add rsp,byte +0x30

And from that it seems to me like at least the invoke macro is correct. The proc-locals-endproc macro is harder, because it's spread out and I believe that the invoke macro somehow relies on it. Anyway, the prologue ends up expanding to this:

    push rbp
    mov rbp,rsp
    mov rax,rsp
    and rax,byte +0xf
    jz 0x15
    sub rsp,byte +0x10
    and spl,0xf0
    mov [rbp+0x10],rcx
    mov [rbp+0x18],rdx

and the epilogue ends up expanding into this:

    mov rsp,rbp
    pop rbp
    ret

Both of which, from my admittedly meagre knowledge of Win64, seems to be okay.

EDIT Okay, thanks to Harry Johnston's answer I got the code working:

;%USERPROFILE%\nasm\learning\stdio.asm
;
;Basic usage of the standard input/output/error channels.
;
;nasm -f win64 stdio.asm
;golink /console /ni /entry main stdio.obj kernel32.dll

%include "\inc\nasmx.inc"
%include "\inc\win32\windows.inc"
%include "\inc\win32\kernel32.inc"

%ifidn __BITS__, 0x40
;// assert: set call stack for procedure prolog to max
;// invoke param bytes for 64-bit assembly mode
DEFAULT REL
NASMX_PRAGMA CALLSTACK, 0x30
%endif

entry toplevel

section .data
errmsg      db      "No errors to report!",0xd,0xa
errmsglen   equ     $-errmsg
query       db      "What is your name?",0xd,0xa
querylen    equ     $-query
greet       db      "Welcome, "
greetlen    equ     $-greet
crlf        db      0xd,0xa
crlflen     equ     $-crlf
bNamelim    equ     0xff
minusone    equ     0xffffffffffffffff
zero        equ     0x0

section .bss
    hStdInput   resq    0x1
    hStdOutput  resq    0x1
    hStdError   resq    0x1
    hNum        resq    0x1
    hMode       resq    0x1
    bName       resb    0x100
    bNamelen    resq    0x1

section .text
proc    toplevel, ptrdiff_t argcount, ptrdiff_t cmdline
locals none
    invoke GetStdHandle, STD_INPUT_HANDLE
    mov qword [hStdInput], rax
    invoke GetStdHandle, STD_OUTPUT_HANDLE
    mov qword [hStdOutput], rax
    invoke GetStdHandle, STD_ERROR_HANDLE
    mov qword [hStdError], rax

    invoke WriteFile, qword [hStdOutput], query, querylen, hNum, zero
    invoke ReadFile, qword [hStdInput], bName, bNamelim, bNamelen, zero
    invoke WriteFile, qword [hStdOutput], greet, greetlen, hNum, zero
    invoke WriteFile, qword [hStdOutput], bName, [bNamelen], hNum, zero
    invoke WriteFile, qword [hStdOutput], crlf, crlflen, hNum, zero
    invoke WriteFile, qword [hStdError], errmsg, errmsglen, hNum, zero
    invoke ExitProcess, zero
endproc

However, that code still doesn't answer Raymond Chen's issues with the macros and whether they violate the Win64 ABI or not, so I'll have to look into that some more.

EDIT A version without macros that I believe follow the x64 ABI fully, including the unwind data.

;%USERPROFILE%\nasm\learning\stdio.asm
;
;Basic usage of the standard input/output/error channels.
;
;nasm -f win64 stdio.asm
;golink /console /ni /entry main stdio.obj kernel32.dll

;Image setup
bits 64
default rel
global main

;Linkage
extern GetStdHandle
extern WriteFile
extern ReadFile
extern ExitProcess

;Read only data
section .rdata use64
    zero:                   equ     0x0
    query:                  db      "What is your name?",0xd,0xa
    querylen:               equ     $-query
    greet:                  db      "Welcome, "
    greetlen:               equ     $-greet
    errmsg:                 db      "No errors to report!",0xd,0xa
    errmsglen:              equ     $-errmsg
    crlf:                   db      0xd,0xa
    crlflen:                equ     $-crlf
    bNamelim:               equ     0xff
    STD_INPUT_HANDLE:       equ     -10
    STD_OUTPUT_HANDLE:      equ     -11
    STD_ERROR_HANDLE:       equ     -12
    UNW_VERSION:            equ     0x1
    UNW_FLAG_NHANDLER:      equ     0x0
    UNW_FLAG_EHANDLER:      equ     0x1
    UNW_FLAG_UHANDLER:      equ     0x2
    UNW_FLAG_CHAININFO:     equ     0x4
    UWOP_PUSH_NONVOL:       equ     0x0
    UWOP_ALLOC_LARGE:       equ     0x1
    UWOP_ALLOC_SMALL:       equ     0x2
    UWOP_SET_FPREG:         equ     0x3
    UWOP_SAVE_NONVOL:       equ     0x4
    UWOP_SAVE_NONVOL_FAR:   equ     0x5
    UWOP_SAVE_XMM128:       equ     0x8
    UWOP_SAVE_XMM128_FAR:   equ     0x9
    UWOP_PUSH_MACHFRAME:    equ     0xa

;Uninitialised data
section .bss use64
    argc:       resq    0x1
    argv:       resq    0x1
    envp:       resq    0x1
    hStdInput:  resq    0x1
    hStdOutput: resq    0x1
    hStdError:  resq    0x1
    hNum:       resq    0x1
    hMode:      resq    0x1
    bName:      resb    0x100
    bNamelen:   resq    0x1

;Program code
section .text use64
main:
.prolog:
.argc:    mov qword [argc], rcx
.argv:    mov qword [argv], rdx
.envp:    mov qword [envp], r8
.rsp:     sub rsp, 0x8*0x4+0x8

.body:
        ; hStdInput = GetStdHandle (STD_INPUT_HANDLE)
        mov rcx, qword STD_INPUT_HANDLE
        call GetStdHandle
        mov qword [hStdInput], rax

        ; hStdOutput = GetStdHandle (STD_OUTPUT_HANDLE)
        mov rcx, qword STD_OUTPUT_HANDLE
        call GetStdHandle
        mov qword [hStdOutput], rax

        ; hStdError = GetStdHandle (STD_ERROR_HANDLE)
        mov rcx, qword STD_ERROR_HANDLE
        call GetStdHandle
        mov qword [hStdError], rax

        ; WriteFile (*hStdOutput, &query, querylen, &hNum, NULL)
        mov rcx, qword [hStdOutput]
        mov rdx, qword query
        mov r8d, dword querylen
        mov r9, qword hNum
        mov qword [rsp+0x20], zero
        call WriteFile

        ; ReadFile (*hStdInput, &bName, bNamelim, &bNameLen, NULL)
        mov rcx, qword [hStdInput]
        mov rdx, qword bName
        mov r8d, dword bNamelim
        mov r9, qword bNamelen
        mov qword [rsp+0x20], zero
        call ReadFile

        ; WriteFile (*hStdOutput, &crlf, crlflen, &hNum, NULL)
        mov rcx, qword [hStdOutput]
        mov rdx, qword crlf
        mov r8d, dword crlflen
        mov r9, qword hNum
        mov qword [rsp+0x20], zero
        call WriteFile

        ; WriteFile (*hStdOutput, &greet, greetlen, &hNum, NULL)
        mov rcx, qword [hStdOutput]
        mov rdx, qword greet
        mov r8d, dword greetlen
        mov r9, qword hNum
        mov qword [rsp+0x20], zero
        call WriteFile

        ; WriteFile (*hStdOutput, &bName, *bNamelen, &hNum, NULL)
        mov rcx, qword [hStdOutput]
        mov rdx, qword bName
        mov r8d, dword [bNamelen]
        mov r9, qword hNum
        mov qword [rsp+0x20], zero
        call WriteFile

        ; WriteFile (*hStdOutput, &crlf, crlflen, &hNum, NULL)
        mov rcx, qword [hStdOutput]
        mov rdx, qword crlf
        mov r8d, dword crlflen
        mov r9, qword hNum
        mov qword [rsp+0x20], zero
        call WriteFile

        ; WriteFile (*hStdError, &errmsg, errmsglen, &hNum, NULL)
        mov rcx, qword [hStdError]
        mov rdx, qword errmsg
        mov r8d, dword errmsglen
        mov r9, qword hNum
        mov qword [rsp+0x20], zero
        call WriteFile

        ; ExitProcess(0)
.exit:  xor ecx, ecx
        call ExitProcess

.rval:  xor eax, eax ; return 0
.epilog:
        add rsp, 0x8*0x4+0x8
        ret
.end:

; Win64 Windows API x64 Structured Exception Handling (SEH) - procedure data
section .pdata  rdata align=4 use64
    pmain:
    .start: dd      main     wrt ..imagebase 
    .end:   dd      main.end wrt ..imagebase 
    .info:  dd      xmain    wrt ..imagebase 

; Win64 Windows API x64 Structured Exception Handling (SEH) - unwind information
section .xdata  rdata align=8 use64
    xmain:
    .versionandflags:
            db      UNW_VERSION + (UNW_FLAG_NHANDLER << 0x3) ; Version = 1
    ; Version is low 3 bits. Handler flags are high 5 bits.
    .size:  db      main.body-main.prolog ; size of prolog that is
    .count: db      0x1 ; Only one unwind code
    .frame: db      0x0 + (0x0 << 0x4) ; Zero if no frame pointer taken
    ; Frame register is low 4 bits, Frame register offset is high 4 bits,
    ; rsp + 16 * offset at time of establishing
    .codes: db      main.body-main.prolog ; offset of next instruction
            db      UWOP_ALLOC_SMALL + (0x4 << 0x4) ; UWOP_INFO: 4*8+8 bytes
    ; Low 4 bytes UWOP, high 4 bytes op info.
    ; Some ops use one or two 16 bit slots more for addressing here
            db      0x0,0x0 ; Unused record to bring the number to be even
    .handl: ; 32 bit image relative address to entry of exception handler
    .einfo: ; implementation defined structure exception info

Solution

  • I suspect this is your problem:

    bNamelim    db      0xff
    

    [...]

    invoke ReadFile, qword [hStdInput], bName, bNamelim, bNamelen, zero
    

    You're passing the address rather than the value of bNamelim.

    I'm not sure exactly how ReadFile should be expected to respond to a value larger than 32 bits, but it certainly isn't what you wanted to do.