pythonassemblyx86disassemblycapstone

capstone disassembler python returns same instructions on every offset


I am trying to disassambe a PE file using capstone with python bindings.

import pefile
from capstone import *

exe_file = 'C:\\Users\\Philip\\file.exe'
pe = pefile.PE(exe_file)

# find text section
offset = False
for section in pe.sections:
    if section.Name == b'.text\x00\x00\x00':
        offset = section.VirtualAddress
        break

with open(exe_file, 'rb') as f:
    code = f.read()

# start disassembling text section
md = Cs(CS_ARCH_X86, CS_MODE_32)
md.detail = True
if offset:
    for i in md.disasm(code, offset):
        print('0x%x:\t%s\t%s' % (i.address, i.mnemonic, i.op_str))

But it keeps returning the same ASM instructions at every offset.

0x1000: dec ebp
0x1001: pop edx
0x1002: nop 
0x1003: add byte ptr [ebx], al
0x1005: add byte ptr [eax], al
0x1007: add byte ptr [eax + eax], al
0x100a: add byte ptr [eax], al
for i in md.disasm(code, 0x2000):
        print('0x%x:\t%s\t%s' %(i.address, i.mnemonic, i.op_str))
0x2000: dec ebp
0x2001: pop edx
0x2002: nop 
0x2003: add byte ptr [ebx], al
0x2005: add byte ptr [eax], al
0x2007: add byte ptr [eax + eax], al
0x200a: add byte ptr [eax], al

And if i keep looping il get an endless stream of the same output.

from typing import Iterable, Any, Tuple

def signal_last(it: Iterable[Any]) -> Iterable[Tuple[bool, Any]]:
    iterable = iter(it)
    ret_var = next(iterable)
    for value in iterable:
        yield False, ret_var
        ret_var = value
    yield True, ret_var

offset = 0x1000
while True:
    for last, i in signal_last(md.disasm(code, offset)):
        print('0x%x:\t%s\t%s' % (i.address, i.mnemonic, i.op_str))
        if last:
            offset = i.address + 1
0xbc113:    dec ebp
0xbc114:    pop edx
0xbc115:    nop 
0xbc116:    add byte ptr [ebx], al
0xbc118:    add byte ptr [eax], al
0xbc11a:    add byte ptr [eax + eax], al
0xbc11d:    add byte ptr [eax], al
0xbc11e:    dec ebp
0xbc11f:    pop edx
0xbc120:    nop 
0xbc121:    add byte ptr [ebx], al
0xbc123:    add byte ptr [eax], al
0xbc125:    add byte ptr [eax + eax], al
0xbc128:    add byte ptr [eax], al
0xbc129:    dec ebp
0xbc12a:    pop edx
0xbc12b:    nop 
0xbc12c:    add byte ptr [ebx], al
0xbc12e:    add byte ptr [eax], al
0xbc130:    add byte ptr [eax + eax], al
0xbc133:    add byte ptr [eax], al
0xbc134:    dec ebp
0xbc135:    pop edx
0xbc136:    nop 
0xbc137:    add byte ptr [ebx], al
0xbc139:    add byte ptr [eax], al
0xbc13b:    add byte ptr [eax + eax], al
0xbc13e:    add byte ptr [eax], al
0xbc13f:    dec ebp
0xbc140:    pop edx
0xbc141:    nop 
0xbc142:    add byte ptr [ebx], al
0xbc144:    add byte ptr [eax], al
0xbc146:    add byte ptr [eax + eax], al
0xbc149:    add byte ptr [eax], al
0xbc14a:    dec ebp
0xbc14b:    pop edx
0xbc14c:    nop 
0xbc14d:    add byte ptr [ebx], al
0xbc14f:    add byte ptr [eax], al
0xbc151:    add byte ptr [eax + eax], al
0xbc154:    add byte ptr [eax], al
0xbc155:    dec ebp
0xbc156:    pop edx
0xbc157:    nop 
0xbc158:    add byte ptr [ebx], al
0xbc15a:    add byte ptr [eax], al
0xbc15c:    add byte ptr [eax + eax], al
0xbc15f:    add byte ptr [eax], al
0xbc160:    dec ebp
0xbc161:    pop edx
0xbc162:    nop 
0xbc163:    add byte ptr [ebx], al
0xbc165:    add byte ptr [eax], al
0xbc167:    add byte ptr [eax + eax], al
0xbc16a:    add byte ptr [eax], al
0xbc16b:    dec ebp
0xbc16c:    pop edx
0xbc16d:    nop 
0xbc16e:    add byte ptr [ebx], al
0xbc170:    add byte ptr [eax], al
0xbc172:    add byte ptr [eax + eax], al
0xbc175:    add byte ptr [eax], al

Does anyone know what i am doing wrong? Because i don't think this is how Capstone is supposed to work.


Solution

  • Disasm will disassemble from start of code. You should pass raw data corresponding to code section, not the beginning of PE file, where PE headers reside:

    import pefile
    from capstone import *
    
    exe_file = 'C:\\Users\\Philip\\file.exe'
    pe = pefile.PE(exe_file)
    
    # find text section
    offset = False
    for section in pe.sections:
        if section.Name == b'.text\x00\x00\x00':
            offset = section.VirtualAddress
            codePtr = section.PointerToRawData
            codeEndPtr = codePtr+section.SizeOfRawData
            break
    
    code = pe.get_memory_mapped_image()[codePtr:codeEndPtr]
    
    # start disassembling text section
    md = Cs(CS_ARCH_X86, CS_MODE_32)
    md.detail = True
    if offset:
        for i in md.disasm(code, offset):
            print('0x%x:\t%s\t%s' % (i.address, i.mnemonic, i.op_str))