I am trying to disassambe a PE file using capstone with python bindings.
import pefile
from capstone import *
exe_file = 'C:\\Users\\Philip\\file.exe'
pe = pefile.PE(exe_file)
# find text section
offset = False
for section in pe.sections:
if section.Name == b'.text\x00\x00\x00':
offset = section.VirtualAddress
break
with open(exe_file, 'rb') as f:
code = f.read()
# start disassembling text section
md = Cs(CS_ARCH_X86, CS_MODE_32)
md.detail = True
if offset:
for i in md.disasm(code, offset):
print('0x%x:\t%s\t%s' % (i.address, i.mnemonic, i.op_str))
But it keeps returning the same ASM instructions at every offset.
0x1000: dec ebp
0x1001: pop edx
0x1002: nop
0x1003: add byte ptr [ebx], al
0x1005: add byte ptr [eax], al
0x1007: add byte ptr [eax + eax], al
0x100a: add byte ptr [eax], al
for i in md.disasm(code, 0x2000):
print('0x%x:\t%s\t%s' %(i.address, i.mnemonic, i.op_str))
0x2000: dec ebp
0x2001: pop edx
0x2002: nop
0x2003: add byte ptr [ebx], al
0x2005: add byte ptr [eax], al
0x2007: add byte ptr [eax + eax], al
0x200a: add byte ptr [eax], al
And if i keep looping il get an endless stream of the same output.
from typing import Iterable, Any, Tuple
def signal_last(it: Iterable[Any]) -> Iterable[Tuple[bool, Any]]:
iterable = iter(it)
ret_var = next(iterable)
for value in iterable:
yield False, ret_var
ret_var = value
yield True, ret_var
offset = 0x1000
while True:
for last, i in signal_last(md.disasm(code, offset)):
print('0x%x:\t%s\t%s' % (i.address, i.mnemonic, i.op_str))
if last:
offset = i.address + 1
0xbc113: dec ebp
0xbc114: pop edx
0xbc115: nop
0xbc116: add byte ptr [ebx], al
0xbc118: add byte ptr [eax], al
0xbc11a: add byte ptr [eax + eax], al
0xbc11d: add byte ptr [eax], al
0xbc11e: dec ebp
0xbc11f: pop edx
0xbc120: nop
0xbc121: add byte ptr [ebx], al
0xbc123: add byte ptr [eax], al
0xbc125: add byte ptr [eax + eax], al
0xbc128: add byte ptr [eax], al
0xbc129: dec ebp
0xbc12a: pop edx
0xbc12b: nop
0xbc12c: add byte ptr [ebx], al
0xbc12e: add byte ptr [eax], al
0xbc130: add byte ptr [eax + eax], al
0xbc133: add byte ptr [eax], al
0xbc134: dec ebp
0xbc135: pop edx
0xbc136: nop
0xbc137: add byte ptr [ebx], al
0xbc139: add byte ptr [eax], al
0xbc13b: add byte ptr [eax + eax], al
0xbc13e: add byte ptr [eax], al
0xbc13f: dec ebp
0xbc140: pop edx
0xbc141: nop
0xbc142: add byte ptr [ebx], al
0xbc144: add byte ptr [eax], al
0xbc146: add byte ptr [eax + eax], al
0xbc149: add byte ptr [eax], al
0xbc14a: dec ebp
0xbc14b: pop edx
0xbc14c: nop
0xbc14d: add byte ptr [ebx], al
0xbc14f: add byte ptr [eax], al
0xbc151: add byte ptr [eax + eax], al
0xbc154: add byte ptr [eax], al
0xbc155: dec ebp
0xbc156: pop edx
0xbc157: nop
0xbc158: add byte ptr [ebx], al
0xbc15a: add byte ptr [eax], al
0xbc15c: add byte ptr [eax + eax], al
0xbc15f: add byte ptr [eax], al
0xbc160: dec ebp
0xbc161: pop edx
0xbc162: nop
0xbc163: add byte ptr [ebx], al
0xbc165: add byte ptr [eax], al
0xbc167: add byte ptr [eax + eax], al
0xbc16a: add byte ptr [eax], al
0xbc16b: dec ebp
0xbc16c: pop edx
0xbc16d: nop
0xbc16e: add byte ptr [ebx], al
0xbc170: add byte ptr [eax], al
0xbc172: add byte ptr [eax + eax], al
0xbc175: add byte ptr [eax], al
Does anyone know what i am doing wrong? Because i don't think this is how Capstone is supposed to work.
Disasm will disassemble from start of code
. You should pass raw data corresponding to code section, not the beginning of PE file, where PE headers reside:
import pefile
from capstone import *
exe_file = 'C:\\Users\\Philip\\file.exe'
pe = pefile.PE(exe_file)
# find text section
offset = False
for section in pe.sections:
if section.Name == b'.text\x00\x00\x00':
offset = section.VirtualAddress
codePtr = section.PointerToRawData
codeEndPtr = codePtr+section.SizeOfRawData
break
code = pe.get_memory_mapped_image()[codePtr:codeEndPtr]
# start disassembling text section
md = Cs(CS_ARCH_X86, CS_MODE_32)
md.detail = True
if offset:
for i in md.disasm(code, offset):
print('0x%x:\t%s\t%s' % (i.address, i.mnemonic, i.op_str))