pythondebuggingwindbgpdbpydbg

Parse PDB Symbol and Resolve Address


Using a python based disassembler + debugger I've found below instructions(example). Now I want to parse Microsoft provided public symbols to find exact functions its calling.

I want to know what are the available options/ modules to do the same. Can we just simply get the info from a static PDB files or its required to load that in memory while debugging ?

call ntdll!0x33dec
call ntdll!0x22280
call ntdll!0x2df40
call ntdll!0x33cdb
call ntdll!0x2df29
call ntdll!0x325a0
call ntdll!0x32a96
call ntdll!0x32a79
call ntdll!0x220a4


Solution

  • A sample that uses capstone for dis-assembly and dbghelp apis for symbol resolving of an immediate E8 call

    import sys  #for argv[]
    import binascii #for hexlify repr() spits out ugly mix like
            #'\xe8y\xff\' instead of  '\xe8\x79\xff' :(
    from ctypes import *
    from capstone import *
    class SYMBOL_INFO(Structure):
        _fields_ = [
        ( 'SizeOfStruct', c_ulong ),
        ( 'TypeIndex', c_ulong ),
        ( 'Reserved', c_ulonglong * 2 ),
        ( 'Index', c_ulong ),
        ( 'Size', c_ulong ),
        ( 'ModBase', c_ulonglong ),
        ( 'Flags', c_ulong ),
        ( 'Value', c_ulonglong ),
        ( 'Address', c_ulonglong ),
        ( 'Register', c_ulong ),
        ( 'Scope', c_ulong ),
        ( 'Tag' , c_ulong ),
        ( 'NameLen', c_ulong ),
        ( 'MaxNameLen', c_ulong ),
        ( 'Name', c_char * 2001 )
    ]   
    modname = sys.argv[1]
    offset  = long(sys.argv[2],16)
    sympath = "xxx:\\yyyyy"  # substitute actual path
    base    = windll.LoadLibrary(modname)._handle
    symaddr = c_ulonglong(base + offset)
    print "Module name = %s\nModule Base = %s\nSymFromAddr = %s" % \
        (modname,hex(base),hex(symaddr.value))
    dbghelp = windll.dbghelp
    k32 =     windll.kernel32
    hproc = k32.GetCurrentProcess()
    dbghelp.SymInitialize(hproc,sympath,1)
    sinfo = SYMBOL_INFO()
    sinfo.SizeOfStruct = sizeof(SYMBOL_INFO) - 2000
    sinfo.MaxNameLen = 2000
    Displacement = c_ulonglong()
    dbghelp.SymFromAddr(hproc,symaddr,addressof(Displacement),addressof(sinfo))
    print "Sym At Addr = %s + %s" % (sinfo.Name,str(hex(Displacement.value)))
    opcodebuff     = create_string_buffer(16)
    memmove(opcodebuff,symaddr.value,16)
    for i in range(0,16,1):
        print binascii.hexlify(opcodebuff.raw[i]),
    print
    MyDisasm = Cs(CS_ARCH_X86, CS_MODE_32)
    for i in MyDisasm.disasm(opcodebuff, symaddr,1):
        print "0x%x: %s %s  %s" %  ( i.address ,binascii.hexlify(i.bytes),
         i.mnemonic, i.op_str)
        if(i.mnemonic == 'call'):
            try:
                symaddr = c_ulonglong(long(i.op_str,16))
                dbghelp.SymFromAddr(hproc,symaddr,addressof(Displacement),addressof(sinfo))
                print "(%s+%s)" % (sinfo.Name,str(hex(Displacement.value))),
                print "(%s+0x%X)"% (modname ,long(i.op_str,16)-base)
            except:
                print "Indirect/register Calls Not Handled Yet"
                pass
    

    usage as follows

    python dumpsym.py ntdll 1041

    first argument is a string that represents a module 
    
    second argument is a string that represents an offset in the module
    
    so if module is loaded at 0xxxxxxxxx offset 1041 will point to the address 
    0xxxxxxxxx+0x1041 
    

    output

    Module name = ntdll
    Module Base = 0x7c900000
    SymFromAddr = 0x7c901041L
    Sym At Addr = RtlEnterCriticalSection + 0x41L
    e8 79 a1 01 00 64 8b 0d 18 00 00 00 8b 54 24 04
    0x7c901041: e879a10100 call  0x7c91b1bf
    (RtlpWaitForCriticalSection+0x0L) (ntdll+0x1B1BF)