disasm.py   [plain text]


#!/usr/bin/env python

"""
Run gdb to disassemble a function, feed the bytes to 'llvm-mc -disassemble' command,
and display the disassembly result.

"""

import os
import sys
from optparse import OptionParser

def is_exe(fpath):
    """Check whether fpath is an executable."""
    return os.path.isfile(fpath) and os.access(fpath, os.X_OK)

def which(program):
    """Find the full path to a program, or return None."""
    fpath, fname = os.path.split(program)
    if fpath:
        if is_exe(program):
            return program
    else:
        for path in os.environ["PATH"].split(os.pathsep):
            exe_file = os.path.join(path, program)
            if is_exe(exe_file):
                return exe_file
    return None

def do_llvm_mc_disassembly(gdb_commands, gdb_options, exe, func, mc, mc_options):
    from cStringIO import StringIO 
    import pexpect

    gdb_prompt = "\r\n\(gdb\) "
    gdb = pexpect.spawn(('gdb %s' % gdb_options) if gdb_options else 'gdb')
    # Turn on logging for what gdb sends back.
    gdb.logfile_read = sys.stdout
    gdb.expect(gdb_prompt)

    # See if there any extra command(s) to execute before we issue the file command.
    for cmd in gdb_commands:
        gdb.sendline(cmd)
        gdb.expect(gdb_prompt)

    # Now issue the file command.
    gdb.sendline('file %s' % exe)
    gdb.expect(gdb_prompt)

    # Send the disassemble command.
    gdb.sendline('disassemble %s' % func)
    gdb.expect(gdb_prompt)

    # Get the output from gdb.
    gdb_output = gdb.before

    # Use StringIO to record the memory dump as well as the gdb assembler code.
    mc_input = StringIO()

    # These keep track of the states of our simple gdb_output parser.
    prev_line = None
    prev_addr = None
    curr_addr = None
    addr_diff = 0
    looking = False
    for line in gdb_output.split(os.linesep):
        if line.startswith('Dump of assembler code'):
            looking = True
            continue

        if line.startswith('End of assembler dump.'):
            looking = False
            prev_addr = curr_addr
            if mc_options and mc_options.find('arm') != -1:
                addr_diff = 4
            if mc_options and mc_options.find('thumb') != -1:
                # It is obviously wrong to assume the last instruction of the
                # function has two bytes.
                # FIXME
                addr_diff = 2

        if looking and line.startswith('0x'):
            # It's an assembler code dump.
            prev_addr = curr_addr
            curr_addr = line.split(None, 1)[0]
            if prev_addr and curr_addr:
                addr_diff = int(curr_addr, 16) - int(prev_addr, 16)

        if prev_addr and addr_diff > 0:
            # Feed the examining memory command to gdb.
            gdb.sendline('x /%db %s' % (addr_diff, prev_addr))
            gdb.expect(gdb_prompt)
            x_output = gdb.before
            # Get the last output line from the gdb examine memory command,
            # split the string into a 3-tuple with separator '>:' to handle
            # objc method names.
            memory_dump = x_output.split(os.linesep)[-1].partition('>:')[2].strip()
            #print "\nbytes:", memory_dump
            disasm_str = prev_line.partition('>:')[2]
            print >> mc_input, '%s # %s' % (memory_dump, disasm_str)

        # We're done with the processing.  Assign the current line to be prev_line.
        prev_line = line

    # Close the gdb session now that we are done with it.
    gdb.sendline('quit')
    gdb.expect(pexpect.EOF)
    gdb.close()

    # Write the memory dump into a file.
    with open('disasm-input.txt', 'w') as f:
        f.write(mc_input.getvalue())

    mc_cmd = '%s -disassemble %s disasm-input.txt' % (mc, mc_options)
    print "\nExecuting command:", mc_cmd
    os.system(mc_cmd)

    # And invoke llvm-mc with the just recorded file.
    #mc = pexpect.spawn('%s -disassemble %s disasm-input.txt' % (mc, mc_options))
    #mc.logfile_read = sys.stdout
    #print "mc:", mc
    #mc.close()
    

def main():
    # This is to set up the Python path to include the pexpect-2.4 dir.
    # Remember to update this when/if things change.
    scriptPath = sys.path[0]
    sys.path.append(os.path.join(scriptPath, os.pardir, os.pardir, 'test', 'pexpect-2.4'))

    parser = OptionParser(usage="""\
Run gdb to disassemble a function, feed the bytes to 'llvm-mc -disassemble' command,
and display the disassembly result.

Usage: %prog [options]
""")
    parser.add_option('-C', '--gdb-command',
                      type='string', action='append', metavar='COMMAND',
                      default=[], dest='gdb_commands',
                      help='Command(s) gdb executes after starting up (can be empty)')
    parser.add_option('-O', '--gdb-options',
                      type='string', action='store',
                      dest='gdb_options',
                      help="""The options passed to 'gdb' command if specified.""")
    parser.add_option('-e', '--executable',
                      type='string', action='store',
                      dest='executable',
                      help="""The executable to do disassembly on.""")
    parser.add_option('-f', '--function',
                      type='string', action='store',
                      dest='function',
                      help="""The function name (could be an address to gdb) for disassembly.""")
    parser.add_option('-m', '--llvm-mc',
                      type='string', action='store',
                      dest='llvm_mc',
                      help="""The llvm-mc executable full path, if specified.
                      Otherwise, it must be present in your PATH environment.""")

    parser.add_option('-o', '--options',
                      type='string', action='store',
                      dest='llvm_mc_options',
                      help="""The options passed to 'llvm-mc -disassemble' command if specified.""")

    opts, args = parser.parse_args()

    gdb_commands = opts.gdb_commands
    gdb_options = opts.gdb_options

    if not opts.executable:
        parser.print_help()
        sys.exit(1)
    executable = opts.executable

    if not opts.function:
        parser.print_help()
        sys.exit(1)
    function = opts.function

    llvm_mc = opts.llvm_mc if opts.llvm_mc else which('llvm-mc')
    if not llvm_mc:
        parser.print_help()
        sys.exit(1)

    # This is optional.  For example:
    # --options='-triple=arm-apple-darwin -debug-only=arm-disassembler'
    llvm_mc_options = opts.llvm_mc_options

    # We have parsed the options.
    print "gdb commands:", gdb_commands
    print "gdb options:", gdb_options
    print "executable:", executable
    print "function:", function
    print "llvm-mc:", llvm_mc
    print "llvm-mc options:", llvm_mc_options

    do_llvm_mc_disassembly(gdb_commands, gdb_options, executable, function, llvm_mc, llvm_mc_options)

if __name__ == '__main__':
    main()