#!/usr/bin/env python2 ''' Dump some PE file features from memory images. author: Willi Ballenthin email: william.ballenthin@fireeye.com website: https://gist.github.com/williballenthin/cbc102d561e2eb647f7aec3c3753ba55 ''' import os import sys import hashlib import logging import datetime import contextlib # from pypi:: # # pip install pytz argparse import pytz import argparse # from vivisect:: # # pip install https://github.com/williballenthin/vivisect/zipball/master import PE logger = logging.getLogger(__name__) @contextlib.contextmanager def restoring_offset(f): ''' context manager that restores the current offset of the file pointer after some block of operations. Example:: with open('test.bin', 'rb') as f: assert f.tell() == 0x0 with restoring_offset(f): f.seek(0x200) assert f.tell() == 0x200 assert f.tell() == 0x0 ''' t = f.tell() try: yield finally: f.seek(t, os.SEEK_SET) class FileView(object): ''' Given an open file object, provide read access to a subsection of the file as if it were its own file object. This is a bit like `losetup(8)`, except a file-like object in Python. Example:: with open('logical-process-memory.bin', 'rb') as f: g = FileView(f, 0x401000) assert g.read(0x2) == 'MZ' ''' def __init__(self, f, start=0, length=None): super(FileView, self).__init__() self.f = f self.start = start self.f.seek(self.start) if length is None: with restoring_offset(f): f.seek(0, os.SEEK_END) self.length = f.tell() - self.start else: self.length = length def tell(self): return self.f.tell() - self.start def seek(self, offset, whence=os.SEEK_SET): final_offset = 0 if whence == os.SEEK_SET: final_offset = self.start + offset elif whence == os.SEEK_CUR: final_offset = self.f.tell() + offset elif whence == os.SEEK_END: final_offset = self.start + self.length - offset else: raise IOError('unknown seek whence') logger.debug('seek offset: 0x%x whence: 0x%x final offset: 0x%x', offset, whence, final_offset) if final_offset < self.start: raise IOError('cant read offset %d (underrun)' % (final_offset - self.start)) if final_offset > self.start + self.length: raise IOError('cant read offset %d (overrun)' % (final_offset - self.start)) self.f.seek(final_offset) def read(self, length=None): max_length = self.length - self.tell() logger.debug('read length: 0x%x', length or max_length) if length is None: return self.f.read(max_length) else: if max_length < length: return self.f.read(max_length) else: return self.f.read(length) def md5(buf): m = hashlib.md5() m.update(buf) return m.hexdigest() def get_imphash(pe): impstrs = [] exts = ['ocx', 'sys', 'dll'] for (off, libname, funcname) in pe.getImports(): parts = libname.rsplit('.', 1) if len(parts) > 1 and parts[1] in exts: libname = parts[0] impstrs.append('%s.%s' % (libname.lower(), funcname.lower())) return md5(','.join(impstrs).encode()) def guess_is_memory_image(f): ''' guess if the provided file is a PE from memory or on disk. it works by exploiting the differing alignment between file sectors (0x200, PE file alignment) and memory pages (0x1000, PE section alignment). on disk, the first section's content typically begins at offset 0x400, while in memory, it usually begins at 0x1000. Example:: with open('kernel32.dll', 'rb') as f: assert guess_is_memory_image(f) == False with open('0x401000.bin', 'rb') as f: assert guess_is_memory_image(f) == True ''' with restoring_offset(f): f.seek(0x400) return f.read(0x200) == '\x00' * 0x200 def output_normal_mode(pe, args): ts = datetime.datetime.fromtimestamp(pe.IMAGE_NT_HEADERS.FileHeader.TimeDateStamp, pytz.utc) print('timestamp: ' + ts.isoformat()) print('checksum: ' + hex(pe.IMAGE_NT_HEADERS.OptionalHeader.CheckSum)) if pe.getExportName(): print('export name: ' + pe.getExportName()) print('exports:') for (_, ord_, funcname) in pe.getExports(): print(' %d) %s' % (ord_, funcname)) if not args.no_imports: print('imports:') for (_, libname, funcname) in pe.getImports(): print(' - %s.%s' % (libname, funcname)) print('sections:') for section in pe.getSections(): print(' - ' + section.Name) print(' virtual address: ' + hex(section.VirtualAddress) + '\tsize: ' + hex(section.VirtualSize)) print(' raw address: ' + hex(section.PointerToRawData) + '\tsize: ' + hex(section.SizeOfRawData)) print('imphash: ' + get_imphash(pe)) def output_bulk_mode(pe, args): filename = args.input offset = args.offset export_name = pe.getExportName() or '' ts = datetime.datetime.fromtimestamp(pe.IMAGE_NT_HEADERS.FileHeader.TimeDateStamp, pytz.utc) timestamp = ts.isoformat() checksum = hex(pe.IMAGE_NT_HEADERS.OptionalHeader.CheckSum) imphash = get_imphash(pe) print('{filename}|{offset}|{export_name}|{timestamp}|{checksum}|{imphash}'.format(**locals())) def number(s): if s.startswith('0x'): return int(s, 0x10) else: return int(s) def main(argv=None): if argv is None: argv = sys.argv[1:] parser = argparse.ArgumentParser(description="Dump some PE file features features from memory images.") parser.add_argument("input", type=str, help="Path to input file") parser.add_argument("offset", type=number, help="Offset from which to parse the PE image.") parser.add_argument("-v", "--verbose", action="store_true", help="Enable debug logging") parser.add_argument("-q", "--quiet", action="store_true", help="Disable all output but errors") parser.add_argument("--no-imports", dest='no_imports', action="store_true", help="Don't show imports") parser.add_argument("--bulk-mode", dest='bulk_mode', action="store_true", help="Output in bulk mode (|SV)") args = parser.parse_args() if args.verbose: logging.basicConfig(level=logging.DEBUG) elif args.quiet: logging.basicConfig(level=logging.ERROR) else: logging.basicConfig(level=logging.INFO) logging.debug('offset: 0x%x', args.offset) with open(args.input, 'rb') as f: fv = FileView(f, args.offset) pe = PE.PE(fv, inmem=guess_is_memory_image(fv)) with restoring_offset(fv): if fv.read(0x2) != 'MZ': logger.warning('missing PE header!') if args.bulk_mode: output_bulk_mode(pe, args) else: output_normal_mode(pe, args) if __name__ == "__main__": sys.exit(main())