#!/usr/bin/env python2
'''
Dump some PE file features from memory images.

author: Willi Ballenthin
email: william.ballenthin@fireeye.com
website: https://gist.github.com/williballenthin/cbc102d561e2eb647f7aec3c3753ba55
'''
import os
import sys
import hashlib
import logging
import datetime
import contextlib


# from pypi::
#
#     pip install pytz argparse
import pytz
import argparse


# from vivisect::
#
#     pip install https://github.com/williballenthin/vivisect/zipball/master
import PE


logger = logging.getLogger(__name__)


@contextlib.contextmanager
def restoring_offset(f):
    '''
    context manager that restores the current offset of the file pointer
     after some block of operations.

    Example::

        with open('test.bin', 'rb') as f:
            assert f.tell() == 0x0

            with restoring_offset(f):
                f.seek(0x200)
                assert f.tell() == 0x200

            assert f.tell() == 0x0
    '''
    t = f.tell()
    try:
        yield
    finally:
        f.seek(t, os.SEEK_SET)


class FileView(object):
    '''
    Given an open file object, provide read access to a subsection of the file
     as if it were its own file object. This is a bit like `losetup(8)`, except a
     file-like object in Python.

    Example::

        with open('logical-process-memory.bin', 'rb') as f:
            g = FileView(f, 0x401000)
            assert g.read(0x2) == 'MZ'
    '''
    def __init__(self, f, start=0, length=None):
        super(FileView, self).__init__()
        self.f = f
        self.start = start

        self.f.seek(self.start)

        if length is None:
            with restoring_offset(f):
                f.seek(0, os.SEEK_END)
                self.length = f.tell() - self.start
        else:
            self.length = length

    def tell(self):
        return self.f.tell() - self.start

    def seek(self, offset, whence=os.SEEK_SET):
        final_offset = 0

        if whence == os.SEEK_SET:
            final_offset = self.start + offset

        elif whence == os.SEEK_CUR:
            final_offset = self.f.tell() + offset

        elif whence == os.SEEK_END:
            final_offset = self.start + self.length - offset

        else:
            raise IOError('unknown seek whence')

        logger.debug('seek offset: 0x%x whence: 0x%x final offset: 0x%x',
                     offset, whence, final_offset)

        if final_offset < self.start:
            raise IOError('cant read offset %d (underrun)' % (final_offset - self.start))

        if final_offset > self.start + self.length:
            raise IOError('cant read offset %d (overrun)' % (final_offset - self.start))

        self.f.seek(final_offset)

    def read(self, length=None):
        max_length = self.length - self.tell()
        logger.debug('read length: 0x%x', length or max_length)

        if length is None:
            return self.f.read(max_length)
        else:
            if max_length < length:
                return self.f.read(max_length)
            else:
                return self.f.read(length)


def md5(buf):
    m = hashlib.md5()
    m.update(buf)
    return m.hexdigest()


def get_imphash(pe):
    impstrs = []
    exts = ['ocx', 'sys', 'dll']

    for (off, libname, funcname) in pe.getImports():
        parts = libname.rsplit('.', 1)

        if len(parts) > 1 and parts[1] in exts:
            libname = parts[0]

        impstrs.append('%s.%s' % (libname.lower(), funcname.lower()))

    return md5(','.join(impstrs).encode())


def guess_is_memory_image(f):
    '''
    guess if the provided file is a PE from memory or on disk.

    it works by exploiting the differing alignment between file
     sectors (0x200, PE file alignment) and memory pages (0x1000,
     PE section alignment). on disk, the first section's content
     typically begins at offset 0x400, while in memory, it usually
     begins at 0x1000.


    Example::

        with open('kernel32.dll', 'rb') as f:
            assert guess_is_memory_image(f) == False

        with open('0x401000.bin', 'rb') as f:
            assert guess_is_memory_image(f) == True

    '''
    with restoring_offset(f):
        f.seek(0x400)
        return f.read(0x200) == '\x00' * 0x200


def output_normal_mode(pe, args):
    ts = datetime.datetime.fromtimestamp(pe.IMAGE_NT_HEADERS.FileHeader.TimeDateStamp, pytz.utc)
    print('timestamp: ' + ts.isoformat())
    print('checksum: ' + hex(pe.IMAGE_NT_HEADERS.OptionalHeader.CheckSum))
    if pe.getExportName():
        print('export name: ' + pe.getExportName())

    print('exports:')
    for (_, ord_, funcname) in pe.getExports():
        print('  %d) %s' % (ord_, funcname))

    if not args.no_imports:
        print('imports:')
        for (_, libname, funcname) in pe.getImports():
            print('  - %s.%s' % (libname, funcname))

    print('sections:')
    for section in pe.getSections():
        print('  - ' + section.Name)
        print('    virtual address: ' + hex(section.VirtualAddress) + '\tsize: ' + hex(section.VirtualSize))
        print('    raw address:     ' + hex(section.PointerToRawData) + '\tsize: ' + hex(section.SizeOfRawData))
    print('imphash: ' + get_imphash(pe))


def output_bulk_mode(pe, args):
    filename = args.input
    offset = args.offset
    export_name = pe.getExportName() or ''
    ts = datetime.datetime.fromtimestamp(pe.IMAGE_NT_HEADERS.FileHeader.TimeDateStamp, pytz.utc)
    timestamp = ts.isoformat()
    checksum = hex(pe.IMAGE_NT_HEADERS.OptionalHeader.CheckSum)
    imphash = get_imphash(pe)

    print('{filename}|{offset}|{export_name}|{timestamp}|{checksum}|{imphash}'.format(**locals()))


def number(s):
    if s.startswith('0x'):
        return int(s, 0x10)
    else:
        return int(s)


def main(argv=None):
    if argv is None:
        argv = sys.argv[1:]

    parser = argparse.ArgumentParser(description="Dump some PE file features features from memory images.")
    parser.add_argument("input", type=str,
                        help="Path to input file")
    parser.add_argument("offset", type=number,
                        help="Offset from which to parse the PE image.")
    parser.add_argument("-v", "--verbose", action="store_true",
                        help="Enable debug logging")
    parser.add_argument("-q", "--quiet", action="store_true",
                        help="Disable all output but errors")
    parser.add_argument("--no-imports", dest='no_imports', action="store_true",
                        help="Don't show imports")

    parser.add_argument("--bulk-mode", dest='bulk_mode', action="store_true",
                        help="Output in bulk mode (|SV)")

    args = parser.parse_args()

    if args.verbose:
        logging.basicConfig(level=logging.DEBUG)
    elif args.quiet:
        logging.basicConfig(level=logging.ERROR)
    else:
        logging.basicConfig(level=logging.INFO)

    logging.debug('offset: 0x%x', args.offset)

    with open(args.input, 'rb') as f:
        fv = FileView(f, args.offset)
        pe = PE.PE(fv, inmem=guess_is_memory_image(fv))

        with restoring_offset(fv):
            if fv.read(0x2) != 'MZ':
                logger.warning('missing PE header!')

        if args.bulk_mode:
            output_bulk_mode(pe, args)
        else:
            output_normal_mode(pe, args)


if __name__ == "__main__":
    sys.exit(main())