Created
March 8, 2012 07:11
-
-
Save christopherhesse/1999345 to your computer and use it in GitHub Desktop.
Revisions
-
christopherhesse created this gist
Mar 8, 2012 .There are no files selected for viewing
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters. Learn more about bidirectional Unicode charactersOriginal file line number Diff line number Diff line change @@ -0,0 +1,67 @@ import sys import re import gzip import mimetypes from mimetools import Message from StringIO import StringIO def parse_headers(raw_headers): response_line, headers_text = raw_headers.split('\n', 1) headers = Message(StringIO(headers_text)) return dict(headers) def filter_blank(lines): for line in lines: line = line.strip() if line != '': yield line def convert_cache_line(cache_line): result = '' for byte in cache_line.split(' ')[1:17]: if byte == '': break result += chr(int(byte, 16)) return result def gzip_decompress(data): return gzip.GzipFile('', 'rb', 9, StringIO(data)).read() def decode_cache_html(cache_html): pre_regexp = re.compile('<pre>(.*?)</pre>', re.DOTALL) matches = pre_regexp.findall(cache_html) raw_headers = matches[0] headers = parse_headers(raw_headers) segments = [] for raw_content in filter_blank(matches[1:]): segment = '' for line in filter_blank(raw_content.split('\n')): if line != '': segment += convert_cache_line(line) segments.append(segment) return headers, segments def main(): for filename in sys.argv[1:]: with open(filename) as input_file: cache_html = input_file.read() headers, segments = decode_cache_html(cache_html) # segment[0] is usually the header + certificate data = segments[1] if headers.get('content-encoding') == 'gzip': data = gzip_decompress(data) content_type = headers['content-type'].split(';')[0] extension = mimetypes.guess_all_extensions(content_type)[0] output_filename = filename + '-decoded' + extension print 'writing to {0}'.format(output_filename) with open(output_filename, 'w') as output_file: output_file.write(data) if __name__ == "__main__": main()