#!/usr/bin/env python # -*- coding: utf-8 -*- """ WSGI middleware to record requests and responses. """ from __future__ import print_function, unicode_literals import logging import time # Required to be loaded early to avoid hitting deadlock situation when processing requests # See http://code.google.com/p/modwsgi/wiki/ApplicationIssues (at the bottom, under Non Blocking Module Imports) import _strptime import itertools from cStringIO import StringIO class RequestResponseState(object): """Capture the data for a request-response.""" def __init__(self, id, method, url, request_headers, content_length, request_body): self.request_id = id self.method = method self.url = url self.request_headers = request_headers self.content_length = content_length self.request_body = request_body self.status = -1 self.response_headers = None self.response_chunks = None self.duration_msecs = 0 self.started_at = time.time() def start_response(self, status, response_headers): self.status = status self.response_headers = response_headers def finish_response(self, response_chunks): self.duration_msecs = 1000.0 * (time.time() - self.started_at) self.response_chunks = response_chunks return response_chunks class SessionRecorderMiddleware(object): """WSGI Middleware for recording of request-response""" def __init__(self, app, recorder): self.app = app self.recorder = recorder self.request_counter = itertools.count().next # Threadsafe counter def __call__(self, environ, start_response): state = RequestResponseState( self.request_counter(), environ['REQUEST_METHOD'], self.request_url(environ), [(k, v) for k,v in self.parse_request_headers(environ)], *self.request_body(environ) ) def _start_response(status, response_headers, *args): # Capture status and response_headers for later processing state.start_response(status, response_headers) return start_response(status, response_headers, *args) response_chunks = state.finish_response( self.app(environ, _start_response) ) self.recorder(state) # return data to WSGI server return response_chunks def request_url(self, environ): return '{0}{1}{2}'.format( environ.get('SCRIPT_NAME', ''), environ.get('PATH_INFO', ''), '?' + environ['QUERY_STRING'] if environ.get('QUERY_STRING') else '', ) _parse_headers_special = { 'HTTP_CGI_AUTHORIZATION': 'Authorization', 'CONTENT_LENGTH': 'Content-Length', 'CONTENT_TYPE': 'Content-Type', } def parse_request_headers(self, environ): for cgi_var, value in environ.iteritems(): if cgi_var in self._parse_headers_special: yield self._parse_headers_special[cgi_var], value elif cgi_var.startswith('HTTP_'): yield cgi_var[5:].title().replace('_', '-'), value def request_body(self, environ): content_length = environ.get('CONTENT_LENGTH') body = '' if content_length: if content_length == '-1': # This is a special case, where the content length is basically undetermined body = environ['wsgi.input'].read(-1) content_length = len(body) else: content_length = int(content_length) body = environ['wsgi.input'].read(content_length) environ['wsgi.input'] = StringIO(body) # reset request body for the nested app else: content_length = 0 return content_length, body def is_binary_content_type(content_type): type_subtype = content_type.split(';') _type, subtype = type_subtype.split('/') if _type == 'text': return False elif _type == 'application': return subtype not in ( 'atom+xml', 'ecmascript', 'json', 'javascript', 'rss+xml', 'soap+xml', 'xhtml+xml') else: return True def log_results(state): # TODO: create an HttpArchive data = [ 'SR: {0}'.format(state.request_id), '{0} {1}'.format(state.method, state.url), str(state.request_headers), # TODO: sanitize binary request body => look at request Content-Type '{0} bytes: {1}'.format(state.content_length, state.request_body or ''), '=> {0} :: {1:.3f} ms :: {2}'.format( state.status, state.duration_msecs, str(state.response_headers)), ] + ( # TODO: sanitize binary response body => look at response Content-Type state.response_chunks ) + ['========'] logging.info('\n'.join(data)) # TODO: unit tests