#!/usr/bin/env python from pygtail import Pygtail from datadog import statsd import re # nginx.net.response_time # statsd.histogram('', duration) # nginx.net.response_codes. # nginx.net.method. # nginx.net.cache. # statsd.increment log_file = '/var/log/nginx/spfoodie-access.log' # sample_line = ('127.0.0.1 - MISS - [19/Mar/2015:22:25:16 +0000] ' # '"GET /stub-status HTTP/1.1" 404 32276 0.332 "-" "curl/7.35.0"') sample_line = ( '127.0.0.1 - - - [20/Mar/2015:08:51:16 -0400] "PURGE /vampire-cocktail/' 'HTTP/1.1" 200 23122 0.509 "http://pinterest.com/pin/462111611740773604/' '?source_app=android" "Mozilla/5.0 ' '(Linux; U; Android 4.1.2; el-gr; ST26i Build/11.2.A.0.31) ' 'AppleWebKit/534.30 (KHTML, like Gecko) Version/4.0 ' 'Mobile Safari/534.30 mod_pagespeed/1.8.31.5-4307"' ) name_prefix = 'nginx.net' statsd_type = { 'response_code': 'increment', 'method': 'increment', 'cache': 'increment', 'response_time': 'histogram', 'bytes_sent': 'histogram' } log_re = re.compile( (r'' '(\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) ' '- (?P(-|\w+)) - ' '\[(\d{2}\/[a-z]{3}\/\d{4}:\d{2}:\d{2}:\d{2} ' '(\+|\-)\d{4})\] ' '((\"(?P\w+) )(?P.+)(http\/\d\.\d\")) ' '(?P\d{3}) ' '(?P\d+) ' '(?P\d+\.\d+) ' '(["]((\-)|(.+))["]) ' '(["](.+)["])'), re.IGNORECASE) def parse_line(line): """ Given a log line, returns a dict of values cache: (hit || miss) method (GET, POST, PUT, PURGE, ...) response_time: (seconds) response_code: (404, 302, ...) bytes_sent: """ match = log_re.match(line.strip()) if not match: return None else: return match.groupdict() def main(): """ Pull unprocessed lines from a log file. For each line and each extracted metric, submit the data. """ for line in Pygtail(log_file): data = parse_line(line) if not data: print "Error parsing {0}".format(line) continue if data['cache'] == '-': data['cache'] = 'disabled' for k, v in data.iteritems(): if k in statsd_type: if statsd_type[k] == 'increment': metric_name = "{0}.{1}.{2}".format( name_prefix, k, v.lower()) statsd.increment(metric_name) elif statsd_type[k] == 'histogram': metric_name = "{0}.{1}".format(name_prefix, k) statsd.histogram(metric_name, v) def test(): print parse_line(sample_line) if __name__ == '__main__': main() # test()