Skip to content

Instantly share code, notes, and snippets.

@phonglh79
Forked from hreeder/parser.py
Created January 6, 2021 08:35
Show Gist options
  • Save phonglh79/1c2da1d6ea626cc9e08c61200a5dda6f to your computer and use it in GitHub Desktop.
Save phonglh79/1c2da1d6ea626cc9e08c61200a5dda6f to your computer and use it in GitHub Desktop.

Revisions

  1. @hreeder hreeder revised this gist Apr 15, 2015. 1 changed file with 25 additions and 13 deletions.
    38 changes: 25 additions & 13 deletions parser.py
    Original file line number Diff line number Diff line change
    @@ -1,15 +1,12 @@
    #!/usr/bin/env python
    import apachelog
    #!/usr/bin/env python
    import gzip
    import os
    import sys
    import re

    from apachelog import ApacheLogParserError
    INPUT_DIR = "nginx-logs"

    INPUT_DIR = "nginx-logs"

    nformat = r'%h %l %u %t \"%r\" %>s %b \"%i\" \"%{User-Agent}i\" \"%V\"'
    logparser = apachelog.parser(nformat)
    lineformat = re.compile(r"""(?P<ipaddress>\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}) - - \[(?P<dateandtime>\d{2}\/[a-z]{3}\/\d{4}:\d{2}:\d{2}:\d{2} (\+|\-)\d{4})\] ((\"(GET|POST) )(?P<url>.+)(http\/1\.1")) (?P<statuscode>\d{3}) (?P<bytessent>\d+) (["](?P<refferer>(\-)|(.+))["]) (["](?P<useragent>.+)["])""", re.IGNORECASE)

    for f in os.listdir(INPUT_DIR):
    if f.endswith(".gz"):
    @@ -18,10 +15,25 @@
    logfile = open(os.path.join(INPUT_DIR, f))

    for l in logfile.readlines():
    try:
    data = logparser.parse(l)
    print data
    except ApacheLogParserError:
    sys.stderr.write("Unable to parse %s" % l)
    data = re.search(lineformat, l)
    if data:
    datadict = data.groupdict()
    ip = datadict["ipaddress"]
    datetimestring = datadict["dateandtime"]
    url = datadict["url"]
    bytessent = datadict["bytessent"]
    referrer = datadict["refferer"]
    useragent = datadict["useragent"]
    status = datadict["statuscode"]
    method = data.group(6)

    logfile.close()
    print ip, \
    datetimestring, \
    url, \
    bytessent, \
    referrer, \
    useragent, \
    status, \
    method

    logfile.close()
  2. @hreeder hreeder created this gist Apr 15, 2015.
    27 changes: 27 additions & 0 deletions parser.py
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,27 @@
    #!/usr/bin/env python
    import apachelog
    import gzip
    import os
    import sys

    from apachelog import ApacheLogParserError

    INPUT_DIR = "nginx-logs"

    nformat = r'%h %l %u %t \"%r\" %>s %b \"%i\" \"%{User-Agent}i\" \"%V\"'
    logparser = apachelog.parser(nformat)

    for f in os.listdir(INPUT_DIR):
    if f.endswith(".gz"):
    logfile = gzip.open(os.path.join(INPUT_DIR, f))
    else:
    logfile = open(os.path.join(INPUT_DIR, f))

    for l in logfile.readlines():
    try:
    data = logparser.parse(l)
    print data
    except ApacheLogParserError:
    sys.stderr.write("Unable to parse %s" % l)

    logfile.close()