Skip to content

Instantly share code, notes, and snippets.

@mk-qi
Forked from eqhmcow/apache-logtop-README
Last active August 29, 2015 14:23
Show Gist options
  • Save mk-qi/dd43d55ff0c2e700231d to your computer and use it in GitHub Desktop.
Save mk-qi/dd43d55ff0c2e700231d to your computer and use it in GitHub Desktop.

Revisions

  1. @eqhmcow eqhmcow renamed this gist Mar 22, 2013. 1 changed file with 0 additions and 0 deletions.
    File renamed without changes.
  2. @eqhmcow eqhmcow revised this gist Jul 5, 2011. 1 changed file with 3 additions and 3 deletions.
    6 changes: 3 additions & 3 deletions logstat.pl
    Original file line number Diff line number Diff line change
    @@ -65,10 +65,10 @@ sub init_time
    $referrer, $ua) = (m/
    ^(\S+)\s # vhost
    \S+\s # IP
    \S+\s # remote logname
    (?:\S+\s)+ # remote user
    \S+\s+ # remote logname
    (?:\S+\s+)+ # remote user
    \[([^]]+)\]\s # date
    "(\S+)\s? # method
    "(\S*)\s? # method
    (?:((?:[^"]*(?:\\")?)*)\s # URL
    ([^"]*)"\s| # protocol
    ((?:[^"]*(?:\\")?)*)"\s) # or, possibly URL with no protocol
  3. @eqhmcow eqhmcow revised this gist Jun 24, 2011. 3 changed files with 102 additions and 30 deletions.
    32 changes: 19 additions & 13 deletions README
    Original file line number Diff line number Diff line change
    @@ -2,7 +2,8 @@ DESCRIPTION

    logstat.pl and logtop.pl

    These scripts show current and average request-per-second counts based on apache access logs in real-time.
    These scripts show current and average request-per-second counts based on
    apache access logs in real-time.

    You can see the total requests-per-second as well as a breakdown by:
    * vhost
    @@ -24,14 +25,16 @@ The output shows:
    * and the request's vhost, URL, user agent or referrer.

    See also the apachetop tool - http://freshmeat.net/projects/apachetop/ and
    http://packages.debian.org/search?keywords=apachetop . This script is not based on or intended to
    mimic apachetop, but the functionality is broadly similar.
    http://packages.debian.org/search?keywords=apachetop . This script is not
    based on or intended to mimic apachetop, but the functionality is broadly
    similar.

    SYNOPSIS

    To use:

    In one screen, run the log parser. It writes out a stats file which the other scripts use.
    In one screen, run the log parser. It writes out a stats file which the
    other scripts use.

    $ cd /dev/shm # use shared memory
    $ tail -F /var/log/httpd/access.log | ~/logstat.pl # centralized logging helps here
    @@ -68,7 +71,7 @@ etc.

    EXAMPLE

    Every 5.0s: ~/logtop.pl Fri Jun 24 03:31:59 2011
    Every 5.0s: ~/logtop.pl Fri Jun 24 03:31:59 2011

    Fri Jun 24 03:31:58 2011 hps: 2, average: 5.35, 4.45, 2.37, 0 seconds ago, total
    vhosts sorted by five
    @@ -79,17 +82,20 @@ Fri Jun 24 03:31:34 2011 hps: 2, average: 0.99, 0.33, 0.12, 24 seconds ag

    NOTES

    * The script's tally sub is properly abstracted so this script could be quite easily modified to tally
    and report averages on anything you can count (not just httpd logs)
    * The script's tally sub is properly abstracted so this script could be
    quite easily modified to tally and report averages on anything you can count
    (not just httpd logs)

    * The log parsing regex matches the "v-combined" log format (combined with the virtual host at the front)
    * The log parsing regex matches the "v-combined" log format (combined with
    * the virtual host at the front)

    * The logtop script accepts "match" and/or "skip" arguments to only show events that match (or do
    not match) a given regex.
    * The logtop script accepts "match" and/or "skip" arguments to only show
    events that match (or do not match) a given regex.

    TODO

    * Persist historical data and reload on restart (this can be done asynchronously with another client script)
    * Persist historical data and reload on restart (this can be done
    * asynchronously with another client script)

    * Persist position in log file (using ftell) to recover from where we left off on restart (this should
    account for log file rotation)
    * Persist position in log file (using ftell) to recover from where we left
    off on restart (this should account for log file rotation)
    83 changes: 69 additions & 14 deletions logstat.pl
    Original file line number Diff line number Diff line change
    @@ -6,10 +6,12 @@
    use Time::HiRes 'time';
    use List::Util qw/sum/;
    use Storable;
    use Digest::MD5 'md5_hex';
    #use Data::Dumper;

    my %data; # internal (server-only) data
    my %shared; # data shared with clients
    my %keys; # keys to data shared with clients

    sub init
    {
    @@ -48,6 +50,15 @@ sub init_time
    # TODO: read previous data from persistant store on restart
    # (have a seprate client that persists data)

    my $stotal = 0; # FIXME debug

    # update this when adding new categories
    my @data = qw/vhosts urls ua referrer/;

    foreach my $key (@data) {
    mkdir $key;
    }

    while (<>) {
    my $nt = time();
    my ($vhost, $date, $method, $url, $protocol, $alt_url, $code, $bytes,
    @@ -72,24 +83,29 @@ sub init_time

    # vhost counts
    $data{vhosts}{$vhost} ||= $shared{vhosts}{$vhost} ||= {};
    tally($data{vhosts}{$vhost}, $shared{vhosts}{$vhost}, $nt);
    tally($data{vhosts}{$vhost}, $shared{vhosts}{$vhost}, $nt,
    'vhosts', $vhost);

    # url counts
    $data{urls}{"$vhost $url"} ||= $shared{urls}{"$vhost $url"} ||= {};
    tally($data{urls}{"$vhost $url"}, $shared{urls}{"$vhost $url"}, $nt);
    my $vhost_url = "$vhost $url";
    $data{urls}{$vhost_url} ||= $shared{urls}{$vhost_url} ||= {};
    tally($data{urls}{$vhost_url}, $shared{urls}{$vhost_url}, $nt,
    'urls', $vhost_url);

    # user agent counts
    $data{ua}{$ua} ||= $shared{ua}{$ua} ||= {};
    tally($data{ua}{$ua}, $shared{ua}{$ua}, $nt);
    tally($data{ua}{$ua}, $shared{ua}{$ua}, $nt, 'ua', $ua);

    # referrer counts
    $data{referrer}{$referrer} ||= $shared{referrer}{$referrer} ||= {};
    tally($data{referrer}{$referrer}, $shared{referrer}{$referrer}, $nt);
    tally($data{referrer}{$referrer}, $shared{referrer}{$referrer}, $nt,
    'referrer', $referrer);

    # TODO: status codes? bytes? (bytes would require some additional work to
    # be useful)

    # total hit count
    print '.'; # FIXME debug
    next unless tally(\%data, \%shared, $nt);

    # show the last line we parsed; this can be used to confirm we're parsing
    @@ -100,29 +116,49 @@ sub init_time
    # due to a slow network) the date may be seconds or even minutes in the
    # past, even though the line was actually written to the log much more
    # recently than that (i.e. just now)
    print "last line parsed: [$vhost] [$date] [$method] [$url] [$protocol] [$alt_url] [$code] [$bytes] [$referrer] [$ua]\n\n";
    print "\n\nlast line parsed: [$vhost] [$date] [$method] [$url] " .
    "[$protocol] [$alt_url] [$code] [$bytes] [$referrer] [$ua]\n\n";

    # show the last second's total hits-per-second and the historical average
    # and last prune data
    # and the result of the last prune
    my $pt = $prune_time - $nt;
    show(\%shared, sprintf(" total\nnext prune in %.0f seconds; last $last_prune_took\n\n", $pt));
    show(\%shared, sprintf(
    " total\nnext prune in %.0f seconds; " .
    "last $last_prune_took\n", $pt));

    # store the data for clients to read.
    # store the total and keys to the categorized data for clients to read.
    # NOTE: change the working directory to /dev/shm before starting these
    # scripts to use shared memory; this can be considerably faster than
    # writing the shared data to local disk
    # NOTE: rename is used to atomically update the data file so clients do
    # not try to read the data while we're writing it
    store(\%shared, 'logstats.data.tmp');
    rename('logstats.data.tmp', 'logstats.data');
    my $nst = time(); # FIXME debug
    $keys{total} = {
    hps => $shared{hps},
    one => $shared{one},
    five => $shared{five},
    fifteen => $shared{fifteen},
    time => $shared{time},
    };
    my $entry_count = 0;
    foreach my $key (@data) {
    my $count = keys %{ $keys{$key} };
    $entry_count += $count;
    print "$count $key; ";
    }
    store(\%keys, 'logstats.keys.tmp');
    rename('logstats.keys.tmp', 'logstats.keys');
    $stotal += time() - $nst; # FIXME debug
    printf "$entry_count total entries; store took %.3f seconds\n\n", $stotal;
    $stotal = 0;

    # prune old data to prevent using too much memory
    # NOTE: this does block (for a hopefully short amount of time); we
    # should catch back up quickly, though
    next unless $pt <= 0;
    $pt = time();
    print "pruning data...\n";
    my $entry_count = 0;
    $entry_count = 0;
    my $prune_count = 0;
    $prune_time = $nt + $prune_every;
    foreach my $k (keys %data) {
    @@ -133,6 +169,8 @@ sub init_time
    $entry_count++;
    next unless (ref($val) || '') eq 'HASH'
    and $nt - $val->{time} > $prune_every;
    unlink("$k/" . $keys{$k}{$key});
    delete $keys{$k}{$key};
    delete $v->{$key};
    delete $shared{$k}{$key};
    $prune_count++;
    @@ -147,11 +185,15 @@ sub init_time
    sub tally
    {
    my $nt = $_[2] || time();
    init($_[0], $_[1], $nt) unless defined $_[0]->{count};
    my $init = defined $_[0]->{count};
    init($_[0], $_[1], $nt) unless $init;

    # count the event
    $_[0]->{count}++;

    # save data if this is the first time we've seen this event
    save($_[0], $_[1], $_[3], $_[4]) if $_[3] && !$init;

    # add $count to per-minute count and reset $count every second
    my $diff = $nt - $_[1]->{time};
    return 0 unless $diff >= 1;
    @@ -183,6 +225,7 @@ sub tally
    $shared->{one} = $data->{counts}[1] / 60;
    $shared->{five} = sum(@{$data->{counts}}[1..5]) / 5 / 60;
    $shared->{fifteen} = sum(@{$data->{counts}}[1..15]) / 15 / 60;
    save($data, $shared, $_[3], $_[4]) if $_[3];
    return 1;
    }

    @@ -198,14 +241,26 @@ sub tally
    no warnings 'uninitialized';
    $shared->{five} = sum($count, @{$data->{counts}}[1..5]) / 6 / 60;
    $shared->{fifteen} = sum($count, @{$data->{counts}}[1..15]) / 16 / 60;
    save($data, $shared, $_[3], $_[4]) if $_[3];
    return 1;
    }

    sub show
    {
    my $shared = $_[0];
    print scalar localtime($shared->{time});
    printf " hps: %6.0f, average: %.2f, %.2f, %.2f, ", $shared->{hps}, $shared->{one}, $shared->{five}, $shared->{fifteen};
    printf " hps: %6.0f, average: %.2f, %.2f, %.2f, ",
    $shared->{hps}, $shared->{one}, $shared->{five}, $shared->{fifteen};
    print $_[1] || "\n";
    return;
    }

    sub save
    {
    my $nst = time(); # FIXME debug
    my $file = $_[2] . '/' . ($keys{$_[2]}{$_[3]} ||= md5_hex($_[3]));
    store($_[1], "$file.tmp");
    rename("$file.tmp", $file);
    $stotal += time() - $nst; # FIXME debug
    return;
    }
    17 changes: 14 additions & 3 deletions logtop.pl
    Original file line number Diff line number Diff line change
    @@ -23,13 +23,24 @@
    $match_re = qr/$match/i if $match;
    $skip_re = qr/$skip/i if $skip;

    my $data = retrieve("logstats.data");
    # load data
    my $rt = time();
    my $datakeys = retrieve('logstats.keys');

    my $now = time();
    my $data = $datakeys->{total};
    foreach my $key (keys %{ $datakeys->{$type} }) {
    my $md5 = $datakeys->{$type}{$key};
    my $d;
    # file may have been pruned; skip if we can't read it
    eval { $d = retrieve("$type/$md5") };
    next if $@;
    $data->{$type}{$key} = $d;
    }

    my $now = time();
    show($data, " total\n", $now - $data->{time});

    print "$type sorted by $sort\n";
    printf "loaded data in %.3f seconds - $type sorted by $sort\n", $now - $rt;

    my @stale;
    foreach my $event (sort
  4. @eqhmcow eqhmcow revised this gist Jun 24, 2011. 1 changed file with 2 additions and 2 deletions.
    4 changes: 2 additions & 2 deletions logstat.pl
    Original file line number Diff line number Diff line change
    @@ -54,8 +54,8 @@ sub init_time
    $referrer, $ua) = (m/
    ^(\S+)\s # vhost
    \S+\s # IP
    \S+\s
    \S+\s
    \S+\s # remote logname
    (?:\S+\s)+ # remote user
    \[([^]]+)\]\s # date
    "(\S+)\s? # method
    (?:((?:[^"]*(?:\\")?)*)\s # URL
  5. @eqhmcow eqhmcow revised this gist Jun 24, 2011. 4 changed files with 178 additions and 108 deletions.
    81 changes: 60 additions & 21 deletions README
    Original file line number Diff line number Diff line change
    @@ -1,15 +1,27 @@
    DESCRIPTION

    These scripts show total, per-vhost and per-URL request-per-second counts based on apache
    access logs in real-time.
    logstat.pl and logtop.pl

    These scripts show current and average request-per-second counts based on apache access logs in real-time.

    You can see the total requests-per-second as well as a breakdown by:
    * vhost
    * URL
    * user agent
    * or referrer.

    You can sort by:
    * number of requests in the last second
    * time of last stats update
    * average requests-per-second over the last one, five or fifteen minutes

    The output shows:
    * date of last stats update
    * last second's hits per second (hps)
    * one, five and fifteen minute hps average
    * seconds since last request
    * asterisk marks request hit in the last 5 seconds
    * vhost or URL requested
    * the date of the last stats update
    * the last second's hits per second (hps)
    * the one, five and fifteen minute hps average
    * the seconds since last request
    * an asterisk to mark requests hit in the last 5 seconds
    * and the request's vhost, URL, user agent or referrer.

    See also the apachetop tool - http://freshmeat.net/projects/apachetop/ and
    http://packages.debian.org/search?keywords=apachetop . This script is not based on or intended to
    @@ -24,33 +36,60 @@ In one screen, run the log parser. It writes out a stats file which the other sc
    $ cd /dev/shm # use shared memory
    $ tail -F /var/log/httpd/access.log | ~/logstat.pl # centralized logging helps here

    In another screen, view vhost stats:
    In another screen, view your stats:

    $ cd /dev/shm # use shared memory

    # defaults to vhosts sorted by five minute average
    $ watch -n 5 -- ~/logtop.pl

    In a third screen, view URL stats:
    or

    $ cd /dev/shm # use shared memory
    $ watch -n 5 -- '~/logtop-urls.pl --skip '\''\.(jpg|png|gif|js|css)$|^\S+\s(/robots\.txt$|/favicon\.ico$)'\'''
    # URLs sorted by five minute average, ignoring images, JS and CSS
    $ watch -n 5 -- '~/logtop.pl urls \
    --skip '\''\.(jpg|png|gif|js|css)$|^\S+\s(/robots\.txt$|/favicon\.ico$)'\'''

    EXAMPLE
    or

    # user agents sorted by requests in the last second
    $ watch -n 5 -- ~/logtop.pl ua --sort hps

    or

    Every 5.0s: ./logtop.pl Wed Jun 22 09:55:54 2011
    # referrers sorted by one minute average
    $ watch -n 5 -- ~/logtop.pl referrer --sort one

    Wed Jun 22 09:55:54 2011 hps: 9, average: 12.87, 13.19, 13.33, 0 seconds ago, total
    or

    Wed Jun 22 09:55:48 2011 hps: 17, average: 5.66, 1.90, 0.86, 6 seconds ago, example.com
    Wed Jun 22 09:55:45 2011 hps: 6, average: 1.17, 1.43, 1.48, 9 seconds ago, example.org
    Wed Jun 22 09:55:50 2011 hps: 3, average: 0.94, 1.33, 1.14, 4 seconds ago, * example.net
    # URLs sorted by last stats update
    $ watch -n 5 -- ~/logtop.pl urls --sort time

    etc.

    EXAMPLE

    Every 5.0s: ~/logtop.pl Fri Jun 24 03:31:59 2011

    Fri Jun 24 03:31:58 2011 hps: 2, average: 5.35, 4.45, 2.37, 0 seconds ago, total
    vhosts sorted by five
    Fri Jun 24 03:31:52 2011 hps: 0, average: 0.41, 0.59, 0.35, 6 seconds ago, example.com
    Fri Jun 24 03:31:54 2011 hps: 1, average: 1.17, 0.58, 0.23, 4 seconds ago, * example.net
    Fri Jun 24 03:31:34 2011 hps: 2, average: 0.99, 0.33, 0.12, 24 seconds ago, example.org
    ...

    NOTES

    * The script's tally sub is properly abstracted so this script could be modified to tally and report
    averages on anything you can count (not just httpd logs)
    * The script's tally sub is properly abstracted so this script could be quite easily modified to tally
    and report averages on anything you can count (not just httpd logs)

    * The log parsing regex matches the "v-combined" log format (combined with the virtual host at the front)

    * The logtop-urls script accepts "match" and/or "skip" arguments to only show URLs that match (or do
    * The logtop script accepts "match" and/or "skip" arguments to only show events that match (or do
    not match) a given regex.

    TODO

    * Persist historical data and reload on restart (this can be done asynchronously with another client script)

    * Persist position in log file (using ftell) to recover from where we left off on restart (this should
    account for log file rotation)
    96 changes: 81 additions & 15 deletions logstat.pl
    Original file line number Diff line number Diff line change
    @@ -17,10 +17,12 @@ sub init
    my $shared = $_[1];
    $data->{count} = 0;
    $data->{counts} = [0];
    $shared->{hps} = 0;
    $shared->{one} = 0;
    $shared->{five} = 0;
    $shared->{fifteen} = 0;
    init_time($data, $shared, $_[2]);
    return;
    }

    sub init_time
    @@ -36,8 +38,16 @@ sub init_time
    }
    return if $mode == 1;
    $data->{minute} = $shared->{time} + 60;
    return;
    }

    my $prune_every = 60 * 20; # prune every 20 minutes
    my $prune_time = time() + $prune_every;
    my $last_prune_took = "prune hasn't happened yet";

    # TODO: read previous data from persistant store on restart
    # (have a seprate client that persists data)

    while (<>) {
    my $nt = time();
    my ($vhost, $date, $method, $url, $protocol, $alt_url, $code, $bytes,
    @@ -59,41 +69,96 @@ sub init_time
    die "Couldn't match $_" unless $vhost;
    $alt_url ||= '';
    $url ||= $alt_url;

    # vhost counts
    $data{vhosts}{$vhost} ||= {};
    $shared{vhosts}{$vhost} ||= {};
    $data{vhosts}{$vhost} ||= $shared{vhosts}{$vhost} ||= {};
    tally($data{vhosts}{$vhost}, $shared{vhosts}{$vhost}, $nt);

    # url counts
    $data{urls}{"$vhost $url"} ||= {};
    $shared{urls}{"$vhost $url"} ||= {};
    $data{urls}{"$vhost $url"} ||= $shared{urls}{"$vhost $url"} ||= {};
    tally($data{urls}{"$vhost $url"}, $shared{urls}{"$vhost $url"}, $nt);

    # TODO: user agents? referrers? status codes? bytes?
    # user agent counts
    $data{ua}{$ua} ||= $shared{ua}{$ua} ||= {};
    tally($data{ua}{$ua}, $shared{ua}{$ua}, $nt);

    # referrer counts
    $data{referrer}{$referrer} ||= $shared{referrer}{$referrer} ||= {};
    tally($data{referrer}{$referrer}, $shared{referrer}{$referrer}, $nt);

    # TODO: status codes? bytes? (bytes would require some additional work to
    # be useful)

    # total hit count
    if (tally(\%data, \%shared, $nt)) {
    print "last line parsed: [$vhost] [$date] [$method] [$url] [$protocol] [$alt_url] [$code] [$bytes] [$referrer] [$ua]\n\n";
    show(\%shared, " total\n\n");
    store(\%shared, 'logstats.data.tmp');
    rename('logstats.data.tmp', 'logstats.data');
    next unless tally(\%data, \%shared, $nt);

    # show the last line we parsed; this can be used to confirm we're parsing
    # data in real-time (and that we're parsing the log correctly)
    # NOTE: the date from the apache log is the date the HTTP request
    # started, but apache writes the request to the log when the request
    # finishes. This means if the request took a long time to serve (e.g.
    # due to a slow network) the date may be seconds or even minutes in the
    # past, even though the line was actually written to the log much more
    # recently than that (i.e. just now)
    print "last line parsed: [$vhost] [$date] [$method] [$url] [$protocol] [$alt_url] [$code] [$bytes] [$referrer] [$ua]\n\n";

    # show the last second's total hits-per-second and the historical average
    # and last prune data
    my $pt = $prune_time - $nt;
    show(\%shared, sprintf(" total\nnext prune in %.0f seconds; last $last_prune_took\n\n", $pt));

    # store the data for clients to read.
    # NOTE: change the working directory to /dev/shm before starting these
    # scripts to use shared memory; this can be considerably faster than
    # writing the shared data to local disk
    # NOTE: rename is used to atomically update the data file so clients do
    # not try to read the data while we're writing it
    store(\%shared, 'logstats.data.tmp');
    rename('logstats.data.tmp', 'logstats.data');

    # prune old data to prevent using too much memory
    # NOTE: this does block (for a hopefully short amount of time); we
    # should catch back up quickly, though
    next unless $pt <= 0;
    $pt = time();
    print "pruning data...\n";
    my $entry_count = 0;
    my $prune_count = 0;
    $prune_time = $nt + $prune_every;
    foreach my $k (keys %data) {
    my $v = $data{$k};
    next unless (ref($v) || '') eq 'HASH';
    foreach my $key (keys %$v) {
    my $val = $shared{$k}{$key};
    $entry_count++;
    next unless (ref($val) || '') eq 'HASH'
    and $nt - $val->{time} > $prune_every;
    delete $v->{$key};
    delete $shared{$k}{$key};
    $prune_count++;
    }
    }
    my $elapsed = time() - $pt;
    $last_prune_took =
    sprintf "pruned $prune_count stale out of $entry_count total entries in %.3f seconds.", $elapsed;
    print "$last_prune_took\n\n";
    }

    sub tally
    {
    my $data = $_[0];
    my $shared = $_[1];
    my $nt = $_[2] || time();
    init($data, $shared, $nt) unless defined $data->{count};
    init($_[0], $_[1], $nt) unless defined $_[0]->{count};

    # count the event
    $data->{count}++;
    $_[0]->{count}++;

    # add $count to per-minute count and reset $count every second
    my $diff = $nt - $shared->{time};
    my $diff = $nt - $_[1]->{time};
    return 0 unless $diff >= 1;

    my $data = $_[0];
    my $shared = $_[1];

    init_time($data, $shared, $nt, 1);
    $shared->{hps} = $data->{count} / $diff;
    $data->{count} = 0;
    @@ -142,4 +207,5 @@ sub show
    print scalar localtime($shared->{time});
    printf " hps: %6.0f, average: %.2f, %.2f, %.2f, ", $shared->{hps}, $shared->{one}, $shared->{five}, $shared->{fifteen};
    print $_[1] || "\n";
    return;
    }
    60 changes: 0 additions & 60 deletions logtop-urls.pl
    Original file line number Diff line number Diff line change
    @@ -1,60 +0,0 @@
    #!/usr/bin/perl -w

    use strict;
    use warnings;

    use Time::HiRes 'time';
    use Storable;

    use Getopt::Long;
    my ($match, $skip, $match_re, $skip_re);
    GetOptions(
    'match=s' => \$match,
    'skip=s' => \$skip,
    );

    $match_re = qr/$match/i if $match;
    $skip_re = qr/$skip/i if $skip;

    my $data = retrieve("logstats.data");

    my $now = time;

    show($data, " total\n\n", $now - $data->{time});

    my @stale;
    foreach my $url (sort { $data->{urls}{$b}{five} <=> $data->{urls}{$a}{five} } keys %{ $data->{urls} }) {
    next if $skip and $url =~ m/$skip_re/;
    next if $match and $url !~ m/$match_re/;
    my $d = $data->{urls}{$url};
    my $t = $now - $d->{time};
    if ($t > 60) {
    push @stale, $url;
    next;
    }
    my $text = ' ';
    if ($now - $d->{time} < 6) {
    $text = " * ";
    }
    $text .= " $url";

    show($d, "$text\n", $t);
    }

    print "\nstale:\n" if @stale;

    my $i = 0;
    foreach my $url (@stale) {
    my $d = $data->{urls}{$url};
    my $t = $now - $d->{time};
    show($d, " $url\n", $t);
    last if ++$i > 100;
    }

    sub show
    {
    my $data = $_[0];
    print scalar localtime($data->{time});
    printf " hps: %6.0f, average: %.2f, %.2f, %.2f, %2.0f seconds ago, ", $data->{hps} || 0, $data->{one}, $data->{five}, $data->{fifteen}, $_[2];
    print $_[1] || "\n";
    }
    49 changes: 37 additions & 12 deletions logtop.pl
    Original file line number Diff line number Diff line change
    @@ -6,41 +6,66 @@
    use Time::HiRes 'time';
    use Storable;

    use Getopt::Long;
    my ($match, $skip, $match_re, $skip_re, $sort);
    GetOptions(
    'match=s' => \$match,
    'skip=s' => \$skip,
    'sort=s' => \$sort,
    );

    # time, hps, one, five or fifteen
    $sort ||= 'five';

    # vhosts, urls, ua or referrer
    my $type = shift() || 'vhosts';

    $match_re = qr/$match/i if $match;
    $skip_re = qr/$skip/i if $skip;

    my $data = retrieve("logstats.data");

    my $now = time;
    my $now = time();

    show($data, " total\n", $now - $data->{time});

    show($data, " total\n\n", $now - $data->{time});
    print "$type sorted by $sort\n";

    my @stale;
    foreach my $vhost (sort { $data->{vhosts}{$b}{five} <=> $data->{vhosts}{$a}{five} } keys %{ $data->{vhosts} }) {
    my $d = $data->{vhosts}{$vhost};
    foreach my $event (sort
    { $data->{$type}{$b}{$sort} <=> $data->{$type}{$a}{$sort} }
    keys %{ $data->{$type} }) {
    next if $skip and $event =~ m/$skip_re/;
    next if $match and $event !~ m/$match_re/;
    my $d = $data->{$type}{$event};
    my $t = $now - $d->{time};
    if ($t > 60) {
    push @stale, $vhost;
    push @stale, $event;
    next;
    }
    my $text = ' ';
    if ($now - $d->{time} < 6) {
    $text = " * ";
    }
    $text .= " $vhost";

    show($d, "$text\n", $t);
    show($d, "$text $event\n", $t);
    }

    print "\nstale:\n" if @stale;

    foreach my $vhost (@stale) {
    my $d = $data->{vhosts}{$vhost};
    my $i = 0;
    foreach my $event (@stale) {
    my $d = $data->{$type}{$event};
    my $t = $now - $d->{time};
    show($d, " $vhost\n", $t);
    show($d, " $event\n", $t);
    last if ++$i > 100;
    }

    sub show
    {
    my $data = $_[0];
    print scalar localtime($data->{time});
    printf " hps: %6.0f, average: %.2f, %.2f, %.2f, %2.0f seconds ago, ", $data->{hps} || 0, $data->{one}, $data->{five}, $data->{fifteen}, $_[2];
    my $s = "s";
    $s = " " if sprintf("%.0f", $_[2]) eq '1';
    printf " hps: %6.0f, average: %.2f, %.2f, %.2f, %2.0f second$s ago, ", $data->{hps}, $data->{one}, $data->{five}, $data->{fifteen}, $_[2];
    print $_[1] || "\n";
    }
  6. @eqhmcow eqhmcow revised this gist Jun 24, 2011. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion README
    Original file line number Diff line number Diff line change
    @@ -32,7 +32,7 @@ $ watch -n 5 -- ~/logtop.pl
    In a third screen, view URL stats:

    $ cd /dev/shm # use shared memory
    $ watch -n 5 -- './logtop-urls.pl --skip '\''\.(jpg|png|gif|js|css)$|^\S+\s(/robots\.txt$|/favicon\.ico$)'\'''
    $ watch -n 5 -- '~/logtop-urls.pl --skip '\''\.(jpg|png|gif|js|css)$|^\S+\s(/robots\.txt$|/favicon\.ico$)'\'''

    EXAMPLE

  7. @eqhmcow eqhmcow revised this gist Jun 24, 2011. 2 changed files with 106 additions and 90 deletions.
    22 changes: 15 additions & 7 deletions README
    Original file line number Diff line number Diff line change
    @@ -1,6 +1,7 @@
    DESCRIPTION

    These scripts show total, per-vhost and per-URL request-per-second counts based on apache access logs in real-time.
    These scripts show total, per-vhost and per-URL request-per-second counts based on apache
    access logs in real-time.

    The output shows:
    * date of last stats update
    @@ -10,23 +11,28 @@ The output shows:
    * asterisk marks request hit in the last 5 seconds
    * vhost or URL requested

    See also the apachetop tool - http://freshmeat.net/projects/apachetop/ and http://packages.debian.org/search?keywords=apachetop . This script is not based on or intended to mimic apachetop, but the functionality is broadly similar.
    See also the apachetop tool - http://freshmeat.net/projects/apachetop/ and
    http://packages.debian.org/search?keywords=apachetop . This script is not based on or intended to
    mimic apachetop, but the functionality is broadly similar.

    SYNOPSIS

    To use:

    In one screen, run the log parser. It writes out a stats file which the other scripts use.

    $ tail -F access.log | ./logstat.pl # centralized logging helps here
    $ cd /dev/shm # use shared memory
    $ tail -F /var/log/httpd/access.log | ~/logstat.pl # centralized logging helps here

    In another screen, view vhost stats:

    $ watch -n 5 -- ./logtop.pl
    $ cd /dev/shm # use shared memory
    $ watch -n 5 -- ~/logtop.pl

    In a third screen, view URL stats:

    watch -n 5 -- './logtop-urls.pl --skip '\''\.(jpg|png|gif|js|css)$|^\S+\s(/robots\.txt$|/favicon\.ico$)'\'''
    $ cd /dev/shm # use shared memory
    $ watch -n 5 -- './logtop-urls.pl --skip '\''\.(jpg|png|gif|js|css)$|^\S+\s(/robots\.txt$|/favicon\.ico$)'\'''

    EXAMPLE

    @@ -41,8 +47,10 @@ Wed Jun 22 09:55:50 2011 hps: 3, average: 0.94, 1.33, 1.14, 4 seconds ag

    NOTES

    * The script's tally sub is properly abstracted so this script could be modified to tally and report averages on anything you can count (not just httpd logs)
    * The script's tally sub is properly abstracted so this script could be modified to tally and report
    averages on anything you can count (not just httpd logs)

    * The log parsing regex matches the "v-combined" log format (combined with the virtual host at the front)

    * The logtop-urls script accepts "match" and/or "skip" arguments to only show URLs that match (or do not match) a given regex.
    * The logtop-urls script accepts "match" and/or "skip" arguments to only show URLs that match (or do
    not match) a given regex.
    174 changes: 91 additions & 83 deletions logstat.pl
    Original file line number Diff line number Diff line change
    @@ -8,17 +8,19 @@
    use Storable;
    #use Data::Dumper;

    my %data;
    my %data; # internal (server-only) data
    my %shared; # data shared with clients

    sub init
    sub init
    {
    my $data = $_[0];
    my $shared = $_[1];
    $data->{count} = 0;
    $data->{counts} = [0];
    $data->{one} = 0;
    $data->{five} = 0;
    $data->{fifteen} = 0;
    init_time($data);
    $shared->{one} = 0;
    $shared->{five} = 0;
    $shared->{fifteen} = 0;
    init_time($data, $shared, $_[2]);
    }

    sub init_time
    @@ -27,111 +29,117 @@ sub init_time
    # mode 1 == init time and second only
    # mode 2 == init minute only
    my $data = $_[0];
    my $mode = $_[2] || 0;
    my $shared = $_[1];
    my $mode = $_[3] || 0;
    unless ($mode == 2) {
    $data->{time} = $_[1] || time();
    $shared->{time} = $_[2] || time();
    }
    return if $mode == 1;
    $data->{minute} = $data->{time} + 60;
    $data->{minute} = $shared->{time} + 60;
    }

    while (<>) {
    my $nt = time();
    my ($vhost, $method, $url, $protocol, $alt_url, $code, $bytes, $referrer, $ua) = (m/
    my ($vhost, $date, $method, $url, $protocol, $alt_url, $code, $bytes,
    $referrer, $ua) = (m/
    ^(\S+)\s # vhost
    \S+\s # IP
    \S+\s
    \S+\s
    \[[^]]+\]\s # date
    "(\S+)\s? # method
    (?:((?:[^"]*(?:\\")?)*)\s # URL
    ([^"]*)"\s| # protocol
    ((?:[^"]*(?:\\")?)*)"\s) # or, possibly URL with no protocol
    (\S+)\s # status code
    (\S+)\s # bytes
    "((?:[^"]*(?:\\")?)*)"\s # referrer
    "(.*)"$ # user agent
    /x);
    \S+\s
    \S+\s
    \[([^]]+)\]\s # date
    "(\S+)\s? # method
    (?:((?:[^"]*(?:\\")?)*)\s # URL
    ([^"]*)"\s| # protocol
    ((?:[^"]*(?:\\")?)*)"\s) # or, possibly URL with no protocol
    (\S+)\s # status code
    (\S+)\s # bytes
    "((?:[^"]*(?:\\")?)*)"\s # referrer
    "(.*)"$ # user agent
    /x);
    die "Couldn't match $_" unless $vhost;
    # print "$vhost $method $url $protocol $alt_url $code $bytes $referrer $ua\n";
    $alt_url ||= '';
    $url ||= $alt_url;
    # vhost counts
    $data{vhosts}{$vhost} ||= {};
    if (tally($data{vhosts}{$vhost}, $nt)) {
    show($data{vhosts}{$vhost}, " $vhost\n");
    }
    $shared{vhosts}{$vhost} ||= {};
    tally($data{vhosts}{$vhost}, $shared{vhosts}{$vhost}, $nt);

    # url counts
    $data{urls}{"$vhost $url"} ||= {};
    if (tally($data{urls}{"$vhost $url"}, $nt)) {
    show($data{urls}{"$vhost $url"}, " $vhost $url\n");
    }

    $shared{urls}{"$vhost $url"} ||= {};
    tally($data{urls}{"$vhost $url"}, $shared{urls}{"$vhost $url"}, $nt);

    # TODO: user agents? referrers? status codes? bytes?

    # total hit count
    if (tally(\%data, $nt)) {
    print "\n";
    show(\%data, " total *\n\n");
    store(\%data, "logstats.data.tmp");
    rename("logstats.data.tmp", "logstats.data");
    if (tally(\%data, \%shared, $nt)) {
    print "last line parsed: [$vhost] [$date] [$method] [$url] [$protocol] [$alt_url] [$code] [$bytes] [$referrer] [$ua]\n\n";
    show(\%shared, " total\n\n");
    store(\%shared, 'logstats.data.tmp');
    rename('logstats.data.tmp', 'logstats.data');
    }
    }

    sub tally
    {
    my $data = $_[0];
    # reset $count every second
    init($data) unless defined $data->{count};
    my $shared = $_[1];
    my $nt = $_[2] || time();
    init($data, $shared, $nt) unless defined $data->{count};

    # count the event
    $data->{count}++;
    my $nt = $_[1] || time();
    my $diff = $nt - $data->{time};
    my $gimme_a_sec = 0;
    if ($diff >= 1) {
    $gimme_a_sec = 1;
    init_time($data, $nt, 1);
    $data->{hps} = $data->{count} / $diff;
    $data->{count} = 0;
    # keep per-minute count
    $data->{counts}[0] += $data->{hps};
    # update per-minute counter
    $diff = $nt - $data->{minute};
    if ($diff >= 0) {
    init_time($data, $nt, 2);
    # log "0" counts if this is an infrequent stat
    my $count = $data->{counts}[0];
    $data->{counts}[0] = 0;
    while ($diff >= 60) {
    unshift @{$data->{counts}}, 0;
    $diff -= 60;
    }
    $data->{counts}[0] = $count;
    unshift @{$data->{counts}}, 0;
    no warnings qw/uninitialized misc/;
    splice @{$data->{counts}}, 16;
    my @count = @{$data->{counts}};
    $data->{one} = $count[1] / 60;
    $data->{five} = sum(@count[1..5]) / 5 / 60;
    $data->{fifteen} = sum(@count[1..15]) / 15 / 60;
    } else {
    # extrapolate running average
    $diff += 60;
    my $count = $data->{counts}[0];
    $count *= 60 / $diff;
    my @count = @{$data->{counts}};
    defined($count[1]) or $count[1] = $count;
    $data->{one} = sum($count, $count[1]) / 2 / 60;
    no warnings 'uninitialized';
    $data->{five} = sum($count, @count[1..5]) / 6 / 60;
    $data->{fifteen} = sum($count, @count[1..15]) / 16 / 60;
    }

    # add $count to per-minute count and reset $count every second
    my $diff = $nt - $shared->{time};
    return 0 unless $diff >= 1;

    init_time($data, $shared, $nt, 1);
    $shared->{hps} = $data->{count} / $diff;
    $data->{count} = 0;
    $data->{counts}[0] += $shared->{hps};

    # add per-minute count to 15-minute historical data set and reset
    # per-minute count every minute
    $diff = $nt - $data->{minute};
    if ($diff >= 0) {
    init_time($data, $shared, $nt, 2);

    # log "0" counts to historical data set if this is an infrequent stat
    splice(@{$data->{counts}}, 1, 0, (0) x int($diff / 60));

    # FIXME: reduce value of per-minute count by remainder if it's been
    # over a minute (that is, add a fractional "0" count)

    # compute historical average
    unshift @{$data->{counts}}, 0;
    no warnings qw/uninitialized misc/;
    splice @{$data->{counts}}, 16;
    $shared->{one} = $data->{counts}[1] / 60;
    $shared->{five} = sum(@{$data->{counts}}[1..5]) / 5 / 60;
    $shared->{fifteen} = sum(@{$data->{counts}}[1..15]) / 15 / 60;
    return 1;
    }
    return $gimme_a_sec;

    # if it hasn't been a minute yet, extrapolate a running historical average
    my $count = $data->{counts}[0];
    # this gets more accurate as the minute progresses,
    # but it does tend to over-estimate infrequent events
    $count *= 60 / ($diff + 60);
    # egregious HACK - but this only matters for the first minute(s)
    # after we start up
    defined($data->{counts}[1]) or $data->{counts}[1] = $count;
    $shared->{one} = sum($count, $data->{counts}[1]) / 2 / 60;
    no warnings 'uninitialized';
    $shared->{five} = sum($count, @{$data->{counts}}[1..5]) / 6 / 60;
    $shared->{fifteen} = sum($count, @{$data->{counts}}[1..15]) / 16 / 60;
    return 1;
    }

    sub show
    {
    my $data = $_[0];
    print scalar localtime($data->{time});
    printf " hps: %6.0f, average: %.2f, %.2f, %.2f, ", $data->{hps}, $data->{one}, $data->{five}, $data->{fifteen};
    my $shared = $_[0];
    print scalar localtime($shared->{time});
    printf " hps: %6.0f, average: %.2f, %.2f, %.2f, ", $shared->{hps}, $shared->{one}, $shared->{five}, $shared->{fifteen};
    print $_[1] || "\n";
    }
  8. @eqhmcow eqhmcow revised this gist Jun 23, 2011. 1 changed file with 12 additions and 0 deletions.
    12 changes: 12 additions & 0 deletions logtop-urls.pl
    Original file line number Diff line number Diff line change
    @@ -6,6 +6,16 @@
    use Time::HiRes 'time';
    use Storable;

    use Getopt::Long;
    my ($match, $skip, $match_re, $skip_re);
    GetOptions(
    'match=s' => \$match,
    'skip=s' => \$skip,
    );

    $match_re = qr/$match/i if $match;
    $skip_re = qr/$skip/i if $skip;

    my $data = retrieve("logstats.data");

    my $now = time;
    @@ -14,6 +24,8 @@

    my @stale;
    foreach my $url (sort { $data->{urls}{$b}{five} <=> $data->{urls}{$a}{five} } keys %{ $data->{urls} }) {
    next if $skip and $url =~ m/$skip_re/;
    next if $match and $url !~ m/$match_re/;
    my $d = $data->{urls}{$url};
    my $t = $now - $d->{time};
    if ($t > 60) {
  9. @eqhmcow eqhmcow revised this gist Jun 23, 2011. 4 changed files with 88 additions and 26 deletions.
    24 changes: 14 additions & 10 deletions README
    Original file line number Diff line number Diff line change
    @@ -1,38 +1,42 @@
    DESCRIPTION

    These scripts show total and per-vhost request-per-second counts based on apache access logs in real-time.
    These scripts show total, per-vhost and per-URL request-per-second counts based on apache access logs in real-time.

    The output shows:
    * date of last stats update
    * last second's hits per second (hps)
    * one, five and fifteen minute hps average
    * seconds since last request
    * vhost
    * asterisk marks vhosts with hits in the last 5 seconds
    * asterisk marks request hit in the last 5 seconds
    * vhost or URL requested

    See also the apachetop tool - http://freshmeat.net/projects/apachetop/ and http://packages.debian.org/search?keywords=apachetop . This script is not based on or intended to mimic apachetop, but the functionality is broadly similar.

    SYNOPSIS

    To use:

    In one screen:
    In one screen, run the log parser. It writes out a stats file which the other scripts use.

    $ tail -F access.log | ./logstat.pl # centralized logging helps here

    In another screen:
    In another screen, view vhost stats:

    $ watch -n 5 -- ./logtop.pl

    In a third screen, view URL stats:

    watch -n 5 -- './logtop-urls.pl --skip '\''\.(jpg|png|gif|js|css)$|^\S+\s(/robots\.txt$|/favicon\.ico$)'\'''

    EXAMPLE

    Every 5.0s: ./logtop.pl Wed Jun 22 09:55:54 2011

    Wed Jun 22 09:55:54 2011 hps: 9, average: 12.87, 13.19, 13.33, 0 seconds ago, total *
    Wed Jun 22 09:55:54 2011 hps: 9, average: 12.87, 13.19, 13.33, 0 seconds ago, total

    Wed Jun 22 09:55:48 2011 hps: 17, average: 5.66, 1.90, 0.86, 6 seconds ago, example.com
    Wed Jun 22 09:55:45 2011 hps: 6, average: 1.17, 1.43, 1.48, 9 seconds ago, example.org
    Wed Jun 22 09:55:50 2011 hps: 3, average: 0.94, 1.33, 1.14, 4 seconds ago, example.net *
    Wed Jun 22 09:55:48 2011 hps: 17, average: 5.66, 1.90, 0.86, 6 seconds ago, example.com
    Wed Jun 22 09:55:45 2011 hps: 6, average: 1.17, 1.43, 1.48, 9 seconds ago, example.org
    Wed Jun 22 09:55:50 2011 hps: 3, average: 0.94, 1.33, 1.14, 4 seconds ago, * example.net
    ...

    NOTES
    @@ -41,4 +45,4 @@ NOTES

    * The log parsing regex matches the "v-combined" log format (combined with the virtual host at the front)

    * This currently only breaks down requests by vhost (not by URL, etc); though as noted above it's easy to add more counters
    * The logtop-urls script accepts "match" and/or "skip" arguments to only show URLs that match (or do not match) a given regex.
    33 changes: 21 additions & 12 deletions logstat.pl
    Original file line number Diff line number Diff line change
    @@ -37,27 +37,36 @@ sub init_time

    while (<>) {
    my $nt = time();
    my ($vhost, $method, $url, $code, $bytes, $referrer, $ua) = (m/
    ^(\S+)\s # vhost
    \S+\s # IP
    my ($vhost, $method, $url, $protocol, $alt_url, $code, $bytes, $referrer, $ua) = (m/
    ^(\S+)\s # vhost
    \S+\s # IP
    \S+\s
    \S+\s
    \[[^]]+\]\s # date
    "(\S+)\s # method
    ((?:[^"]*(?:\\")?)*)\s? # URL
    [^"]*"\s # protocol
    (\S+)\s # status code
    (\S+)\s # bytes
    "((?:[^"]*(?:\\")?)*)"\s # referrer
    "(.*)"$ # user agent
    \[[^]]+\]\s # date
    "(\S+)\s? # method
    (?:((?:[^"]*(?:\\")?)*)\s # URL
    ([^"]*)"\s| # protocol
    ((?:[^"]*(?:\\")?)*)"\s) # or, possibly URL with no protocol
    (\S+)\s # status code
    (\S+)\s # bytes
    "((?:[^"]*(?:\\")?)*)"\s # referrer
    "(.*)"$ # user agent
    /x);
    die "Couldn't match $_" unless $vhost;
    # print "$vhost $method $url $protocol $alt_url $code $bytes $referrer $ua\n";
    $url ||= $alt_url;
    # vhost counts
    $data{vhosts}{$vhost} ||= {};
    if (tally($data{vhosts}{$vhost}, $nt)) {
    show($data{vhosts}{$vhost}, " $vhost\n");
    }
    # TODO: urls? user agents? referrers? status codes?
    # url counts
    $data{urls}{"$vhost $url"} ||= {};
    if (tally($data{urls}{"$vhost $url"}, $nt)) {
    show($data{urls}{"$vhost $url"}, " $vhost $url\n");
    }

    # TODO: user agents? referrers? status codes? bytes?

    # total hit count
    if (tally(\%data, $nt)) {
    48 changes: 48 additions & 0 deletions logtop-urls.pl
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,48 @@
    #!/usr/bin/perl -w

    use strict;
    use warnings;

    use Time::HiRes 'time';
    use Storable;

    my $data = retrieve("logstats.data");

    my $now = time;

    show($data, " total\n\n", $now - $data->{time});

    my @stale;
    foreach my $url (sort { $data->{urls}{$b}{five} <=> $data->{urls}{$a}{five} } keys %{ $data->{urls} }) {
    my $d = $data->{urls}{$url};
    my $t = $now - $d->{time};
    if ($t > 60) {
    push @stale, $url;
    next;
    }
    my $text = ' ';
    if ($now - $d->{time} < 6) {
    $text = " * ";
    }
    $text .= " $url";

    show($d, "$text\n", $t);
    }

    print "\nstale:\n" if @stale;

    my $i = 0;
    foreach my $url (@stale) {
    my $d = $data->{urls}{$url};
    my $t = $now - $d->{time};
    show($d, " $url\n", $t);
    last if ++$i > 100;
    }

    sub show
    {
    my $data = $_[0];
    print scalar localtime($data->{time});
    printf " hps: %6.0f, average: %.2f, %.2f, %.2f, %2.0f seconds ago, ", $data->{hps} || 0, $data->{one}, $data->{five}, $data->{fifteen}, $_[2];
    print $_[1] || "\n";
    }
    9 changes: 5 additions & 4 deletions logtop.pl
    Original file line number Diff line number Diff line change
    @@ -10,7 +10,7 @@

    my $now = time;

    show($data, " total *\n\n", $now - $data->{time});
    show($data, " total\n\n", $now - $data->{time});

    my @stale;
    foreach my $vhost (sort { $data->{vhosts}{$b}{five} <=> $data->{vhosts}{$a}{five} } keys %{ $data->{vhosts} }) {
    @@ -20,10 +20,11 @@
    push @stale, $vhost;
    next;
    }
    my $text = " $vhost";
    if ($now - $d->{time} <= 5) {
    $text .= " *";
    my $text = ' ';
    if ($now - $d->{time} < 6) {
    $text = " * ";
    }
    $text .= " $vhost";

    show($d, "$text\n", $t);
    }
  10. @eqhmcow eqhmcow revised this gist Jun 22, 2011. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion logstat.pl
    Original file line number Diff line number Diff line change
    @@ -44,7 +44,7 @@ sub init_time
    \S+\s
    \[[^]]+\]\s # date
    "(\S+)\s # method
    ((?:[^"]*(?:\\")?)*)\s # URL
    ((?:[^"]*(?:\\")?)*)\s? # URL
    [^"]*"\s # protocol
    (\S+)\s # status code
    (\S+)\s # bytes
  11. @eqhmcow eqhmcow revised this gist Jun 22, 2011. 1 changed file with 1 addition and 1 deletion.
    2 changes: 1 addition & 1 deletion README
    Original file line number Diff line number Diff line change
    @@ -1,6 +1,6 @@
    DESCRIPTION

    These scripts show total and per-vhost request-per-second counts based on apache access logs.
    These scripts show total and per-vhost request-per-second counts based on apache access logs in real-time.

    The output shows:
    * date of last stats update
  12. @eqhmcow eqhmcow revised this gist Jun 22, 2011. 2 changed files with 27 additions and 11 deletions.
    18 changes: 17 additions & 1 deletion README
    Original file line number Diff line number Diff line change
    @@ -1,5 +1,21 @@
    DESCRIPTION

    These scripts show total and per-vhost request-per-second counts based on apache access logs.

    The output shows:
    * date of last stats update
    * last second's hits per second (hps)
    * one, five and fifteen minute hps average
    * seconds since last request
    * vhost
    * asterisk marks vhosts with hits in the last 5 seconds

    See also the apachetop tool - http://freshmeat.net/projects/apachetop/ and http://packages.debian.org/search?keywords=apachetop . This script is not based on or intended to mimic apachetop, but the functionality is broadly similar.

    SYNOPSIS

    To use:

    In one screen:

    $ tail -F access.log | ./logstat.pl # centralized logging helps here
    @@ -21,7 +37,7 @@ Wed Jun 22 09:55:50 2011 hps: 3, average: 0.94, 1.33, 1.14, 4 seconds ag

    NOTES

    * The tally sub is properly abstracted so this can be used to tally and report averages on anything you can count (not just httpd logs)
    * The script's tally sub is properly abstracted so this script could be modified to tally and report averages on anything you can count (not just httpd logs)

    * The log parsing regex matches the "v-combined" log format (combined with the virtual host at the front)

    20 changes: 10 additions & 10 deletions logstat.pl
    Original file line number Diff line number Diff line change
    @@ -38,18 +38,18 @@ sub init_time
    while (<>) {
    my $nt = time();
    my ($vhost, $method, $url, $code, $bytes, $referrer, $ua) = (m/
    ^(\S+)\s # vhost
    \S+\s # IP
    ^(\S+)\s # vhost
    \S+\s # IP
    \S+\s
    \S+\s
    \[[^]]+\]\s # date
    "(\S+)\s # method
    (\S*)\s # URL
    [^"]*"\s # protocol
    (\S+)\s # status code
    (\S+)\s # bytes
    "([^"]*)"\s # referrer
    "(.*)"$ # user agent
    \[[^]]+\]\s # date
    "(\S+)\s # method
    ((?:[^"]*(?:\\")?)*)\s # URL
    [^"]*"\s # protocol
    (\S+)\s # status code
    (\S+)\s # bytes
    "((?:[^"]*(?:\\")?)*)"\s # referrer
    "(.*)"$ # user agent
    /x);
    die "Couldn't match $_" unless $vhost;
    # vhost counts
  13. @invalid-email-address Anonymous created this gist Jun 22, 2011.
    28 changes: 28 additions & 0 deletions README
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,28 @@
    SYNOPSIS

    In one screen:

    $ tail -F access.log | ./logstat.pl # centralized logging helps here

    In another screen:

    $ watch -n 5 -- ./logtop.pl

    EXAMPLE

    Every 5.0s: ./logtop.pl Wed Jun 22 09:55:54 2011

    Wed Jun 22 09:55:54 2011 hps: 9, average: 12.87, 13.19, 13.33, 0 seconds ago, total *

    Wed Jun 22 09:55:48 2011 hps: 17, average: 5.66, 1.90, 0.86, 6 seconds ago, example.com
    Wed Jun 22 09:55:45 2011 hps: 6, average: 1.17, 1.43, 1.48, 9 seconds ago, example.org
    Wed Jun 22 09:55:50 2011 hps: 3, average: 0.94, 1.33, 1.14, 4 seconds ago, example.net *
    ...

    NOTES

    * The tally sub is properly abstracted so this can be used to tally and report averages on anything you can count (not just httpd logs)

    * The log parsing regex matches the "v-combined" log format (combined with the virtual host at the front)

    * This currently only breaks down requests by vhost (not by URL, etc); though as noted above it's easy to add more counters
    128 changes: 128 additions & 0 deletions logstat.pl
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,128 @@
    #!/usr/bin/perl -w

    use strict;
    use warnings;

    use Time::HiRes 'time';
    use List::Util qw/sum/;
    use Storable;
    #use Data::Dumper;

    my %data;

    sub init
    {
    my $data = $_[0];
    $data->{count} = 0;
    $data->{counts} = [0];
    $data->{one} = 0;
    $data->{five} = 0;
    $data->{fifteen} = 0;
    init_time($data);
    }

    sub init_time
    {
    # mode 0 == init all
    # mode 1 == init time and second only
    # mode 2 == init minute only
    my $data = $_[0];
    my $mode = $_[2] || 0;
    unless ($mode == 2) {
    $data->{time} = $_[1] || time();
    }
    return if $mode == 1;
    $data->{minute} = $data->{time} + 60;
    }

    while (<>) {
    my $nt = time();
    my ($vhost, $method, $url, $code, $bytes, $referrer, $ua) = (m/
    ^(\S+)\s # vhost
    \S+\s # IP
    \S+\s
    \S+\s
    \[[^]]+\]\s # date
    "(\S+)\s # method
    (\S*)\s # URL
    [^"]*"\s # protocol
    (\S+)\s # status code
    (\S+)\s # bytes
    "([^"]*)"\s # referrer
    "(.*)"$ # user agent
    /x);
    die "Couldn't match $_" unless $vhost;
    # vhost counts
    $data{vhosts}{$vhost} ||= {};
    if (tally($data{vhosts}{$vhost}, $nt)) {
    show($data{vhosts}{$vhost}, " $vhost\n");
    }
    # TODO: urls? user agents? referrers? status codes?

    # total hit count
    if (tally(\%data, $nt)) {
    print "\n";
    show(\%data, " total *\n\n");
    store(\%data, "logstats.data.tmp");
    rename("logstats.data.tmp", "logstats.data");
    }
    }

    sub tally
    {
    my $data = $_[0];
    # reset $count every second
    init($data) unless defined $data->{count};
    $data->{count}++;
    my $nt = $_[1] || time();
    my $diff = $nt - $data->{time};
    my $gimme_a_sec = 0;
    if ($diff >= 1) {
    $gimme_a_sec = 1;
    init_time($data, $nt, 1);
    $data->{hps} = $data->{count} / $diff;
    $data->{count} = 0;
    # keep per-minute count
    $data->{counts}[0] += $data->{hps};
    # update per-minute counter
    $diff = $nt - $data->{minute};
    if ($diff >= 0) {
    init_time($data, $nt, 2);
    # log "0" counts if this is an infrequent stat
    my $count = $data->{counts}[0];
    $data->{counts}[0] = 0;
    while ($diff >= 60) {
    unshift @{$data->{counts}}, 0;
    $diff -= 60;
    }
    $data->{counts}[0] = $count;
    unshift @{$data->{counts}}, 0;
    no warnings qw/uninitialized misc/;
    splice @{$data->{counts}}, 16;
    my @count = @{$data->{counts}};
    $data->{one} = $count[1] / 60;
    $data->{five} = sum(@count[1..5]) / 5 / 60;
    $data->{fifteen} = sum(@count[1..15]) / 15 / 60;
    } else {
    # extrapolate running average
    $diff += 60;
    my $count = $data->{counts}[0];
    $count *= 60 / $diff;
    my @count = @{$data->{counts}};
    defined($count[1]) or $count[1] = $count;
    $data->{one} = sum($count, $count[1]) / 2 / 60;
    no warnings 'uninitialized';
    $data->{five} = sum($count, @count[1..5]) / 6 / 60;
    $data->{fifteen} = sum($count, @count[1..15]) / 16 / 60;
    }
    }
    return $gimme_a_sec;
    }

    sub show
    {
    my $data = $_[0];
    print scalar localtime($data->{time});
    printf " hps: %6.0f, average: %.2f, %.2f, %.2f, ", $data->{hps}, $data->{one}, $data->{five}, $data->{fifteen};
    print $_[1] || "\n";
    }
    45 changes: 45 additions & 0 deletions logtop.pl
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,45 @@
    #!/usr/bin/perl -w

    use strict;
    use warnings;

    use Time::HiRes 'time';
    use Storable;

    my $data = retrieve("logstats.data");

    my $now = time;

    show($data, " total *\n\n", $now - $data->{time});

    my @stale;
    foreach my $vhost (sort { $data->{vhosts}{$b}{five} <=> $data->{vhosts}{$a}{five} } keys %{ $data->{vhosts} }) {
    my $d = $data->{vhosts}{$vhost};
    my $t = $now - $d->{time};
    if ($t > 60) {
    push @stale, $vhost;
    next;
    }
    my $text = " $vhost";
    if ($now - $d->{time} <= 5) {
    $text .= " *";
    }

    show($d, "$text\n", $t);
    }

    print "\nstale:\n" if @stale;

    foreach my $vhost (@stale) {
    my $d = $data->{vhosts}{$vhost};
    my $t = $now - $d->{time};
    show($d, " $vhost\n", $t);
    }

    sub show
    {
    my $data = $_[0];
    print scalar localtime($data->{time});
    printf " hps: %6.0f, average: %.2f, %.2f, %.2f, %2.0f seconds ago, ", $data->{hps} || 0, $data->{one}, $data->{five}, $data->{fifteen}, $_[2];
    print $_[1] || "\n";
    }