Skip to content

Instantly share code, notes, and snippets.

@mk-qi
Forked from eqhmcow/apache-logtop-README
Last active August 29, 2015 14:23
Show Gist options
  • Save mk-qi/dd43d55ff0c2e700231d to your computer and use it in GitHub Desktop.
Save mk-qi/dd43d55ff0c2e700231d to your computer and use it in GitHub Desktop.
DESCRIPTION
These scripts show total and per-vhost request-per-second counts based on apache access logs.
The output shows:
* date of last stats update
* last second's hits per second (hps)
* one, five and fifteen minute hps average
* seconds since last request
* vhost
* asterisk marks vhosts with hits in the last 5 seconds
See also the apachetop tool - http://freshmeat.net/projects/apachetop/ and http://packages.debian.org/search?keywords=apachetop . This script is not based on or intended to mimic apachetop, but the functionality is broadly similar.
SYNOPSIS
To use:
In one screen:
$ tail -F access.log | ./logstat.pl # centralized logging helps here
In another screen:
$ watch -n 5 -- ./logtop.pl
EXAMPLE
Every 5.0s: ./logtop.pl Wed Jun 22 09:55:54 2011
Wed Jun 22 09:55:54 2011 hps: 9, average: 12.87, 13.19, 13.33, 0 seconds ago, total *
Wed Jun 22 09:55:48 2011 hps: 17, average: 5.66, 1.90, 0.86, 6 seconds ago, example.com
Wed Jun 22 09:55:45 2011 hps: 6, average: 1.17, 1.43, 1.48, 9 seconds ago, example.org
Wed Jun 22 09:55:50 2011 hps: 3, average: 0.94, 1.33, 1.14, 4 seconds ago, example.net *
...
NOTES
* The script's tally sub is properly abstracted so this script could be modified to tally and report averages on anything you can count (not just httpd logs)
* The log parsing regex matches the "v-combined" log format (combined with the virtual host at the front)
* This currently only breaks down requests by vhost (not by URL, etc); though as noted above it's easy to add more counters
#!/usr/bin/perl -w
use strict;
use warnings;
use Time::HiRes 'time';
use List::Util qw/sum/;
use Storable;
#use Data::Dumper;
my %data;
sub init
{
my $data = $_[0];
$data->{count} = 0;
$data->{counts} = [0];
$data->{one} = 0;
$data->{five} = 0;
$data->{fifteen} = 0;
init_time($data);
}
sub init_time
{
# mode 0 == init all
# mode 1 == init time and second only
# mode 2 == init minute only
my $data = $_[0];
my $mode = $_[2] || 0;
unless ($mode == 2) {
$data->{time} = $_[1] || time();
}
return if $mode == 1;
$data->{minute} = $data->{time} + 60;
}
while (<>) {
my $nt = time();
my ($vhost, $method, $url, $code, $bytes, $referrer, $ua) = (m/
^(\S+)\s # vhost
\S+\s # IP
\S+\s
\S+\s
\[[^]]+\]\s # date
"(\S+)\s # method
((?:[^"]*(?:\\")?)*)\s # URL
[^"]*"\s # protocol
(\S+)\s # status code
(\S+)\s # bytes
"((?:[^"]*(?:\\")?)*)"\s # referrer
"(.*)"$ # user agent
/x);
die "Couldn't match $_" unless $vhost;
# vhost counts
$data{vhosts}{$vhost} ||= {};
if (tally($data{vhosts}{$vhost}, $nt)) {
show($data{vhosts}{$vhost}, " $vhost\n");
}
# TODO: urls? user agents? referrers? status codes?
# total hit count
if (tally(\%data, $nt)) {
print "\n";
show(\%data, " total *\n\n");
store(\%data, "logstats.data.tmp");
rename("logstats.data.tmp", "logstats.data");
}
}
sub tally
{
my $data = $_[0];
# reset $count every second
init($data) unless defined $data->{count};
$data->{count}++;
my $nt = $_[1] || time();
my $diff = $nt - $data->{time};
my $gimme_a_sec = 0;
if ($diff >= 1) {
$gimme_a_sec = 1;
init_time($data, $nt, 1);
$data->{hps} = $data->{count} / $diff;
$data->{count} = 0;
# keep per-minute count
$data->{counts}[0] += $data->{hps};
# update per-minute counter
$diff = $nt - $data->{minute};
if ($diff >= 0) {
init_time($data, $nt, 2);
# log "0" counts if this is an infrequent stat
my $count = $data->{counts}[0];
$data->{counts}[0] = 0;
while ($diff >= 60) {
unshift @{$data->{counts}}, 0;
$diff -= 60;
}
$data->{counts}[0] = $count;
unshift @{$data->{counts}}, 0;
no warnings qw/uninitialized misc/;
splice @{$data->{counts}}, 16;
my @count = @{$data->{counts}};
$data->{one} = $count[1] / 60;
$data->{five} = sum(@count[1..5]) / 5 / 60;
$data->{fifteen} = sum(@count[1..15]) / 15 / 60;
} else {
# extrapolate running average
$diff += 60;
my $count = $data->{counts}[0];
$count *= 60 / $diff;
my @count = @{$data->{counts}};
defined($count[1]) or $count[1] = $count;
$data->{one} = sum($count, $count[1]) / 2 / 60;
no warnings 'uninitialized';
$data->{five} = sum($count, @count[1..5]) / 6 / 60;
$data->{fifteen} = sum($count, @count[1..15]) / 16 / 60;
}
}
return $gimme_a_sec;
}
sub show
{
my $data = $_[0];
print scalar localtime($data->{time});
printf " hps: %6.0f, average: %.2f, %.2f, %.2f, ", $data->{hps}, $data->{one}, $data->{five}, $data->{fifteen};
print $_[1] || "\n";
}
#!/usr/bin/perl -w
use strict;
use warnings;
use Time::HiRes 'time';
use Storable;
my $data = retrieve("logstats.data");
my $now = time;
show($data, " total *\n\n", $now - $data->{time});
my @stale;
foreach my $vhost (sort { $data->{vhosts}{$b}{five} <=> $data->{vhosts}{$a}{five} } keys %{ $data->{vhosts} }) {
my $d = $data->{vhosts}{$vhost};
my $t = $now - $d->{time};
if ($t > 60) {
push @stale, $vhost;
next;
}
my $text = " $vhost";
if ($now - $d->{time} <= 5) {
$text .= " *";
}
show($d, "$text\n", $t);
}
print "\nstale:\n" if @stale;
foreach my $vhost (@stale) {
my $d = $data->{vhosts}{$vhost};
my $t = $now - $d->{time};
show($d, " $vhost\n", $t);
}
sub show
{
my $data = $_[0];
print scalar localtime($data->{time});
printf " hps: %6.0f, average: %.2f, %.2f, %.2f, %2.0f seconds ago, ", $data->{hps} || 0, $data->{one}, $data->{five}, $data->{fifteen}, $_[2];
print $_[1] || "\n";
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment