-
-
Save xtetsuji/1446584 to your computer and use it in GitHub Desktop.
| #!/usr/bin/perl | |
| # "pflog" enhancement for parsing maillog of postfix 2.3 or higher version. | |
| # "pflog" see: http://www.tmtm.org/ruby/pflog/pflog-0.3 | |
| use strict; | |
| use warnings; | |
| use Time::Local; | |
| use Getopt::Long; | |
| #use Data::Dumper; | |
| my %month = (qw(Jan 1 Feb 2 Mar 3 Apr 4 May 5 Jun 6 Jul 7 Aug 8 Sep 9 Oct 10 Nov 11 Dec 12)); | |
| my $SEPARATOR = q(,); | |
| my (%queue, @found_queue); | |
| GetOptions( | |
| 'year|y=s' => \ my $year, | |
| 'no-header|H' => \ my $no_header, | |
| 'help' => \ my $help, | |
| ); | |
| if ( $help ) { | |
| print <<END_HELP; | |
| Usage: | |
| $0 -y YEAR mail.log > mail.csv | |
| END_HELP | |
| exit; | |
| } | |
| if ( !defined $year || $year !~ /^\d{4}$/ ) { | |
| die "specify -y YEAR (YEAR is 4 letter digits, e.g. 2011).\n"; | |
| } | |
| my $re_date = qr/[A-Z][a-z][a-z] ?\d+ \d{2}:\d{2}:\d{2}/; | |
| my $re_host = qr/\S+/; | |
| #my $re_following_capture = qr/\s*\(([^()]+)\)/; | |
| my $re_line = qr{^($re_date) ($re_host) postfix/(\w+)\[\d+\]: (\w+):\s*(.*)}; | |
| while (<>) { | |
| my ($date, $host, $service, $queue_id, $following) = /$re_line/ | |
| or next; | |
| if ( !exists $queue{$queue_id} ) { | |
| $queue{$queue_id} = {}; | |
| # queue_id は見つかった順番に記録される | |
| # queue_id is recorded order by found. | |
| push @found_queue, $queue_id; | |
| } | |
| my $q = $queue{$queue_id}; | |
| my ($information) = $following =~ /\s*\((.+)\)$/ | |
| and $following =~ s/\s\(.+\)$//; | |
| my %param; | |
| if ( $following =~ /=/ ) { | |
| my @param = map { split /=/, $_, 2 } split /,\s*/, $following; | |
| if ( @param % 2 == 0 ) { | |
| %param = @param; | |
| } | |
| else { | |
| warn "found odd number of key/value pair."; | |
| } | |
| } | |
| # %param 手直し | |
| # %param modification. | |
| if ( exists $param{client} && defined $param{client} ) { | |
| my ($hostname, $ipaddr) = $param{client} =~ /^(.+?)\[([0-9.]+)\]/; | |
| $param{client_hostname} = $hostname; | |
| $param{client_ipaddr} = $ipaddr; | |
| } | |
| if ( $information && $param{status} ) { | |
| $param{information} = $information; | |
| } | |
| for my $key ( qw(from to) ) { | |
| if ( defined $param{$key} && $param{$key} =~ /^<(.*)>$/ ) { | |
| $param{$key} = $1; | |
| } | |
| } | |
| # 今回の行で取得することができた情報を追加 | |
| # Addition of information that be got current line. | |
| for my $key (keys %param) { | |
| my $value = $param{$key}; | |
| if ( !exists $q->{$key} ) { | |
| # 新規採用 / newly | |
| $q->{$key} = $value; | |
| } | |
| elsif ( !ref $q->{$key} ) { # 文字列 / string | |
| # 配列リファレンスにして追加 / Addition as array reference | |
| $q->{$key} = [$q->{$key}, $value]; | |
| } | |
| elsif ( ref $q->{$key} eq 'ARRAY' ) { | |
| # 配列リファレンスに push / push to array reference | |
| push @{$q->{$key}}, $value; | |
| } | |
| else { | |
| die "unknown situation."; # 想定外 / non-supposition | |
| } | |
| } | |
| # 12/31 -> 01/01 などの流れの場合の年の調整 | |
| # Adjustoment flow of year on 12/31 -> 01/01 | |
| skew_date($date) | |
| and $year++; | |
| # Add _meta | |
| my $meta_q = $q->{_meta} ||= {}; | |
| if ( !defined $meta_q->{host} ) { | |
| $meta_q->{host} = $host; | |
| } | |
| if ( $param{client} || $param{uid} ) { | |
| $meta_q->{start_date} = date_format($date); | |
| } | |
| if ( $param{status} && $param{status} eq 'sent' ) { | |
| $meta_q->{success}++; | |
| $meta_q->{end_date} = date_format($date); | |
| } | |
| } | |
| #print Dumper(\%queue); | |
| # ### DEBUG: | |
| # my @list = map { [$_ => $queue{$_}] } @found_queue; | |
| # print Dumper(@list); | |
| # exit; | |
| # ヘッダ出力 | |
| # Header output | |
| if ( !$no_header && @found_queue ) { | |
| # 内容行 (@found_queue) が見つからなかったら | |
| # ヘッダ行も出力しないとした | |
| # 内容がなければファイルサイズが 0 のほうが | |
| # パッと見てわかりやすいからという意図 | |
| # If content row is not found, we do not output header line too. | |
| # It is cleary that | |
| printf "%s\n", join $SEPARATOR, map { s/^\s+//; qq("$_") } split /\n/, <<END_LIST; | |
| queue id | |
| arrived time | |
| processed time | |
| smtp client hostname / uid | |
| smtp client IP address / username | |
| envelope from | |
| envelope to | |
| message-id | |
| status | |
| relay to | |
| delay time | |
| size | |
| information (reason of defered, local mailbox name, successful message...) | |
| END_LIST | |
| } | |
| ### pflog compatible output | |
| for my $queue_id (@found_queue) { | |
| my $q = $queue{$queue_id}; # 'HASH' | |
| my @row = ($queue_id, | |
| @{$q->{_meta}}{qw(start_date end_date)}, | |
| @$q{qw(client_hostname client_ipaddr from to message-id status relay delay size information)}); | |
| for ( grep { ref $_ eq 'ARRAY' } @row ) { | |
| $_ = join $SEPARATOR, @$_; | |
| } | |
| for (@row) { | |
| $_ = '' if !defined $_; | |
| if ( /,/ || /"/ ) { | |
| s/"/""/g; | |
| $_ = qq("$_"); | |
| } | |
| elsif ( !/^\d+(?:\.\d+)?$/ && length $_ ) { | |
| # Excel は数字のみではないものならダブルクォートするので | |
| # それを真似る | |
| # Excel quotes not only digits, | |
| # so this program imimtations it. | |
| $_ = qq("$_"); | |
| } | |
| elsif ( /^\d{11}$/ ) { | |
| # queue id (と思われるもの)がたまたま全部数字だった場合 | |
| # queue id (we think so) character is all digits unexpectedly. | |
| $_ = qq("$_"); | |
| } | |
| } | |
| printf "%s\n", join $SEPARATOR, @row; | |
| } | |
| ### from pflog | |
| # output: | |
| # queue id | |
| # arrived time | |
| # processed time | |
| # smtp client hostname / uid | |
| # smtp client IP address / username | |
| # envelope from | |
| # envelope to | |
| # message-id | |
| # status | |
| # relay to | |
| # delay time | |
| # size | |
| # information (reason of defered, local mailbox name, successful message...) | |
| sub date_format { | |
| my $date_str = shift; # e.g. Jan 31 00:00:01 | |
| my ($mon_name, $day, $hhmmss) = split /\s+/, $date_str; | |
| # my ($hh, $mm, $ss) = map { sprintf '%d', $_ } split /:/, $hhmmss; | |
| # sprintf に8進数と勘違いされないように | |
| # We avoid that sprintf confuses it octet. | |
| $day =~ s/^0//; | |
| my $mon = $month{$mon_name}; | |
| return sprintf '%d/%02d/%02d %s', $year, $mon, $day, $hhmmss; | |
| } | |
| # skew_date($syslog_date_string) | |
| # 前回 skew_date を呼び出したときの日付(年月日)よりも逆行しているなら真 | |
| # 分や秒の細かいことまで見ない | |
| # Ture if Date (year month day) previous calling of skwe_date goes backward. | |
| # We do not see detail of minutes or second. | |
| { | |
| my $prev_mm_dd; # state | |
| sub skew_date { | |
| my $cur_mm_dd = join '/', (split m{/}, date_format(shift))[1,2]; | |
| my $is_skew; | |
| if ( !$prev_mm_dd # 初回呼び出し / initial calling | |
| || $prev_mm_dd le $cur_mm_dd # 順番通り / right order (e.g. 07/22 le 07/23) | |
| ) { | |
| $is_skew = 0; | |
| } | |
| else { | |
| $is_skew = 1; | |
| } | |
| $prev_mm_dd = $cur_mm_dd; | |
| return $is_skew; | |
| } | |
| } | |
| __END__ | |
| =pod | |
| =encoding utf-8 | |
| =head1 NAME | |
| maillog-hashnize.pl - enhancement of "pflog" for parsing maillog of postfix 2.3 or higher version. | |
| =head1 SYNOPSIS | |
| # input postfix log of syslog format, output csv format. | |
| maillog-hasnize.pl -y 2011 mail.log > maillog.csv | |
| =head1 DESCRIPTIONS | |
| this program is postfix "mail.log" parser, convert from syslog format to csv format for MS-Excel and more spreadsheet viewer. | |
| =head1 LIMITATION | |
| because syslog format does not include "year" information, | |
| specify the "mail.log"'s year as "-y" option. | |
| support from the year to from *next* new year. | |
| but years are missing from the "mail.log", e.g. next "2009/??/??" line is "2011/??/??", it is not support that leap 2 year and more. | |
| =head1 ACKNOWLEDGEMENT | |
| pflog: L<http://www.tmtm.org/ruby/pflog/pflog-0.3> | |
| =head1 COPYRIGHT AND LICENCE | |
| Copyright 2010-2011 fonfun corporation. | |
| This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself. | |
| =cut |
I am glad to be used it by many users!
This is great!
It parsed the maillog and produced CSV. Although I saw few exceptions which might be because not all lines in maillog has equal number of columns and the perl script might be failing to recognize it:
perl maillog-hashnize.pl -y 2016 maillog-20160124 > maillog.csv
found odd number of key/value pair. at maillog-hashnize.pl line 61, <> line 512132.
found odd number of key/value pair. at maillog-hashnize.pl line 61, <> line 520825.
found odd number of key/value pair. at maillog-hashnize.pl line 61, <> line 520834.
@friendyogl Thank you for your praise. I am realy glad.
I realize the "odd number" warning. Please wait. I will fix this problems.
I create maillog-hashnize GitHub repositry xtetsuji/p5-Mail-Log-Hashnize. If you found some problem, and/or have some requests, then write to issue page.
thank you for this! great tool for publishing postfix logs to end users