xtetsuji · October 11, 2022 08:08 · Feb 10, 2016 · Dec 19, 2013 · Dec 8, 2011
diff --git a/maillog-hashnize.pl b/maillog-hashnize.pl
@@ -44,7 +44,9 @@
         or next;
     if ( !exists $queue{$queue_id} ) {
         $queue{$queue_id} = {};
-        push @found_queue, $queue_id; # queue_id は見つかった順番に記録される
+        # queue_id は見つかった順番に記録される
+        # queue_id is recorded order by found.
+        push @found_queue, $queue_id;
     }
     my $q = $queue{$queue_id};
     my ($information) = $following =~ /\s*\((.+)\)$/
@@ -55,9 +57,13 @@
         if ( @param % 2 == 0 ) {
             %param = @param;
         }
+        else {
+            warn "found odd number of key/value pair.";
+        }
     }
 
     # %param 手直し
+    # %param modification.
     if ( exists $param{client} && defined $param{client} ) {
         my ($hostname, $ipaddr) = $param{client} =~ /^(.+?)\[([0-9.]+)\]/;
         $param{client_hostname} = $hostname;
@@ -73,30 +79,32 @@
     }
 
     # 今回の行で取得することができた情報を追加
+    # Addition of information that be got current line.
     for my $key (keys %param) {
         my $value = $param{$key};
         if ( !exists $q->{$key} ) {
-            # 新規採用
+            # 新規採用 / newly
             $q->{$key} = $value;
         }
-        elsif ( !ref $q->{$key} ) { # 文字列
-            # 配列リファレンスにして追加
+        elsif ( !ref $q->{$key} ) { # 文字列 / string
+            # 配列リファレンスにして追加 / Addition as array reference
             $q->{$key} = [$q->{$key}, $value];
         }
         elsif ( ref $q->{$key} eq 'ARRAY' ) {
-            # 配列リファレンスに push
+            # 配列リファレンスに push / push to array reference
             push @{$q->{$key}}, $value;
         }
         else {
-            die "unknown situation."; # 想定外
+            die "unknown situation."; # 想定外 / non-supposition
         }
     }
 
     # 12/31 -> 01/01 などの流れの場合の年の調整
+    # Adjustoment flow of year on 12/31 -> 01/01
     skew_date($date)
         and $year++;
 
-    # _meta 追加
+    # Add _meta
     my $meta_q = $q->{_meta} ||= {};
     if ( !defined $meta_q->{host} ) {
         $meta_q->{host} = $host;
@@ -112,17 +120,20 @@
 
 #print Dumper(\%queue);
 
-# ### デバッグ
+# ### DEBUG:
 # my @list = map { [$_ => $queue{$_}] } @found_queue;
 # print Dumper(@list);
 # exit;
 
 # ヘッダ出力
+# Header output
 if ( !$no_header && @found_queue ) {
     # 内容行 (@found_queue) が見つからなかったら
     # ヘッダ行も出力しないとした
     # 内容がなければファイルサイズが 0 のほうが
     # パッと見てわかりやすいからという意図
+    # If content row is not found, we do not output header line too.
+    # It is cleary that 
     printf "%s\n", join $SEPARATOR, map { s/^\s+//; qq("$_") } split /\n/, <<END_LIST;
   queue id
   arrived time
@@ -140,7 +151,7 @@
 END_LIST
 }
 
-### pflog 互換出力
+### pflog compatible output
 for my $queue_id (@found_queue) {
     my $q = $queue{$queue_id}; # 'HASH'
 
@@ -161,10 +172,13 @@
         elsif ( !/^\d+(?:\.\d+)?$/ && length $_ ) {
             # Excel は数字のみではないものならダブルクォートするので
             # それを真似る
+            # Excel quotes not only digits,
+            # so this program imimtations it.
             $_ = qq("$_");
         }
         elsif ( /^\d{11}$/ ) {
             # queue id (と思われるもの)がたまたま全部数字だった場合
+            # queue id (we think so) character is all digits unexpectedly.
             $_ = qq("$_");
         }
     }
@@ -188,25 +202,29 @@
 #  information (reason of defered, local mailbox name, successful message...)
 
 sub date_format {
-    my $date_str = shift; # Jan 31 00:00:01
+    my $date_str = shift; # e.g. Jan 31 00:00:01
     my ($mon_name, $day, $hhmmss) = split /\s+/, $date_str;
 #    my ($hh, $mm, $ss) = map { sprintf '%d', $_ } split /:/, $hhmmss;
-    $day =~ s/^0//; # sprintf に8進数と勘違いされないように
+    # sprintf に8進数と勘違いされないように
+    # We avoid that sprintf confuses it octet.
+    $day =~ s/^0//;
     my $mon = $month{$mon_name};
     return sprintf '%d/%02d/%02d %s', $year, $mon, $day, $hhmmss;
 }
 
 # skew_date($syslog_date_string)
 # 前回 skew_date を呼び出したときの日付(年月日)よりも逆行しているなら真
 # 分や秒の細かいことまで見ない
+# Ture if Date (year month day) previous calling of skwe_date goes backward.
+# We do not see detail of minutes or second.
 {
 my $prev_mm_dd; # state
 sub skew_date {
     my $cur_mm_dd = join '/', (split m{/}, date_format(shift))[1,2];
 
     my $is_skew;
-    if (    !$prev_mm_dd              # 初回呼び出し
-         || $prev_mm_dd le $cur_mm_dd # 順番通り (e.g. 07/22 le 07/23)
+    if (    !$prev_mm_dd              # 初回呼び出し / initial calling
+         || $prev_mm_dd le $cur_mm_dd # 順番通り / right order (e.g. 07/22 le 07/23)
      ) {
         $is_skew = 0;
     }

diff --git a/maillog-hashnize.pl b/maillog-hashnize.pl
@@ -0,0 +1,258 @@
+#!/usr/bin/perl
+# "pflog" enhancement for parsing maillog of postfix 2.3 or higher version.
+# "pflog" see: http://www.tmtm.org/ruby/pflog/pflog-0.3
+
+use strict;
+use warnings;
+
+use Time::Local;
+use Getopt::Long;
+#use Data::Dumper;
+
+my %month = (qw(Jan 1 Feb 2 Mar 3 Apr 4 May 5 Jun 6 Jul 7 Aug 8 Sep 9 Oct 10 Nov 11 Dec 12));
+
+my $SEPARATOR = q(,);
+
+my (%queue, @found_queue);
+
+GetOptions(
+    'year|y=s'    => \ my $year,
+    'no-header|H' => \ my $no_header,
+    'help'        => \ my $help,
+);
+
+if ( $help ) {
+    print <<END_HELP;
+Usage:
+  $0 -y YEAR mail.log > mail.csv
+END_HELP
+    exit;
+}
+
+if ( !defined $year || $year !~ /^\d{4}$/ ) {
+    die "specify -y YEAR (YEAR is 4 letter digits, e.g. 2011).\n";
+}
+
+my $re_date = qr/[A-Z][a-z][a-z]  ?\d+ \d{2}:\d{2}:\d{2}/;
+my $re_host = qr/\S+/;
+#my $re_following_capture = qr/\s*\(([^()]+)\)/;
+
+my $re_line = qr{^($re_date) ($re_host) postfix/(\w+)\[\d+\]: (\w+):\s*(.*)};
+
+while (<>) {
+    my ($date, $host, $service, $queue_id, $following) = /$re_line/
+        or next;
+    if ( !exists $queue{$queue_id} ) {
+        $queue{$queue_id} = {};
+        push @found_queue, $queue_id; # queue_id は見つかった順番に記録される
+    }
+    my $q = $queue{$queue_id};
+    my ($information) = $following =~ /\s*\((.+)\)$/
+        and $following =~ s/\s\(.+\)$//;
+    my %param;
+    if ( $following =~ /=/ ) {
+        my @param = map { split /=/, $_, 2 } split /,\s*/, $following;
+        if ( @param % 2 == 0 ) {
+            %param = @param;
+        }
+    }
+
+    # %param 手直し
+    if ( exists $param{client} && defined $param{client} ) {
+        my ($hostname, $ipaddr) = $param{client} =~ /^(.+?)\[([0-9.]+)\]/;
+        $param{client_hostname} = $hostname;
+        $param{client_ipaddr}   = $ipaddr;
+    }
+    if ( $information && $param{status} ) {
+        $param{information} = $information;
+    }
+    for my $key ( qw(from to) ) {
+        if ( defined $param{$key} && $param{$key} =~ /^<(.*)>$/ ) {
+            $param{$key} = $1;
+        }
+    }
+
+    # 今回の行で取得することができた情報を追加
+    for my $key (keys %param) {
+        my $value = $param{$key};
+        if ( !exists $q->{$key} ) {
+            # 新規採用
+            $q->{$key} = $value;
+        }
+        elsif ( !ref $q->{$key} ) { # 文字列
+            # 配列リファレンスにして追加
+            $q->{$key} = [$q->{$key}, $value];
+        }
+        elsif ( ref $q->{$key} eq 'ARRAY' ) {
+            # 配列リファレンスに push
+            push @{$q->{$key}}, $value;
+        }
+        else {
+            die "unknown situation."; # 想定外
+        }
+    }
+
+    # 12/31 -> 01/01 などの流れの場合の年の調整
+    skew_date($date)
+        and $year++;
+
+    # _meta 追加
+    my $meta_q = $q->{_meta} ||= {};
+    if ( !defined $meta_q->{host} ) {
+        $meta_q->{host} = $host;
+    }
+    if ( $param{client} || $param{uid} ) {
+        $meta_q->{start_date} = date_format($date);
+    }
+    if ( $param{status} && $param{status} eq 'sent' ) {
+        $meta_q->{success}++;
+        $meta_q->{end_date} = date_format($date);
+    }
+}
+
+#print Dumper(\%queue);
+
+# ### デバッグ
+# my @list = map { [$_ => $queue{$_}] } @found_queue;
+# print Dumper(@list);
+# exit;
+
+# ヘッダ出力
+if ( !$no_header && @found_queue ) {
+    # 内容行 (@found_queue) が見つからなかったら
+    # ヘッダ行も出力しないとした
+    # 内容がなければファイルサイズが 0 のほうが
+    # パッと見てわかりやすいからという意図
+    printf "%s\n", join $SEPARATOR, map { s/^\s+//; qq("$_") } split /\n/, <<END_LIST;
+  queue id
+  arrived time
+  processed time
+  smtp client hostname / uid
+  smtp client IP address / username
+  envelope from
+  envelope to
+  message-id
+  status
+  relay to
+  delay time
+  size
+  information (reason of defered, local mailbox name, successful message...)
+END_LIST
+}
+
+### pflog 互換出力
+for my $queue_id (@found_queue) {
+    my $q = $queue{$queue_id}; # 'HASH'
+
+    my @row = ($queue_id,
+               @{$q->{_meta}}{qw(start_date end_date)},
+               @$q{qw(client_hostname client_ipaddr from to message-id status relay delay size information)});
+
+    for ( grep { ref $_ eq 'ARRAY' } @row ) {
+        $_ = join $SEPARATOR, @$_;
+    }
+
+    for (@row) {
+        $_ = '' if !defined $_;
+        if ( /,/ || /"/ ) {
+            s/"/""/g;
+            $_ = qq("$_");
+        }
+        elsif ( !/^\d+(?:\.\d+)?$/ && length $_ ) {
+            # Excel は数字のみではないものならダブルクォートするので
+            # それを真似る
+            $_ = qq("$_");
+        }
+        elsif ( /^\d{11}$/ ) {
+            # queue id (と思われるもの)がたまたま全部数字だった場合
+            $_ = qq("$_");
+        }
+    }
+    printf "%s\n", join $SEPARATOR, @row;
+}
+
+### from pflog
+# output:
+#  queue id
+#  arrived time
+#  processed time
+#  smtp client hostname / uid
+#  smtp client IP address / username
+#  envelope from
+#  envelope to
+#  message-id
+#  status
+#  relay to
+#  delay time
+#  size
+#  information (reason of defered, local mailbox name, successful message...)
+
+sub date_format {
+    my $date_str = shift; # Jan 31 00:00:01
+    my ($mon_name, $day, $hhmmss) = split /\s+/, $date_str;
+#    my ($hh, $mm, $ss) = map { sprintf '%d', $_ } split /:/, $hhmmss;
+    $day =~ s/^0//; # sprintf に8進数と勘違いされないように
+    my $mon = $month{$mon_name};
+    return sprintf '%d/%02d/%02d %s', $year, $mon, $day, $hhmmss;
+}
+
+# skew_date($syslog_date_string)
+# 前回 skew_date を呼び出したときの日付(年月日)よりも逆行しているなら真
+# 分や秒の細かいことまで見ない
+{
+my $prev_mm_dd; # state
+sub skew_date {
+    my $cur_mm_dd = join '/', (split m{/}, date_format(shift))[1,2];
+
+    my $is_skew;
+    if (    !$prev_mm_dd              # 初回呼び出し
+         || $prev_mm_dd le $cur_mm_dd # 順番通り (e.g. 07/22 le 07/23)
+     ) {
+        $is_skew = 0;
+    }
+    else {
+        $is_skew = 1;
+    }
+    $prev_mm_dd = $cur_mm_dd;
+    return $is_skew;
+}
+}
+
+__END__
+
+=pod
+
+=encoding utf-8
+
+=head1 NAME
+
+maillog-hashnize.pl - enhancement of "pflog" for parsing maillog of postfix 2.3 or higher version.
+
+=head1 SYNOPSIS
+
+ # input postfix log of syslog format, output csv format.
+ maillog-hasnize.pl -y 2011 mail.log > maillog.csv
+
+=head1 DESCRIPTIONS
+
+this program is postfix "mail.log" parser, convert from syslog format to csv format for MS-Excel and more spreadsheet viewer.
+
+=head1 LIMITATION
+
+because syslog format does not include "year" information,
+specify the "mail.log"'s year as "-y" option.
+
+support from the year to from *next* new year.
+but years are missing from the "mail.log", e.g. next "2009/??/??" line is "2011/??/??", it is not support that leap 2 year and more.
+
+=head1 ACKNOWLEDGEMENT
+
+pflog: L<http://www.tmtm.org/ruby/pflog/pflog-0.3>
+
+=head1 COPYRIGHT AND LICENCE
+
+Copyright 2010-2011 fonfun corporation.
+
+This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
+
+=cut
No results found