Skip to content

Instantly share code, notes, and snippets.

@xtetsuji
Last active October 11, 2022 08:08
Show Gist options
  • Select an option

  • Save xtetsuji/1446584 to your computer and use it in GitHub Desktop.

Select an option

Save xtetsuji/1446584 to your computer and use it in GitHub Desktop.

Revisions

  1. xtetsuji revised this gist Feb 10, 2016. No changes.
  2. xtetsuji revised this gist Dec 19, 2013. 1 changed file with 31 additions and 13 deletions.
    44 changes: 31 additions & 13 deletions maillog-hashnize.pl
    100644 → 100755
    Original file line number Diff line number Diff line change
    @@ -44,7 +44,9 @@
    or next;
    if ( !exists $queue{$queue_id} ) {
    $queue{$queue_id} = {};
    push @found_queue, $queue_id; # queue_id は見つかった順番に記録される
    # queue_id は見つかった順番に記録される
    # queue_id is recorded order by found.
    push @found_queue, $queue_id;
    }
    my $q = $queue{$queue_id};
    my ($information) = $following =~ /\s*\((.+)\)$/
    @@ -55,9 +57,13 @@
    if ( @param % 2 == 0 ) {
    %param = @param;
    }
    else {
    warn "found odd number of key/value pair.";
    }
    }

    # %param 手直し
    # %param modification.
    if ( exists $param{client} && defined $param{client} ) {
    my ($hostname, $ipaddr) = $param{client} =~ /^(.+?)\[([0-9.]+)\]/;
    $param{client_hostname} = $hostname;
    @@ -73,30 +79,32 @@
    }

    # 今回の行で取得することができた情報を追加
    # Addition of information that be got current line.
    for my $key (keys %param) {
    my $value = $param{$key};
    if ( !exists $q->{$key} ) {
    # 新規採用
    # 新規採用 / newly
    $q->{$key} = $value;
    }
    elsif ( !ref $q->{$key} ) { # 文字列
    # 配列リファレンスにして追加
    elsif ( !ref $q->{$key} ) { # 文字列 / string
    # 配列リファレンスにして追加 / Addition as array reference
    $q->{$key} = [$q->{$key}, $value];
    }
    elsif ( ref $q->{$key} eq 'ARRAY' ) {
    # 配列リファレンスに push
    # 配列リファレンスに push / push to array reference
    push @{$q->{$key}}, $value;
    }
    else {
    die "unknown situation."; # 想定外
    die "unknown situation."; # 想定外 / non-supposition
    }
    }

    # 12/31 -> 01/01 などの流れの場合の年の調整
    # Adjustoment flow of year on 12/31 -> 01/01
    skew_date($date)
    and $year++;

    # _meta 追加
    # Add _meta
    my $meta_q = $q->{_meta} ||= {};
    if ( !defined $meta_q->{host} ) {
    $meta_q->{host} = $host;
    @@ -112,17 +120,20 @@

    #print Dumper(\%queue);

    # ### デバッグ
    # ### DEBUG:
    # my @list = map { [$_ => $queue{$_}] } @found_queue;
    # print Dumper(@list);
    # exit;

    # ヘッダ出力
    # Header output
    if ( !$no_header && @found_queue ) {
    # 内容行 (@found_queue) が見つからなかったら
    # ヘッダ行も出力しないとした
    # 内容がなければファイルサイズが 0 のほうが
    # パッと見てわかりやすいからという意図
    # If content row is not found, we do not output header line too.
    # It is cleary that
    printf "%s\n", join $SEPARATOR, map { s/^\s+//; qq("$_") } split /\n/, <<END_LIST;
    queue id
    arrived time
    @@ -140,7 +151,7 @@
    END_LIST
    }

    ### pflog 互換出力
    ### pflog compatible output
    for my $queue_id (@found_queue) {
    my $q = $queue{$queue_id}; # 'HASH'

    @@ -161,10 +172,13 @@
    elsif ( !/^\d+(?:\.\d+)?$/ && length $_ ) {
    # Excel は数字のみではないものならダブルクォートするので
    # それを真似る
    # Excel quotes not only digits,
    # so this program imimtations it.
    $_ = qq("$_");
    }
    elsif ( /^\d{11}$/ ) {
    # queue id (と思われるもの)がたまたま全部数字だった場合
    # queue id (we think so) character is all digits unexpectedly.
    $_ = qq("$_");
    }
    }
    @@ -188,25 +202,29 @@
    # information (reason of defered, local mailbox name, successful message...)

    sub date_format {
    my $date_str = shift; # Jan 31 00:00:01
    my $date_str = shift; # e.g. Jan 31 00:00:01
    my ($mon_name, $day, $hhmmss) = split /\s+/, $date_str;
    # my ($hh, $mm, $ss) = map { sprintf '%d', $_ } split /:/, $hhmmss;
    $day =~ s/^0//; # sprintf に8進数と勘違いされないように
    # sprintf に8進数と勘違いされないように
    # We avoid that sprintf confuses it octet.
    $day =~ s/^0//;
    my $mon = $month{$mon_name};
    return sprintf '%d/%02d/%02d %s', $year, $mon, $day, $hhmmss;
    }

    # skew_date($syslog_date_string)
    # 前回 skew_date を呼び出したときの日付(年月日)よりも逆行しているなら真
    # 分や秒の細かいことまで見ない
    # Ture if Date (year month day) previous calling of skwe_date goes backward.
    # We do not see detail of minutes or second.
    {
    my $prev_mm_dd; # state
    sub skew_date {
    my $cur_mm_dd = join '/', (split m{/}, date_format(shift))[1,2];

    my $is_skew;
    if ( !$prev_mm_dd # 初回呼び出し
    || $prev_mm_dd le $cur_mm_dd # 順番通り (e.g. 07/22 le 07/23)
    if ( !$prev_mm_dd # 初回呼び出し / initial calling
    || $prev_mm_dd le $cur_mm_dd # 順番通り / right order (e.g. 07/22 le 07/23)
    ) {
    $is_skew = 0;
    }
  3. OGATA Tetsuji created this gist Dec 8, 2011.
    258 changes: 258 additions & 0 deletions maillog-hashnize.pl
    Original file line number Diff line number Diff line change
    @@ -0,0 +1,258 @@
    #!/usr/bin/perl
    # "pflog" enhancement for parsing maillog of postfix 2.3 or higher version.
    # "pflog" see: http://www.tmtm.org/ruby/pflog/pflog-0.3

    use strict;
    use warnings;

    use Time::Local;
    use Getopt::Long;
    #use Data::Dumper;

    my %month = (qw(Jan 1 Feb 2 Mar 3 Apr 4 May 5 Jun 6 Jul 7 Aug 8 Sep 9 Oct 10 Nov 11 Dec 12));

    my $SEPARATOR = q(,);

    my (%queue, @found_queue);

    GetOptions(
    'year|y=s' => \ my $year,
    'no-header|H' => \ my $no_header,
    'help' => \ my $help,
    );

    if ( $help ) {
    print <<END_HELP;
    Usage:
    $0 -y YEAR mail.log > mail.csv
    END_HELP
    exit;
    }

    if ( !defined $year || $year !~ /^\d{4}$/ ) {
    die "specify -y YEAR (YEAR is 4 letter digits, e.g. 2011).\n";
    }

    my $re_date = qr/[A-Z][a-z][a-z] ?\d+ \d{2}:\d{2}:\d{2}/;
    my $re_host = qr/\S+/;
    #my $re_following_capture = qr/\s*\(([^()]+)\)/;

    my $re_line = qr{^($re_date) ($re_host) postfix/(\w+)\[\d+\]: (\w+):\s*(.*)};

    while (<>) {
    my ($date, $host, $service, $queue_id, $following) = /$re_line/
    or next;
    if ( !exists $queue{$queue_id} ) {
    $queue{$queue_id} = {};
    push @found_queue, $queue_id; # queue_id は見つかった順番に記録される
    }
    my $q = $queue{$queue_id};
    my ($information) = $following =~ /\s*\((.+)\)$/
    and $following =~ s/\s\(.+\)$//;
    my %param;
    if ( $following =~ /=/ ) {
    my @param = map { split /=/, $_, 2 } split /,\s*/, $following;
    if ( @param % 2 == 0 ) {
    %param = @param;
    }
    }

    # %param 手直し
    if ( exists $param{client} && defined $param{client} ) {
    my ($hostname, $ipaddr) = $param{client} =~ /^(.+?)\[([0-9.]+)\]/;
    $param{client_hostname} = $hostname;
    $param{client_ipaddr} = $ipaddr;
    }
    if ( $information && $param{status} ) {
    $param{information} = $information;
    }
    for my $key ( qw(from to) ) {
    if ( defined $param{$key} && $param{$key} =~ /^<(.*)>$/ ) {
    $param{$key} = $1;
    }
    }

    # 今回の行で取得することができた情報を追加
    for my $key (keys %param) {
    my $value = $param{$key};
    if ( !exists $q->{$key} ) {
    # 新規採用
    $q->{$key} = $value;
    }
    elsif ( !ref $q->{$key} ) { # 文字列
    # 配列リファレンスにして追加
    $q->{$key} = [$q->{$key}, $value];
    }
    elsif ( ref $q->{$key} eq 'ARRAY' ) {
    # 配列リファレンスに push
    push @{$q->{$key}}, $value;
    }
    else {
    die "unknown situation."; # 想定外
    }
    }

    # 12/31 -> 01/01 などの流れの場合の年の調整
    skew_date($date)
    and $year++;

    # _meta 追加
    my $meta_q = $q->{_meta} ||= {};
    if ( !defined $meta_q->{host} ) {
    $meta_q->{host} = $host;
    }
    if ( $param{client} || $param{uid} ) {
    $meta_q->{start_date} = date_format($date);
    }
    if ( $param{status} && $param{status} eq 'sent' ) {
    $meta_q->{success}++;
    $meta_q->{end_date} = date_format($date);
    }
    }

    #print Dumper(\%queue);

    # ### デバッグ
    # my @list = map { [$_ => $queue{$_}] } @found_queue;
    # print Dumper(@list);
    # exit;

    # ヘッダ出力
    if ( !$no_header && @found_queue ) {
    # 内容行 (@found_queue) が見つからなかったら
    # ヘッダ行も出力しないとした
    # 内容がなければファイルサイズが 0 のほうが
    # パッと見てわかりやすいからという意図
    printf "%s\n", join $SEPARATOR, map { s/^\s+//; qq("$_") } split /\n/, <<END_LIST;
    queue id
    arrived time
    processed time
    smtp client hostname / uid
    smtp client IP address / username
    envelope from
    envelope to
    message-id
    status
    relay to
    delay time
    size
    information (reason of defered, local mailbox name, successful message...)
    END_LIST
    }

    ### pflog 互換出力
    for my $queue_id (@found_queue) {
    my $q = $queue{$queue_id}; # 'HASH'

    my @row = ($queue_id,
    @{$q->{_meta}}{qw(start_date end_date)},
    @$q{qw(client_hostname client_ipaddr from to message-id status relay delay size information)});

    for ( grep { ref $_ eq 'ARRAY' } @row ) {
    $_ = join $SEPARATOR, @$_;
    }

    for (@row) {
    $_ = '' if !defined $_;
    if ( /,/ || /"/ ) {
    s/"/""/g;
    $_ = qq("$_");
    }
    elsif ( !/^\d+(?:\.\d+)?$/ && length $_ ) {
    # Excel は数字のみではないものならダブルクォートするので
    # それを真似る
    $_ = qq("$_");
    }
    elsif ( /^\d{11}$/ ) {
    # queue id (と思われるもの)がたまたま全部数字だった場合
    $_ = qq("$_");
    }
    }
    printf "%s\n", join $SEPARATOR, @row;
    }

    ### from pflog
    # output:
    # queue id
    # arrived time
    # processed time
    # smtp client hostname / uid
    # smtp client IP address / username
    # envelope from
    # envelope to
    # message-id
    # status
    # relay to
    # delay time
    # size
    # information (reason of defered, local mailbox name, successful message...)

    sub date_format {
    my $date_str = shift; # Jan 31 00:00:01
    my ($mon_name, $day, $hhmmss) = split /\s+/, $date_str;
    # my ($hh, $mm, $ss) = map { sprintf '%d', $_ } split /:/, $hhmmss;
    $day =~ s/^0//; # sprintf に8進数と勘違いされないように
    my $mon = $month{$mon_name};
    return sprintf '%d/%02d/%02d %s', $year, $mon, $day, $hhmmss;
    }

    # skew_date($syslog_date_string)
    # 前回 skew_date を呼び出したときの日付(年月日)よりも逆行しているなら真
    # 分や秒の細かいことまで見ない
    {
    my $prev_mm_dd; # state
    sub skew_date {
    my $cur_mm_dd = join '/', (split m{/}, date_format(shift))[1,2];

    my $is_skew;
    if ( !$prev_mm_dd # 初回呼び出し
    || $prev_mm_dd le $cur_mm_dd # 順番通り (e.g. 07/22 le 07/23)
    ) {
    $is_skew = 0;
    }
    else {
    $is_skew = 1;
    }
    $prev_mm_dd = $cur_mm_dd;
    return $is_skew;
    }
    }

    __END__
    =pod
    =encoding utf-8
    =head1 NAME
    maillog-hashnize.pl - enhancement of "pflog" for parsing maillog of postfix 2.3 or higher version.
    =head1 SYNOPSIS
    # input postfix log of syslog format, output csv format.
    maillog-hasnize.pl -y 2011 mail.log > maillog.csv
    =head1 DESCRIPTIONS
    this program is postfix "mail.log" parser, convert from syslog format to csv format for MS-Excel and more spreadsheet viewer.
    =head1 LIMITATION
    because syslog format does not include "year" information,
    specify the "mail.log"'s year as "-y" option.
    support from the year to from *next* new year.
    but years are missing from the "mail.log", e.g. next "2009/??/??" line is "2011/??/??", it is not support that leap 2 year and more.
    =head1 ACKNOWLEDGEMENT
    pflog: L<http://www.tmtm.org/ruby/pflog/pflog-0.3>
    =head1 COPYRIGHT AND LICENCE
    Copyright 2010-2011 fonfun corporation.
    This library is free software; you can redistribute it and/or modify it under the same terms as Perl itself.
    =cut