#!/usr/bin/perl
#############################################################################
#  Copyright (C) 2008 Nippon Telegraph and Telephone Corporation
#############################################################################
#####################################################################
# Function: pg_get_logfilterinfo
#
#
# summary:
# This is a log-filter module for filtering Postgresql stdout server 
# messages. If this module detects messages about analysis target,
# filtering it and output to own text file.
# 
#| Analysis target is 
#|   - autovacuum log (log_autovacuum_min_duration)
#|   - checkpoint log (log_checkpoints)
#
# Usage example: pg_ctl -D $PGDATA start | pg_ge_logfilterinfo --with-option
#
#####################################################################

use File::Basename;
use File::Path;
use Getopt::Long;
use Carp;
use Fcntl;

#####################################################################
# Constants: static value, message define
#####################################################################
use constant {
    # Error message
    STATS_MSG_ATVCMLOG_UNMATCH => "detected invalid autovacuum log message: ",
    STATS_MSG_CHKPTLOG_UNMATCH => "detected invalid checkpoint log message: ",
    STATS_MSG_CANT_CREATE_OUTPUT_DIR => "could not create out put directory: ",
    STATS_MSG_CANT_OPEN_FILE => "could not open file: ",

    # Usage
    CMDNAME => basename($0),

    # Timestamp format
    FORMAT_DATETIME => qq(%04d-%02d-%02d %02d:%02d:%02d),

    # autovacuum info file name（When we access, add "DB name-" to head of file）
    OUTPUT_ATVCM_FILENAME => "autovacuum.log",

    # checkpoint info file
    OUTPUT_CHKPT_FILENAME => "checkpoint.log",

    # directory name of output file storing
    OUTPUT_DIRNAME => "pg_statslog",

    # path to lock file
    LOCK_FILE_PREFIX => "/tmp/lock_pg_statsinfo",

    # postmaster.pid
    POSTMASTER_PID_FILE => "postmaster.pid"
};

# autovacuum log format
$FORMAT_ATVCM_IN1 = q(LOG: \s\s [0-9]* [:]* \s* automatic \s vacuum \s of \s table \s \"(\w+)\.(\w+)\.(\w+)\": \s index \s scans: \s (\d+) \Z);
$FORMAT_ATVCM_IN2 = q( \s+ pages: \s (\d+) \s removed, \s (\d+) \s remain \Z);
$FORMAT_ATVCM_IN3 = q( \s+ tuples: \s (\d+) \s removed, \s (\d+) \s remain \Z);
$FORMAT_ATVCM_IN4 = q( \s+ system\susage: \s CPU \s (\d+\.\d+)s/(\d+\.\d+)u \s sec \s elapsed \s (\d+\.\d+) \s sec \Z);

# checkpoint log format
$FORMAT_CHKPT_IN1 = q(LOG: \s\s [0-9]* [:]* \s* checkpoint \s starting: \s ([\w\s]+) \Z);
$FORMAT_CHKPT_IN2 = q(LOG: \s\s [0-9]* [:]* \s* checkpoint \s complete: \s wrote \s (\d+) \s buffers \s \\((\d+\.\d+)\%\\); \s (\d+)\s transaction \s log \s file \\(s\\) \s added, \s (\d+) \s removed, \s (\d+) \s recycled; \s write=(\d+\.\d+) \s s, \s sync=(\d+\.\d+) \s s, \s total=(\d+\.\d+) \s s \Z);

# variable: G_checkpoint_info
# checkpoint info set
#
my %G_checkpoint_info;

# variable: G_output_dir
# output directory
#
my %G_output_dir;

# variable: G_postmaster_pid
# pid of postmaster
#
my $G_postmaster_pid = 0;

# variable: G_lock_file
# lock file for filtered-lock file accessing
#
my $G_lock_file = "";

umask 0066;
main();
1;

#####################################################################
# Function: get_output_dir
#
#
# summary:
# Set output directory. If $tmp_outoput_dir was specified, thr dir set as
# $tmp_output_dir/pg_statslog. If not specified, the dir set as 
# $PGDATA(environment variable)/pg_statslog.
#
# paramteres:
#| output directory path 
#
# return:
#| output directory path (full)
#
# exceptions:
#| none
#
# recital:
#| no object whether $param_output_dir is really exists or not.
#
#####################################################################
sub get_output_dir
{
    my ($tmp_output_dir) = @_;
    my $output_dir = "";

    if ($tmp_output_dir eq "")
    {
        #
        # param is empty, so we use $PGDATA
        #
        $tmp_output_dir = $ENV{PGDATA};
    }

    if ($tmp_output_dir ne "")
    {
        #
        # If '~' is in head of param, we replace it with $HOME
        #
        $tmp_output_dir =~ s{ \A~([^/]*) }
                { $1
                    ? (getpwnam($1))[7] : ( $ENV{HOME} || $ENV{LOGDIR} || (getpwuid($<))[7])
                }ex;

        #
        # deletion trailing '/' 
        #
        $tmp_output_dir =~ s/\/\z//xms;

        #
        # create absolute full path to outoput dir
        #
        $output_dir = $tmp_output_dir . "/" . OUTPUT_DIRNAME;
    }

    return $output_dir;
}

#####################################################################
# Function: get_postmaster_pid
#
#
# summary:
#| return postmaster pid
#
# parameters:
#| path to postgresql database cluster
#
# return:
#| postmaster pid
#
# exception:
#| postmaster.pid file open failed
#
# recital:
#| none
#
#####################################################################
sub get_postmaster_pid
{
    my ($output_dir) = @_;
    my $pid = 0;
    my $pid_file = $output_dir . "/" . POSTMASTER_PID_FILE;

    eval {
        open(PID_FD, $pid_file) or croak STATS_MSG_CANT_OPEN_FILE . POSTMASTER_PID_FILE;

        # read only leading one-record
        $pid = <PID_FD>;
        chomp($pid);
        close(PID_FD);
    };
    if ($@) {
        sleep 1;
        open(PID_FD, $pid_file) or croak STATS_MSG_CANT_OPEN_FILE . POSTMASTER_PID_FILE;

        $pid = <PID_FD>;
        chomp($pid);
        close(PID_FD);
    }

    return $pid;
}

#####################################################################
# Function: create_lockfile
#
#
# summary:
# create path to lock file. The path is "/tmp/lock_pg_statsinfo.PID".
# PID is same as pid of postmaster.
#
# paramters:
#| path to postgresql database cluster
#
# return:
#| path to lock file
#
# exceptions:
#| postmaster.pid file open failed
#
# recital:
#| no object whether $param_output_dir is really exists or not.
#
#####################################################################
sub create_lockfile
{
    my ($tmp_output_dir) = @_;
    my $pid_file         = "";
    my $lockfile_path    = "";

    if ($tmp_output_dir eq "")
    {
        #
        # param is empry, so we use $PGDATA
        #
        $tmp_output_dir = $ENV{PGDATA};
    }

    if ($tmp_output_dir ne "")
    {
        #
        # if '~' is in head of param, we replace it with $HOME
        #
        $tmp_output_dir =~ s{ \A~([^/]*) }
                { $1
                    ? (getpwnam($1))[7] : ( $ENV{HOME} || $ENV{LOGDIR} || (getpwuid($<))[7])
                }ex;

        #
        # deletion trailing '/'
        #
        $tmp_output_dir =~ s/\/\z//xms;

        #
        # set path to postmaster.pid
        #
        $pid_file = $tmp_output_dir . "/" . POSTMASTER_PID_FILE;
        eval
        {
            $postmaster_pid = get_postmaster_pid($tmp_output_dir);
        };
        if ($@)
        {
            print $@;
            croak;
        }
        $lockfile_path = LOCK_FILE_PREFIX . "." . $postmaster_pid;

        #
        # create lock file
        #
        sysopen(LOCK_FD, $lockfile_path, O_RDWR|O_CREAT, 0600)
            or croak STATS_MSG_CANT_OPEN_FILE . $lockfile_path;
        close(LOCK_FD);
    }


    return $lockfile_path;
}

#####################################################################
# Function: get_autovacuum_info
#
#
# summary:
# filtering autovacuum info from log messages and output own file
#
# parameters:
#| handler
#| db_name
#| schema_name
#| table_name
#
# return:
#| TRUE  - normal exit
#| FALSE - abnormal exit
#
# exceptions:
#| none
#
# recital:
#| none
#
#####################################################################
sub get_autovacuum_info($$$$)
{
    my ($handler, $db_name, $schema_name, $table_name) = @_;
    my $line = "";
    my $param_datetime = "";
    my $param_table_name = "";
    my $param_page_remove = "";
    my $param_page_remain = "";
    my $param_total_elapsed = "";

    #
    # Get current time.
    #
    $param_datetime = getcurrenttimestamp();

    #
    # set outoput parameters
    #
    $param_table_name = "$db_name.$schema_name.$table_name";

    #
    # get second-record of autovacuum log 
    #
    $line = <$handler>;

    #
    # input log messages are output to stdout as is
    #
    print "$line";

    chomp($line);

    #
    # second record of autovacuum-log-format match up?
    #
    if ($line =~ /$FORMAT_ATVCM_IN2/xms)
    {
        #
        # OK. create paramters for output
        #
        $param_page_remove = $1;    # removable pages
        $param_page_remain = $2;    # remain pages

        #
        # get third record of autovacuum log
        #
        $line = <$handler>;

        print "$line";

        chomp($line);

        #
        # third record of autovacuum-log-format match up?
        #
        if ($line =~ /$FORMAT_ATVCM_IN3/xms)
        {
            $param_tuple_remove = $1;    # removable record
            $param_tuple_remain = $2;    # remain record

            #
            # get next record of autovacuum log, and do same process.
            #
            $line = <$handler>;

            print "$line";

            chomp($line);

            if ($line =~ /$FORMAT_ATVCM_IN4/xms)
            {
                $param_total_elapsed = "$3";

                eval
                {
                    #
                    # open output file
                    #   - we also open lock file for exclusive control
                    #
                    my $output_file = "$G_output_dir/$db_name-" . OUTPUT_ATVCM_FILENAME;
                    sysopen(LOCK_FD, $G_lock_file, O_RDWR|O_CREAT, 0600)
                        or croak STATS_MSG_CANT_OPEN_FILE . $G_lock_file;
                    flock(LOCK_FD, 2);

                    open(OUT_ATVCM, ">> $output_file")
                        or croak STATS_MSG_CANT_OPEN_FILE . $output_file;
                };
                if ($@)
                {
                    close(LOCK_FD);
                    print $@;
                    unlink $G_lock_file;
                    croak;
                }

                #
                # add reading record to autovacuum info file
                #
                print OUT_ATVCM "$param_datetime\t" .
                                "$param_table_name\t" .
                                "$param_page_remove\t" .
                                "$param_page_remain\t" .
                                "$param_tuple_remove\t" .
                                "$param_tuple_remain\t" .
                                "$param_total_elapsed\n";
                close(OUT_ATVCM);
                close(LOCK_FD);
            }
            else
            {
                print STDERR STATS_MSG_ATVCMLOG_UNMATCH . $line . "\n";
                return FALSE;
            }
        }
        else
        {
            print STDERR STATS_MSG_ATVCMLOG_UNMATCH . $line . "\n";
            return FALSE;
        }
    }
    else
    {
        print STDERR STATS_MSG_ATVCMLOG_UNMATCH . $line . "\n";
        return FALSE;
    }

    return TRUE;
}

#####################################################################
# Function: get_checkpoint_info
#
#
# summary:
# filtering checkpoint info from log messages and output own file
#
# parameters:
#| Hash has following keys
#|   LINE_MODE       Line analysis mode (0: Start line, 1: End line)
#| [In LINE_MODE = 0 case]
#|   TRIGGER         checkpoint execute trigger
#| [In line_mode = 1 case]
#|   NUM_BUFFERS     number of written buffers
#|   CREATE_WAL      number of created WAL
#|   DELETE_WAL      number of deleted WAL
#|   RECYCLE_WAL     number of recycled WAL
#|   WRITE_DURATION  write duration
#|   SYNC_DURATION   sync  duration
#|   TOTAL_DURATION  total duration
#
# return:
#| TRUE  - normal exit
#| FALSE - abnormal exit
#
# exceptions:
#| none
#
# recital:
#| none
#
#####################################################################
sub get_checkpoint_info
{
    my (%params) = @_;
    my $line_mode = $params{LINE_MODE};

    #
    # If start line..
    #
    if ($line_mode == 0)
    {
        my $param_datetime = "";

        %G_checkpoint_info = ();

        #
        # Get current time.
        #
        $G_checkpoint_info{Datetime} = getcurrenttimestamp();

        #
        #  set parameters taken from start line.
        #
        $G_checkpoint_info{Trigger} = $params{TRIGGER};
    }
    #
    # If end line..
    #
    elsif ($line_mode == 1)
    {
        #
        # set outoput parameters
        #
        $G_checkpoint_info{Num_buffers} = $params{NUM_BUFFERS};
        $G_checkpoint_info{Create_wal}  = $params{CREATE_WAL};
        $G_checkpoint_info{Delete_wal}  = $params{DELETE_WAL};
        $G_checkpoint_info{Recycle_wal} = $params{RECYCLE_WAL};
        $G_checkpoint_info{Write_duration} = $params{WRITE_DURATION};
        $G_checkpoint_info{Sync_duration} = $params{SYNC_DURATION};
        $G_checkpoint_info{Total_duration} = $params{TOTAL_DURATION};

        eval
        {
            #
            # open output file
            #   - we also open lock file for exclusive control
            #
            my $output_file = "$G_output_dir/" . OUTPUT_CHKPT_FILENAME;
            sysopen(LOCK_FD, $G_lock_file, O_RDWR|O_CREAT, 0600)
                    or croak STATS_MSG_CANT_OPEN_FILE . $G_lock_file;
            flock(LOCK_FD, 2);
            open(OUT_CHKPT, ">> $output_file")
                    or croak STATS_MSG_CANT_OPEN_FILE . $output_file;
        };
        if ($@)
        {
            close(LOCK_FD);
            print $@;
            unlink $G_lock_file;
            croak;
        }

        #
        # add reading records to checkpoint info file
        #
        print OUT_CHKPT "$G_checkpoint_info{Datetime}\t" .
                        "$G_checkpoint_info{Trigger}\t" .
                        "$G_checkpoint_info{Num_buffers}\t" .
                        "$G_checkpoint_info{Create_wal}\t" .
                        "$G_checkpoint_info{Delete_wal}\t" .
                        "$G_checkpoint_info{Recycle_wal}\t" .
                        "$G_checkpoint_info{Write_duration}\t" .
                        "$G_checkpoint_info{Sync_duration}\t" .
                        "$G_checkpoint_info{Total_duration}\n";
        close(OUT_CHKPT);
        close(LOCK_FD);
    }
    else
    {
        return FALSE;
    }

    return TRUE;
}

#####################################################################
# Function: getcurrenttimestamp
#
#
# summary:
# create current timestamp string with specific format
#
# parameters:
#| none
#
# return:
#| current timestamp string
#
# exceptions:
#| none
#
# recital:
#| none
#
#####################################################################
sub getcurrenttimestamp()
{
    my $sec = 0;
    my $min = 0;
    my $hour = 0;
    my $mday = 0;
    my $mon = 0;
    my $year = 0;
    my $wday = 0;
    my $yday = 0;
    my $isdst = 0;

    #
    # get current timestamp and print it with specific format
    #
    ($sec, $min, $hour, $mday, $mon, $year, $wday, $yday, $isdst) = localtime(time);

    sprintf(FORMAT_DATETIME, $year+1900, $mon+1, $mday, $hour, $min, $sec);
}

#####################################################################
# Function: usage
#
#
# summary:
# show usage
#
# parameters:
#| none
#
# return:
#| none
#
# exceptions:
#| none
#
# recital:
#| none
#
#####################################################################
sub usage()
{
    print
        "Usage:\n" .
        "	" . CMDNAME . " [--postgres-datadir=<DATADIR>]\n";
}

#####################################################################
# Function: main
#
#
# summary:
# extract messages about analysis target information from PostgreSQL's 
# server log.
# -- Now we check about autovacuum and checkpoint info. --
#
# parameters:
#| ARGV - parameters list
#
# return:
#| 0 - normal exit
#| 1 - abnormal exit
#
# exceptions:
#| none
#
# recital:
#|- Adding new monitoring informaion
#|  If you want to add a new mintoring information, please add the
#|  appropriate regular expression matching pattern in while loop too.
#####################################################################
sub main
{
    #
    # We set "$| = 1" for invalidation the stdout buffering.
    # If not, we can not put unnecessary messages to stdout immediately..
    #
    $| = 1;

    my $opt_output_dir = "";

    #
    # get run-time opition (overwrite $tmp_output_dir)
    #
    GetOptions('postgres-datadir=s' => \$opt_output_dir);

    #
    # file hundler for reading server log
    #
    my $in_handler = STDIN;

    #
    # checkpoint info storing area
    #
    %G_checkpoint_info = ();

    #
    # set output directory
    #
    $G_output_dir = get_output_dir($opt_output_dir);
    if ($G_output_dir eq "")
    {
        usage();
        exit(1);
    }

    #
    # create lock file
    #
    $G_lock_file = create_lockfile($opt_output_dir);
    if ($G_lock_file eq "")
    {
        exit(1);
    }

    #
    # create output directory
    #
    eval
    {
        mkpath($G_output_dir);
    };
    if ($@)
    {
        my $errmsg = $@; chomp($errmsg);
        unlink $G_lock_file;
        die STATS_MSG_CANT_CREATE_OUTPUT_DIR, $errmsg . "\n";
    }

    #
    # main loop routine. we get records from stdin
    #
    while (<$in_handler>)
    {
        my $line = $_;

        #
        # put reading line to stdout as is..
        #
        print "$line";

        chomp($line);

        #####
        # If you add new monitoring info, add the reg-exp pattern matching follwing.
        #####

        #
        # match the fist record of autovacuum-log-format ?
        #
        if ($line =~ /$FORMAT_ATVCM_IN1/xms)
        {
            get_autovacuum_info($in_handler, $1, $2, $3);
        }
        #
        # match the first record of checkpoint-log-format ?
        #
        elsif ($line =~ /$FORMAT_CHKPT_IN1/xms)
        {
            my %cp_hash = ('LINE_MODE' => 0,
                           'TRIGGER' => $1);
            get_checkpoint_info(%cp_hash);
        }
        #
        # match the second record of checkpoint-log-format ?
        #
        elsif ($line =~ /$FORMAT_CHKPT_IN2/xms)
        {
            my %cp_hash = ('LINE_MODE' => 1,
                           'NUM_BUFFERS' => $1,
                           'CREATE_WAL' => $3,
                           'DELETE_WAL' => $4,
                           'RECYCLE_WAL' => $5,
                           'WRITE_DURATION' => $6,
                           'SYNC_DURATION' => $7,
                           'TOTAL_DURATION' => $8);
            my $rcode = get_checkpoint_info(%cp_hash);
            if ($rcode ne TRUE)
            {
                print STDERR STATS_MSG_CHKPTLOG_UNMATCH . $line . "\n";
            }
        }
    }

    #
    # ロックファイルの削除
    # delete lock file
    #
    unlink $G_lock_file;

    exit(0);
}
