# -*- perl -*-
# ers_monitor
# Monitor Replication
# Copyright (C) 2000 PostgreSQL, Inc.
###############################################################################
# This program monitors 3 values:
# 1) difference between now() & last successful slave-to-master sync (status=1
#    in _rserv_sync_ (in sec)
# 2) for pending replication (if exists) difference between prepare end time
#    and last slave sync
# 3) for 3 sequential successful slave-to-master sync times t0, t1, t2 ratio is
#    defined as (t2-t1)/(t1-t0). Ratio > 1 means that intervals between
#    slave-to-master syncs grow.
#
#  Result is stored in "log" file which is command line parameter - default is
#  /tmp/__replic_monitor.log.
#
###############################################################################

eval '(exit $?0)' && eval 'exec perl -S $0 ${1+"$@"}'
    & eval 'exec perl -S $0 $argv:q'
    if 0;

use Getopt::Long;
use Sys::Syslog;
use Time::Local;
use IO::File;
use Pg;

$goodopts = GetOptions("debug!", "verbose!", "help",
                       "log=s", "nowlimit=s", "palimit=s", "ratiolimit=s",
                       "masterhost=s", "slavehost=s", "host=s",
                       "masteruser=s", "slaveuser=s", "user=s",
                       "masterpassword=s", "slavepassword=s", "password=s");

if ((! $goodopts) || defined($opt_help) || (scalar(@ARGV) < 2)) {
    print STDERR "Usage: $0 --log=file --host=name --user=name --password=string masterdb slavedb\n";
    print STDERR "\t--masterhost=name --masteruser=name --masterpassword=string\n";
    print STDERR "\t--slavehost=name --slaveuser=name --slavepassword=string\n";
    print STDERR "\t--nowlimit=number --palimit=number --ratiolimit=number\n";
    print STDERR "\t-debug -verbose -help\n";
    exit (((! $goodopts) || (scalar(@ARGV) < 2))? 1:0);
}


my $debug = $opt_debug || 0;;
my $verbose = $opt_verbose || $opt_debug;


#
# Set defaults in case they were not explicitly specified

# we actually require that these be specified above or we don't even get here
my $master = $ARGV[0] || "master_g";
my $slave = $ARGV[1] || "slave_g";
my $server = 0;

# make sure that we have some parameters for the master database
$opt_masterhost = $opt_host if (!defined($opt_masterhost) && defined($opt_host));
$opt_masteruser = $opt_user if (!defined($opt_masteruser) && defined($opt_user));
$opt_masterpassword = $opt_password if (!defined($opt_masterpassword) && defined($opt_password));

# make sure that we have some parameters for the slave database
$opt_slavehost = $opt_host if (!defined($opt_slavehost) && defined($opt_host));
$opt_slaveuser = $opt_user if (!defined($opt_slaveuser) && defined($opt_user));
$opt_slavepassword = $opt_password if (!defined($opt_slavepassword) && defined($opt_password));

# generate some local connection info for the master
my $minfo = "dbname=$master";
$minfo = "$minfo host=$opt_masterhost" if (defined($opt_masterhost));
$minfo = "$minfo user=$opt_masteruser" if (defined($opt_masteruser));
$minfo = "$minfo password=$opt_masterpassword" if (defined($opt_masterpassword));

# generate connection info for our invocation of PrepareSnapshotPTE
my $margs;
$margs = "$margs --debug" if (defined($opt_debug));
$margs = "$margs --host=$opt_masterhost" if (defined($opt_masterhost));
$margs = "$margs --user=$opt_masteruser" if (defined($opt_masteruser));
$margs = "$margs --password=$opt_masterpassword" if (defined($opt_masterpassword));
$margs = "$margs $master";

# generate some local connection info for the slave
my $sinfo = "dbname=$slave";
$sinfo = "$sinfo host=$opt_slavehost" if (defined($opt_slavehost));
$sinfo = "$sinfo user=$opt_slaveuser" if (defined($opt_slaveuser));
$sinfo = "$sinfo password=$opt_slavepassword" if (defined($opt_slavepassword));

my $TIME_NULL = "1970-01-01 00:00:00-00";
my $LIMIT_NOW = 1000;
my $LIMIT_LAST_APPLY = 1000;
my $LIMIT_RATIO = 1;

my @times=();

print_array("INC",@INC) if $debug;
print_array("ENV",@ENV) if $debug;

# Init:
#my $master="master_g";
#my $minfo="dbname=$master";
#my $slave="slave_g";
#my $sinfo="dbname=$slave";
my $file="/tmp/__replic_monitor";
my $log=$opt_log || "/tmp/__replic_monitor.log";

my @LIMIT = ();
$LIMIT[0] = $opt_nowlimit || $LIMIT_NOW;
$LIMIT[1] = $opt_palimit || $LIMIT_LAST_APPLY;
$LIMIT[2] = $opt_ratiolimit || $LIMIT_RATIO;
print_array("LIMIT Array", @LIMIT) if $debug;

my @LIMIT_MSG = ("LIMIT TIME FROM NOW TO LAST SYNC-SYNC EXCEEDED",
                 "LIMIT TIME FROM PREPARE TO LAST APPLIED EXCEEDED",
                 "LIMIT MASTER SYNC INTERVAL GROWS RATIO EXCEEDED",);

#############
# MAIN Begins
#

    my $mconn = Pg::connectdb($minfo);
        die "Failed to connect to Master $master\n" unless $mconn->status == PGRES_CONNECTION_OK;

    my $sconn = Pg::connectdb($sinfo);
        die "Failed to connect to Slave $slave\n" unless $sconn->status == PGRES_CONNECTION_OK;

    my $now_date = get_now_time($mconn);
    my $now_time=get_epoch_time($now_date);
    print "now_date=$now_date; now_time=$now_time\n" if $debug;

    #
    # Check if limits are exceeded:
    #
    my $master_sync_time = check_now_limit($mconn, $log, $now_date);
    check_pa_limit($mconn, $sconn, $log, $now_date);
    check_ratio_limit($file, $master_sync_time, $log, $now_date);

#
# MAIN Ends
###########

#
# Check if LIMIT MASTER SYNC INTERVAL GROWS RATIO EXCEEDED:
# check_ratio_limit($file, $master_sync_time, $log, $now_date);
#
sub check_ratio_limit {
    my ($file, $master_sync_time, $log, $now_date) = @_;

    # get times from previous syncs:
    @times=get_times($file);
    my $dt2 = 0;
    my $dt1 = 0;

    if (@times > 2) {
        die "Wrong number of etries in times array!\n";
    } elsif (@times == 2) {

        # calculate ratio:
        $dt2 = $master_sync_time - $times[1];
        $dt1 = $times[1] - $times[0];
        print "\ncheck_ratio_limit: $master_sync_time - $times[1] is dt2=$dt2; $times[1] - $times[0] is dt1=$dt1;\n\n" if $debug;


        if ($dt1 > 0 && $dt2 > 0) {
                my $ratio = $dt2 / $dt1;
                print "check_ratio_limit: ratio=$ratio\n" if $debug;
                write_alarm_to_log($log, $now_date, 2, $ratio) if ($dt1 > 1) ;
        }
        print_array("check_ratio_limit: times array", @times) if $debug;
    }

    if ($dt2 > 0 || @times < 2) {
        print "master_sync_time=$master_sync_time\n" if $debug;
        shift @times if (@times == 2) ;
        push @times, "$master_sync_time";
        print_array("times-2", @times) if $debug;

        open(DAT,">$file") || die("Cannot Open $file");
        for (my $i=0; $i<@times; $i++) {
            print DAT "$times[$i]\n";
        }
        close(DAT);
    }
}

#
# Check if LIMIT TIME FROM PREPARE TO LAST APPLIED EXCEEDED:
# check_pa_limit($mconn, $sconn, $log, $now_date);
#
sub check_pa_limit {
        my ($mconn, $sconn, $log, $now_date) = @_;

        # Get last master sync time with status=0 :
        my $date0=get_sync_time_m($mconn, 0);
        if ($date0 eq "") {
        exit(1) ;
        }
        my $master_sync_time0=get_epoch_time($date0);
        print "ncheck_pa_limit: master_sync_time0=$master_sync_time0; date0=$date0\n" if $debug;


        # Get slave_sync_time
        my $sdate=get_sync_time_s($sconn);
        exit(1) if ($sdate eq "");

        my $slave_sync_time=get_epoch_time($sdate);
        print "ncheck_pa_limit: slave_sync_time=$slave_sync_time\n" if $debug;

        my $delta = $master_sync_time0 - $slave_sync_time;
        print "\ncheck_pa_limit: delta=$delta\n\n" if $debug;

        write_alarm_to_log($log, $now_date, 1, $delta);

}

#
# Get last master sync time with status=1 &
# LIMIT TIME FROM NOW TO LAST SYNC-SYNC EXCEEDED:
#  check_now_limit($mconn, $log, $now_date);
#
sub check_now_limit {
    my ($mconn, $log, $now_date) = @_;
    my $date=get_sync_time_m($mconn, 1);
    my $master_sync_time=0;
    if ($date eq "") {
        exit(1) ;
    } elsif ($date ne $TIME_NULL) {
        $master_sync_time=get_epoch_time($date);
        print "check_now_limit: master_sync_time=$master_sync_time; date=$date\n" if $debug;

        # Check if limit 0 is exceeded:
        my $delta_now = $now_time - $master_sync_time;
        print "\ncheck_now_limit: delta_now=$delta_now\n\n" if $debug;
        write_alarm_to_log($log, $now_date, 0, $delta_now);
    }
    return($master_sync_time);
}

#
# write_alarm_to_log($log, $now, $limit_idx, $delta)
#
sub write_alarm_to_log {
     my ($log, $now, $limit_idx, $delta) = @_;
     # print "write_alarm_to_log: $log, $now, $limit_idx, $delta, $LIMIT[$limit_idx]\n" if $debug;
     if ($delta > $LIMIT[$limit_idx]) {
         my $line = "[$now]  [".$LIMIT_MSG[$limit_idx]."]  [delta=$delta]\n";
         print "$line" if $verbose;
         open(DAT,">>$log") || die("Cannot Open $log");
         print DAT "$line";
         close(DAT);
     }
}


#
# Get times from temp file
# get_times($file);
#
sub get_times {
    my($file) =@_;
    if (-e $file) {
        open(DAT, $file) || die("Could not open $file!");
        @times=<DAT>;
        close(DAT);
        # print_array("times-0", @times) if $debug;
    }

    for (my $i=0; $i<@times; $i++) {
        chomp($times[$i]);
    }
    print_array("get_times: times array", @times) if $debug;
    return @times;
}

#
# Get master sync time in format "YYYY-MM-DD HH:MI:SS+ZZ"
# get_sync_time($conn, "dbname=$master");
#
sub get_sync_time {
        my ($conn, $minfo, $is_slave) = @_;

        my $slave="";
        my $where=" where status=0";
        if ($is_slave) {
             $slave="slave_" if $is_slave;
             $where="";
        }
        my $sql = "select max(synctime) from _rserv_".$slave."sync_".$where;

        print "get_sync_time: slave=$slave\n" if $debug;
        print "get_sync_time: sql=$sql\n" if $debug;


        my $result = $conn->exec($sql);
        if ($result->resultStatus ne PGRES_TUPLES_OK)
        {
                print "result->resultStatus=".$result->resultStatus."\n" ;
                my $msg=$conn->errorMessage;
                print "ERROR: $msg\n" ;
                return("");
        }
        my @row = $result->fetchrow;
        my $sync_time = $TIME_NULL;
        $sync_time = $row[0] if (defined $row[0]) ;

        print "sync_time=$sync_time\n" if $debug;
        return $sync_time;

}

#
# Get master sync time in format "YYYY-MM-DD HH:MI:SS+ZZ"
# get_now_time($conn);
#
sub get_now_time {
        my ($conn) = @_;

        my $sql = "select now()";
        print "get_now_time: sql=$sql\n" if $debug;

        my $result = $conn->exec($sql);
        if ($result->resultStatus ne PGRES_TUPLES_OK)
        {
                print  STDERR "result->resultStatus=".$result->resultStatus."\n" ;
                my $msg=$conn->errorMessage;
                print  STDERR "ERROR: $msg\n" ;
                return("");
        }
        my @row = $result->fetchrow;
        my $time = $TIME_NULL;
        $time = $row[0] if (defined $row[0]) ;

        print "get_now_time: time=$time\n" if $debug;
        return $time;

}

#
# Get master sync time in format "YYYY-MM-DD HH:MI:SS+ZZ"
# get_sync_time_m($conn, "dbname=$master", 0);
#
sub get_sync_time_m {
        my ($conn, $is_sync) = @_;

        my $status="0";
        $status="1" if $is_sync;

        my $sql = "select max(synctime) from _rserv_sync_ where status=$status";
        print "get_sync_time_m: sql=$sql\n" if $debug;

        my $result = $conn->exec($sql);
        if ($result->resultStatus ne PGRES_TUPLES_OK)
        {
                print "result->resultStatus=".$result->resultStatus."\n" ;
                my $msg=$conn->errorMessage;
                print  STDERR "ERROR: $msg\n" ;
                return("");
        }

        my @row = $result->fetchrow;
        my $sync_time = $TIME_NULL;
        $sync_time = $row[0] if (defined $row[0]) ;
        print "get_sync_time_m: sync_time=$sync_time\n" if $debug;
        return $sync_time;

}

#
# Get slave sync time in format "YYYY-MM-DD HH:MI:SS+ZZ"
# get_sync_time_s($conn);
#
sub get_sync_time_s {
        my ($conn) = @_;

        my $slave="";
        my $sql = "select max(synctime) from _rserv_slave_sync_";
        print "get_sync_time_s: sql=$sql\n" if $debug;


        my $result = $conn->exec($sql);
        if ($result->resultStatus ne PGRES_TUPLES_OK)
        {
                print "result->resultStatus=".$result->resultStatus."\n" ;
                my $msg=$conn->errorMessage;
                print "ERROR: $msg\n" ;
                return("");
        }
        my @row = $result->fetchrow;
        my $sync_time = $TIME_NULL;
        $sync_time = $row[0] if (defined $row[0]) ;
        print "get_sync_time_s: sync_time=$sync_time\n" if $debug;
        return $sync_time;

}

# Usage:
# $epoch_time=get_epoch_time("YYYY-MM-DD HH:MI:SS+ZZ");
#
sub get_epoch_time {
    my $tmptime = $_[0];
    print "get_epoch_time: tmptime=$tmptime\n" if $debug;
    my @db = $tmptime =~ /^(\d{4})-(\d{1,2})-(\d{1,2}) (\d{1,2}):(\d{1,2}):(\d{1,2})[\+\-](\d{1,2})$/;
    # print_array("get_epoch_time: Array db", @db) if $debug;
    # $t = timelocal($sec  ,$min   ,$hours ,$mday  ,$mon     ,$year);
    return timelocal($db[5], $db[4], $db[3], $db[2], $db[1]-1, $db[0]);
}

####################################################
# <p> PRINT Array
# @param - $string - header string
# @param @array - array to print
####################################################
sub print_array {
   my $string = $_[0];
   print "$string \n";
   for(my $i=1; $i<@_; $i++) {
         my $j=$i-1;
         print "  $j:    [$_[$i]] \n";
   }
}


####################################################
# <p> trim all \s characters in the beginning & end
#  @ param $string  to trim
#  @ return trimmed string
###################################################
sub trim  {
    my $string = $_[0];
    chomp($string);
    $string =~ s/^\s*//;
    $string =~ s/\s*$//;
    return ($string);
}

