#!/usr/bin/perl
=head1 NAME
p4-watchreplica - Check replication health condition
=head1 SYNOPSIS
p4-watchreplica {-m/aster <master address> -r/eplica <replica address>}
=head1 DESCRIPTION
Check replication health condition
Script does couple of good things.
It checks to make sure there is master replica relation between servers
It allows for configurable lagging of journal catch up
Ideally you should set this script as a cronjob and let that cron send you mail
=head1 OPTIONS
-help - Output help information.
-m/aster - Perforce master host address
-r/eplica - Perforce replica host address
=head1 EXAMPLES
p4-watchreplica -m master:2666 -r replica:2777
=head1 SEE ALSO
p4 help pull
=head1 CONTACT
Thandesha V K - thanvk@gmail.com
=cut
use warnings;
use strict;
use Getopt::Long qw( GetOptions );
use Pod::Usage qw( pod2usage );
my $P4 = "/sbin/p4";
my $ALLOWED_SEEK_LAG = 5000;
################################################################################
# NAME :
# validate_input
#
# PURPOSE :
# Process and validate the options specified on the command line by the user.
#
# PARAMETERS :
# NONE
#
# GLOBALS :
# $P4 - Path to p4 command
#
# COMMENTS:
#
################################################################################
sub validate_input
{
my %options;
GetOptions( \%options, 'help', 'master|m=s', 'replica|r=s' )
or pod2usage( "ERROR: Error in usage." );
if ( $options{ help } ) {
pod2usage( -exitval => 0, -verbose => 1 );
}
if ( !$options{master} ) {
pod2usage( -message =>"ERROR: Please pass Perforce master host address", -exitval => 1, -verbose => 1 );
}
my @p4out = `$P4 -p $options{master} info`;
if ( $?/256 ) {
print "ERROR: $options{master} doesn't seem to be a valid perforce server!!";
exit (1);
}
if ( !$options{replica} ) {
pod2usage( -message =>"ERROR: Please pass Perforce replica host address", -exitval => 1, -verbose => 1 );
}
@p4out = `$P4 -p $options{replica} info`;
if ( $?/256 ) {
print "ERROR: $options{replica} doesn't seem to be a valid perforce server!!";
exit (1);
}
@p4out = `$P4 -p $options{replica} configure show`;
if ( $?/256 ) {
print "ERROR: Trouble running \"p4 configure show\" for $options{replica}!!";
exit (1);
}
my $found = 0;
foreach my $configure ( @p4out ) {
#P4TARGET=master:2666 (configure)
if ( $configure =~ m/^P4TARGET=$options{master} \(configure\)$/ ) {
$found = 1;
last;
}
}
if ( !$found ) {
print "ERROR: Can't find master->replica relation between $options{master} and $options{replica}!!";
exit (1);
}
return \%options;
}
################################################################################
# NAME :
# check_replication
#
# PURPOSE :
# Compare journal count as well as seek position with some configured seek delay
#
# PARAMETERS :
# $master - Master perforce server
# $replica - Replica perforce server
#
# GLOBALS :
# $P4 - Path to p4 command
# $ALLOWED_SEEK_LAG - Configurable seek lag limit
#
# COMMENTS:
#
################################################################################
sub check_replication
{
my ( $master, $replica ) = @_;
my ( $journal_rep, $seek_rep, $journal_master, $seek_master, $seek_time, $cur_time);
my @p4out = `$P4 -p $replica pull -l -j`;
if ( $?/256 ) {
print "ERROR: Trouble checking \"Replica status\" for $replica!!";
exit (1);
}
foreach my $line ( @p4out ) {
#Current replica journal state is: Journal 5034, Sequence 144466684940.
if ( $line =~ m/^Current replica journal state is:\s+Journal (\d+),\s+Sequence (\d+).$/ ) {
$journal_rep = $1;
$seek_rep = $2;
}
#Current master journal state is: Journal 5034, Sequence 144466684940.
if ( $line =~ m/^Current master journal state is:\s+Journal (\d+),\s+Sequence (\d+).$/ ) {
$journal_master = $1;
$seek_master = $2;
}
#The statefile was last modified at: 2013/02/25 15:30:57.
if ( $line =~ m/^The statefile was last modified at:\s+(\d+\/\d+\/\d+ \d\d:\d\d:\d\d).$/ ) {
$seek_time = $1;
}
}
if ( $journal_rep == $journal_master ) {
my $journal = `$P4 -p $master counter journal`;
if ( $?/256 ) {
print "ERROR: Trouble checking \"Journal value\" for $master!!";
return (1);
}
if ( $journal_rep == $journal ) {
my $diff = $seek_master - $seek_rep;
if ( $seek_rep == $seek_master ) {
print<<EOF;
INFO: Replication is going just fine. Relax :)
EOF
} elsif ( $diff <= $ALLOWED_SEEK_LAG ) {
print<<EOF;
WARNING: Replica is in good health but busy replicating and lagging quite a bit from master server.
MASTER ($master) journal = $journal_master
REPLICA ($replica) journal = $journal_rep
MASTER ($master) journal SEEK position = $seek_master
REPLICA ($replica) journal SEEK position = $seek_rep
=====================================================================
REPLICA is lagging in SEEK position by = $diff
EOF
exit (1);
} else {
print<<EOF;
ERROR: Replication is lagging behind more than threshold. Please Check
MASTER ($master) journal = $journal_master
REPLICA ($replica) journal = $journal_rep
Current journal counter from master = $journal
MASTER ($master) journal SEEK position = $seek_master
REPLICA ($replica) journal SEEK position = $seek_rep
Configurable allowed SEEK lag = $ALLOWED_SEEK_LAG
REPLICA is lagging in SEEK position by = $diff
EOF
}
} else {
print<<EOF;
ERROR: MASTER ($master) and REPLICA ($replica) journal counters are matching but they are not same as current journal value
Check if
1. Just now journal got rotated and replication is yet to catch up
2. Replication has stopped
3. Replication has some problem and failing
MASTER ($master) journal = $journal_master
REPLICA ($replica) journal = $journal_rep
=========================================================
Current journal counter from master = $journal
EOF
exit(1);
}
} else {
print "ERROR: MASTER ($master) and REPLICA ($replica) journal counters are not matching each other!!";
exit (1);
}
}
################################################################################
############################### main #######################################
################################################################################
$\="\n";
my $options = validate_input();
check_replication( $options->{master}, $options->{replica});
# |
Change |
User |
Description |
Committed |
|
#1
|
8262 |
VK Thandesh |
Check replication health condition |
|
|