@@ -211,6 +211,9 @@ package check_postgres;
211211 ' pgb-backends-msg' => q{ $1 of $2 connections ($3%)} ,
212212 ' pgb-backends-none' => q{ No connections} ,
213213 ' pgb-backends-users' => q{ $1 for number of users must be a number or percentage} ,
214+ ' pgb-maxwait-msg' => q{ longest wait: $1s} ,
215+ ' pgb-maxwait-nomatch' => q{ No matching rows were found} ,
216+ ' pgb-maxwait-skipped' => q{ No matching rows were found (skipped rows: $1)} ,
214217 ' PID' => q{ PID} ,
215218 ' port' => q{ port} ,
216219 ' preptxn-none' => q{ No prepared transactions found} ,
@@ -1913,6 +1916,7 @@ package check_postgres;
19131916 pgb_pool_maxwait => [1, ' Check the current maximum wait time for client connections in pgbouncer pools.' ],
19141917 pgbouncer_backends => [0, ' Check how many clients are connected to pgbouncer compared to max_client_conn.' ],
19151918 pgbouncer_checksum => [0, ' Check that no pgbouncer settings have changed since the last check.' ],
1919+ pgbouncer_maxwait => [0, ' Check how long the first (oldest) client in queue has been waiting.' ],
19161920 pgagent_jobs => [0, ' Check for no failed pgAgent jobs within a specified period of time.' ],
19171921 prepared_txns => [1, ' Checks number and age of prepared transactions.' ],
19181922 query_runtime => [0, ' Check how long a specific query takes to run.' ],
@@ -2769,6 +2773,9 @@ sub finishup {
27692773# # Check the current maximum wait time for client connections in pgbouncer pools
27702774check_pgb_pool(' maxwait' ) if $action eq ' pgb_pool_maxwait' ;
27712775
2776+ # # Check how long the first (oldest) client in queue has been waiting.
2777+ check_pgbouncer_maxwait() if $action eq ' pgbouncer_maxwait' ;
2778+
27722779# # Check how many clients are connected to pgbouncer compared to max_client_conn.
27732780check_pgbouncer_backends() if $action eq ' pgbouncer_backends' ;
27742781
@@ -6758,6 +6765,107 @@ sub check_pgbouncer_checksum {
67586765
67596766} # # end of check_pgbouncer_checksum
67606767
6768+ sub check_pgbouncer_maxwait {
6769+
6770+ # # Check how long the first (oldest) client in queue has waited, in
6771+ # # seconds.
6772+ # # Supports: Nagios, MRTG
6773+ # # Warning and critical are time limits - defaults to seconds
6774+ # # Valid units: s[econd], m[inute], h[our], d[ay]
6775+ # # All above may be written as plural as well (e.g. "2 hours")
6776+ # # Can also ignore databases with exclude and limit with include
6777+
6778+ my $arg = shift || {};
6779+
6780+ my ($warning , $critical ) = validate_range
6781+ ({
6782+ type => ' time' ,
6783+ });
6784+
6785+ # # Grab information from the pg_stat_activity table
6786+ # # Since we clobber old info on a qtime "tie", use an ORDER BY
6787+ $SQL = qq{ SHOW POOLS} ;
6788+
6789+ my $info = run_command($SQL , { regex => qr {\d +} , emptyok => 1 } );
6790+
6791+ # # Default values for information gathered
6792+ my ($maxwait , $database , $user , $cl_active , $cl_waiting ) =
6793+ (0,' ?' ,' ?' ,0,0);
6794+
6795+ for $db (@{$info -> {db }}) {
6796+
6797+ # # Parse the psql output and gather stats from the winning row
6798+ # # Read in and parse the psql output
6799+ my $skipped = 0;
6800+ ROW: for my $r (@{$db -> {slurp }}) {
6801+
6802+ # # Apply --exclude and --include arguments to the database name
6803+ if (skip_item($r -> {database })) {
6804+ $skipped ++;
6805+ next ROW;
6806+ }
6807+
6808+ # # Assign stats if we have a new winner
6809+ if ($r -> {maxwait } > $maxwait ) {
6810+ $database = $r -> {database };
6811+ $user = $r -> {user };
6812+ $cl_active = $r -> {cl_active };
6813+ $cl_waiting = $r -> {cl_waiting };
6814+ $maxwait = $r -> {maxwait };
6815+ }
6816+ }
6817+
6818+ # # We don't really care why things matches as far as the final output
6819+ # # But it's nice to report what we can
6820+ if ($database eq ' ?' ) {
6821+ $MRTG and do_mrtg({one => 0, msg => ' No rows' });
6822+ $db -> {perf } = " 0;$warning ;$critical " ;
6823+
6824+ if ($skipped ) {
6825+ add_ok msg(' pgb-maxwait-skipped' , $skipped );
6826+ }
6827+ else {
6828+ add_ok msg(' pgb-maxwait-nomatch' , $maxwait );
6829+ }
6830+ return ;
6831+ }
6832+
6833+ # # Details on who the offender was
6834+ my $whodunit = sprintf q{ %s:%s %s:%s cl_active:%s cl_waiting:%s} ,
6835+ msg(' database' ),
6836+ $database ,
6837+ msg(' username' ),
6838+ $user ,
6839+ $cl_active ,
6840+ $cl_waiting ;
6841+
6842+ $MRTG and do_mrtg({one => $maxwait , msg => " $whodunit " });
6843+
6844+ $db -> {perf } .= sprintf q{ '%s'=%s;%s;%s} ,
6845+ $whodunit ,
6846+ $maxwait ,
6847+ $warning ,
6848+ $critical ;
6849+
6850+ my $m = msg(' pgb-maxwait-msg' , $maxwait );
6851+ my $msg = sprintf ' %s (%s)' , $m , $whodunit ;
6852+
6853+ if (length $critical and $maxwait >= $critical ) {
6854+ add_critical $msg ;
6855+ }
6856+ elsif (length $warning and $maxwait >= $warning ) {
6857+ add_warning $msg ;
6858+ }
6859+ else {
6860+ add_ok $msg ;
6861+ }
6862+ }
6863+
6864+ return ;
6865+
6866+
6867+ } # # end of check_pgbouncer_maxwait
6868+
67616869sub check_pgbouncer_backends {
67626870
67636871 # # Check the number of connections to pgbouncer compared to
@@ -10504,6 +10612,30 @@ =head2 B<pgbouncer_checksum>
1050410612checksum must be provided as the C<--mrtg > argument. The fourth line always gives the
1050510613current checksum.
1050610614
10615+ =head2 B<pgbouncer_maxwait >
10616+
10617+ (C<symlink: check_postgres_pgbouncer_maxwait > ) Checks how long the first
10618+ (oldest) client in the queue has been waiting, in seconds. If this starts
10619+ increasing, then the current pool of servers does not handle requests quick
10620+ enough. Reason may be either overloaded server or just too small of a
10621+ pool_size setting in pbouncer config file. Databases can be filtered by use
10622+ of the I<--include > and I<--exclude > options. See the L</"BASIC FILTERING">
10623+ section for more details. The values or the I<--warning > and I<--critical >
10624+ options are units of time, and must be provided (no default). Valid units are
10625+ 'seconds', 'minutes', 'hours', or 'days'. Each may be written singular or
10626+ abbreviated to just the first letter. If no units are given, the units are
10627+ assumed to be seconds.
10628+
10629+ This action requires Postgres 8.3 or better.
10630+
10631+ Example 1: Give a critical if any transaction has been open for more than 10
10632+ minutes:
10633+
10634+ check_postgres_pgbouncer_maxwait -p 6432 -u pgbouncer --critical='10 minutes'
10635+
10636+ For MRTG output, returns the maximum time in seconds a transaction has been
10637+ open on the first line. The fourth line gives the name of the database.
10638+
1050710639=head2 B<pgagent_jobs >
1050810640
1050910641(C<symlink: check_postgres_pgagent_jobs > ) Checks that all the pgAgent jobs
0 commit comments