Skip to content

Commit ad43a8b

Browse files
author
Muhammad Shahzeb
committed
Add vars for grouping
1 parent 077a5d1 commit ad43a8b

File tree

2 files changed

+25
-20
lines changed

2 files changed

+25
-20
lines changed

postgres_mixin/alerts/postgres.libsonnet

Lines changed: 22 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -11,12 +11,12 @@
1111
summary: 'Postgres connections count is over the maximum amount.',
1212
},
1313
expr: |||
14-
sum by (instance) (pg_stat_activity_count{%(postgresExporterSelector)s})
14+
sum by (%(agg)s) (pg_stat_activity_count{%(postgresExporterSelector)s})
1515
>=
1616
sum by (instance) (pg_settings_max_connections{%(postgresExporterSelector)s})
1717
-
18-
sum by (instance) (pg_settings_superuser_reserved_connections{%(postgresExporterSelector)s})
19-
||| % $._config,
18+
sum by (%(agg)s) (pg_settings_superuser_reserved_connections{%(postgresExporterSelector)s})
19+
||| % $._config { agg: std.join(',', $._config.groupLabels + $._config.instanceLabels) },
2020
'for': '1m',
2121
labels: {
2222
severity: 'warning',
@@ -29,14 +29,14 @@
2929
summary: 'Postgres connections count is over 80% of maximum amount.',
3030
},
3131
expr: |||
32-
sum by (instance) (pg_stat_activity_count{%(postgresExporterSelector)s})
32+
sum by (%(agg)s) (pg_stat_activity_count{%(postgresExporterSelector)s})
3333
>
3434
(
35-
sum by (instance) (pg_settings_max_connections{%(postgresExporterSelector)s})
35+
sum by (%(agg)s) (pg_settings_max_connections{%(postgresExporterSelector)s})
3636
-
37-
sum by (instance) (pg_settings_superuser_reserved_connections{%(postgresExporterSelector)s})
37+
sum by (%(agg)s) (pg_settings_superuser_reserved_connections{%(postgresExporterSelector)s})
3838
) * 0.8
39-
||| % $._config,
39+
||| % $._config { agg: std.join(',', $._config.groupLabels + $._config.instanceLabels) },
4040
'for': '10m',
4141
labels: {
4242
severity: 'warning',
@@ -61,12 +61,12 @@
6161
summary: 'PostgreSQL high number of slow queries.',
6262
},
6363
expr: |||
64-
avg by (datname) (
64+
avg by (%(agg)s) (
6565
rate (
6666
pg_stat_activity_max_tx_duration{%(dbNameFilter)s,%(postgresExporterSelector)s}[2m]
6767
)
6868
) > 2 * 60
69-
||| % $._config,
69+
||| % $._config { agg: std.join(',', $._config.groupLabels + $._config.instanceLabels) },
7070
'for': '2m',
7171
labels: {
7272
severity: 'warning',
@@ -79,7 +79,7 @@
7979
summary: 'PostgreSQL high number of queries per second.',
8080
},
8181
expr: |||
82-
avg by (datname) (
82+
avg by (datname, %(agg)s) (
8383
irate(
8484
pg_stat_database_xact_commit{%(dbNameFilter)s,%(postgresExporterSelector)s}[5m]
8585
)
@@ -88,7 +88,7 @@
8888
pg_stat_database_xact_rollback{%(dbNameFilter)s,%(postgresExporterSelector)s}[5m]
8989
)
9090
) > 10000
91-
||| % $._config,
91+
||| % $._config { agg: std.join(',', $._config.groupLabels + $._config.instanceLabels) },
9292
'for': '5m',
9393
labels: {
9494
severity: 'warning',
@@ -101,7 +101,7 @@
101101
summary: 'PostgreSQL low cache hit rate.',
102102
},
103103
expr: |||
104-
avg by (datname) (
104+
avg by (datname, %(agg)s) (
105105
rate(pg_stat_database_blks_hit{%(dbNameFilter)s,%(postgresExporterSelector)s}[5m])
106106
/
107107
(
@@ -114,7 +114,7 @@
114114
)
115115
)
116116
) < 0.98
117-
||| % $._config,
117+
||| % $._config { agg: std.join(',', $._config.groupLabels + $._config.instanceLabels) },
118118
'for': '5m',
119119
labels: {
120120
severity: 'warning',
@@ -157,9 +157,9 @@
157157
summary: 'PostgreSQL has high number of acquired locks.',
158158
},
159159
expr: |||
160-
max by( server, job, datname, namespace) ((pg_locks_count{%(dbNameFilter)s}) /
161-
on(instance, namespace) group_left(server) (pg_settings_max_locks_per_transaction{} * pg_settings_max_connections{})) > 0.20
162-
||| % $._config,
160+
max by(datname, %(agg)s) ((pg_locks_count{%(dbNameFilter)s}) /
161+
on(%(agg)s) group_left(server) (pg_settings_max_locks_per_transaction{} * pg_settings_max_connections{})) > 0.20
162+
||| % $._config { agg: std.join(',', $._config.groupLabels + $._config.instanceLabels) },
163163
'for': '5m',
164164
labels: {
165165
severity: 'warning',
@@ -171,7 +171,9 @@
171171
description: '{{ $labels.instance }} replication lag exceeds 1 hour. Check for network issues or load imbalances.',
172172
summary: 'PostgreSQL replication lagging more than 1 hour.',
173173
},
174-
expr: '(pg_replication_lag{} > 3600) and on (instance) (pg_replication_is_replica{} == 1)',
174+
expr: |||
175+
(pg_replication_lag{} > 3600) and on (%(agg)s) (pg_replication_is_replica{} == 1)'
176+
||| % $._config { agg: std.join(',', $._config.groupLabels + $._config.instanceLabels) },
175177
'for': '5m',
176178
labels: {
177179
severity: 'warning',
@@ -223,12 +225,12 @@
223225
timestamp(
224226
pg_stat_user_tables_n_dead_tup{} >
225227
pg_stat_user_tables_n_live_tup{}
226-
* on(namespace, job, service, instance, server) group_left pg_settings_autovacuum_vacuum_scale_factor{}
227-
+ on(namespace, job, service, instance, server) group_left pg_settings_autovacuum_vacuum_threshold{}
228+
* on(%(agg)s) group_left pg_settings_autovacuum_vacuum_scale_factor{}
229+
+ on(%(agg)s) group_left pg_settings_autovacuum_vacuum_threshold{}
228230
)
229231
< time() - 36000
230232
)
231-
|||,
233+
||| % $._config { agg: std.join(',', $._config.groupLabels + $._config.instanceLabels) },
232234
'for': '30m',
233235
labels: {
234236
severity: 'critical',

postgres_mixin/config.libsonnet

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,5 +2,8 @@
22
_config+:: {
33
dbNameFilter: 'datname!~"template.*"',
44
postgresExporterSelector: '',
5+
groupLabels: if self.enableMultiCluster then ['job', 'cluster'] else ['job'],
6+
instanceLabels: ['instance', 'server'],
7+
enableMultiCluster: false,
58
},
69
}

0 commit comments

Comments
 (0)