Skip to content

Commit 53764b3

Browse files
committed
chore: update queries for docker image
1 parent 167fa71 commit 53764b3

File tree

1 file changed

+59
-10
lines changed

1 file changed

+59
-10
lines changed

docker/all-in-one/opt/postgres_exporter/queries.yml

+59-10
Original file line numberDiff line numberDiff line change
@@ -1,13 +1,12 @@
1+
set_timeout:
2+
master: true
3+
cache_seconds: 5
4+
query: "set statement_timeout to '20s'"
5+
16
pg_database:
27
master: true
38
cache_seconds: 60
4-
query: |
5-
select sum(size_b) / (1024 * 1024) as size_mb
6-
from (select sum(pg_database_size(pg_database.datname)) as size_b
7-
from pg_database
8-
union all
9-
select sum(size) as size_b
10-
from pg_ls_waldir()) s;
9+
query: "SELECT SUM(pg_database_size(pg_database.datname)) / (1024 * 1024) as size_mb FROM pg_database"
1110
metrics:
1211
- size_mb:
1312
usage: "GAUGE"
@@ -178,10 +177,10 @@ pg_ls_archive_statusdir:
178177
- wal_pending_count:
179178
usage: "COUNTER"
180179
description: "Number of not yet archived WAL files"
181-
180+
182181
auth_users:
183182
master: true
184-
cache_seconds: 60
183+
cache_seconds: 21600 # 6 hours
185184
query: "select count(id) as user_count from auth.users"
186185
metrics:
187186
- user_count:
@@ -215,6 +214,15 @@ replication:
215214
usage: "GAUGE"
216215
description: "Replication Slot Active Status"
217216

217+
replication_slots:
218+
master: true
219+
cache_seconds: 60
220+
query: "SELECT max(pg_wal_lsn_diff(pg_current_wal_lsn(), restart_lsn)) AS max_lag_bytes FROM pg_replication_slots"
221+
metrics:
222+
- max_lag_bytes:
223+
usage: "GAUGE"
224+
description: "Max Replication Lag"
225+
218226
storage:
219227
master: true
220228
cache_seconds: 60
@@ -261,17 +269,23 @@ supabase_usage_metrics:
261269
and lower(trim(regexp_replace(regexp_replace(query, E'\n', ' ', 'g'), E'\\s+', ' ', 'g'))) not in
262270
('with rows as ( select id from net.http_request_queue order by id limit $1 ) delete from net.http_request_queue q using rows where q.id = rows.id returning q.id, q.method, q.url, timeout_milliseconds, array(select key || $2 || value from jsonb_each_text(q.headers)), q.body',
263271
'with rows as ( select ctid from net._http_response where created < now() - $1 order by created limit $2 ) delete from net._http_response r using rows where r.ctid = rows.ctid',
272+
-- version of query above before https://github.com/supabase/pg_net/commit/eaa721e11761da07d01fc04b5114c90cd7973b83
273+
'with rows as ( select ctid from net._http_response where created < $1 - $2 order by created limit $3 ) delete from net._http_response r using rows where r.ctid = rows.ctid',
264274
'select exists ( select $2 from pg_catalog.pg_class c where c.relname = $1 and c.relkind = $3 )',
265275
'select description from pg_namespace n left join pg_description d on d.objoid = n.oid where n.nspname = $1',
266276
'select concat(schemaname, $1, tablename, $2, policyname) as policy from pg_policies order by 1 desc',
267277
'select concat(table_schema, $1, table_name) as table from information_schema.tables where table_schema not like $2 and table_schema <> $3 order by 1 desc',
268278
'select concat(conrelid::regclass, $1, conname) as fk from pg_constraint where contype = $2 order by 1 desc',
269-
'select datname from pg_database where datallowconn = $1 order by oid asc')
279+
'select datname from pg_database where datallowconn = $1 order by oid asc',
280+
'select count(*) > $1 as pgsodium_enabled from pg_extension where extname = $2',
281+
'select count(*) > $1 as keys_created from pgsodium.key')
270282
and query <> 'insert into schema_migrations (version) values ($1)'
271283
-- temporarily included for older versions of pg_net
272284
and query not like 'SELECT%FROM net.http_request_queue%'
273285
and query not like 'DELETE FROM net.http_request_queue%'
274286
and query not like '%source: project usage%'
287+
and query not like 'select name, setting from pg_settings where name in ($1, $2)%'
288+
and userid <> (select oid from pg_roles where rolname = 'authenticator')
275289
metrics:
276290
- user_queries_total:
277291
usage: "COUNTER"
@@ -294,3 +308,38 @@ pg_status:
294308
- in_recovery:
295309
usage: "GAUGE"
296310
description: "Database in recovery"
311+
312+
# specific to read replicas
313+
# for primary databases, all columns will always return a value of 0
314+
# ---
315+
# for checking replication lag (physical_replica_lag_second)
316+
# we firstly check if the replica is connected to its primary
317+
# and if last WAL received is equivalent to last WAL replayed
318+
# if so return 0
319+
# otherwise calculate replication lag as per usual
320+
physical_replication_lag:
321+
master: true
322+
cache-seconds: 60
323+
query: |
324+
select
325+
case
326+
when (select count(*) from pg_stat_wal_receiver) = 1 and pg_last_wal_receive_lsn() = pg_last_wal_replay_lsn()
327+
then 0
328+
else coalesce(extract(epoch from now() - pg_last_xact_replay_timestamp()),0)
329+
end as physical_replication_lag_seconds,
330+
case
331+
when pg_is_in_recovery()
332+
then case when pg_is_wal_replay_paused() = false then 0 else 1 end
333+
else 0
334+
end as is_wal_replay_paused,
335+
(select count(*) from pg_stat_wal_receiver) as is_connected_to_primary
336+
metrics:
337+
- physical_replication_lag_seconds:
338+
usage: "GAUGE"
339+
description: "Physical replication lag in seconds"
340+
- is_wal_replay_paused:
341+
usage: "GAUGE"
342+
description: "Check if WAL replay has been paused"
343+
- is_connected_to_primary:
344+
usage: "GAUGE"
345+
description: "Monitor connection to the primary database"

0 commit comments

Comments
 (0)