-pg_replication:
- query: "SELECT EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp())) AS lag_seconds"
+pg_process_idle:
+ query: |
+ WITH
+ metrics AS (
+ SELECT
+ state,
+ application_name,
+ SUM(EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - state_change))::bigint)::float AS process_idle_seconds_sum,
+ COUNT(*) AS process_idle_seconds_count
+ FROM pg_stat_activity
+ WHERE state ~ '^idle'
+ GROUP BY state, application_name
+ ),
+ buckets AS (
+ SELECT
+ state,
+ application_name,
+ le,
+ SUM(
+ CASE WHEN EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - state_change)) <= le
+ THEN 1
+ ELSE 0
+ END
+ )::bigint AS bucket
+ FROM
+ pg_stat_activity,
+ UNNEST(ARRAY[1, 2, 5, 15, 30, 60, 90, 120, 300]) AS le
+ GROUP BY state, application_name, le
+ ORDER BY state, application_name, le
+ )
+ SELECT
+ state,
+ application_name,
+ process_idle_seconds_sum as seconds_sum,
+ process_idle_seconds_count as seconds_count,
+ ARRAY_AGG(le) AS seconds,
+ ARRAY_AGG(bucket) AS seconds_bucket
+ FROM metrics JOIN buckets USING (state, application_name)
+ GROUP BY 1, 2, 3, 4
master: true
metrics:
- - lag_seconds:
- usage: "GAUGE"
- description: "Replication lag behind master in seconds"
+ - state:
+ usage: "LABEL"
+ description: "State"
+ - application_name:
+ usage: "LABEL"
+ description: "Application Name"
+ - seconds:
+ usage: "HISTOGRAM"
+ description: "Idle time of server processes"
-pg_database:
- query: "SELECT pg_database.oid AS datid, pg_database.datname, pg_database_size(pg_database.datname) AS size_bytes FROM pg_database"
+pg_wal:
+ query: "SELECT count(*) AS segment_count FROM pg_ls_waldir() WHERE name ~ '^[0-9A-Z]{24}$'"
master: true
- cache_seconds: 30
metrics:
- - datid:
- usage: "LABEL"
- description: "ID of the database"
- - datname:
- usage: "LABEL"
- description: "Name of the database"
- - size_bytes:
+ - segment_count:
usage: "GAUGE"
- description: "Disk space used by the database"
+ description: "Number of WAL segments"