]> git.openstreetmap.org Git - chef.git/blobdiff - cookbooks/postgresql/templates/default/postgres_queries.yml.erb
Merge remote-tracking branch 'github/pull/603'
[chef.git] / cookbooks / postgresql / templates / default / postgres_queries.yml.erb
index 50c9ae49fdd7374d488276d3d49f88e0df7e3634..a92382edc0ec0e2a2118165eacbcfcfae94542ac 100644 (file)
@@ -1,22 +1,58 @@
-pg_replication:
-  query: "SELECT EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp())) AS lag_seconds"
+pg_process_idle:
+  query: |
+    WITH
+      metrics AS (
+        SELECT
+          state,
+          application_name,
+          SUM(EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - state_change))::bigint)::float AS process_idle_seconds_sum,
+          COUNT(*) AS process_idle_seconds_count
+        FROM pg_stat_activity
+        WHERE state ~ '^idle'
+        GROUP BY state, application_name
+      ),
+      buckets AS (
+        SELECT
+          state,
+          application_name,
+          le,
+          SUM(
+            CASE WHEN EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - state_change)) <= le
+              THEN 1
+              ELSE 0
+            END
+          )::bigint AS bucket
+        FROM
+          pg_stat_activity,
+          UNNEST(ARRAY[1, 2, 5, 15, 30, 60, 90, 120, 300]) AS le
+        GROUP BY state, application_name, le
+        ORDER BY state, application_name, le
+      )
+    SELECT
+      state,
+      application_name,
+      process_idle_seconds_sum as seconds_sum,
+      process_idle_seconds_count as seconds_count,
+      ARRAY_AGG(le) AS seconds,
+      ARRAY_AGG(bucket) AS seconds_bucket
+    FROM metrics JOIN buckets USING (state, application_name)
+    GROUP BY 1, 2, 3, 4
   master: true
   metrics:
-    - lag_seconds:
-        usage: "GAUGE"
-        description: "Replication lag behind master in seconds"
+    - state:
+        usage: "LABEL"
+        description: "State"
+    - application_name:
+        usage: "LABEL"
+        description: "Application Name"
+    - seconds:
+        usage: "HISTOGRAM"
+        description: "Idle time of server processes"
 
-pg_database:
-  query: "SELECT pg_database.oid AS datid, pg_database.datname, pg_database_size(pg_database.datname) AS size_bytes FROM pg_database"
+pg_wal:
+  query: "SELECT count(*) AS segment_count FROM pg_ls_waldir() WHERE name ~ '^[0-9A-Z]{24}$'"
   master: true
-  cache_seconds: 30
   metrics:
-    - datid:
-        usage: "LABEL"
-        description: "ID of the database"
-    - datname:
-        usage: "LABEL"
-        description: "Name of the database"
-    - size_bytes:
+    - segment_count:
         usage: "GAUGE"
-        description: "Disk space used by the database"
+        description: "Number of WAL segments"