]> git.openstreetmap.org Git - chef.git/blobdiff - cookbooks/postgresql/templates/default/postgres_queries.yml.erb
Add ulimit increase documentation to CONTRIBUTING.md
[chef.git] / cookbooks / postgresql / templates / default / postgres_queries.yml.erb
index 37d854f7725221589d40bef998a6b9f153b0bd49..a92382edc0ec0e2a2118165eacbcfcfae94542ac 100644 (file)
-pg_replication:
-  query: "SELECT CASE WHEN NOT pg_is_in_recovery() THEN 0 ELSE GREATEST (0, EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp()))) END AS lag_seconds"
+pg_process_idle:
+  query: |
+    WITH
+      metrics AS (
+        SELECT
+          state,
+          application_name,
+          SUM(EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - state_change))::bigint)::float AS process_idle_seconds_sum,
+          COUNT(*) AS process_idle_seconds_count
+        FROM pg_stat_activity
+        WHERE state ~ '^idle'
+        GROUP BY state, application_name
+      ),
+      buckets AS (
+        SELECT
+          state,
+          application_name,
+          le,
+          SUM(
+            CASE WHEN EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - state_change)) <= le
+              THEN 1
+              ELSE 0
+            END
+          )::bigint AS bucket
+        FROM
+          pg_stat_activity,
+          UNNEST(ARRAY[1, 2, 5, 15, 30, 60, 90, 120, 300]) AS le
+        GROUP BY state, application_name, le
+        ORDER BY state, application_name, le
+      )
+    SELECT
+      state,
+      application_name,
+      process_idle_seconds_sum as seconds_sum,
+      process_idle_seconds_count as seconds_count,
+      ARRAY_AGG(le) AS seconds,
+      ARRAY_AGG(bucket) AS seconds_bucket
+    FROM metrics JOIN buckets USING (state, application_name)
+    GROUP BY 1, 2, 3, 4
   master: true
   metrics:
-    - lag_seconds:
-        usage: "GAUGE"
-        description: "Replication lag behind master in seconds"
-
-pg_postmaster:
-  query: "SELECT pg_postmaster_start_time as start_time_seconds from pg_postmaster_start_time()"
-  master: true
-  metrics:
-    - start_time_seconds:
-        usage: "GAUGE"
-        description: "Time at which postmaster started"
-<% if node[:postgresql][:monitor_tables] -%>
-
-pg_stat_user_tables:
-  query: "SELECT current_database() datname, schemaname, relname, seq_scan, seq_tup_read, idx_scan, idx_tup_fetch, n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze, COALESCE(last_vacuum, '1970-01-01Z') as last_vacuum, COALESCE(last_autovacuum, '1970-01-01Z') as last_autovacuum, COALESCE(last_analyze, '1970-01-01Z') as last_analyze, COALESCE(last_autoanalyze, '1970-01-01Z') as last_autoanalyze, vacuum_count, autovacuum_count, analyze_count, autoanalyze_count FROM pg_stat_user_tables"
-  metrics:
-    - datname:
-        usage: "LABEL"
-        description: "Name of current database"
-    - schemaname:
+    - state:
         usage: "LABEL"
-        description: "Name of the schema that this table is in"
-    - relname:
+        description: "State"
+    - application_name:
         usage: "LABEL"
-        description: "Name of this table"
-    - seq_scan:
-        usage: "COUNTER"
-        description: "Number of sequential scans initiated on this table"
-    - seq_tup_read:
-        usage: "COUNTER"
-        description: "Number of live rows fetched by sequential scans"
-    - idx_scan:
-        usage: "COUNTER"
-        description: "Number of index scans initiated on this table"
-    - idx_tup_fetch:
-        usage: "COUNTER"
-        description: "Number of live rows fetched by index scans"
-    - n_tup_ins:
-        usage: "COUNTER"
-        description: "Number of rows inserted"
-    - n_tup_upd:
-        usage: "COUNTER"
-        description: "Number of rows updated"
-    - n_tup_del:
-        usage: "COUNTER"
-        description: "Number of rows deleted"
-    - n_tup_hot_upd:
-        usage: "COUNTER"
-        description: "Number of rows HOT updated (i.e., with no separate index update required)"
-    - n_live_tup:
-        usage: "GAUGE"
-        description: "Estimated number of live rows"
-    - n_dead_tup:
-        usage: "GAUGE"
-        description: "Estimated number of dead rows"
-    - n_mod_since_analyze:
-        usage: "GAUGE"
-        description: "Estimated number of rows changed since last analyze"
-    - last_vacuum:
-        usage: "GAUGE"
-        description: "Last time at which this table was manually vacuumed (not counting VACUUM FULL)"
-    - last_autovacuum:
-        usage: "GAUGE"
-        description: "Last time at which this table was vacuumed by the autovacuum daemon"
-    - last_analyze:
-        usage: "GAUGE"
-        description: "Last time at which this table was manually analyzed"
-    - last_autoanalyze:
-        usage: "GAUGE"
-        description: "Last time at which this table was analyzed by the autovacuum daemon"
-    - vacuum_count:
-        usage: "COUNTER"
-        description: "Number of times this table has been manually vacuumed (not counting VACUUM FULL)"
-    - autovacuum_count:
-        usage: "COUNTER"
-        description: "Number of times this table has been vacuumed by the autovacuum daemon"
-    - analyze_count:
-        usage: "COUNTER"
-        description: "Number of times this table has been manually analyzed"
-    - autoanalyze_count:
-        usage: "COUNTER"
-        description: "Number of times this table has been analyzed by the autovacuum daemon"
+        description: "Application Name"
+    - seconds:
+        usage: "HISTOGRAM"
+        description: "Idle time of server processes"
 
-pg_statio_user_tables:
-  query: "SELECT current_database() datname, schemaname, relname, heap_blks_read, heap_blks_hit, idx_blks_read, idx_blks_hit, toast_blks_read, toast_blks_hit, tidx_blks_read, tidx_blks_hit FROM pg_statio_user_tables"
-  metrics:
-    - datname:
-        usage: "LABEL"
-        description: "Name of current database"
-    - schemaname:
-        usage: "LABEL"
-        description: "Name of the schema that this table is in"
-    - relname:
-        usage: "LABEL"
-        description: "Name of this table"
-    - heap_blks_read:
-        usage: "COUNTER"
-        description: "Number of disk blocks read from this table"
-    - heap_blks_hit:
-        usage: "COUNTER"
-        description: "Number of buffer hits in this table"
-    - idx_blks_read:
-        usage: "COUNTER"
-        description: "Number of disk blocks read from all indexes on this table"
-    - idx_blks_hit:
-        usage: "COUNTER"
-        description: "Number of buffer hits in all indexes on this table"
-    - toast_blks_read:
-        usage: "COUNTER"
-        description: "Number of disk blocks read from this table's TOAST table (if any)"
-    - toast_blks_hit:
-        usage: "COUNTER"
-        description: "Number of buffer hits in this table's TOAST table (if any)"
-    - tidx_blks_read:
-        usage: "COUNTER"
-        description: "Number of disk blocks read from this table's TOAST table indexes (if any)"
-    - tidx_blks_hit:
-        usage: "COUNTER"
-        description: "Number of buffer hits in this table's TOAST table indexes (if any)"
-<% end -%>
-
-pg_unfrozen_ids:
-  query: "SELECT current_database() AS datname, max(age(relfrozenxid)) AS xid_age, max(mxid_age(relminmxid)) AS mxid_age FROM pg_class WHERE relkind IN ('r', 'm')"
+pg_wal:
+  query: "SELECT count(*) AS segment_count FROM pg_ls_waldir() WHERE name ~ '^[0-9A-Z]{24}$'"
+  master: true
   metrics:
-    - datname:
-        usage: "LABEL"
-        description: "Name of the database"
-    - xid_age:
-        usage: "GAUGE"
-        description: "Age of the oldest unfrozen transaction ID in this database"
-    - mxid_age:
+    - segment_count:
         usage: "GAUGE"
-        description: "Age of the oldest unfrozen multixact ID in this database"
+        description: "Number of WAL segments"