]> git.openstreetmap.org Git - chef.git/blob - cookbooks/postgresql/templates/default/postgres_queries.yml.erb
Add metric for idle postgres processes
[chef.git] / cookbooks / postgresql / templates / default / postgres_queries.yml.erb
1 pg_replication:
2   query: "SELECT CASE WHEN NOT pg_is_in_recovery() THEN 0 ELSE GREATEST (0, EXTRACT(EPOCH FROM (now() - pg_last_xact_replay_timestamp()))) END AS lag_seconds"
3   master: true
4   metrics:
5     - lag_seconds:
6         usage: "GAUGE"
7         description: "Replication lag behind master in seconds"
8
9 pg_postmaster:
10   query: "SELECT pg_postmaster_start_time as start_time_seconds from pg_postmaster_start_time()"
11   master: true
12   metrics:
13     - start_time_seconds:
14         usage: "GAUGE"
15         description: "Time at which postmaster started"
16 <% if node[:postgresql][:monitor_tables] -%>
17
18 pg_stat_user_tables:
19   query: "SELECT current_database() datname, schemaname, relname, seq_scan, seq_tup_read, idx_scan, idx_tup_fetch, n_tup_ins, n_tup_upd, n_tup_del, n_tup_hot_upd, n_live_tup, n_dead_tup, n_mod_since_analyze, COALESCE(last_vacuum, '1970-01-01Z') as last_vacuum, COALESCE(last_autovacuum, '1970-01-01Z') as last_autovacuum, COALESCE(last_analyze, '1970-01-01Z') as last_analyze, COALESCE(last_autoanalyze, '1970-01-01Z') as last_autoanalyze, vacuum_count, autovacuum_count, analyze_count, autoanalyze_count FROM pg_stat_user_tables"
20   metrics:
21     - datname:
22         usage: "LABEL"
23         description: "Name of current database"
24     - schemaname:
25         usage: "LABEL"
26         description: "Name of the schema that this table is in"
27     - relname:
28         usage: "LABEL"
29         description: "Name of this table"
30     - seq_scan:
31         usage: "COUNTER"
32         description: "Number of sequential scans initiated on this table"
33     - seq_tup_read:
34         usage: "COUNTER"
35         description: "Number of live rows fetched by sequential scans"
36     - idx_scan:
37         usage: "COUNTER"
38         description: "Number of index scans initiated on this table"
39     - idx_tup_fetch:
40         usage: "COUNTER"
41         description: "Number of live rows fetched by index scans"
42     - n_tup_ins:
43         usage: "COUNTER"
44         description: "Number of rows inserted"
45     - n_tup_upd:
46         usage: "COUNTER"
47         description: "Number of rows updated"
48     - n_tup_del:
49         usage: "COUNTER"
50         description: "Number of rows deleted"
51     - n_tup_hot_upd:
52         usage: "COUNTER"
53         description: "Number of rows HOT updated (i.e., with no separate index update required)"
54     - n_live_tup:
55         usage: "GAUGE"
56         description: "Estimated number of live rows"
57     - n_dead_tup:
58         usage: "GAUGE"
59         description: "Estimated number of dead rows"
60     - n_mod_since_analyze:
61         usage: "GAUGE"
62         description: "Estimated number of rows changed since last analyze"
63     - last_vacuum:
64         usage: "GAUGE"
65         description: "Last time at which this table was manually vacuumed (not counting VACUUM FULL)"
66     - last_autovacuum:
67         usage: "GAUGE"
68         description: "Last time at which this table was vacuumed by the autovacuum daemon"
69     - last_analyze:
70         usage: "GAUGE"
71         description: "Last time at which this table was manually analyzed"
72     - last_autoanalyze:
73         usage: "GAUGE"
74         description: "Last time at which this table was analyzed by the autovacuum daemon"
75     - vacuum_count:
76         usage: "COUNTER"
77         description: "Number of times this table has been manually vacuumed (not counting VACUUM FULL)"
78     - autovacuum_count:
79         usage: "COUNTER"
80         description: "Number of times this table has been vacuumed by the autovacuum daemon"
81     - analyze_count:
82         usage: "COUNTER"
83         description: "Number of times this table has been manually analyzed"
84     - autoanalyze_count:
85         usage: "COUNTER"
86         description: "Number of times this table has been analyzed by the autovacuum daemon"
87
88 pg_statio_user_tables:
89   query: "SELECT current_database() datname, schemaname, relname, heap_blks_read, heap_blks_hit, idx_blks_read, idx_blks_hit, toast_blks_read, toast_blks_hit, tidx_blks_read, tidx_blks_hit FROM pg_statio_user_tables"
90   metrics:
91     - datname:
92         usage: "LABEL"
93         description: "Name of current database"
94     - schemaname:
95         usage: "LABEL"
96         description: "Name of the schema that this table is in"
97     - relname:
98         usage: "LABEL"
99         description: "Name of this table"
100     - heap_blks_read:
101         usage: "COUNTER"
102         description: "Number of disk blocks read from this table"
103     - heap_blks_hit:
104         usage: "COUNTER"
105         description: "Number of buffer hits in this table"
106     - idx_blks_read:
107         usage: "COUNTER"
108         description: "Number of disk blocks read from all indexes on this table"
109     - idx_blks_hit:
110         usage: "COUNTER"
111         description: "Number of buffer hits in all indexes on this table"
112     - toast_blks_read:
113         usage: "COUNTER"
114         description: "Number of disk blocks read from this table's TOAST table (if any)"
115     - toast_blks_hit:
116         usage: "COUNTER"
117         description: "Number of buffer hits in this table's TOAST table (if any)"
118     - tidx_blks_read:
119         usage: "COUNTER"
120         description: "Number of disk blocks read from this table's TOAST table indexes (if any)"
121     - tidx_blks_hit:
122         usage: "COUNTER"
123         description: "Number of buffer hits in this table's TOAST table indexes (if any)"
124 <% end -%>
125
126 pg_process_idle:
127   query: |
128     WITH
129       metrics AS (
130         SELECT
131           state,
132           application_name,
133           SUM(EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - state_change))::bigint)::float AS process_idle_seconds_sum,
134           COUNT(*) AS process_idle_seconds_count
135         FROM pg_stat_activity
136         WHERE state ~ '^idle'
137         GROUP BY state, application_name
138       ),
139       buckets AS (
140         SELECT
141           state,
142           application_name,
143           le,
144           SUM(
145             CASE WHEN EXTRACT(EPOCH FROM (CURRENT_TIMESTAMP - state_change)) <= le
146               THEN 1
147               ELSE 0
148             END
149           )::bigint AS bucket
150         FROM
151           pg_stat_activity,
152           UNNEST(ARRAY[1, 2, 5, 15, 30, 60, 90, 120, 300]) AS le
153         GROUP BY state, application_name, le
154         ORDER BY state, application_name, le
155       )
156     SELECT
157       state,
158       application_name,
159       process_idle_seconds_sum as seconds_sum,
160       process_idle_seconds_count as seconds_count,
161       ARRAY_AGG(le) AS seconds,
162       ARRAY_AGG(bucket) AS seconds_bucket
163     FROM metrics JOIN buckets USING (state, application_name)
164     GROUP BY 1, 2, 3, 4
165   master: true
166   metrics:
167     - state:
168         usage: "LABEL"
169         description: "State"
170     - application_name:
171         usage: "LABEL"
172         description: "Application Name"
173     - seconds:
174         usage: "HISTOGRAM"
175         description: "Idle time of server processes"
176
177 pg_unfrozen_ids:
178   query: "SELECT current_database() AS datname, max(age(relfrozenxid)) AS xid_age, max(mxid_age(relminmxid)) AS mxid_age FROM pg_class WHERE relkind IN ('r', 'm')"
179   metrics:
180     - datname:
181         usage: "LABEL"
182         description: "Name of the database"
183     - xid_age:
184         usage: "GAUGE"
185         description: "Age of the oldest unfrozen transaction ID in this database"
186     - mxid_age:
187         usage: "GAUGE"
188         description: "Age of the oldest unfrozen multixact ID in this database"
189
190 pg_wal:
191   query: "SELECT count(*) AS segment_count FROM pg_ls_waldir() WHERE name ~ '^[0-9A-Z]{24}$'"
192   master: true
193   metrics:
194     - segment_count:
195         usage: "GAUGE"
196         description: "Number of WAL segments"