]> git.openstreetmap.org Git - chef.git/blobdiff - cookbooks/prometheus/recipes/server.rb
Tighten timeouts and log request timeouts
[chef.git] / cookbooks / prometheus / recipes / server.rb
index 902844c3d8826dd57976e9e78305fa797fb22fec..45b60f946ace102c3921b9b9bbe4f77fe973db04 100644 (file)
@@ -19,6 +19,7 @@
 
 include_recipe "apache"
 include_recipe "apt::grafana"
+include_recipe "awscli"
 include_recipe "networking"
 
 passwords = data_bag_item("prometheus", "passwords")
@@ -55,7 +56,11 @@ prometheus_exporter "cloudwatch" do
   address "127.0.0.1"
   port 5000
   listen_switch "listen-address"
-  options "--config.file=/etc/prometheus/cloudwatch.yml"
+  options %w[
+    --config.file=/etc/prometheus/cloudwatch.yml
+    --enable-feature=aws-sdk-v2
+    --enable-feature=always-return-info-metrics
+  ]
   environment "AWS_ACCESS_KEY_ID" => "AKIASQUXHPE7JHG37EA6",
               "AWS_SECRET_ACCESS_KEY" => tokens["cloudwatch"]
   subscribes :restart, "template[/etc/prometheus/cloudwatch.yml]"
@@ -237,7 +242,7 @@ package "prometheus"
 systemd_service "prometheus-executable" do
   service "prometheus"
   dropin "executable"
-  exec_start "/opt/prometheus-server/prometheus/prometheus --config.file=/etc/prometheus/prometheus.yml --web.external-url=https://prometheus.openstreetmap.org/prometheus --storage.tsdb.path=/var/lib/prometheus/metrics2 --storage.tsdb.retention.time=540d"
+  exec_start "/opt/prometheus-server/prometheus/prometheus --config.file=/etc/prometheus/prometheus.yml --web.enable-admin-api --web.external-url=https://prometheus.openstreetmap.org/prometheus --storage.tsdb.path=/var/lib/prometheus/metrics2 --storage.tsdb.retention.time=540d"
   timeout_stop_sec 300
   notifies :restart, "service[prometheus]"
 end
@@ -373,3 +378,49 @@ template "/etc/cron.daily/prometheus-backup" do
   group "root"
   mode "750"
 end
+
+package %w[
+  curl
+  jq
+]
+
+directory "/var/lib/prometheus/.aws" do
+  user "prometheus"
+  group "prometheus"
+  mode "755"
+end
+
+template "/var/lib/prometheus/.aws/credentials" do
+  source "aws-credentials.erb"
+  user "prometheus"
+  group "prometheus"
+  mode "600"
+  variables :passwords => passwords
+end
+
+template "/usr/local/bin/prometheus-backup-data" do
+  source "backup-data.erb"
+  owner "root"
+  group "root"
+  mode "755"
+end
+
+systemd_service "prometheus-backup-data" do
+  description "Backup prometheus data to S3"
+  user "prometheus"
+  exec_start "/usr/local/bin/prometheus-backup-data"
+  read_write_paths %w[
+    /var/lib/prometheus/.aws
+    /var/lib/prometheus/metrics2/snapshots
+  ]
+  sandbox :enable_network => true
+end
+
+systemd_timer "prometheus-backup-data" do
+  description "Backup prometheus data to S3"
+  on_calendar "03:11"
+end
+
+service "prometheus-backup-data.timer" do
+  action [:enable, :start]
+end