]> git.openstreetmap.org Git - chef.git/blobdiff - cookbooks/prometheus/recipes/server.rb
Revert "Blackhole unreachable Amazon IPv6 block on equinix machines"
[chef.git] / cookbooks / prometheus / recipes / server.rb
index 45550368cd5b669f1cb1b49ee9c1159fccbdb4d1..4b9058c8eb5964dafd855879eafc272256891e35 100644 (file)
@@ -19,6 +19,7 @@
 
 include_recipe "apache"
 include_recipe "apt::grafana"
+include_recipe "awscli"
 include_recipe "networking"
 
 passwords = data_bag_item("prometheus", "passwords")
@@ -44,6 +45,27 @@ prometheus_exporter "statuscake" do
   environment "STATUSCAKE_APIKEY" => tokens["statuscake"]
 end
 
+template "/etc/prometheus/cloudwatch.yml" do
+  source "cloudwatch.yml.erb"
+  owner "root"
+  group "root"
+  mode "644"
+end
+
+prometheus_exporter "cloudwatch" do
+  address "127.0.0.1"
+  port 5000
+  listen_switch "listen-address"
+  options %w[
+    --config.file=/etc/prometheus/cloudwatch.yml
+    --enable-feature=aws-sdk-v2
+    --enable-feature=always-return-info-metrics
+  ]
+  environment "AWS_ACCESS_KEY_ID" => "AKIASQUXHPE7JHG37EA6",
+              "AWS_SECRET_ACCESS_KEY" => tokens["cloudwatch"]
+  subscribes :restart, "template[/etc/prometheus/cloudwatch.yml]"
+end
+
 cache_dir = Chef::Config[:file_cache_path]
 
 prometheus_version = "2.45.0"
@@ -220,7 +242,7 @@ package "prometheus"
 systemd_service "prometheus-executable" do
   service "prometheus"
   dropin "executable"
-  exec_start "/opt/prometheus-server/prometheus/prometheus --config.file=/etc/prometheus/prometheus.yml --web.external-url=https://prometheus.openstreetmap.org/prometheus --storage.tsdb.path=/var/lib/prometheus/metrics2 --storage.tsdb.retention.time=540d"
+  exec_start "/opt/prometheus-server/prometheus/prometheus --config.file=/etc/prometheus/prometheus.yml --web.enable-admin-api --web.external-url=https://prometheus.openstreetmap.org/prometheus --storage.tsdb.path=/var/lib/prometheus/metrics2 --storage.tsdb.retention.time=540d"
   timeout_stop_sec 300
   notifies :restart, "service[prometheus]"
 end
@@ -341,7 +363,7 @@ apache_module "proxy_http"
 apache_module "proxy_wstunnel"
 
 ssl_certificate "prometheus.openstreetmap.org" do
-  domains ["prometheus.openstreetmap.org", "prometheus.osm.org"]
+  domains ["prometheus.openstreetmap.org", "prometheus.osm.org", "munin.openstreetmap.org", "munin.osm.org"]
   notifies :reload, "service[apache2]"
 end
 
@@ -356,3 +378,49 @@ template "/etc/cron.daily/prometheus-backup" do
   group "root"
   mode "750"
 end
+
+package %w[
+  curl
+  jq
+]
+
+directory "/var/lib/prometheus/.aws" do
+  user "prometheus"
+  group "prometheus"
+  mode "755"
+end
+
+template "/var/lib/prometheus/.aws/credentials" do
+  source "aws-credentials.erb"
+  user "prometheus"
+  group "prometheus"
+  mode "600"
+  variables :passwords => passwords
+end
+
+template "/usr/local/bin/prometheus-backup-data" do
+  source "backup-data.erb"
+  owner "root"
+  group "root"
+  mode "755"
+end
+
+systemd_service "prometheus-backup-data" do
+  description "Backup prometheus data to S3"
+  user "prometheus"
+  exec_start "/usr/local/bin/prometheus-backup-data"
+  read_write_paths %w[
+    /var/lib/prometheus/.aws
+    /var/lib/prometheus/metrics2/snapshots
+  ]
+  sandbox :enable_network => true
+end
+
+systemd_timer "prometheus-backup-data" do
+  description "Backup prometheus data to S3"
+  on_calendar "03:11"
+end
+
+service "prometheus-backup-data.timer" do
+  action [:enable, :start]
+end