]> git.openstreetmap.org Git - chef.git/commitdiff
nominatim: complete rework of the setup
authorSarah Hoffmann <lonvia@denofr.de>
Thu, 30 Jun 2022 08:26:15 +0000 (10:26 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Thu, 30 Jun 2022 08:26:15 +0000 (10:26 +0200)
* switch to installing Nominatim into /usr/local
* use a dedicated project directory instead of running from build dir
* switch updates to use systemd

cookbooks/nominatim/recipes/version4.rb [new file with mode: 0644]
cookbooks/nominatim/templates/default/nominatim-daily-maintenance.erb [new file with mode: 0644]
cookbooks/nominatim/templates/default/nominatim-update-data.erb [new file with mode: 0644]
cookbooks/nominatim/templates/default/nominatim-update-refresh-db.erb [new file with mode: 0644]
cookbooks/nominatim/templates/default/nominatim-update-source.erb [new file with mode: 0644]
cookbooks/nominatim/templates/default/nominatim-update.erb [new file with mode: 0644]
roles/nominatim-v4.rb [new file with mode: 0644]

diff --git a/cookbooks/nominatim/recipes/version4.rb b/cookbooks/nominatim/recipes/version4.rb
new file mode 100644 (file)
index 0000000..e68be08
--- /dev/null
@@ -0,0 +1,520 @@
+#
+# Cookbook:: nominatim
+# Recipe:: base
+#
+# Copyright:: 2015, OpenStreetMap Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     https://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+include_recipe "accounts"
+include_recipe "munin"
+include_recipe "php::fpm"
+include_recipe "prometheus"
+
+basedir = data_bag_item("accounts", "nominatim")["home"]
+email_errors = data_bag_item("accounts", "lonvia")["email"]
+
+directory basedir do
+  owner "nominatim"
+  group "nominatim"
+  mode "755"
+  recursive true
+end
+
+## Log directory setup
+
+directory node[:nominatim][:logdir] do
+  owner "nominatim"
+  group "nominatim"
+  mode "755"
+  recursive true
+end
+
+file "#{node[:nominatim][:logdir]}/query.log" do
+  action :create_if_missing
+  owner "www-data"
+  group "adm"
+  mode "664"
+end
+
+file "#{node[:nominatim][:logdir]}/update.log" do
+  action :create_if_missing
+  owner "nominatim"
+  group "adm"
+  mode "664"
+end
+
+## Postgresql
+
+include_recipe "postgresql"
+
+postgresql_version = node[:nominatim][:dbcluster].split("/").first
+postgis_version = node[:nominatim][:postgis]
+
+package "postgresql-#{postgresql_version}-postgis-#{postgis_version}"
+
+postgresql_user "nominatim" do
+  cluster node[:nominatim][:dbcluster]
+  superuser true
+  only_if { node[:nominatim][:state] != "slave" }
+end
+
+postgresql_user "www-data" do
+  cluster node[:nominatim][:dbcluster]
+  only_if { node[:nominatim][:state] != "slave" }
+end
+
+postgresql_munin "nominatim" do
+  cluster node[:nominatim][:dbcluster]
+  database node[:nominatim][:dbname]
+end
+
+directory "#{basedir}/tablespaces" do
+  owner "postgres"
+  group "postgres"
+  mode "700"
+end
+
+# NOTE: tablespaces must be exactly in the same location on each
+#       Nominatim instance when replication is in use. Therefore
+#       use symlinks to canonical directory locations.
+node[:nominatim][:tablespaces].each do |name, location|
+  directory location do
+    owner "postgres"
+    group "postgres"
+    mode "700"
+    recursive true
+  end
+
+  link "#{basedir}/tablespaces/#{name}" do
+    to location
+  end
+
+  postgresql_tablespace name do
+    cluster node[:nominatim][:dbcluster]
+    location "#{basedir}/tablespaces/#{name}"
+  end
+end
+
+## Nominatim backend
+
+include_recipe "git"
+
+package %w[
+  build-essential
+  cmake
+  g++
+  libboost-dev
+  libboost-system-dev
+  libboost-filesystem-dev
+  libexpat1-dev
+  zlib1g-dev
+  libbz2-dev
+  libpq-dev
+  libproj-dev
+  python3-pyosmium
+  python3-psycopg2
+  python3-dotenv
+  python3-psutil
+  python3-jinja2
+  python3-icu
+  python3-datrie
+  python3-yaml
+  php-pgsql
+  php-intl
+  ruby
+  ruby-file-tail
+  ruby-pg
+  ruby-webrick
+]
+
+source_directory = "#{basedir}/src/nominatim"
+build_directory = "#{basedir}/src/build"
+project_directory = "#{basedir}/planet-project"
+bin_directory = "#{basedir}/bin"
+cfg_directory = "#{basedir}/etc"
+ui_directory = "#{basedir}/ui"
+qa_bin_directory = "#{basedir}/src/Nominatim-Data-Analyser"
+qa_data_directory = "#{basedir}/qa-data"
+
+[basedir, "#{basedir}/src", cfg_directory, bin_directory, build_directory, project_directory].each do |path|
+  directory path do
+    owner "nominatim"
+    group "nominatim"
+    mode "755"
+    recursive true
+  end
+end
+
+directory "#{bin_directory}/maintenance" do
+  owner "nominatim"
+  group "nominatim"
+  mode "775"
+end
+
+if node[:nominatim][:flatnode_file]
+  directory File.dirname(node[:nominatim][:flatnode_file]) do
+    recursive true
+  end
+end
+
+# Normally syncing via chef is a bad idea because syncing might involve
+# an update of database functions which should not be done while an update
+# is ongoing. Therefore we sync in between update cycles. There is an
+# exception for slaves: they get DB function updates from the master, so
+# only the source code needs to be updated, which chef may do.
+git source_directory do
+  action node[:nominatim][:state] == "slave" ? :sync : :checkout
+  repository node[:nominatim][:repository]
+  revision node[:nominatim][:revision]
+  enable_submodules true
+  user "nominatim"
+  group "nominatim"
+  not_if { node[:nominatim][:state] != "slave" && File.exist?("#{source_directory}/README.md") }
+  notifies :run, "execute[compile_nominatim]"
+end
+
+remote_file "#{source_directory}/data/country_osm_grid.sql.gz" do
+  action :create_if_missing
+  source "https://www.nominatim.org/data/country_grid.sql.gz"
+  owner "nominatim"
+  group "nominatim"
+  mode "644"
+end
+
+execute "compile_nominatim" do
+  action :nothing
+  user "nominatim"
+  cwd build_directory
+  command "cmake #{source_directory} && make"
+  notifies :run, "execute[install_nominatim]"
+end
+
+execute "install_nominatim" do
+  action :nothing
+  cwd build_directory
+  command "make install"
+end
+
+# Project directory
+
+template "#{project_directory}/.env" do
+  source "nominatim.env.erb"
+  owner "nominatim"
+  group "nominatim"
+  mode "664"
+  variables :base_url => node[:nominatim][:state] == "off" ? node[:fqdn] : "nominatim.openstreetmap.org",
+            :dbname => node[:nominatim][:dbname],
+            :flatnode_file => node[:nominatim][:flatnode_file],
+            :log_file => "#{node[:nominatim][:logdir]}/query.log",
+            :tokenizer => node[:nominatim][:config][:tokenizer]
+end
+
+#%w[wikimedia-importance.sql.gz gb_postcodes.csv.gz us_postcodes.csv.gz].each do |fname|
+#  remote_file "#{project_directory}/#{fname}" do
+#    action :create
+#    source "https://www.nominatim.org/data/#{fname}"
+#    owner "nominatim"
+#    group "nominatim"
+#    mode "644"
+#  end
+#end
+
+# Webserver + frontend
+
+%w[user_agent referrer email generic].each do |name|
+  file "#{cfg_directory}/nginx_blocked_#{name}.conf" do
+    action :create_if_missing
+    owner "nominatim"
+    group "adm"
+    mode "664"
+  end
+end
+
+node[:nominatim][:fpm_pools].each do |name, data|
+  php_fpm name do
+    port data[:port]
+    pm data[:pm]
+    pm_max_children data[:max_children]
+    pm_start_servers 20
+    pm_min_spare_servers 10
+    pm_max_spare_servers 20
+    pm_max_requests 10000
+    prometheus_port data[:prometheus_port]
+  end
+end
+
+ssl_certificate node[:fqdn] do
+  domains [node[:fqdn],
+           "nominatim.openstreetmap.org",
+           "nominatim.osm.org",
+           "nominatim.openstreetmap.com",
+           "nominatim.openstreetmap.net",
+           "nominatim.openstreetmaps.org",
+           "nominatim.openmaps.org",
+           "nominatim.qgis.org"]
+  notifies :reload, "service[nginx]"
+end
+
+include_recipe "nginx"
+
+nginx_site "default" do
+  action [:delete]
+end
+
+frontends = search(:node, "recipes:web\\:\\:frontend").sort_by(&:name)
+
+nginx_site "nominatim" do
+  template "nginx.erb"
+  directory build_directory
+  variables :pools => node[:nominatim][:fpm_pools],
+            :frontends => frontends,
+            :confdir => "#{basedir}/etc",
+            :ui_directory => ui_directory
+end
+
+template "/etc/logrotate.d/nginx" do
+  source "logrotate.nginx.erb"
+  owner "root"
+  group "root"
+  mode "644"
+end
+
+# Updates
+
+%w[nominatim-update
+   nominatim-update-source
+   nominatim-update-refresh-db
+   nominatim-update-data
+   nominatim-daily-maintenance].each do |fname|
+  template "#{bin_directory}/#{fname}" do
+    source "#{fname}.erb"
+    owner "nominatim"
+    group "nominatim"
+    mode "554"
+    variables :bindir => bin_directory,
+              :srcdir => source_directory,
+              :builddir => build_directory,
+              :projectdir => project_directory,
+              :qabindir => qa_bin_directory,
+              :qadatadir => qa_data_directory
+  end
+end
+
+systemd_service "nominatim-update" do
+  description "Update the Nominatim database"
+  exec_start "#{bin_directory}/nominatim-update"
+  restart "on-success"
+  standard_output "append:#{node[:nominatim][:logdir]}/update.log"
+  standard_error "inherit"
+  working_directory project_directory
+end
+
+systemd_service "nominatim-update-maintenance-trigger" do
+  description "Trigger daily maintenance tasks for Nominatim DB"
+  exec_start "ln -sf #{bin_directory}/nominatim-daily-maintenance #{bin_directory}/maintenance/"
+  user "nominatim"
+end
+
+systemd_timer "nominatim-update-maintenance-trigger" do
+  action node[:nominatim][:state] != "off" ? :create : :delete
+  description "Schedule daily maintenance tasks for Nominatim DB"
+  on_calendar "*-*-* 02:03:00 UTC"
+end
+
+service "nominatim-update-maintenance-trigger" do
+  action node[:nominatim][:state] != "off" ? :enable : :disable
+end
+
+# Nominatim UI
+
+git ui_directory do
+  action :sync
+  repository node[:nominatim][:ui_repository]
+  revision node[:nominatim][:ui_revision]
+  user "nominatim"
+  group "nominatim"
+end
+
+template "#{ui_directory}/dist/theme/config.theme.js" do
+  source "ui-config.js.erb"
+  owner "nominatim"
+  group "nominatim"
+  mode "664"
+end
+
+# Nominatim QA
+
+if node[:nominatim][:enable_qa_tiles]
+  package "python3-geojson"
+
+  git qa_bin_directory do
+    repository node[:nominatim][:qa_repository]
+    revision node[:nominatim][:qa_revision]
+    enable_submodules true
+    user "nominatim"
+    group "nominatim"
+    notifies :run, "execute[compile_qa]"
+  end
+
+  execute "compile_qa" do
+    action :nothing
+    user "nominatim"
+    cwd "#{qa_bin_directory}/clustering-vt"
+    command "make"
+  end
+
+  directory qa_data_directory do
+    owner "nominatim"
+    group "nominatim"
+    mode "755"
+    recursive true
+  end
+
+  template "#{qa_bin_directory}/analyser/config/config.yaml" do
+    source "qa_config.erb"
+    owner "nominatim"
+    group "nominatim"
+    mode "755"
+    variables :outputdir => "#{qa_data_directory}/new"
+  end
+
+  ssl_certificate "qa-tile.nominatim.openstreetmap.org" do
+    domains ["qa-tile.nominatim.openstreetmap.org"]
+    notifies :reload, "service[nginx]"
+  end
+
+  nginx_site "qa-tiles.nominatim" do
+    template "nginx-qa-tiles.erb"
+    directory build_directory
+    variables :qa_data_directory => qa_data_directory
+  end
+
+end
+
+# Replication
+
+cron_d "nominatim-clean-db" do
+  action node[:nominatim][:state] == "master" ? :create : :delete
+  minute "5"
+  hour "*/4"
+  user "postgres"
+  command "#{bin_directory}/clean-db-nominatim"
+  mailto email_errors
+end
+
+if node[:nominatim][:state] == "master"
+  postgresql_user "replication" do
+    cluster node[:nominatim][:dbcluster]
+    password data_bag_item("nominatim", "passwords")["replication"]
+    replication true
+  end
+
+  directory node[:rsyncd][:modules][:archive][:path] do
+    owner "postgres"
+    group "postgres"
+    mode "700"
+  end
+
+  template "#{bin_directory}/clean-db-nominatim" do
+    source "clean-db-nominatim.erb"
+    owner "nominatim"
+    group "nominatim"
+    mode "755"
+    variables :archive_dir => node[:rsyncd][:modules][:archive][:path],
+              :update_stop_file => "#{basedir}/status/updates_disabled",
+              :streaming_clients => search(:node, "nominatim_state:slave").map { |slave| slave[:fqdn] }.join(" ")
+  end
+end
+
+# Maintenance
+
+cron_d "nominatim-backup" do
+  action (node[:nominatim][:enable_backup] && node[:nominatim][:state] != "off") ? :create : :delete
+  minute "0"
+  hour "3"
+  day "1"
+  user "nominatim"
+  command "#{bin_directory}/backup-nominatim"
+  mailto email_errors
+end
+
+cron_d "nominatim-vacuum-db" do
+  action node[:nominatim][:state] != "off" ? :create : :delete
+  minute "20"
+  hour "0"
+  user "postgres"
+  command "#{bin_directory}/vacuum-db-nominatim"
+  mailto email_errors
+end
+
+%w[backup-nominatim vacuum-db-nominatim].each do |fname|
+  template "#{bin_directory}/#{fname}" do
+    source "#{fname}.erb"
+    owner "nominatim"
+    group "nominatim"
+    mode "755"
+    variables :db => node[:nominatim][:dbname]
+  end
+end
+
+# Logging
+
+template "/etc/logrotate.d/nominatim" do
+  source "logrotate.nominatim.erb"
+  owner "root"
+  group "root"
+  mode "644"
+end
+
+# Monitoring
+
+munin_plugin_conf "nominatim" do
+  template "munin.erb"
+  variables :db => node[:nominatim][:dbname],
+            :querylog => "#{node[:nominatim][:logdir]}/query.log"
+end
+
+munin_plugin "nominatim_importlag" do
+  target "#{source_directory}/munin/nominatim_importlag"
+end
+
+munin_plugin "nominatim_query_speed" do
+  target "#{source_directory}/munin/nominatim_query_speed_querylog"
+end
+
+munin_plugin "nominatim_requests" do
+  target "#{source_directory}/munin/nominatim_requests_querylog"
+end
+
+prometheus_exporter "nominatim" do
+  port 8082
+  user "www-data"
+  options [
+    "--nominatim.query-log=#{node[:nominatim][:logdir]}/query.log",
+    "--nominatim.database-name=#{node[:nominatim][:dbname]}"
+  ]
+end
+
+include_recipe "fail2ban"
+
+frontend_addresses = frontends.collect { |f| f.ipaddresses(:role => :external) }
+
+fail2ban_jail "nominatim_limit_req" do
+  filter "nginx-limit-req"
+  logpath "#{node[:nominatim][:logdir]}/nominatim.openstreetmap.org-error.log"
+  ports [80, 443]
+  maxretry 20
+  ignoreips frontend_addresses.flatten.sort
+end
diff --git a/cookbooks/nominatim/templates/default/nominatim-daily-maintenance.erb b/cookbooks/nominatim/templates/default/nominatim-daily-maintenance.erb
new file mode 100644 (file)
index 0000000..837f4af
--- /dev/null
@@ -0,0 +1,15 @@
+# DO NOT EDIT - This file is being maintained by Chef
+
+nominatim refresh --postcodes --word-tokens --threads 4 -v
+
+<% if node[:nominatim][:enable_qa_tiles] -%>
+pushd <%= @qabindir %>
+if python3 cli.py --execute-all; then
+  rm -rf "<%= @qadatadir %>/old"
+  if [ -d "<%= @qadatadir %>/current" ]; then
+    mv "<%= @qadatadir %>/current" "<%= @qadatadir %>/old"
+  fi
+  mv "<%= @qadatadir %>/new" "<%= @qadatadir %>/current"
+fi
+popd
+<% end -%>
diff --git a/cookbooks/nominatim/templates/default/nominatim-update-data.erb b/cookbooks/nominatim/templates/default/nominatim-update-data.erb
new file mode 100644 (file)
index 0000000..733c381
--- /dev/null
@@ -0,0 +1,30 @@
+#!/bin/bash
+
+# DO NOT EDIT - This file is being maintained by Chef
+
+date "+%c === Downloading and updating latest data from OSM"
+
+cd <%= @projectdir %>
+
+# First execute any maintenance task that may be there.
+
+for task in `find <%= @bindir %>/maintenance -type f`; do
+  date "+%c === Running maintenance task ${task}"
+  . ${task}
+  rm ${task}
+done
+
+# Then catch up with the database.
+
+num_cpus=`cat /proc/cpuinfo | grep -c processor`
+num_cpus=$((num_cpus - 2))
+current_load=`cat /proc/loadavg | cut -f 2 -d ' ' | sed 's:\..*::'`
+
+if [[ $current_load -lt $num_cpus ]]
+then
+    INST=4
+else
+    INST=2
+fi
+
+nominatim replication --catch-up --threads $INST
diff --git a/cookbooks/nominatim/templates/default/nominatim-update-refresh-db.erb b/cookbooks/nominatim/templates/default/nominatim-update-refresh-db.erb
new file mode 100644 (file)
index 0000000..f3de243
--- /dev/null
@@ -0,0 +1,10 @@
+#!/bin/bash
+
+# DO NOT EDIT - This file is being maintained by Chef
+
+date "+%c === Refresh database after software updates"
+
+cd <%= @projectdir %>
+
+nominatim admin --migrate
+nominatim refresh --functions --address-levels --website
diff --git a/cookbooks/nominatim/templates/default/nominatim-update-source.erb b/cookbooks/nominatim/templates/default/nominatim-update-source.erb
new file mode 100644 (file)
index 0000000..43a4865
--- /dev/null
@@ -0,0 +1,23 @@
+#!/bin/bash -e
+
+# DO NOT EDIT - This file is being maintained by Chef
+
+# Note: the script must return 0 only when new updates have been applied.
+
+date "+%c === Checking for new versions of Nominatim"
+
+cd <%= @srcdir %>
+
+git fetch origin
+
+if git diff --exit-code origin/<%= node[:nominatim][:revision] %> >/dev/null; then
+  # signal that there are no new updates
+  exit 99
+fi
+
+git merge origin/<%= node[:nominatim][:revision] %>
+git submodule update
+
+cd <%= @builddir %>
+cmake .
+make
diff --git a/cookbooks/nominatim/templates/default/nominatim-update.erb b/cookbooks/nominatim/templates/default/nominatim-update.erb
new file mode 100644 (file)
index 0000000..9f12220
--- /dev/null
@@ -0,0 +1,35 @@
+#!/bin/bash
+
+# DO NOT EDIT - This file is being maintained by Chef
+
+date "+%c === Starting Nominatim update cycle"
+
+starttime=`date +%s`
+
+<% if node[:nominatim][:enable_git_updates] -%>
+if /sbin/runuser -u nominatim -- <%= @bindir %>/nominatim-update-source; then
+  pushd <%= @builddir %>
+  make install
+
+  if ! /sbin/runuser -u nominatim -- <%= @bindir %>/nominatim-update-refresh-db; then
+    date "+%c === Database refresh failed. Stopping updates."
+    exit 1
+  fi
+fi
+<% end -%>
+
+if ! /sbin/runuser -u nominatim -- <%= @bindir %>/nominatim-update-data; then
+  date "+%c === Data update failed. Stopping updates."
+  exit 1
+fi
+
+date "+%c === Nominatim update cycle finished"
+
+# sleep a bit if updates take less than a minute
+endtime=`date +%s`
+elapsed=$((endtime - starttime))
+if [[ $elapsed -lt 60 ]]; then
+  sleepy=$((60 - $elapsed))
+  date "+%c === Sleeping for ${sleepy}s..."
+  sleep $sleepy
+fi
diff --git a/roles/nominatim-v4.rb b/roles/nominatim-v4.rb
new file mode 100644 (file)
index 0000000..f2c3157
--- /dev/null
@@ -0,0 +1,90 @@
+name "nominatim"
+description "Role applied to all nominatim servers. For Nominatim 4."
+
+default_attributes(
+  :accounts => {
+    :users => {
+      :lonvia => { :status => :administrator },
+      :nominatim => {
+        :status => :role,
+        :members => [:lonvia, :tomh]
+      }
+    }
+  },
+  :networking => {
+    :firewall => {
+      :http_rate_limit => "s:2/sec:15"
+    }
+  },
+  :postgresql => {
+    :settings => {
+      :defaults => {
+        :max_connections => "450",
+        :synchronous_commit => "off",
+        :checkpoint_segments => "32",
+        :checkpoint_timeout => "10min",
+        :checkpoint_completion_target => "0.9",
+        :jit => "off",
+        :shared_buffers => "2GB",
+        :autovacuum_max_workers => "1",
+        :max_parallel_workers_per_gather => "0",
+        :maintenance_work_mem => "10GB",
+        :random_page_cost => "1.5",
+        :effective_cache_size => "60GB"
+      }
+    }
+  },
+  :sysctl => {
+    :postgres => {
+      :comment => "Increase shared memory for postgres",
+      :parameters => {
+        "kernel.shmmax" => 26 * 1024 * 1024 * 1024,
+        "kernel.shmall" => 26 * 1024 * 1024 * 1024 / 4096
+      }
+    },
+    :kernel_scheduler_tune => {
+      :comment => "Tune kernel scheduler preempt",
+      :parameters => {
+        "kernel.sched_min_granularity_ns" => 10000000,
+        "kernel.sched_wakeup_granularity_ns" => 15000000
+      }
+    },
+    :swappiness => {
+      :comment => "Reduce swap usage",
+      :parameters => {
+        "vm.swappiness" => 10
+      }
+    },
+    :network_conntrack_time_wait => {
+      :comment => "Only track completed connections for 30 seconds",
+      :parameters => {
+        "net.netfilter.nf_conntrack_tcp_timeout_time_wait" => "30"
+      }
+    },
+    :network_conntrack_max => {
+      :comment => "Increase max number of connections tracked",
+      :parameters => {
+        "net.netfilter.nf_conntrack_max" => "196608"
+      }
+    }
+  },
+  :nominatim => {
+    :dbadmins => %w[lonvia tomh],
+    :tablespaces => {
+      "dosm" => "/ssd/tablespaces/dosm",
+      "iosm" => "/ssd/tablespaces/iosm",
+      "dplace" => "/ssd/tablespaces/dplace",
+      "iplace" => "/ssd/tablespaces/iplace",
+      "daddress" => "/ssd/tablespaces/daddress",
+      "iaddress" => "/ssd/tablespaces/iaddress",
+      "dsearch" => "/ssd/tablespaces/dsearch",
+      "isearch" => "/ssd/tablespaces/isearch",
+      "daux" => "/ssd/tablespaces/daux",
+      "iaux" => "/ssd/tablespaces/iaux"
+    }
+  }
+)
+
+run_list(
+  "recipe[nominatim::version4]"
+)