]> git.openstreetmap.org Git - nominatim.git/commitdiff
Merge pull request #3108 from mtmail/remove-legacy-wikipedia-tag-syntax
authorSarah Hoffmann <lonvia@denofr.de>
Thu, 7 Dec 2023 08:24:32 +0000 (09:24 +0100)
committerGitHub <noreply@github.com>
Thu, 7 Dec 2023 08:24:32 +0000 (09:24 +0100)
These days the OSM wikipedia tab no longer contains URLs

160 files changed:
.github/actions/build-nominatim/action.yml
.github/workflows/ci-tests.yml
CMakeLists.txt
ChangeLog
SECURITY.md
cmake/tool-installed.tmpl
cmake/tool.tmpl
docs/CMakeLists.txt
docs/admin/Advanced-Installations.md
docs/admin/Deployment-PHP.md [moved from docs/admin/Deployment.md with 94% similarity]
docs/admin/Deployment-Python.md [new file with mode: 0644]
docs/admin/Import.md
docs/admin/Installation.md
docs/admin/Maintenance.md
docs/admin/Migration.md
docs/api/Details.md
docs/api/Lookup.md
docs/api/Overview.md
docs/api/Reverse.md
docs/api/Search.md
docs/api/Status.md
docs/customize/Settings.md
docs/customize/Tokenizers.md
docs/develop/Development-Environment.md
docs/develop/ICU-Tokenizer-Modules.md
docs/develop/Tokenizers.md
docs/extra.css
docs/index.md
docs/library/Configuration.md [new file with mode: 0644]
docs/library/Getting-Started.md [new file with mode: 0644]
docs/library/Input-Parameter-Types.md [new file with mode: 0644]
docs/library/Low-Level-DB-Access.md [new file with mode: 0644]
docs/library/NominatimAPI.md [new file with mode: 0644]
docs/library/Result-Handling.md [new file with mode: 0644]
docs/mkdocs.yml
lib-php/PlaceLookup.php
lib-php/ReverseGeocode.php
lib-php/admin/export.php [deleted file]
lib-php/admin/warm.php [deleted file]
lib-php/website/details.php
lib-sql/functions/address_lookup.sql
lib-sql/functions/place_triggers.sql
lib-sql/functions/utils.sql
man/create-manpage.tmpl
nominatim/api/__init__.py
nominatim/api/connection.py
nominatim/api/core.py
nominatim/api/logging.py
nominatim/api/lookup.py
nominatim/api/results.py
nominatim/api/reverse.py
nominatim/api/search/db_search_builder.py
nominatim/api/search/db_search_fields.py
nominatim/api/search/db_searches.py
nominatim/api/search/geocoder.py
nominatim/api/search/icu_tokenizer.py
nominatim/api/search/legacy_tokenizer.py
nominatim/api/search/query.py
nominatim/api/search/query_analyzer_factory.py
nominatim/api/search/token_assignment.py
nominatim/api/status.py
nominatim/api/types.py
nominatim/api/v1/format.py
nominatim/api/v1/format_json.py
nominatim/api/v1/helpers.py
nominatim/api/v1/server_glue.py
nominatim/cli.py
nominatim/clicmd/__init__.py
nominatim/clicmd/admin.py
nominatim/clicmd/api.py
nominatim/clicmd/args.py
nominatim/clicmd/convert.py [new file with mode: 0644]
nominatim/clicmd/export.py [new file with mode: 0644]
nominatim/clicmd/refresh.py
nominatim/clicmd/setup.py
nominatim/config.py
nominatim/data/postcode_format.py
nominatim/db/async_connection.py
nominatim/db/connection.py
nominatim/db/sqlalchemy_functions.py [new file with mode: 0644]
nominatim/db/sqlalchemy_schema.py
nominatim/db/sqlalchemy_types.py
nominatim/db/utils.py
nominatim/server/falcon/server.py
nominatim/server/starlette/server.py
nominatim/tokenizer/base.py
nominatim/tokenizer/icu_tokenizer.py
nominatim/tokenizer/legacy_tokenizer.py
nominatim/tokenizer/sanitizers/config.py
nominatim/tokenizer/sanitizers/tag_japanese.py [new file with mode: 0644]
nominatim/tokenizer/token_analysis/base.py
nominatim/tools/admin.py
nominatim/tools/collect_os_info.py
nominatim/tools/convert_sqlite.py [new file with mode: 0644]
nominatim/tools/database_import.py
nominatim/tools/exec_utils.py
nominatim/tools/refresh.py
nominatim/typing.py
nominatim/version.py
osm2pgsql
settings/env.defaults
settings/flex-base.lua
settings/icu-rules/unicode-digits-to-decimal.yaml
settings/icu_tokenizer.yaml
settings/import-admin.lua
test/bdd/api/details/language.feature
test/bdd/api/details/params.feature
test/bdd/api/details/simple.feature
test/bdd/api/errors/formats.feature
test/bdd/api/lookup/simple.feature
test/bdd/api/reverse/geometry.feature
test/bdd/api/reverse/language.feature
test/bdd/api/reverse/layers.feature
test/bdd/api/reverse/queries.feature
test/bdd/api/reverse/v1_geocodejson.feature
test/bdd/api/reverse/v1_geojson.feature
test/bdd/api/reverse/v1_json.feature
test/bdd/api/reverse/v1_params.feature
test/bdd/api/reverse/v1_xml.feature
test/bdd/api/search/queries.feature
test/bdd/api/search/structured.feature
test/bdd/api/status/simple.feature
test/bdd/db/query/japanese.feature [new file with mode: 0644]
test/bdd/db/query/postcodes.feature
test/bdd/environment.py
test/bdd/osm2pgsql/update/tags.feature
test/bdd/steps/nominatim_environment.py
test/python/api/conftest.py
test/python/api/fake_adaptor.py [new file with mode: 0644]
test/python/api/search/test_api_search_query.py
test/python/api/search/test_db_search_builder.py
test/python/api/search/test_icu_query_analyzer.py
test/python/api/search/test_legacy_query_analyzer.py
test/python/api/search/test_query.py [new file with mode: 0644]
test/python/api/search/test_search_country.py
test/python/api/search/test_search_near.py
test/python/api/search/test_search_places.py
test/python/api/search/test_search_postcode.py
test/python/api/search/test_token_assignment.py
test/python/api/test_api_deletable_v1.py [new file with mode: 0644]
test/python/api/test_api_details.py
test/python/api/test_api_lookup.py
test/python/api/test_api_polygons_v1.py [new file with mode: 0644]
test/python/api/test_api_reverse.py
test/python/api/test_api_status.py
test/python/api/test_export.py [new file with mode: 0644]
test/python/api/test_helpers_v1.py
test/python/api/test_results.py
test/python/api/test_server_glue_v1.py
test/python/api/test_warm.py [new file with mode: 0644]
test/python/cli/conftest.py
test/python/cli/test_cli.py
test/python/cli/test_cmd_admin.py
test/python/cli/test_cmd_api.py
test/python/tokenizer/sanitizers/test_clean_postcodes.py
test/python/tokenizer/sanitizers/test_tag_japanese.py [new file with mode: 0644]
test/python/tools/test_admin.py
test/python/tools/test_exec_utils.py
vagrant/Install-on-Ubuntu-20.sh
vagrant/Install-on-Ubuntu-22.sh

index 724de3dec672b6915a5f0d5ef3990cdfb0903225..17ff0ccfc14d391958309bae4020e598523b69e5 100644 (file)
@@ -25,12 +25,12 @@ runs:
           shell: bash
         - name: Install${{ matrix.flavour }} prerequisites
           run: |
-            sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev liblua${LUA_VERSION}-dev lua${LUA_VERSION} lua-dkjson
+            sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev liblua${LUA_VERSION}-dev lua${LUA_VERSION} lua-dkjson nlohmann-json3-dev libspatialite7 libsqlite3-mod-spatialite
             if [ "$FLAVOUR" == "oldstuff" ]; then
-                pip3 install MarkupSafe==2.0.1 python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu==2.9 osmium PyYAML==5.1 sqlalchemy==1.4 datrie asyncpg
+                pip3 install MarkupSafe==2.0.1 python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu==2.9 osmium PyYAML==5.1 sqlalchemy==1.4.31 datrie asyncpg aiosqlite
             else
                 sudo apt-get install -y -qq python3-icu python3-datrie python3-pyosmium python3-jinja2 python3-psutil python3-psycopg2 python3-dotenv python3-yaml
-                pip3 install sqlalchemy psycopg
+                pip3 install sqlalchemy psycopg aiosqlite
             fi
           shell: bash
           env:
index 48de6e0d51d50514ad5074f25f671c3c0d2b9e46..42c03edc17d9e76fd20463b6294c25ce1fa730bc 100644 (file)
@@ -7,7 +7,7 @@ jobs:
         runs-on: ubuntu-latest
 
         steps:
-            - uses: actions/checkout@v3
+            - uses: actions/checkout@v4
               with:
                 submodules: true
 
@@ -105,7 +105,7 @@ jobs:
               if: matrix.flavour != 'oldstuff'
 
             - name: Install newer pytest-asyncio
-              run: pip3 install -U pytest-asyncio
+              run: pip3 install -U pytest-asyncio==0.21.1
               if: matrix.flavour == 'ubuntu-20'
 
             - name: Install test prerequsites (from pip for Ubuntu 18)
@@ -113,18 +113,21 @@ jobs:
               if: matrix.flavour == 'oldstuff'
 
             - name: Install Python webservers
-              run: pip3 install falcon starlette
+              run: pip3 install falcon starlette asgi_lifespan
 
             - name: Install latest pylint
-              run: pip3 install -U pylint asgi_lifespan
+              run: pip3 install -U pylint
+              if: matrix.flavour != 'oldstuff'
 
             - name: PHP linting
               run: phpcs --report-width=120 .
               working-directory: Nominatim
+              if: matrix.flavour != 'oldstuff'
 
             - name: Python linting
               run: python3 -m pylint nominatim
               working-directory: Nominatim
+              if: matrix.flavour != 'oldstuff'
 
             - name: PHP unit tests
               run: phpunit ./
@@ -346,3 +349,95 @@ jobs:
             - name: Clean up database (reverse-only import)
               run: nominatim refresh --postcodes --word-tokens
               working-directory: /home/nominatim/nominatim-project
+
+    install-no-superuser:
+      runs-on: ubuntu-latest
+      needs: create-archive
+
+      strategy:
+          matrix:
+              name: [Ubuntu-22]
+              include:
+                  - name: Ubuntu-22
+                    image: "ubuntu:22.04"
+                    ubuntu: 22
+                    install_mode: install-apache
+
+      container:
+          image: ${{ matrix.image }}
+          env:
+              LANG: en_US.UTF-8
+
+      defaults:
+          run:
+              shell: sudo -Hu nominatim bash --noprofile --norc -eo pipefail {0}
+
+      steps:
+          - name: Prepare container (Ubuntu)
+            run: |
+                export APT_LISTCHANGES_FRONTEND=none
+                export DEBIAN_FRONTEND=noninteractive
+                apt-get update -qq
+                apt-get install -y git sudo wget
+                ln -snf /usr/share/zoneinfo/$CONTAINER_TIMEZONE /etc/localtime && echo $CONTAINER_TIMEZONE > /etc/timezone
+            shell: bash
+
+          - name: Setup import user
+            run: |
+                useradd -m nominatim
+                echo 'nominatim   ALL=(ALL:ALL) NOPASSWD: ALL' > /etc/sudoers.d/nominiatim
+                echo "/home/nominatim/Nominatim/vagrant/Install-on-${OS}.sh no $INSTALL_MODE" > /home/nominatim/vagrant.sh
+            shell: bash
+            env:
+              OS: ${{ matrix.name }}
+              INSTALL_MODE: ${{ matrix.install_mode }}
+
+          - uses: actions/download-artifact@v3
+            with:
+                name: full-source
+                path: /home/nominatim
+
+          - name: Install Nominatim
+            run: |
+              export USERNAME=nominatim
+              export USERHOME=/home/nominatim
+              export NOSYSTEMD=yes
+              export HAVE_SELINUX=no
+              tar xf nominatim-src.tar.bz2
+              . vagrant.sh
+            working-directory: /home/nominatim
+
+          - name: Prepare import environment
+            run: |
+                mv Nominatim/test/testdb/apidb-test-data.pbf test.pbf
+                mv Nominatim/settings/flex-base.lua flex-base.lua
+                mv Nominatim/settings/import-extratags.lua import-extratags.lua
+                mv Nominatim/settings/taginfo.lua taginfo.lua
+                rm -rf Nominatim
+                mkdir data-env-reverse
+            working-directory: /home/nominatim
+
+          - name: Prepare Database
+            run: |
+                nominatim import --prepare-database
+            working-directory: /home/nominatim/nominatim-project
+
+          - name: Create import user
+            run: |
+                sudo -u postgres createuser -S osm-import
+                sudo -u postgres psql -c "ALTER USER \"osm-import\" WITH PASSWORD 'osm-import';"
+            working-directory: /home/nominatim/nominatim-project
+
+          - name: Grant import user rights
+            run: |
+                sudo -u postgres psql -c "GRANT INSERT, UPDATE ON ALL TABLES IN SCHEMA public TO \"osm-import\";"
+            working-directory: /home/nominatim/nominatim-project
+
+          - name: Run import
+            run: |
+                NOMINATIM_DATABASE_DSN="pgsql:host=127.0.0.1;dbname=nominatim;user=osm-import;password=osm-import" nominatim import --continue import-from-file --osm-file ../test.pbf
+            working-directory: /home/nominatim/nominatim-project
+
+          - name: Check full import
+            run: nominatim admin --check-database
+            working-directory: /home/nominatim/nominatim-project
\ No newline at end of file
index 8200e7572beb9548ee78f412e1e2146ee80347e0..536b21bc37c7c1c6d53215d5dca95e94998fc2a6 100644 (file)
@@ -19,7 +19,7 @@ list(APPEND CMAKE_MODULE_PATH "${CMAKE_SOURCE_DIR}/cmake")
 project(nominatim)
 
 set(NOMINATIM_VERSION_MAJOR 4)
-set(NOMINATIM_VERSION_MINOR 2)
+set(NOMINATIM_VERSION_MINOR 3)
 set(NOMINATIM_VERSION_PATCH 0)
 
 set(NOMINATIM_VERSION "${NOMINATIM_VERSION_MAJOR}.${NOMINATIM_VERSION_MINOR}.${NOMINATIM_VERSION_PATCH}")
@@ -92,16 +92,6 @@ if (BUILD_API OR BUILD_IMPORTER)
     else()
         message (STATUS "Using PHP binary " ${PHP_BIN})
     endif()
-    if (NOT PHPCGI_BIN)
-        find_program (PHPCGI_BIN php-cgi)
-    endif()
-    # sanity check if PHP binary exists
-    if (NOT EXISTS ${PHPCGI_BIN})
-        message(WARNING "php-cgi binary not found. nominatim tool will not provide query functions.")
-        set (PHPCGI_BIN "")
-    else()
-        message (STATUS "Using php-cgi binary " ${PHPCGI_BIN})
-    endif()
 endif()
 
 #-----------------------------------------------------------------------------
index 89861d0ba6a99a670a761bfc513ea6fcf0a381b2..fae0d68f3897ede3bc93cd08fe32f435640d0e68 100644 (file)
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,42 @@
+4.3.0
+ * fix failing importance recalculation command
+ * fix merging of linked names into unnamed boundaries
+ * fix a number of corner cases with interpolation splitting resulting in
+   invalid geometries
+ * fix failure in website generation when password contains curly brackets
+ * fix broken use of ST_Project in PostGIS 3.4
+ * new NOMINATIM_SEARCH_WITHIN_COUNTRIES setting to restrict reverse lookups
+   to known countries (thanks @alfmarcua)
+ * allow negative OSM IDs (thanks @alfmarcua)
+ * disallow import of Tiger data in a frozen DB
+ * avoid UPDATE to change settings to be compatible with r/o DBs (thanks @t-tomek)
+ * update bundled osm2pgsql to 1.9.2
+ * reorganise osm2pgsql flex style and make it the default
+ * exclude names ending in :wikipedia from indexing
+ * no longer accept comma as a list separator in name tags
+ * process forward dependencies on update to catch updates in geometries
+   of ways and relations
+ * fix handling of isolated silent letters during transliteration
+ * no longer assign postcodes to large linear features like rivers
+ * introduce nominatim.paths module for finding data and libraries
+ * documentation layout changed to material theme
+ * new documentation section for library
+ * various smaller fixes to existing documentation
+   (thanks @woodpeck, @bloom256, @biswajit-k)
+ * updates to vagrant install scripts, drop support for Ubunut 18
+   (thanks @n-timofeev)
+ * removed obsolete configuration variables from env.defaults
+ * add script for generating a taginfo description (thanks @biswajit-k)
+ * modernize Python code around BDD test and add testing of Python frontend
+ * lots of new BDD tests for API output
+
+4.2.3
+
+ * fix deletion handling for 'nominatim add-data'
+ * adapt place_force_delete() to new deletion handling
+ * flex style: avoid dropping of postcode areas
+ * fix update errors on address interpolation handling
+
 4.2.2
 
  * extend flex-style library to fully support all default styles
index d023c1e5b834cfc0fcc4fd74509fc9c0b9eb08a5..2cb351ce6011c5102e943059ad595b921e452d07 100644 (file)
@@ -9,10 +9,9 @@ versions.
 
 | Version | End of support for security updates |
 | ------- | ----------------------------------- |
+| 4.3.x   | 2025-09-07                          |
 | 4.2.x   | 2024-11-24                          |
 | 4.1.x   | 2024-08-05                          |
-| 4.0.x   | 2023-11-02                          |
-| 3.7.x   | 2023-04-05                          |
 
 ## Reporting a Vulnerability
 
@@ -36,5 +35,6 @@ incident. Announcements will also be published at the
 
 ## List of Previous Incidents
 
-* 2020-05-04 - [SQL injection issue on /details endpoint](https://lists.openstreetmap.org/pipermail/geocoding/2020-May/002012.html)
+* 2023-11-20 - [SQL injection vulnerability](https://nominatim.org/2023/11/20/release-432.html)
 * 2023-02-21 - [cross-site scripting vulnerability](https://nominatim.org/2023/02/21/release-421.html)
+* 2020-05-04 - [SQL injection issue on /details endpoint](https://lists.openstreetmap.org/pipermail/geocoding/2020-May/002012.html)
index e38dafabebb2ed447839a95b94a455f86ea73866..a6384f148ef2e5369692f4d9ec89a9de570ac98c 100644 (file)
@@ -10,5 +10,4 @@ from nominatim import version
 version.GIT_COMMIT_HASH = '@GIT_HASH@'
 
 exit(cli.nominatim(module_dir='@NOMINATIM_LIBDIR@/module',
-                   osm2pgsql_path='@NOMINATIM_LIBDIR@/osm2pgsql',
-                   phpcgi_path='@PHPCGI_BIN@'))
+                   osm2pgsql_path='@NOMINATIM_LIBDIR@/osm2pgsql'))
index 96c6c6dcdb079f1ad8a3422c433f9b8471c9abef..fcdbe899295e4b207d72d68f779fdb2caa79e17a 100755 (executable)
@@ -10,5 +10,4 @@ from nominatim import version
 version.GIT_COMMIT_HASH = '@GIT_HASH@'
 
 exit(cli.nominatim(module_dir='@CMAKE_BINARY_DIR@/module',
-                   osm2pgsql_path='@CMAKE_BINARY_DIR@/osm2pgsql/osm2pgsql',
-                   phpcgi_path='@PHPCGI_BIN@'))
+                   osm2pgsql_path='@CMAKE_BINARY_DIR@/osm2pgsql/osm2pgsql'))
index edfc882942635fb81ada651bfd7fadca31353c69..637ecfe9416fbcb0f956aa04af9d210b7a60b132 100644 (file)
@@ -11,6 +11,7 @@ set (DOC_SOURCES
      develop
      api
      customize
+     library
      index.md
      extra.css
      styles.css
@@ -25,10 +26,10 @@ endforeach()
 ADD_CUSTOM_TARGET(doc
    COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-20.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-20.md
    COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-22.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-22.md
-   COMMAND PYTHONPATH=${PROJECT_SOURCE_DIR} mkdocs build -d ${CMAKE_CURRENT_BINARY_DIR}/../site-html -f ${CMAKE_CURRENT_BINARY_DIR}/../mkdocs.yml
+   COMMAND mkdocs build -d ${CMAKE_CURRENT_BINARY_DIR}/../site-html -f ${CMAKE_CURRENT_BINARY_DIR}/../mkdocs.yml
 )
 
 ADD_CUSTOM_TARGET(serve-doc
-    COMMAND PYTHONPATH=${PROJECT_SOURCE_DIR} mkdocs serve
-    WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
+    COMMAND mkdocs serve -f ${CMAKE_CURRENT_BINARY_DIR}/../mkdocs.yml
+    WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
 )
index 08c059841f48df4d40f45f1eef74f629f8b5d91f..3b98fec39579a5b286542349525bcd1bd63bcc5f 100644 (file)
@@ -36,16 +36,15 @@ which has the following structure:
 
 ```bash
 update
-    ├── europe
-    │   ├── andorra
-    │   │   └── sequence.state
-    │   └── monaco
-    │       └── sequence.state
-    └── tmp
-        └── europe
-                ├── andorra-latest.osm.pbf
-                └── monaco-latest.osm.pbf
-
+ ├── europe
+ │    ├── andorra
+ │    │    └── sequence.state
+ │    └── monaco
+ │         └── sequence.state
+ └── tmp
+      └── europe
+           ├── andorra-latest.osm.pbf
+           └── monaco-latest.osm.pbf
 
 ```
 
@@ -99,7 +98,7 @@ Change into the project directory and run the following command:
 
 This will get diffs from the replication server, import diffs and index
 the database. The default replication server in the
-script([Geofabrik](https://download.geofabrik.de)) provides daily updates.
+script ([Geofabrik](https://download.geofabrik.de)) provides daily updates.
 
 ## Using an external PostgreSQL database
 
similarity index 94%
rename from docs/admin/Deployment.md
rename to docs/admin/Deployment-PHP.md
index 8b63554608a96d753b6c3598a7fc5de7066ae443..3ff86dad474182f2644fcc83e96d67d0f56ede8e 100644 (file)
@@ -1,4 +1,4 @@
-# Deploying Nominatim
+# Deploying Nominatim using the PHP frontend
 
 The Nominatim API is implemented as a PHP application. The `website/` directory
 in the project directory contains the configured website. You can serve this
@@ -8,13 +8,13 @@ PHP scripts.
 This section gives a quick overview on how to configure Apache and Nginx to
 serve Nominatim. It is not meant as a full system administration guide on how
 to run a web service. Please refer to the documentation of
-[Apache](http://httpd.apache.org/docs/current/) and
+[Apache](https://httpd.apache.org/docs/current/) and
 [Nginx](https://nginx.org/en/docs/)
 for background information on configuring the services.
 
 !!! Note
-    Throughout this page, we assume that your Nominatim project directory is
-    located in `/srv/nominatim-project` and that you have installed Nominatim
+    Throughout this page, we assume your Nominatim project directory is
+    located in `/srv/nominatim-project` and you have installed Nominatim
     using the default installation prefix `/usr/local`. If you have put it
     somewhere else, you need to adjust the commands and configuration
     accordingly.
diff --git a/docs/admin/Deployment-Python.md b/docs/admin/Deployment-Python.md
new file mode 100644 (file)
index 0000000..6fd2416
--- /dev/null
@@ -0,0 +1,140 @@
+# Deploying the Nominatim Python frontend
+
+The Nominatim can be run as a Python-based 
+[ASGI web application](https://asgi.readthedocs.io/en/latest/). You have the
+choice between [Falcon](https://falcon.readthedocs.io/en/stable/)
+and [Starlette](https://www.starlette.io/) as the ASGI framework.
+
+This section gives a quick overview on how to configure Nginx to serve
+Nominatim. Please refer to the documentation of
+[Nginx](https://nginx.org/en/docs/) for background information on how
+to configure it.
+
+!!! Note
+    Throughout this page, we assume your Nominatim project directory is
+    located in `/srv/nominatim-project` and you have installed Nominatim
+    using the default installation prefix `/usr/local`. If you have put it
+    somewhere else, you need to adjust the commands and configuration
+    accordingly.
+
+    We further assume that your web server runs as user `www-data`. Older
+    versions of CentOS may still use the user name `apache`. You also need
+    to adapt the instructions in this case.
+
+### Installing the required packages
+
+The recommended way to deploy a Python ASGI application is to run
+the ASGI runner [uvicorn](https://uvicorn.org/)
+together with [gunicorn](https://gunicorn.org/) HTTP server. We use
+Falcon here as the web framework.
+
+Create a virtual environment for the Python packages and install the necessary
+dependencies:
+
+``` sh
+sudo apt install virtualenv
+virtualenv /srv/nominatim-venv
+/srv/nominatim-venv/bin/pip install SQLAlchemy PyICU psycopg[binary] \
+   psycopg2-binary python-dotenv PyYAML falcon uvicorn gunicorn
+```
+
+### Setting up Nominatim as a systemd job
+
+Next you need to set up the service that runs the Nominatim frontend. This is
+easiest done with a systemd job.
+
+First you need to tell systemd to create a socket file to be used by
+hunicorn. Crate the following file `/etc/systemd/system/nominatim.socket`:
+
+``` systemd
+[Unit]
+Description=Gunicorn socket for Nominatim
+
+[Socket]
+ListenStream=/run/nominatim.sock
+SocketUser=www-data
+
+[Install]
+WantedBy=multi-user.target
+```
+
+Now you can add the systemd service for Nominatim itself.
+Create the following file `/etc/systemd/system/nominatim.service`:
+
+``` systemd
+[Unit]
+Description=Nominatim running as a gunicorn application
+After=network.target
+Requires=nominatim.socket
+
+[Service]
+Type=simple
+Environment="PYTHONPATH=/usr/local/lib/nominatim/lib-python/"
+User=www-data
+Group=www-data
+WorkingDirectory=/srv/nominatim-project
+ExecStart=/srv/nominatim-venv/bin/gunicorn -b unix:/run/nominatim.sock -w 4 -k uvicorn.workers.UvicornWorker nominatim.server.falcon.server:run_wsgi
+ExecReload=/bin/kill -s HUP $MAINPID
+StandardOutput=append:/var/log/gunicorn-nominatim.log
+StandardError=inherit
+PrivateTmp=true
+TimeoutStopSec=5
+KillMode=mixed
+
+[Install]
+WantedBy=multi-user.target
+```
+
+This sets up gunicorn with 4 workers (`-w 4` in ExecStart). Each worker runs
+its own Python process using
+[`NOMINATIM_API_POOL_SIZE`](../customize/Settings.md#nominatim_api_pool_size)
+connections to the database to serve requests in parallel.
+
+Make the new services known to systemd and start it:
+
+``` sh
+sudo systemctl daemon-reload
+sudo systemctl enable nominatim.socket
+sudo systemctl start nominatim.socket
+sudo systemctl enable nominatim.service
+sudo systemctl start nominatim.service
+```
+
+This sets the service up, so that Nominatim is automatically started
+on reboot.
+
+### Configuring nginx
+
+To make the service available to the world, you need to proxy it through
+nginx. Add the following definition to the default configuration:
+
+``` nginx
+upstream nominatim_service {
+  server unix:/run/nominatim.sock fail_timeout=0;
+}
+
+server {
+    listen 80;
+    listen [::]:80;
+
+    root /var/www/html;
+    index /search;
+
+    location / {
+            proxy_set_header Host $http_host;
+            proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+            proxy_set_header X-Forwarded-Proto $scheme;
+            proxy_redirect off;
+            proxy_pass http://nominatim_service;
+    }
+}
+```
+
+Reload nginx with
+
+```
+sudo systemctl reload nginx
+```
+
+and you should be able to see the status of your server under
+`http://localhost/status`.
index d84a2376097605765ee3579732b39077acb7bda9..0fd5ec29b4256a357fee4b11bb036ec47aa8a0d3 100644 (file)
@@ -254,26 +254,70 @@ successfully.
 nominatim admin --check-database
 ```
 
-Now you can try out your installation by running:
+Now you can try out your installation by executing a simple query on the
+command line:
+
+``` sh
+nominatim search --query Berlin
+```
+
+or, when you have a reverse-only installation:
+
+``` sh
+nominatim reverse --lat 51 --lon 45
+```
+
+If you want to run Nominatim as a service, you need to make a choice between
+running the traditional PHP frontend or the new experimental Python frontend.
+Make sure you have installed the right packages as per
+[Installation](Installation.md#software).
+
+#### Testing the PHP frontend
+
+You can run a small test server with the PHP frontend like this:
 
 ```sh
 nominatim serve
 ```
 
-This runs a small test server normally used for development. You can use it
-to verify that your installation is working. Go to
-`http://localhost:8088/status.php` and you should see the message `OK`.
-You can also run a search query, e.g. `http://localhost:8088/search.php?q=Berlin`.
+Go to `http://localhost:8088/status.php` and you should see the message `OK`.
+You can also run a search query, e.g. `http://localhost:8088/search.php?q=Berlin`
+or, for reverse-only installations a reverse query,
+e.g. `http://localhost:8088/reverse.php?lat=27.1750090510034&lon=78.04209025`.
+
+Do not use this test server in production.
+To run Nominatim via webservers like Apache or nginx, please continue reading
+[Deploy the PHP frontend](Deployment-PHP.md).
+
+#### Testing the Python frontend
+
+To run the test server against the Python frontend, you must choose a
+web framework to use, either starlette or falcon. Make sure the appropriate
+packages are installed. Then run
+
+``` sh
+nominatim serve --engine falcon
+```
+
+or
+
+``` sh
+nominatim serve --engine starlette
+```
+
+Go to `http://localhost:8088/status.php` and you should see the message `OK`.
+You can also run a search query, e.g. `http://localhost:8088/search.php?q=Berlin`
+or, for reverse-only installations a reverse query,
+e.g. `http://localhost:8088/reverse.php?lat=27.1750090510034&lon=78.04209025`.
 
-Note that search query is not supported for reverse-only imports. You can run a
-reverse query, e.g. `http://localhost:8088/reverse.php?lat=27.1750090510034&lon=78.04209025`.
+Do not use this test server in production.
+To run Nominatim via webservers like Apache or nginx, please continue reading
+[Deploy the Python frontend](Deployment-Python.md).
 
-To run Nominatim via webservers like Apache or nginx, please read the
-[Deployment chapter](Deployment.md).
 
-## Adding search through category phrases
+## Enabling search by category phrases
 
-If you want to be able to search for places by their type through
+To be able to search for places by their type using
 [special phrases](https://wiki.openstreetmap.org/wiki/Nominatim/Special_Phrases)
 you also need to import these key phrases like this:
 
index 11442eed4635e8dd2aca1d6674a1589895428536..89e56c6e8e165621504e85c308fbfede5057c39b 100644 (file)
@@ -35,6 +35,7 @@ For compiling:
   * [bzip2](http://www.bzip.org/)
   * [zlib](https://www.zlib.net/)
   * [ICU](http://site.icu-project.org/)
+  * [nlohmann/json](https://json.nlohmann.me/)
   * [Boost libraries](https://www.boost.org/), including system and filesystem
   * PostgreSQL client libraries
   * a recent C++ compiler (gcc 5+ or Clang 3.8+)
@@ -48,15 +49,17 @@ For running Nominatim:
   * [Python Dotenv](https://github.com/theskumar/python-dotenv)
   * [psutil](https://github.com/giampaolo/psutil)
   * [Jinja2](https://palletsprojects.com/p/jinja/)
-  * [SQLAlchemy](https://www.sqlalchemy.org/) (1.4+ with greenlet support)
+  * [SQLAlchemy](https://www.sqlalchemy.org/) (1.4.31+ with greenlet support)
   * [asyncpg](https://magicstack.github.io/asyncpg) (0.8+)
   * [PyICU](https://pypi.org/project/PyICU/)
   * [PyYaml](https://pyyaml.org/) (5.1+)
   * [datrie](https://github.com/pytries/datrie)
+
+When running the PHP frontend:
+
   * [PHP](https://php.net) (7.3+)
   * PHP-pgsql
   * PHP-intl (bundled with PHP)
-  * PHP-cgi (for running queries from the command line)
 
 For running continuous updates:
 
@@ -83,7 +86,7 @@ Take into account that the OSM database is growing fast.
 Fast disks are essential. Using NVME disks is recommended.
 
 Even on a well configured machine the import of a full planet takes
-around 2 days. On traditional spinning disks, 7-8 days are more realistic.
+around 2 days. When using traditional SSDs, 4-5 days are more realistic.
 
 ## Tuning the PostgreSQL database
 
@@ -115,15 +118,6 @@ you might consider setting:
 and even reduce `autovacuum_work_mem` further. This will reduce the amount
 of memory that autovacuum takes away from the import process.
 
-For the initial import, you should also set:
-
-    fsync = off
-    full_page_writes = off
-
-Don't forget to re-enable them after the initial import or you risk database
-corruption.
-
-
 ## Downloading and building Nominatim
 
 ### Downloading the latest release
index 1ee313a997fe658055cf4cc652ebd6f67fd7d854..325e6f8f22410f3d1c96ccb8b5ea7d1cb26a1ca0 100644 (file)
@@ -60,16 +60,13 @@ to finish the recomputation.
 
 ## Removing large deleted objects
 
+Command: `nominatim admin --clean-deleted <PostgreSQL Time Interval>`
+
 Nominatim refuses to delete very large areas because often these deletions are
 accidental and are reverted within hours. Instead the deletions are logged in
 the `import_polygon_delete` table and left to the administrator to clean up.
 
-There is currently no command to do that. You can use the following SQL
-query to force a deletion on all objects that have been deleted more than
-a certain timespan ago (here: 1 month):
+To run this command you will need to pass a PostgreSQL time interval. For example to 
+delete any objects that have been deleted more than a month ago you would run:
+`nominatim admin --clean-deleted '1 month'`
 
-```sql
-SELECT place_force_delete(p.place_id) FROM import_polygon_delete d, placex p
-WHERE p.osm_type = d.osm_type and p.osm_id = d.osm_id
-      and age(p.indexed_date) > '1 month'::interval
-```
index be7d90ff6ea217290560a5b2fa97f5dcc25cd63a..3e62d219817850079f8acb5df286d4f8e812db0b 100644 (file)
@@ -15,7 +15,7 @@ breaking changes. **Please read them before running the migration.**
     If you are migrating from a version <3.6, then you still have to follow
     the manual migration steps up to 3.6.
 
-## 4.1.0 -> master
+## 4.2.0 -> 4.3.0
 
 ### New indexes for reverse lookup
 
@@ -27,7 +27,7 @@ therefore either remove traffic from the machine before attempting a
 version update or create the index manually **before** starting the update
 using the following SQL:
 
-```
+```sql
 CREATE INDEX IF NOT EXISTS idx_placex_geometry_reverse_lookupPlaceNode
   ON placex USING gist (ST_Buffer(geometry, reverse_place_diameter(rank_search)))
   WHERE rank_address between 4 and 25 AND type != 'postcode'
index 08802f9a9d50d1fe6642304a51503928d3974597..c50378c5a6275cf0be3f0f0da1fd424c32ac1b84 100644 (file)
@@ -2,13 +2,17 @@
 
 Show all details about a single place saved in the database.
 
+This API endpoint is meant for visual inspection of the data in the database,
+mainly together with [Nominatim-UI](https://github.com/osm-search/nominatim-ui/).
+The parameters of the endpoint and the output may change occasionally between
+versions of Nominatim. Do not rely on the output in scripts or applications.
+
 !!! warning
-    The details page exists for debugging only. You may not use it in scripts
-    or to automatically query details about a result.
+    The details endpoint at https://nominatim.openstreetmap.org
+    may not used in scripts or bots at all.
     See [Nominatim Usage Policy](https://operations.osmfoundation.org/policies/nominatim/).
 
 
-## Parameters
 
 The details API supports the following two request formats:
 
@@ -35,59 +39,90 @@ for a place is different between Nominatim installation (servers) and
 changes when data gets reimported. Therefore it cannot be used as
 a permanent id and shouldn't be used in bug reports.
 
+!!! danger "Deprecation warning"
+    The API can also be used with the URL
+    `https://nominatim.openstreetmap.org/details.php`. This is now deprecated
+    and will be removed in future versions.
 
-Additional optional parameters are explained below.
+
+## Parameters
+
+This section lists additional optional parameters.
 
 ### Output format
 
-* `json_callback=<string>`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| json_callback | function name | _unset_ |
 
-Wrap JSON output in a callback function (JSONP) i.e. `<string>(<json>)`.
+When set, then JSON output will be wrapped in a callback function with
+the given name. See [JSONP](https://en.wikipedia.org/wiki/JSONP) for more
+information.
 
-* `pretty=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| pretty    | 0 or 1 | 0 |
 
-Add indentation to make it more human-readable. (Default: 0)
+`[PHP-only]` Add indentation to the output to make it more human-readable.
 
 
 ### Output details
 
-* `addressdetails=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| addressdetails | 0 or 1 | 0 |
 
-Include a breakdown of the address into elements. (Default: 0)
+When set to 1, include a breakdown of the address into elements.
 
-* `keywords=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| keywords  | 0 or 1 | 0 |
 
-Include a list of name keywords and address keywords (word ids). (Default: 0)
+When set to 1, include a list of name keywords and address keywords
+in the result.
 
-* `linkedplaces=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| linkedplaces  | 0 or 1 | 1 |
 
-Include details of places that are linked with this one. Places get linked
+Include details of places that are linked with this one. Places get linked
 together when they are different forms of the same physical object. Nominatim
 links two kinds of objects together: place nodes get linked with the
 corresponding administrative boundaries. Waterway relations get linked together with their
 members.
-(Default: 1)
 
-* `hierarchy=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| hierarchy  | 0 or 1 | 0 |
+
+Include details of places lower in the address hierarchy.
+
+`[Python-only]` will only return properly parented places. These are address
+or POI-like places that reuse the address of their parent street or place.
 
-Include details of places lower in the address hierarchy. (Default: 0)
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| group_hierarchy  | 0 or 1 | 0 |
 
-* `group_hierarchy=[0|1]`
+When set to 1, the output of the address hierarchy will be
+grouped by type.
 
-For JSON output will group the places by type. (Default: 0)
+| Parameter | Value  | Default |
+|-----------| -----  | ------- |
+| polygon_geojson | 0 or 1 | 0 |
 
-* `polygon_geojson=[0|1]`
 
-Include geometry of result. (Default: 0)
+Include geometry of result.
 
 ### Language of results
 
-* `accept-language=<browser language string>`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| accept-language | browser language string | content of "Accept-Language" HTTP header |
 
-Preferred language order for showing result, overrides the value
-specified in the "Accept-Language" HTTP header.
-Either use a standard RFC2616 accept-language string or a simple
-comma-separated list of language codes.
+Preferred language order for showing search results. This may either be
+a simple comma-separated list of language codes or have the same format
+as the ["Accept-Language" HTTP header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language).
 
 
 ## Examples
index e91c177095a4cd434841f00c34516e57798e1224..a2ba714e117055925571411037ca44cdc6183975 100644 (file)
@@ -3,7 +3,7 @@
 The lookup API allows to query the address and other details of one or
 multiple OSM objects like node, way or relation.
 
-## Parameters
+## Endpoint
 
 The lookup API has the following format:
 
@@ -15,75 +15,129 @@ The lookup API has the following format:
 prefixed with its type, one of node(N), way(W) or relation(R). Up to 50 ids
 can be queried at the same time.
 
-Additional optional parameters are explained below.
+!!! danger "Deprecation warning"
+    The API can also be used with the URL
+    `https://nominatim.openstreetmap.org/lookup.php`. This is now deprecated
+    and will be removed in future versions.
+
+
+## Parameters
+
+This section lists additional optional parameters.
 
 ### Output format
 
-* `format=[xml|json|jsonv2|geojson|geocodejson]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| format    | one of: `xml`, `json`, `jsonv2`, `geojson`, `geocodejson` | `jsonv2` |
+
+See [Place Output Formats](Output.md) for details on each format.
+
 
-See [Place Output Formats](Output.md) for details on each format. (Default: xml)
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| json_callback | function name | _unset_ |
 
-* `json_callback=<string>`
+When given, then JSON output will be wrapped in a callback function with
+the given name. See [JSONP](https://en.wikipedia.org/wiki/JSONP) for more
+information.
 
-Wrap JSON output in a callback function (JSONP) i.e. `<string>(<json>)`.
 Only has an effect for JSON output formats.
 
+
 ### Output details
 
-* `addressdetails=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| addressdetails | 0 or 1 | 0 |
+
+When set to 1, include a breakdown of the address into elements.
+The exact content of the address breakdown depends on the output format.
 
-Include a breakdown of the address into elements. (Default: 0)
+!!! tip
+    If you are interested in a stable classification of address categories
+    (suburb, city, state, etc), have a look at the `geocodejson` format.
+    All other formats return classifications according to OSM tagging.
+    There is a much larger set of categories and they are not always consistent,
+    which makes them very hard to work with.
 
 
-* `extratags=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| extratags | 0 or 1 | 0 |
 
-Include additional information in the result if available,
-e.g. wikipedia link, opening hours. (Default: 0)
+When set to 1, the response include any additional information in the result
+that is available in the database, e.g. wikipedia link, opening hours.
 
 
-* `namedetails=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| namedetails | 0 or 1 | 0 |
 
-Include a list of alternative names in the results. These may include
-language variants, references, operator and brand. (Default: 0)
+When set to 1, include a full list of names for the result. These may include
+language variants, older names, references and brand.
 
 
 ### Language of results
 
-* `accept-language=<browser language string>`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| accept-language | browser language string | content of "Accept-Language" HTTP header |
+
+Preferred language order for showing search results. This may either be
+a simple comma-separated list of language codes or have the same format
+as the ["Accept-Language" HTTP header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language).
+
+!!! tip
+    First-time users of Nominatim tend to be confused that they get different
+    results when using Nominatim in the browser versus in a command-line tool
+    like wget or curl. The command-line tools
+    usually don't send any Accept-Language header, prompting Nominatim
+    to show results in the local language. Browsers on the contratry always
+    send the currently chosen browser language.
 
-Preferred language order for showing search results, overrides the value
-specified in the "Accept-Language" HTTP header.
-Either use a standard RFC2616 accept-language string or a simple
-comma-separated list of language codes.
 
 ### Polygon output
 
-* `polygon_geojson=1`
-* `polygon_kml=1`
-* `polygon_svg=1`
-* `polygon_text=1`
+| Parameter | Value  | Default |
+|-----------| -----  | ------- |
+| polygon_geojson | 0 or 1 | 0 |
+| polygon_kml     | 0 or 1 | 0 |
+| polygon_svg     | 0 or 1 | 0 |
+| polygon_text    | 0 or 1 | 0 |
 
-Output geometry of results as a GeoJSON, KML, SVG or WKT. Only one of these
-options can be used at a time. (Default: 0)
+Add the full geometry of the place to the result output. Output formats
+in GeoJSON, KML, SVG or WKT are supported. Only one of these
+options can be used at a time.
 
-* `polygon_threshold=0.0`
+| Parameter | Value  | Default |
+|-----------| -----  | ------- |
+| polygon_threshold | floating-point number | 0.0 |
 
-Return a simplified version of the output geometry. The parameter is the
+When one of the polygon_* outputs is chosen, return a simplified version
+of the output geometry. The parameter describes the
 tolerance in degrees with which the geometry may differ from the original
-geometry. Topology is preserved in the result. (Default: 0.0)
+geometry. Topology is preserved in the geometry.
+
 
 ### Other
 
-* `email=<valid email address>`
+| Parameter | Value  | Default |
+|-----------| -----  | ------- |
+| email     | valid email address | _unset_ |
 
 If you are making large numbers of request please include an appropriate email
-address to identify your requests. See Nominatim's [Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.
+address to identify your requests. See Nominatim's
+[Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.
+
 
-* `debug=[0|1]`
+| Parameter | Value  | Default |
+|-----------| -----  | ------- |
+| debug     | 0 or 1 | 0       |
 
 Output assorted developer debug information. Data on internals of Nominatim's
-"Search Loop" logic, and SQL queries. The output is (rough) HTML format.
-This overrides the specified machine readable format. (Default: 0)
+"search loop" logic, and SQL queries. The output is HTML format.
+This overrides the specified machine readable format.
 
 
 ## Examples
index a718079d5a8693ad6ee2d4b4acd2ee3db6f6bf33..383eef53650db44f68fb66272b0914319e232f98 100644 (file)
@@ -1,8 +1,16 @@
 ### Nominatim API
 
-Nominatim indexes named (or numbered) features within the OpenStreetMap (OSM) dataset and a subset of other unnamed features (pubs, hotels, churches, etc).
+!!! Attention
+    The current version of Nominatim implements two different search frontends:
+    the old PHP frontend and the new Python frontend. They have a very similar
+    API but differ in some implementation details. These are marked in the
+    documentation as `[Python-only]` or `[PHP-only]`.
 
-Its API has the following endpoints for querying the data:
+    `https://nominatim.openstreetmap.org` implements the **Python frontend**.
+    So users should refer to the **`[Python-only]`** comments.
+
+This section describes the API V1 of the Nominatim web service. The
+service offers the following endpoints:
 
  * __[/search](Search.md)__ - search OSM objects by name or type
  * __[/reverse](Reverse.md)__ - search OSM object by their location
@@ -12,3 +20,6 @@ Its API has the following endpoints for querying the data:
                     back in Nominatim in case the deletion was accidental
  * __/polygons__ - list of broken polygons detected by Nominatim
  * __[/details](Details.md)__ - show internal details for an object (for debugging only)
+
+
+
index 56281d06f2420c43990f95a44bd18bb4a1dd3725..ef211b5aec9341c7d430f4c9faa989e98c6924ba 100644 (file)
@@ -1,6 +1,7 @@
 # Reverse Geocoding
 
-Reverse geocoding generates an address from a latitude and longitude.
+Reverse geocoding generates an address from a coordinate given as
+latitude and longitude.
 
 ## How it works
 
@@ -18,8 +19,7 @@ The other issue to be aware of is that the closest OSM object may not always
 have a similar enough address to the coordinate you were requesting. For
 example, in dense city areas it may belong to a completely different street.
 
-
-## Parameters
+## Endpoint
 
 The main format of the reverse API is
 
@@ -31,57 +31,101 @@ where `lat` and `lon` are latitude and longitude of a coordinate in WGS84
 projection. The API returns exactly one result or an error when the coordinate
 is in an area with no OSM data coverage.
 
-Additional parameters are accepted as listed below.
 
-!!! warning "Deprecation warning"
+!!! danger "Deprecation warning"
     The reverse API used to allow address lookup for a single OSM object by
-    its OSM id. This use is now deprecated. Use the [Address Lookup API](Lookup.md)
-    instead.
+    its OSM id for `[PHP-only]`. The use is considered deprecated.
+    Use the [Address Lookup API](Lookup.md) instead.
+
+!!! danger "Deprecation warning"
+    The API can also be used with the URL
+    `https://nominatim.openstreetmap.org/reverse.php`. This is now deprecated
+    and will be removed in future versions.
+
+
+## Parameters
+
+This section lists additional parameters to further influence the output.
 
 ### Output format
 
-* `format=[xml|json|jsonv2|geojson|geocodejson]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| format    | one of: `xml`, `json`, `jsonv2`, `geojson`, `geocodejson` | `xml` |
+
+See [Place Output Formats](Output.md) for details on each format.
+
 
-See [Place Output Formats](Output.md) for details on each format. (Default: xml)
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| json_callback | function name | _unset_ |
 
-* `json_callback=<string>`
+When given, then JSON output will be wrapped in a callback function with
+the given name. See [JSONP](https://en.wikipedia.org/wiki/JSONP) for more
+information.
 
-Wrap JSON output in a callback function ([JSONP](https://en.wikipedia.org/wiki/JSONP)) i.e. `<string>(<json>)`.
 Only has an effect for JSON output formats.
 
+
 ### Output details
 
-* `addressdetails=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| addressdetails | 0 or 1 | 1 |
+
+When set to 1, include a breakdown of the address into elements.
+The exact content of the address breakdown depends on the output format.
 
-Include a breakdown of the address into elements. (Default: 1)
+!!! tip
+    If you are interested in a stable classification of address categories
+    (suburb, city, state, etc), have a look at the `geocodejson` format.
+    All other formats return classifications according to OSM tagging.
+    There is a much larger set of categories and they are not always consistent,
+    which makes them very hard to work with.
 
 
-* `extratags=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| extratags | 0 or 1 | 0 |
 
-Include additional information in the result if available,
-e.g. wikipedia link, opening hours. (Default: 0)
+When set to 1, the response include any additional information in the result
+that is available in the database, e.g. wikipedia link, opening hours.
 
 
-* `namedetails=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| namedetails | 0 or 1 | 0 |
 
-Include a list of alternative names in the results. These may include
-language variants, references, operator and brand. (Default: 0)
+When set to 1, include a full list of names for the result. These may include
+language variants, older names, references and brand.
 
 
 ### Language of results
 
-* `accept-language=<browser language string>`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| accept-language | browser language string | content of "Accept-Language" HTTP header |
 
-Preferred language order for showing search results, overrides the value
-specified in the "Accept-Language" HTTP header.
-Either use a standard RFC2616 accept-language string or a simple
-comma-separated list of language codes.
+Preferred language order for showing search results. This may either be
+a simple comma-separated list of language codes or have the same format
+as the ["Accept-Language" HTTP header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language).
 
-### Result limitation
+!!! tip
+    First-time users of Nominatim tend to be confused that they get different
+    results when using Nominatim in the browser versus in a command-line tool
+    like wget or curl. The command-line tools
+    usually don't send any Accept-Language header, prompting Nominatim
+    to show results in the local language. Browsers on the contratry always
+    send the currently chosen browser language.
 
-* `zoom=[0-18]`
 
-Level of detail required for the address. Default: 18. This is a number that
+### Result restriction
+
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| zoom      | 0-18  | 18      |
+
+Level of detail required for the address. This is a number that
 corresponds roughly to the zoom level used in XYZ tile sources in frameworks
 like Leaflet.js, Openlayers etc.
 In terms of address details the zoom levels are as follows:
@@ -95,41 +139,76 @@ In terms of address details the zoom levels are as follows:
   12  | town / borough
   13  | village / suburb
   14  | neighbourhood
-  15  | locality
+  15  | any settlement
   16  | major streets
   17  | major and minor streets
   18  | building
 
 
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| layer     | comma-separated list of: `address`, `poi`, `railway`, `natural`, `manmade` | _unset_ (no restriction) |
+
+The layer filter allows to select places by themes.
+
+The `address` layer contains all places that make up an address:
+address points with house numbers, streets, inhabited places (suburbs, villages,
+cities, states etc.) and administrative boundaries.
+
+The `poi` layer selects all point of interest. This includes classic points
+of interest like restaurants, shops, hotels but also less obvious features
+like recycling bins, guideposts or benches.
+
+The `railway` layer includes railway infrastructure like tracks.
+Note that in Nominatim's standard configuration, only very few railway
+features are imported into the database.
+
+The `natural` layer collects feautures like rivers, lakes and mountains while
+the `manmade` layer functions as a catch-all for features not covered by the
+other layers.
+
+
 ### Polygon output
 
-* `polygon_geojson=1`
-* `polygon_kml=1`
-* `polygon_svg=1`
-* `polygon_text=1`
+| Parameter | Value  | Default |
+|-----------| -----  | ------- |
+| polygon_geojson | 0 or 1 | 0 |
+| polygon_kml     | 0 or 1 | 0 |
+| polygon_svg     | 0 or 1 | 0 |
+| polygon_text    | 0 or 1 | 0 |
 
-Output geometry of results as a GeoJSON, KML, SVG or WKT. Only one of these
-options can be used at a time. (Default: 0)
+Add the full geometry of the place to the result output. Output formats
+in GeoJSON, KML, SVG or WKT are supported. Only one of these
+options can be used at a time.
 
-* `polygon_threshold=0.0`
+| Parameter | Value  | Default |
+|-----------| -----  | ------- |
+| polygon_threshold | floating-point number | 0.0 |
 
-Return a simplified version of the output geometry. The parameter is the
+When one of the polygon_* outputs is chosen, return a simplified version
+of the output geometry. The parameter describes the
 tolerance in degrees with which the geometry may differ from the original
-geometry. Topology is preserved in the result. (Default: 0.0)
+geometry. Topology is preserved in the geometry.
+
 
 ### Other
 
-* `email=<valid email address>`
+| Parameter | Value  | Default |
+|-----------| -----  | ------- |
+| email     | valid email address | _unset_ |
 
-If you are making a large number of requests, please include an appropriate email
-address to identify your requests. See Nominatim's [Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.
+If you are making large numbers of request please include an appropriate email
+address to identify your requests. See Nominatim's
+[Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.
 
 
-* `debug=[0|1]`
+| Parameter | Value  | Default |
+|-----------| -----  | ------- |
+| debug     | 0 or 1 | 0       |
 
 Output assorted developer debug information. Data on internals of Nominatim's
-"Search Loop" logic, and SQL queries. The output is (rough) HTML format.
-This overrides the specified machine readable format. (Default: 0)
+"search loop" logic, and SQL queries. The output is HTML format.
+This overrides the specified machine readable format.
 
 
 ## Examples
index 39864b243aaeeacebdbaa53d6a2825df51ea34a4..3f9d9fc7d881bd827df95f0641f6f6608d91c5a5 100644 (file)
@@ -8,12 +8,12 @@ The search query may also contain
 which are translated into specific OpenStreetMap (OSM) tags (e.g. Pub => `amenity=pub`).
 This can be used to narrow down the kind of objects to be returned.
 
-!!! warning
+!!! note
     Special phrases are not suitable to query all objects of a certain type in an
     area. Nominatim will always just return a collection of the best matches. To
     download OSM data by object type, use the [Overpass API](https://overpass-api.de/).
 
-## Parameters
+## Endpoint
 
 The search API has the following format:
 
@@ -21,35 +21,62 @@ The search API has the following format:
    https://nominatim.openstreetmap.org/search?<params>
 ```
 
-The search term may be specified with two different sets of parameters:
+!!! danger "Deprecation warning"
+    The API can also be used with the URL
+    `https://nominatim.openstreetmap.org/search.php`. This is now deprecated
+    and will be removed in future versions.
+
+The query term can be given in two different forms: free-form or structured.
+
+### Free-form query
+
+| Parameter | Value |
+|-----------| ----- |
+| q         | Free-form query string to search for |
 
-* `q=<query>`
+In this form, the query can be unstructured.
+Free-form queries are processed first left-to-right and then right-to-left if that fails. So you may search for
+[pilkington avenue, birmingham](https://nominatim.openstreetmap.org/search?q=pilkington+avenue,birmingham) as well as for
+[birmingham, pilkington avenue](https://nominatim.openstreetmap.org/search?q=birmingham,+pilkington+avenue).
+Commas are optional, but improve performance by reducing the complexity of the search.
 
-    Free-form query string to search for.
-    Free-form queries are processed first left-to-right and then right-to-left if that fails. So you may search for
-    [pilkington avenue, birmingham](https://nominatim.openstreetmap.org/search?q=pilkington+avenue,birmingham) as well as for
-    [birmingham, pilkington avenue](https://nominatim.openstreetmap.org/search?q=birmingham,+pilkington+avenue).
-    Commas are optional, but improve performance by reducing the complexity of the search.
+The free-form may also contain special phrases to describe the type of
+place to be returned or a coordinate to search close to a position.
 
-* `amenity=<name and/or type of POI>`
-* `street=<housenumber> <streetname>`
-* `city=<city>`
-* `county=<county>`
-* `state=<state>`
-* `country=<country>`
-* `postalcode=<postalcode>`
+### Structured query
 
-    Alternative query string format split into several parameters for structured requests.
-    Structured requests are faster but are less robust against alternative
-    OSM tagging schemas. **Do not combine with** `q=<query>` **parameter**.
+| Parameter  | Value |
+|----------- | ----- |
+| amenity    | name and/or type of POI |
+| street     | housenumber and streetname |
+| city       | city |
+| county     | county |
+| state      | state |
+| country    | country |
+| postalcode | postal code |
 
-Both query forms accept the additional parameters listed below.
+The structured form of the search query allows to lookup up an address
+that is already split into its components. Each parameter represents a field
+of the address. All parameters are optional. You should only use the ones
+that are relevant for the address you want to geocode.
+
+!!! Attention
+    Cannot be combined with the `q=<query>` parameter. Newer versions of
+    the API will return an error if you do so. Older versions simply return
+    unexpected results.
+
+## Parameters
+
+The following parameters can be used to further restrict the search and
+change the output. They are usable for both forms of the search query.
 
 ### Output format
 
-* `format=[xml|json|jsonv2|geojson|geocodejson]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| format    | one of: `xml`, `json`, `jsonv2`, `geojson`, `geocodejson` | `jsonv2` |
 
-See [Place Output Formats](Output.md) for details on each format. (Default: jsonv2)
+See [Place Output Formats](Output.md) for details on each format.
 
 !!! note
     The Nominatim service at
@@ -57,52 +84,148 @@ See [Place Output Formats](Output.md) for details on each format. (Default: json
     has a different default behaviour for historical reasons. When the
     `format` parameter is omitted, the request will be forwarded to the Web UI.
 
-* `json_callback=<string>`
 
-Wrap JSON output in a callback function ([JSONP](https://en.wikipedia.org/wiki/JSONP)) i.e. `<string>(<json>)`.
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| json_callback | function name | _unset_ |
+
+When given, then JSON output will be wrapped in a callback function with
+the given name. See [JSONP](https://en.wikipedia.org/wiki/JSONP) for more
+information.
+
 Only has an effect for JSON output formats.
 
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| limit     | number | 10 |
+
+Limit the maximum number of returned results. Cannot be more than 40.
+Nominatim may decide to return less results than given, if additional
+results do not sufficiently match the query.
+
+
 ### Output details
 
-* `addressdetails=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| addressdetails | 0 or 1 | 0 |
+
+When set to 1, include a breakdown of the address into elements.
+The exact content of the address breakdown depends on the output format.
 
-Include a breakdown of the address into elements. (Default: 0)
+!!! tip
+    If you are interested in a stable classification of address categories
+    (suburb, city, state, etc), have a look at the `geocodejson` format.
+    All other formats return classifications according to OSM tagging.
+    There is a much larger set of categories and they are not always consistent,
+    which makes them very hard to work with.
 
 
-* `extratags=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| extratags | 0 or 1 | 0 |
 
-Include additional information in the result if available,
-e.g. wikipedia link, opening hours. (Default: 0)
+When set to 1, the response include any additional information in the result
+that is available in the database, e.g. wikipedia link, opening hours.
 
 
-* `namedetails=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| namedetails | 0 or 1 | 0 |
 
-Include a list of alternative names in the results. These may include
-language variants, references, operator and brand. (Default: 0)
+When set to 1, include a full list of names for the result. These may include
+language variants, older names, references and brand.
 
 
 ### Language of results
 
-* `accept-language=<browser language string>`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| accept-language | browser language string | content of "Accept-Language" HTTP header |
 
-Preferred language order for showing search results, overrides the value
-specified in the ["Accept-Language" HTTP header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language).
-Either use a standard RFC2616 accept-language string or a simple
-comma-separated list of language codes.
+Preferred language order for showing search results. This may either be
+a simple comma-separated list of language codes or have the same format
+as the ["Accept-Language" HTTP header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language).
 
-### Result limitation
+!!! tip
+    First-time users of Nominatim tend to be confused that they get different
+    results when using Nominatim in the browser versus in a command-line tool
+    like wget or curl. The command-line tools
+    usually don't send any Accept-Language header, prompting Nominatim
+    to show results in the local language. Browsers on the contratry always
+    send the currently chosen browser language.
 
-* `countrycodes=<countrycode>[,<countrycode>][,<countrycode>]...`
+### Result restriction
 
-Limit search results to one or more countries. `<countrycode>` must be the
-[ISO 3166-1alpha2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) code,
-e.g. `gb` for the United Kingdom, `de` for Germany.
+There are two ways to influence the results. *Filters* exclude certain
+kinds of results completely. *Boost parameters* only change the order of the
+results and thus give a preference to some results over others.
+
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| countrycodes | comma-separated list of country codes | _unset_ |
+
+Filer that limits the search results to one or more countries.
+The country code must be the
+[ISO 3166-1alpha2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) code
+of the country, e.g. `gb` for the United Kingdom, `de` for Germany.
 
 Each place in Nominatim is assigned to one country code based
 on OSM country boundaries. In rare cases a place may not be in any country
-at all, for example, in international waters.
+at all, for example, when it is in international waters. These places are
+also excluded when the filter is set.
+
+!!! Note
+    This parameter should not be confused with the 'country' parameter of
+    the structured query. The 'country' parameter contains a search term
+    and will be handled with some fuzziness. The `countrycodes` parameter
+    is a hard filter and as such should be prefered. Having both parameters
+    in the same query will work. If the parameters contradict each other,
+    the search will come up empty.
+
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| layer     | comma-separated list of: `address`, `poi`, `railway`, `natural`, `manmade` | _unset_ (no restriction) |
 
-* `exclude_place_ids=<place_id,[place_id],[place_id]`
+The layer filter allows to select places by themes.
+
+The `address` layer contains all places that make up an address:
+address points with house numbers, streets, inhabited places (suburbs, villages,
+cities, states tec.) and administrative boundaries.
+
+The `poi` layer selects all point of interest. This includes classic POIs like
+restaurants, shops, hotels but also less obvious features like recycling bins,
+guideposts or benches.
+
+The `railway` layer includes railway infrastructure like tracks.
+Note that in Nominatim's standard configuration, only very few railway
+features are imported into the database.
+
+The `natural` layer collects feautures like rivers, lakes and mountains while
+the `manmade` layer functions as a catch-all for features not covered by the
+other layers.
+
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| featureType | one of: `country`, `state`, `city`, `settlement` | _unset_ |
+
+The featureType allows to have a more fine-grained selection for places
+from the address layer. Results can be restricted to places that make up
+the 'state', 'country' or 'city' part of an address. A featureType of
+settlement selects any human inhabited feature from 'state' down to
+'neighbourhood'.
+
+When featureType ist set, then results are automatically restricted
+to the address layer (see above).
+
+!!! tip
+    Instead of using the featureType filters `country`, `state` or `city`,
+    you can also use a structured query without the finer-grained parameters
+    amenity or street.
+
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| exclude_place_ids | comma-separeted list of place ids |
 
 If you do not want certain OSM objects to appear in the search
 result, give a comma separated list of the `place_id`s you want to skip.
@@ -110,180 +233,212 @@ This can be used to retrieve additional search results. For example, if a
 previous query only returned a few results, then including those here would
 cause the search to return other, less accurate, matches (if possible).
 
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| viewbox   | `<x1>,<y1>,<x2>,<y2>` | _unset_ |
 
-* `limit=<integer>`
-
-Limit the number of returned results. (Default: 10, Maximum: 50)
+Boost parameter which focuses the search on the given area.
+Any two corner points of the box are accepted as long as they make a proper
+box. `x` is longitude, `y` is latitude.
 
+| Parameter | Value  | Default |
+|-----------| -----  | ------- |
+| bounded   | 0 or 1 | 0       |
 
-* `viewbox=<x1>,<y1>,<x2>,<y2>`
+When set to 1, then it turns the 'viewbox' parameter (see above) into
+a filter paramter, excluding any results outside the viewbox.
 
-The preferred area to find search results. Any two corner points of the box
-are accepted as long as they span a real box. `x` is longitude,
-`y` is latitude.
-
-
-* `bounded=[0|1]`
-
-When a viewbox is given, restrict the result to items contained within that
-viewbox (see above). When `viewbox` and `bounded=1` are given, an amenity
-only search is allowed. Give the special keyword for the amenity in square
+When `bounded=1` is given and the viewbox is small enough, then an amenity-only
+search is allowed. Give the special keyword for the amenity in square
 brackets, e.g. `[pub]` and a selection of objects of this type is returned.
-There is no guarantee that the result is complete. (Default: 0)
+There is no guarantee that the result returns all objects in the area.
 
 
 ### Polygon output
 
-* `polygon_geojson=1`
-* `polygon_kml=1`
-* `polygon_svg=1`
-* `polygon_text=1`
+| Parameter | Value  | Default |
+|-----------| -----  | ------- |
+| polygon_geojson | 0 or 1 | 0 |
+| polygon_kml     | 0 or 1 | 0 |
+| polygon_svg     | 0 or 1 | 0 |
+| polygon_text    | 0 or 1 | 0 |
 
-Output geometry of results as a GeoJSON, KML, SVG or WKT. Only one of these
-options can be used at a time. (Default: 0)
+Add the full geometry of the place to the result output. Output formats
+in GeoJSON, KML, SVG or WKT are supported. Only one of these
+options can be used at a time.
 
-* `polygon_threshold=0.0`
+| Parameter | Value  | Default |
+|-----------| -----  | ------- |
+| polygon_threshold | floating-point number | 0.0 |
 
-Return a simplified version of the output geometry. The parameter is the
+When one of the polygon_* outputs is chosen, return a simplified version
+of the output geometry. The parameter describes the
 tolerance in degrees with which the geometry may differ from the original
-geometry. Topology is preserved in the result. (Default: 0.0)
+geometry. Topology is preserved in the geometry.
 
 ### Other
 
-* `email=<valid email address>`
+| Parameter | Value  | Default |
+|-----------| -----  | ------- |
+| email     | valid email address | _unset_ |
 
 If you are making large numbers of request please include an appropriate email
-address to identify your requests. See Nominatim's [Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.
+address to identify your requests. See Nominatim's
+[Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.
 
-* `dedupe=[0|1]`
+| Parameter | Value  | Default |
+|-----------| -----  | ------- |
+| dedupe    | 0 or 1 | 1       |
 
 Sometimes you have several objects in OSM identifying the same place or
 object in reality. The simplest case is a street being split into many
 different OSM ways due to different characteristics. Nominatim will
-attempt to detect such duplicates and only return one match unless
-this parameter is set to 0. (Default: 1)
+attempt to detect such duplicates and only return one match. Setting
+this parameter to 0 disables this deduplication mechanism and
+ensures that all results are returned.
 
-* `debug=[0|1]`
+| Parameter | Value  | Default |
+|-----------| -----  | ------- |
+| debug     | 0 or 1 | 0       |
 
 Output assorted developer debug information. Data on internals of Nominatim's
-"Search Loop" logic, and SQL queries. The output is (rough) HTML format.
-This overrides the specified machine readable format. (Default: 0)
-
+"search loop" logic, and SQL queries. The output is HTML format.
+This overrides the specified machine readable format.
 
 
 ## Examples
 
 
-##### XML with kml polygon
+##### XML with KML polygon
 
-* [https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon_geojson=1&addressdetails=1](https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon_geojson=1&addressdetails=1)
+* [https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon_kml=1&addressdetails=1](https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon_kml=1&addressdetails=1)
 
 ```xml
-  <searchresults timestamp="Sat, 07 Nov 09 14:42:10 +0000" querystring="135 pilkington, avenue birmingham" polygon="true">
-    <place
-      place_id="1620612" osm_type="node" osm_id="452010817"
-      boundingbox="52.548641204834,52.5488433837891,-1.81612110137939,-1.81592094898224"
-      lat="52.5487429714954" lon="-1.81602098644987"
-      display_name="135, Pilkington Avenue, Wylde Green, City of Birmingham, West Midlands (county), B72, United Kingdom"
-      class="place" type="house">
-      <geokml>
-        <Polygon>
-          <outerBoundaryIs>
-            <LinearRing>
-              <coordinates>-1.816513,52.548756599999997 -1.816434,52.548747300000002 -1.816429,52.5487629 -1.8163717,52.548756099999999 -1.8163464,52.548834599999999 -1.8164599,52.548848100000001 -1.8164685,52.5488213 -1.8164913,52.548824000000003 -1.816513,52.548756599999997</coordinates>
-            </LinearRing>
-          </outerBoundaryIs>
-        </Polygon>
-      </geokml>
-      <house_number>135</house_number>
-      <road>Pilkington Avenue</road>
-      <village>Wylde Green</village>
-      <town>Sutton Coldfield</town>
-      <city>City of Birmingham</city>
-      <county>West Midlands (county)</county>
-      <postcode>B72</postcode>
-      <country>United Kingdom</country>
-      <country_code>gb</country_code>
-    </place>
-  </searchresults>
+<?xml version="1.0" encoding="UTF-8" ?>
+<searchresults timestamp="Tue, 08 Aug 2023 15:45:41 +00:00"
+               attribution="Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright"
+               querystring="135 pilkington avenue, birmingham"
+               more_url="https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue%2C+birmingham&amp;polygon_kml=1&amp;addressdetails=1&amp;limit=20&amp;exclude_place_ids=125279639&amp;format=xml"
+               exclude_place_ids="125279639">
+  <place place_id="125279639"
+         osm_type="way"
+         osm_id="90394480"
+         lat="52.5487921"
+         lon="-1.8164308"
+         boundingbox="52.5487473,52.5488481,-1.8165130,-1.8163464"
+         place_rank="30"
+         address_rank="30"
+         display_name="135, Pilkington Avenue, Maney, Sutton Coldfield, Wylde Green, Birmingham, West Midlands Combined Authority, England, B72 1LH, United Kingdom"
+         class="building"
+         type="residential"
+         importance="9.999999994736442e-08">
+    <geokml>
+      <Polygon>
+        <outerBoundaryIs>
+          <LinearRing>
+            <coordinates>-1.816513,52.5487566 -1.816434,52.5487473 -1.816429,52.5487629 -1.8163717,52.5487561 -1.8163464,52.5488346 -1.8164599,52.5488481 -1.8164685,52.5488213 -1.8164913,52.548824 -1.816513,52.5487566</coordinates>
+          </LinearRing>
+        </outerBoundaryIs>
+      </Polygon>
+    </geokml>
+    <house_number>135</house_number>
+    <road>Pilkington Avenue</road>
+    <hamlet>Maney</hamlet>
+    <town>Sutton Coldfield</town>
+    <village>Wylde Green</village>
+    <city>Birmingham</city>
+    <ISO3166-2-lvl8>GB-BIR</ISO3166-2-lvl8>
+    <state_district>West Midlands Combined Authority</state_district>
+    <state>England</state>
+    <ISO3166-2-lvl4>GB-ENG</ISO3166-2-lvl4>
+    <postcode>B72 1LH</postcode>
+    <country>United Kingdom</country>
+    <country_code>gb</country_code>
+  </place>
+</searchresults>
 ```
 
 ##### JSON with SVG polygon
 
-[https://nominatim.openstreetmap.org/search/Unter%20den%20Linden%201%20Berlin?format=json&addressdetails=1&limit=1&polygon_svg=1](https://nominatim.openstreetmap.org/search/Unter%20den%20Linden%201%20Berlin?format=json&addressdetails=1&limit=1&polygon_svg=1)
+[https://nominatim.openstreetmap.org/search?q=Unter%20den%20Linden%201%20Berlin&format=json&addressdetails=1&limit=1&polygon_svg=1](https://nominatim.openstreetmap.org/search?q=Unter%20den%20Linden%201%20Berlin&format=json&addressdetails=1&limit=1&polygon_svg=1)
 
 ```json
-    {
-        "address": {
-            "city": "Berlin",
-            "city_district": "Mitte",
-            "construction": "Unter den Linden",
-            "continent": "European Union",
-            "country": "Deutschland",
-            "country_code": "de",
-            "house_number": "1",
-            "neighbourhood": "Scheunenviertel",
-            "postcode": "10117",
-            "public_building": "Kommandantenhaus",
-            "state": "Berlin",
-            "suburb": "Mitte"
-        },
-        "boundingbox": [
-            "52.5170783996582",
-            "52.5173187255859",
-            "13.3975105285645",
-            "13.3981599807739"
-        ],
-        "class": "amenity",
-        "display_name": "Kommandantenhaus, 1, Unter den Linden, Scheunenviertel, Mitte, Berlin, 10117, Deutschland, European Union",
-        "importance": 0.73606775332943,
-        "lat": "52.51719785",
-        "licence": "Data \u00a9 OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright",
-        "lon": "13.3978352028938",
-        "osm_id": "15976890",
-        "osm_type": "way",
-        "place_id": "30848715",
-        "svg": "M 13.397511 -52.517283599999999 L 13.397829400000001 -52.517299800000004 13.398131599999999 -52.517315099999998 13.398159400000001 -52.517112099999999 13.3975388 -52.517080700000001 Z",
-        "type": "public_building"
-    }
+[
+  {
+    "address": {
+      "ISO3166-2-lvl4": "DE-BE",
+      "borough": "Mitte",
+      "city": "Berlin",
+      "country": "Deutschland",
+      "country_code": "de",
+      "historic": "Kommandantenhaus",
+      "house_number": "1",
+      "neighbourhood": "Friedrichswerder",
+      "postcode": "10117",
+      "road": "Unter den Linden",
+      "suburb": "Mitte"
+    },
+    "boundingbox": [
+      "52.5170798",
+      "52.5173311",
+      "13.3975116",
+      "13.3981577"
+    ],
+    "class": "historic",
+    "display_name": "Kommandantenhaus, 1, Unter den Linden, Friedrichswerder, Mitte, Berlin, 10117, Deutschland",
+    "importance": 0.8135042058306902,
+    "lat": "52.51720765",
+    "licence": "Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright",
+    "lon": "13.397834399325466",
+    "osm_id": 15976890,
+    "osm_type": "way",
+    "place_id": 108681845,
+    "svg": "M 13.3975116 -52.5172905 L 13.397549 -52.5170798 13.397715 -52.5170906 13.3977122 -52.5171064 13.3977392 -52.5171086 13.3977417 -52.5170924 13.3979655 -52.5171069 13.3979623 -52.5171233 13.3979893 -52.5171248 13.3979922 -52.5171093 13.3981577 -52.5171203 13.398121 -52.5173311 13.3978115 -52.5173103 Z",
+    "type": "house"
+  }
+]
 ```
 
 ##### JSON with address details
 
-[https://nominatim.openstreetmap.org/?addressdetails=1&q=bakery+in+berlin+wedding&format=json&limit=1](https://nominatim.openstreetmap.org/?addressdetails=1&q=bakery+in+berlin+wedding&format=json&limit=1)
+[https://nominatim.openstreetmap.org/search?addressdetails=1&q=bakery+in+berlin+wedding&format=jsonv2&limit=1](https://nominatim.openstreetmap.org/search?addressdetails=1&q=bakery+in+berlin+wedding&format=jsonv2&limit=1)
 
 ```json
-    {
-        "address": {
-            "bakery": "B\u00e4cker Kamps",
-            "city_district": "Mitte",
-            "continent": "European Union",
-            "country": "Deutschland",
-            "country_code": "de",
-            "footway": "Bahnsteig U6",
-            "neighbourhood": "Sprengelkiez",
-            "postcode": "13353",
-            "state": "Berlin",
-            "suburb": "Wedding"
-        },
-        "boundingbox": [
-            "52.5460929870605",
-            "52.5460968017578",
-            "13.3591794967651",
-            "13.3591804504395"
-        ],
-        "class": "shop",
-        "display_name": "B\u00e4cker Kamps, Bahnsteig U6, Sprengelkiez, Wedding, Mitte, Berlin, 13353, Deutschland, European Union",
-        "icon": "https://nominatim.openstreetmap.org/images/mapicons/shopping_bakery.p.20.png",
-        "importance": 0.201,
-        "lat": "52.5460941",
-        "licence": "Data \u00a9 OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright",
-        "lon": "13.35918",
-        "osm_id": "317179427",
-        "osm_type": "node",
-        "place_id": "1453068",
-        "type": "bakery"
-    }
+[
+  {
+    "address": {
+      "ISO3166-2-lvl4": "DE-BE",
+      "borough": "Mitte",
+      "city": "Berlin",
+      "country": "Deutschland",
+      "country_code": "de",
+      "neighbourhood": "Sprengelkiez",
+      "postcode": "13347",
+      "road": "Lindower Straße",
+      "shop": "Ditsch",
+      "suburb": "Wedding"
+    },
+    "addresstype": "shop",
+    "boundingbox": [
+      "52.5427201",
+      "52.5427654",
+      "13.3668619",
+      "13.3669442"
+    ],
+    "category": "shop",
+    "display_name": "Ditsch, Lindower Straße, Sprengelkiez, Wedding, Mitte, Berlin, 13347, Deutschland",
+    "importance": 9.99999999995449e-06,
+    "lat": "52.54274275",
+    "licence": "Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright",
+    "lon": "13.36690305710228",
+    "name": "Ditsch",
+    "osm_id": 437595031,
+    "osm_type": "way",
+    "place_id": 204751033,
+    "place_rank": 30,
+    "type": "bakery"
+  }
+]
 ```
 
 ##### GeoJSON
index 1a5ff0a8465f6699b7eb5ea9460f69522ed67b54..a34c86c1d447e86906c498a4510b0a74e6b4fdee 100644 (file)
@@ -1,35 +1,50 @@
 # Status
 
-Useful for checking if the service and database is running. The JSON output also shows
+Report on the state of the service and database. Useful for checking if the
+service is up and running. The JSON output also reports
 when the database was last updated.
 
+## Endpoint
+
+The status API has the following format:
+
+```
+https://nominatim.openstreetmap.org/status
+```
+
+!!! danger "Deprecation warning"
+    The API can also be used with the URL
+    `https://nominatim.openstreetmap.org/status.php`. This is now deprecated
+    and will be removed in future versions.
+
+
 ## Parameters
 
-* `format=[text|json]` (defaults to 'text')
+The status endpoint takes a single optional parameter:
+
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| format    | one of: `text`, `json` | 'text' |
+
+Selects the output format. See below.
 
 
 ## Output
 
 #### Text format
 
-```
-   https://nominatim.openstreetmap.org/status.php
-```
-
-will return HTTP status code 200 and print `OK`.
+When everything is okay, a status code 200 is returned and a simple message: `OK`
 
-On error it will return HTTP status code 500 and print a message, e.g.
+On error it will return HTTP status code 500 and print a detailed error message, e.g.
 `ERROR: Database connection failed`.
 
 
 
 #### JSON format
 
-```
-   https://nominatim.openstreetmap.org/status.php?format=json
-```
+Always returns a HTTP code 200, when the status call could be executed.
 
-will return HTTP code 200 and a structure
+On success a JSON dictionary with the following structure is returned:
 
 ```json
   {
@@ -45,8 +60,8 @@ The `software_version` field contains the version of Nominatim used to serve
 the API. The `database_version` field contains the version of the data format
 in the database.
 
-On error will also return HTTP status code 200 and a structure with error
-code and message, e.g.
+On error will return a shorter JSON dictionary with the error message
+and status only, e.g.
 
 ```json
    {
@@ -54,14 +69,3 @@ code and message, e.g.
        "message": "Database connection failed"
    }
 ```
-
-Possible status codes are
-
-|     | message                        | notes                                                             |
-| --- | ------------------------------ | ----------------------------------------------------------------- |
-| 700 | "No database"                  | connection failed                                                 |
-| 701 | "Module failed"                | database could not load nominatim.so                              |
-| 702 | "Module call failed"           | nominatim.so loaded but calling a function failed                 |
-| 703 | "Query failed"                 | test query against a database table failed                        |
-| 704 | "No value"                     | test query worked but returned no results                         |
-| 705 | "Import date is not available" | No import dates were returned (enabling replication can fix this) |
index bb552744ddc3aec13ab3637e4d927e073221f81b..8245e309f1a476f0fba05d1df857002ddd6e06c3 100644 (file)
@@ -91,7 +91,7 @@ The option is only used by the Legacy tokenizer and ignored otherwise.
 | --------------     | --------------------------------------------------- |
 | **Description:**   | Tokenizer used for normalizing and parsing queries and names |
 | **Format:**        | string |
-| **Default:**       | legacy |
+| **Default:**       | icu |
 | **After Changes:** | cannot be changed after import |
 
 Sets the tokenizer type to use for the import. For more information on
@@ -148,29 +148,6 @@ Setting this option to 'yes' means that Nominatim skips reindexing of contained
 objects when the area becomes too large.
 
 
-#### NOMINATIM_UPDATE_FORWARD_DEPENDENCIES
-
-| Summary            |                                                     |
-| --------------     | --------------------------------------------------- |
-| **Description:**   | Forward geometry changes to dependet objects |
-| **Format:**        | bool |
-| **Default:**       | no |
-| **Comment:**       | EXPERT ONLY. Must not be enabled after import. |
-
-The geometry of OSM ways and relations may change when a node that is part
-of the object is moved around. These changes are not propagated per default.
-The geometry of ways/relations is only updated the next time that the object
-itself is touched. When enabling this option, then dependent objects will
-be marked for update when one of its member objects changes.
-
-Enabling this option may slow down updates significantly.
-
-!!! warning
-    If you want to enable this option, it must be set already on import.
-    Do not enable this option on an existing database that was imported with
-    NOMINATIM_UPDATE_FORWARD_DEPENDENCIES=no.
-    Updates will become unusably slow.
-
 #### NOMINATIM_LANGUAGES
 
 | Summary            |                                                     |
@@ -575,6 +552,8 @@ used.
 | **Format:**        | boolean |
 | **Default:**       | no |
 | **After Changes:** | run `nominatim refresh --website` |
+| **Comment:**       | PHP frontend only |
+
 
 This feature is currently undocumented and potentially broken.
 
@@ -587,6 +566,7 @@ This feature is currently undocumented and potentially broken.
 | **Format:**        | integer |
 | **Default:**       | 500 |
 | **After Changes:** | run `nominatim refresh --website` |
+| **Comment:**       | PHP frontend only |
 
 This setting defines the threshold over which a name is no longer considered
 as rare. When searching for places with rare names, only the name is used
@@ -627,6 +607,88 @@ with a single query.
 
 Setting this parameter to 0 disables polygon output completely.
 
+
+#### NOMINATIM_SEARCH_WITHIN_COUNTRIES
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Disable search for elements that are not in the country grid |
+| **Format:**        | boolean |
+| **Default:**       | no |
+| **After Changes:** | run `nominatim refresh --website` |
+| **Comment:**       | PHP frontend only |
+
+Enable to search elements just within countries.
+
+When enabled, if, despite not finding a point within the static grid of countries, it
+finds a geometry of a region, do not return the geometry.
+Return "Unable to geocode" instead.
+
+
+#### NOMINATIM_SERVE_LEGACY_URLS
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Enable serving via URLs with a .php suffix |
+| **Format:**        | boolean |
+| **Default:**       | yes |
+| **Comment:**       | Python frontend only |
+
+When enabled, then endpoints are reachable as `/<name>` as well as `/<name>.php`.
+This can be useful when you want to be backwards-compatible with previous
+versions of Nominatim.
+
+
+#### NOMINATIM_API_POOL_SIZE
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Number of parallel database connections per worker |
+| **Format:**        | number |
+| **Default:**       | 10 |
+| **Comment:**       | Python frontend only |
+
+Sets the maximum number of database connections available for a single instance
+of Nominatim. When configuring the maximum number of connections that your
+PostgreSQL database can handle, you need at least
+`NOMINATIM_API_POOL_SIZE` * `<number of configured workers>` connections.
+For configuring the number of workers, refer to the section about
+[Deploying the Python frontend](../admin/Deployment-Python.md).
+
+#### NOMINATIM_QUERY_TIMEOUT
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Timeout for SQL queries to the database |
+| **Format:**        | number (seconds) |
+| **Default:**       | 10 |
+| **Comment:**       | Python frontend only |
+
+When this timeout is set, then all SQL queries that run longer than the
+specified numbers of seconds will be cancelled and the user receives a
+timeout exceptions. Users of the API see a 503 HTTP error.
+
+The timeout does ont apply when using the
+[low-level DB access](../library/Low-Level-DB-Access.md)
+of the library. A timeout can be manually set, if required.
+
+
+#### NOMINATIM_REQUEST_TIMEOUT
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Timeout for search queries |
+| **Format:**        | number (seconds) |
+| **Default:**       | 60 |
+| **Comment:**       | Python frontend only |
+
+When this timeout is set, a search query will finish sending queries
+to the database after the timeout has passed and immediately return the
+results gathered so far.
+
+Note that under high load you may observe that users receive different results
+than usual without seeing an error. This may cause some confusion.
+
 ### Logging Settings
 
 #### NOMINATIM_LOG_DB
@@ -670,3 +732,20 @@ given in seconds and corresponds to the time the query took executing in PHP.
 type contains the name of the endpoint used.
 
 Can be used as the same time as NOMINATIM_LOG_DB.
+
+#### NOMINATIM_DEBUG_SQL
+
+| Summary            |                                                     |
+| --------------     | --------------------------------------------------- |
+| **Description:**   | Enable printing of raw SQL by SQLAlchemy |
+| **Format:**        | boolean |
+| **Default:**       | no |
+| **Comment:**       | **For developers only.** |
+
+This settings enables
+[SQL debugging](https://docs.sqlalchemy.org/en/20/core/engines.html#dbengine-logging)
+by SQLAlchemy. This can be helpful when debugging some bugs with internal
+query handling. It should only be used together with the CLI query functions.
+Enabling it for server mode may have unintended consequences. Use the `debug`
+parameter instead, which prints information on how the search is executed
+including SQL statements.
index 11c27e38b903ae0683ace099f417ec16b1077bc8..2c7b687834ba7cd53308833d6179586813ba29aa 100644 (file)
@@ -176,58 +176,66 @@ The following is a list of sanitizers that are shipped with Nominatim.
 ##### split-name-list
 
 ::: nominatim.tokenizer.sanitizers.split_name_list
-    selection:
+    options:
         members: False
-    rendering:
         heading_level: 6
+        docstring_section_style: spacy
 
 ##### strip-brace-terms
 
 ::: nominatim.tokenizer.sanitizers.strip_brace_terms
-    selection:
+    options:
         members: False
-    rendering:
         heading_level: 6
+        docstring_section_style: spacy
 
 ##### tag-analyzer-by-language
 
 ::: nominatim.tokenizer.sanitizers.tag_analyzer_by_language
-    selection:
+    options:
         members: False
-    rendering:
         heading_level: 6
+        docstring_section_style: spacy
 
 ##### clean-housenumbers
 
 ::: nominatim.tokenizer.sanitizers.clean_housenumbers
-    selection:
+    options:
         members: False
-    rendering:
         heading_level: 6
+        docstring_section_style: spacy
 
 ##### clean-postcodes
 
 ::: nominatim.tokenizer.sanitizers.clean_postcodes
-    selection:
+    options:
         members: False
-    rendering:
         heading_level: 6
+        docstring_section_style: spacy
 
 ##### clean-tiger-tags
 
 ::: nominatim.tokenizer.sanitizers.clean_tiger_tags
-    selection:
+    options:
         members: False
-    rendering:
         heading_level: 6
+        docstring_section_style: spacy
 
 #### delete-tags
 
 ::: nominatim.tokenizer.sanitizers.delete_tags
-    selection:
+    options:
         members: False
-    rendering:
         heading_level: 6
+        docstring_section_style: spacy
+
+#### tag-japanese
+
+::: nominatim.tokenizer.sanitizers.tag_japanese
+    options:
+        members: False
+        heading_level: 6
+        docstring_section_style: spacy
 
 #### Token Analysis
 
index d0369ea13839e5123128b4360e9fae8b21776807..643064549cae9e3291fc9ecc9dca62ad8a687f0e 100644 (file)
@@ -47,8 +47,8 @@ depending on your choice of webserver framework:
 The documentation is built with mkdocs:
 
 * [mkdocs](https://www.mkdocs.org/) >= 1.1.2
-* [mkdocstrings](https://mkdocstrings.github.io/) >= 0.16
-* [mkdocstrings-python-legacy](https://mkdocstrings.github.io/python-legacy/)
+* [mkdocstrings](https://mkdocstrings.github.io/) >= 0.18
+* [mkdocstrings-python](https://mkdocstrings.github.io/python/)
 
 ### Installing prerequisites on Ubuntu/Debian
 
index 2cf30a5699f7863db7db1f7eb04f7fa2ae3bf1b5..63b1c3c1db5833f1f53225252f424d426e0941b8 100644 (file)
@@ -53,8 +53,7 @@ the function.
 ### Sanitizer configuration
 
 ::: nominatim.tokenizer.sanitizers.config.SanitizerConfig
-    rendering:
-        show_source: no
+    options:
         heading_level: 6
 
 ### The main filter function of the sanitizer
@@ -62,12 +61,10 @@ the function.
 The filter function receives a single object of type `ProcessInfo`
 which has with three members:
 
- * `place`: read-only information about the place being processed.
+ * `place: PlaceInfo`: read-only information about the place being processed.
    See PlaceInfo below.
- * `names`: The current list of names for the place. Each name is a
-   PlaceName object.
- * `address`: The current list of address names for the place. Each name
-   is a PlaceName object.
+ * `names: List[PlaceName]`: The current list of names for the place.
+ * `address: List[PlaceName]`: The current list of address names for the place.
 
 While the `place` member is provided for information only, the `names` and
 `address` lists are meant to be manipulated by the sanitizer. It may add and
@@ -77,16 +74,14 @@ adding extra attributes) or completely replace the list with a different one.
 #### PlaceInfo - information about the place
 
 ::: nominatim.data.place_info.PlaceInfo
-    rendering:
-        show_source: no
+    options:
         heading_level: 6
 
 
 #### PlaceName - extended naming information
 
 ::: nominatim.data.place_name.PlaceName
-    rendering:
-        show_source: no
+    options:
         heading_level: 6
 
 
@@ -145,14 +140,12 @@ They can be found in the directory
 ## Custom token analysis module
 
 ::: nominatim.tokenizer.token_analysis.base.AnalysisModule
-    rendering:
-        show_source: no
+    options:
         heading_level: 6
 
 
 ::: nominatim.tokenizer.token_analysis.base.Analyzer
-    rendering:
-        show_source: no
+    options:
         heading_level: 6
 
 ### Example: Creating acronym variants for long names
index eb0d4ea2e513b030a9dd3e09262196e5875d83ed..050371771c27eb21ffa7158efbc261e8ae35154d 100644 (file)
@@ -134,14 +134,14 @@ All tokenizers must inherit from `nominatim.tokenizer.base.AbstractTokenizer`
 and implement the abstract functions defined there.
 
 ::: nominatim.tokenizer.base.AbstractTokenizer
-    rendering:
-        heading_level: 4
+    options:
+        heading_level: 6
 
 ### Python Analyzer Class
 
 ::: nominatim.tokenizer.base.AbstractAnalyzer
-    rendering:
-        heading_level: 4
+    options:
+        heading_level: 6
 
 ### PL/pgSQL Functions
 
index 3aecf2ef750e2eae298708eb7e2b8b20cdc9d8ac..155fa1aa71fcaf91bbbd66792a87110b206569a9 100644 (file)
@@ -2,6 +2,10 @@
     display: none!important
 }
 
+.wy-nav-content {
+    max-width: 900px!important
+}
+
 table {
     margin-bottom: 12pt
 }
@@ -16,9 +20,17 @@ th {
 
 .doc-object h6 {
     margin-bottom: 0.8em;
-    font-size: 120%;
+    font-size: 130%;
 }
 
 .doc-object {
     margin-bottom: 1.3em;
 }
+
+.doc-children .doc-contents {
+    margin-left: 3em;
+}
+
+.md-footer__inner {
+    display: none;
+}
index 0ed6c54652e42e48fb693635031f814b78417b6c..0b479a17f7bfacc2b4939049a03876752d147b48 100644 (file)
@@ -1,10 +1,15 @@
-Nominatim (from the Latin, 'by name') is a tool to search OSM data by name and address and to generate synthetic addresses of OSM points (reverse geocoding).
+Nominatim (from the Latin, 'by name') is a tool to search OSM data by name and
+address and to generate synthetic addresses of OSM points (reverse geocoding).
+It has also limited capability to search features by their type
+(pubs, hotels, churches, etc).
 
-This guide comes in four parts:
+This guide comes in five parts:
 
  * __[API reference](api/Overview.md)__ for users of Nominatim
  * __[Administration Guide](admin/Installation.md)__ for those who want
    to install their own Nominatim server
  * __[Customization Guide](customize/Overview.md)__ for those who want to
    adapt their own installation to their special requirements
+ * __[Library Guide](library/Getting-Started.md)__ for Python developers who
+   want to use Nominatim as a library in their project
  * __[Developer's Guide](develop/overview.md)__ for developers of the software
diff --git a/docs/library/Configuration.md b/docs/library/Configuration.md
new file mode 100644 (file)
index 0000000..b97c2cc
--- /dev/null
@@ -0,0 +1,31 @@
+# Configuration
+
+When using Nominatim through the library, it can be configured in exactly
+the same way as when running as a service. This means that you should have
+created a [project directory](../admin/Import.md#creating-the-project-directory)
+which contains all files belonging to the Nominatim instance. It can also contain
+an `.env` file with configuration options. Setting configuration parameters
+via environment variables works as well.
+
+Configuration options are resolved in the following order:
+
+* from the OS environment (or the dictionary given in `environ`,
+  (see NominatimAPI.md#nominatim.api.core.NominatimAPI.__init__)
+* from the .env file in the project directory of the installation
+* from the default installation in the configuration directory
+
+For more information on configuration via dotenv and a list of possible
+configuration parameters, see the [Configuration page](../customize/Settings.md).
+
+
+## `Configuration` class
+
+::: nominatim.config.Configuration
+    options:
+        members:
+            - get_bool
+            - get_int
+            - get_str_list
+            - get_path
+        heading_level: 6
+        show_signature_annotations: True
diff --git a/docs/library/Getting-Started.md b/docs/library/Getting-Started.md
new file mode 100644 (file)
index 0000000..dd16b11
--- /dev/null
@@ -0,0 +1,248 @@
+# Getting Started
+
+The Nominatim search frontend can directly be used as a Python library in
+scripts and applications. When you have imported your own Nominatim database,
+then it is no longer necessary to run a full web service for it and access
+the database through http requests. There are
+also less constraints on the kinds of data that can be accessed. The library
+allows to get access to more detailed information about the objects saved
+in the database.
+
+!!! danger
+    The library interface is currently in an experimental stage. There might
+    be some smaller adjustments to the public interface until the next version.
+
+    The library also misses a proper installation routine, so some manipulation
+    of the PYTHONPATH is required. At the moment, use is only recommended for
+    developers with some experience in Python.
+
+## Installation
+
+To use the Nominatim library, you need access to a local Nominatim database.
+Follow the [installation](../admin/Installation.md) and
+[import](../admin/Import.md) instructions to set up your database.
+
+It is not yet possible to install it in the usual way via pip or inside a
+virtualenv. To get access to the library you need to set an appropriate
+`PYTHONPATH`. With the default installation, the python library can be found
+under `/usr/local/share/nominatim/lib-python`. If you have installed
+Nominatim under a different prefix, adapt the `/usr/local/` part accordingly.
+You can also point the `PYTHONPATH` to the Nominatim source code.
+
+### A simple search example
+
+To query the Nominatim database you need to first set up a connection. This
+is done by creating an Nominatim API object. This object exposes all the
+search functions of Nominatim that are also known from its web API.
+
+This code snippet implements a simple search for the town of 'Brugge':
+
+!!! example
+    === "NominatimAPIAsync"
+        ``` python
+        from pathlib import Path
+        import asyncio
+
+        import nominatim.api as napi
+
+        async def search(query):
+            api = napi.NominatimAPIAsync(Path('.'))
+
+            return await api.search(query)
+
+        results = asyncio.run(search('Brugge'))
+        if not results:
+            print('Cannot find Brugge')
+        else:
+            print(f'Found a place at {results[0].centroid.x},{results[0].centroid.y}')
+        ```
+
+    === "NominatimAPI"
+        ``` python
+        from pathlib import Path
+
+        import nominatim.api as napi
+
+        api = napi.NominatimAPI(Path('.'))
+
+        results = api.search('Brugge')
+
+        if not results:
+            print('Cannot find Brugge')
+        else:
+            print(f'Found a place at {results[0].centroid.x},{results[0].centroid.y}')
+        ```
+
+The Nominatim library is designed around
+[asyncio](https://docs.python.org/3/library/asyncio.html). `NominatimAPIAsync`
+provides you with an interface of coroutines.
+If you have many requests to make, coroutines can speed up your applications
+significantly.
+
+For smaller scripts there is also a synchronous wrapper around the API. By
+using `NominatimAPI`, you get exactly the same interface using classic functions.
+
+The examples in this chapter will always show-case both
+implementations. The documentation itself will usually refer only to
+'Nominatim API class' when both flavours are meant. If a functionality is
+available only for the synchronous or asynchronous version, this will be
+explicitly mentioned.
+
+### Defining which database to use
+
+The [Configuration](../admin/Import.md#configuration-setup-in-env)
+section explains how Nominatim is configured using the
+[dotenv](https://github.com/theskumar/python-dotenv) library.
+The same configuration mechanism is used with the
+Nominatim API library. You should therefore be sure you are familiar with
+the section.
+
+The constructor of the 'Nominatim API class' takes one mandatory parameter:
+the path to the [project directory](../admin/Import.md#creating-the-project-directory).
+You should have set up this directory as part of the Nominatim import.
+Any configuration found in the `.env` file in this directory will automatically
+used.
+
+Yo may also configure Nominatim be setting environment variables.
+Normally, Nominatim will check the operating system environment. This can be
+overwritten by giving the constructor a dictionary of configuration parameters.
+
+Let us look up 'Brugge' in the special database named 'belgium' instead of the
+standard 'nominatim' database:
+
+!!! example
+    === "NominatimAPIAsync"
+        ``` python
+        from pathlib import Path
+        import asyncio
+
+        import nominatim.api as napi
+
+        config_params = {
+            'NOMINATIM_DATABASE_DSN': 'pgsql:dbname=belgium'
+        }
+
+        async def search(query):
+            api = napi.NominatimAPIAsync(Path('.'), environ=config_params)
+
+            return await api.search(query)
+
+        results = asyncio.run(search('Brugge'))
+        ```
+
+    === "NominatimAPI"
+        ``` python
+        from pathlib import Path
+
+        import nominatim.api as napi
+
+        config_params = {
+            'NOMINATIM_DATABASE_DSN': 'pgsql:dbname=belgium'
+        }
+
+        api = napi.NominatimAPI(Path('.'), environ=config_params)
+
+        results = api.search('Brugge')
+        ```
+
+### Presenting results to humans
+
+All search functions return the raw results from the database. There is no
+full human-readable label. To create such a label, you need two things:
+
+* the address details of the place
+* adapt the result to the language you wish to use for display
+
+Again searching for 'Brugge', this time with a nicely formatted result:
+
+!!! example
+    === "NominatimAPIAsync"
+        ``` python
+        from pathlib import Path
+        import asyncio
+
+        import nominatim.api as napi
+
+        async def search(query):
+            api = napi.NominatimAPIAsync(Path('.'))
+
+            return await api.search(query, address_details=True)
+
+        results = asyncio.run(search('Brugge'))
+
+        locale = napi.Locales(['fr', 'en'])
+        for i, result in enumerate(results):
+            address_parts = result.address_rows.localize(locale)
+            print(f"{i + 1}. {', '.join(address_parts)}")
+        ```
+
+    === "NominatimAPI"
+        ``` python
+        from pathlib import Path
+
+        import nominatim.api as napi
+
+        api = napi.NominatimAPI(Path('.'))
+
+        results = api.search('Brugge', address_details=True)
+
+        locale = napi.Locales(['fr', 'en'])
+        for i, result in enumerate(results):
+            address_parts = result.address_rows.localize(locale)
+            print(f"{i + 1}. {', '.join(address_parts)}")
+        ```
+
+To request information about the address of a result, add the optional
+parameter 'address_details' to your search:
+
+``` python
+>>> results = api.search('Brugge', address_details=True)
+```
+
+An additional field `address_rows` will set in results that are returned.
+It contains a list of all places that make up the address of the place. For
+simplicity, this includes name and house number of the place itself. With
+the names in this list it is possible to create a human-readable description
+of the result. To do that, you first need to decide in which language the
+results should be presented. As with the names in the result itself, the
+places in `address_rows` contain all possible name translation for each row.
+
+The library has a helper class `Locale` which helps extracting a name of a
+place in the preferred language. It takes a single parameter with a list
+of language codes in the order of preference. So
+
+``` python
+locale = napi.Locale(['fr', 'en'])
+```
+
+creates a helper class that returns the name preferably in French. If that is
+not possible, it tries English and eventually falls back to the default `name`
+or `ref`.
+
+The `Locale` object can be applied to a name dictionary to return the best-matching
+name out of it:
+
+``` python
+>>> print(locale.display_name(results[0].names))
+'Brugges'
+```
+
+The `address_row` field has a helper function to apply the function to all
+its members and save the result in the `local_name` field. It also returns
+all the localized names as a convenient simple list. This list can be used
+to create a human-readable output:
+
+``` python
+>>> address_parts = results[0].address_rows.localize(locale)
+>>> print(', '.join(address_parts))
+Bruges, Flandre-Occidentale, Flandre, Belgique
+```
+
+This is a fairly simple way to create a human-readable description. The
+place information in `address_rows` contains further information about each
+place. For example, which OSM `adlin_level` was used, what category the place
+belongs to or what rank Nominatim has assigned. Use this to adapt the output
+to local address formats.
+
+For more information on address rows, see
+[detailed address description](Result-Handling.md#detailed-address-description).
diff --git a/docs/library/Input-Parameter-Types.md b/docs/library/Input-Parameter-Types.md
new file mode 100644 (file)
index 0000000..9227dc3
--- /dev/null
@@ -0,0 +1,62 @@
+# Input Parameter Types
+
+This page describes in more detail some of the input parameter types used
+in the query functions of the API object.
+
+## Place identification
+
+The [details](NominatimAPI.md#nominatim.api.core.NominatimAPI.details) and
+[lookup](NominatimAPI.md#nominatim.api.core.NominatimAPI.lookup) functions
+require references to places in the database. Below the possible
+types for place identification are listed. All types are dataclasses.
+
+### PlaceID
+
+::: nominatim.api.PlaceID
+    options:
+        heading_level: 6
+
+### OsmID
+
+::: nominatim.api.OsmID
+    options:
+        heading_level: 6
+
+## Geometry types
+
+::: nominatim.api.GeometryFormat
+    options:
+        heading_level: 6
+        members_order: source
+
+## Geometry input
+
+### Point
+
+::: nominatim.api.Point
+    options:
+        heading_level: 6
+        show_signature_annotations: True
+
+### Bbox
+
+::: nominatim.api.Bbox
+    options:
+        heading_level: 6
+        show_signature_annotations: True
+        members_order: source
+        group_by_category: False
+
+## Layers
+
+Layers allow to restrict the search result to thematic groups. This is
+orthogonal to restriction by address ranks, which groups places by their
+geographic extent.
+
+
+::: nominatim.api.DataLayer
+    options:
+        heading_level: 6
+        members_order: source
+
+
diff --git a/docs/library/Low-Level-DB-Access.md b/docs/library/Low-Level-DB-Access.md
new file mode 100644 (file)
index 0000000..acd93fd
--- /dev/null
@@ -0,0 +1,56 @@
+# Low-level connections
+
+The `NominatimAPIAsync` class allows to directly access the underlying
+database connection to explore the raw data. Nominatim uses
+[SQLAlchemy](https://docs.sqlalchemy.org/) for building queries. Please
+refer to the documentation of the library to understand how to write SQL.
+
+To get access to a search connection, use the `begin()` function of your
+API object. This returns a `SearchConnection` object described below
+wrapped in a context manager. Its
+`t` property has definitions for all Nominatim search tables. For an
+overview of available tables, refer to the
+[Development Layout](../develop/Database-Layout.md) in in the development
+chapter. Note that only tables that are needed for search are accessible
+as SQLAlchemy tables.
+
+!!! warning
+    The database layout is not part of the API definition and may change
+    without notice. If you play with the low-level access functions, you
+    need to be prepared for such changes.
+
+Here is a simple example, which prints how many places are available in
+the placex table:
+
+```
+import asyncio
+from pathlib import Path
+import sqlalchemy as sa
+from nominatim.api import NominatimAPIAsync
+
+async def print_table_size():
+    api = NominatimAPIAsync(Path('.'))
+
+    async with api.begin() as conn:
+        cnt = await conn.scalar(sa.select(sa.func.count()).select_from(conn.t.placex))
+        print(f'placex table has {cnt} rows.')
+
+asyncio.run(print_table_size())
+```
+
+!!! warning
+    Low-level connections may only be used to read data from the database.
+    Do not use it to add or modify data or you might break Nominatim's
+    normal functions.
+
+## SearchConnection class
+
+::: nominatim.api.SearchConnection
+    options:
+        members:
+            - scalar
+            - execute
+            - get_class_table
+            - get_db_property
+            - get_property
+        heading_level: 6
diff --git a/docs/library/NominatimAPI.md b/docs/library/NominatimAPI.md
new file mode 100644 (file)
index 0000000..0fa9d65
--- /dev/null
@@ -0,0 +1,36 @@
+# The Nominatim API classes
+
+The API classes are the core object of the search library. Always instantiate
+one of these classes first. The API classes are **not threadsafe**. You need
+to instantiate a separate instance for each thread.
+
+### NominatimAPI
+
+::: nominatim.api.NominatimAPI
+    options:
+        members:
+            - __init__
+            - config
+            - close
+            - status
+            - details
+            - lookup
+            - reverse
+            - search
+            - search_address
+            - search_category
+        heading_level: 6
+        group_by_category: False
+
+
+### NominatimAPIAsync
+
+::: nominatim.api.NominatimAPIAsync
+    options:
+        members:
+            - __init__
+            - setup_database
+            - close
+            - begin
+        heading_level: 6
+        group_by_category: False
diff --git a/docs/library/Result-Handling.md b/docs/library/Result-Handling.md
new file mode 100644 (file)
index 0000000..e2803f9
--- /dev/null
@@ -0,0 +1,58 @@
+# Result handling
+
+The search functions of the Nominatim API always return a result object
+with the raw information about the place that is available in the
+database. This section discusses data types used in the results and utility
+functions that allow further processing of the results.
+
+## Result fields
+
+### Sources
+
+Nominatim takes the result data from multiple sources. The `source_table` field
+in the result describes, from which source the result was retrieved.
+
+::: nominatim.api.SourceTable
+    options:
+        heading_level: 6
+        members_order: source
+
+### Detailed address description
+
+When the `address_details` parameter is set, then functions return not
+only information about the result place but also about the place that
+make up the address. This information is almost always required when you
+want to present the user with a human-readable description of the result.
+See also [Localization](#localization) below.
+
+The address details are available in the `address_rows` field as a ordered
+list of `AddressLine` objects with the country information last. The list also
+contains the result place itself and some artificial entries, for example,
+for the house number or the country code. This makes processing and creating
+a full address easier.
+
+::: nominatim.api.AddressLine
+    options:
+        heading_level: 6
+        members_order: source
+
+### Detailed search terms
+
+The `details` function can return detailed information about which search terms
+may be used to find a place, when the `keywords` parameter is set. Search
+terms are split into terms for the name of the place and search terms for
+its address.
+
+::: nominatim.api.WordInfo
+    options:
+        heading_level: 6
+
+## Localization
+
+Results are always returned with the full list of available names.
+
+### Locale
+
+::: nominatim.api.Locales
+    options:
+        heading_level: 6
index c2a8d393302bb56d93e9397c5cfd697f9b81c7df..3301356d71577f08f16bec798d3539549c4137e8 100644 (file)
@@ -1,5 +1,9 @@
-site_name: Nominatim Documentation
-theme: readthedocs
+site_name: Nominatim Manual
+theme:
+  name: material
+  features:
+    - navigation.tabs
+copyright: Copyright &copy; Nominatim developer community
 docs_dir: ${CMAKE_CURRENT_BINARY_DIR}
 site_url: https://nominatim.org
 repo_url: https://github.com/openstreetmap/Nominatim
@@ -18,7 +22,8 @@ nav:
         - 'Basic Installation': 'admin/Installation.md'
         - 'Import' : 'admin/Import.md'
         - 'Update' : 'admin/Update.md'
-        - 'Deploy' : 'admin/Deployment.md'
+        - 'Deploy (PHP frontend)' : 'admin/Deployment-PHP.md'
+        - 'Deploy (Python frontend)' : 'admin/Deployment-Python.md'
         - 'Nominatim UI'  : 'admin/Setup-Nominatim-UI.md'
         - 'Advanced Installations' : 'admin/Advanced-Installations.md'
         - 'Maintenance' : 'admin/Maintenance.md'
@@ -35,6 +40,13 @@ nav:
         - 'Special Phrases': 'customize/Special-Phrases.md'
         - 'External data: US housenumbers from TIGER': 'customize/Tiger.md'
         - 'External data: Postcodes': 'customize/Postcodes.md'
+    - 'Library Guide':
+        - 'Getting Started': 'library/Getting-Started.md'
+        - 'Nominatim API class': 'library/NominatimAPI.md'
+        - 'Configuration': 'library/Configuration.md'
+        - 'Input Parameter Types': 'library/Input-Parameter-Types.md'
+        - 'Result Handling': 'library/Result-Handling.md'
+        - 'Low-level DB Access': 'library/Low-Level-DB-Access.md'
     - 'Developers Guide':
         - 'Architecture Overview' : 'develop/overview.md'
         - 'Database Layout' : 'develop/Database-Layout.md'
@@ -51,6 +63,8 @@ markdown_extensions:
     - codehilite
     - admonition
     - pymdownx.superfences
+    - pymdownx.tabbed:
+        alternate_style: true
     - def_list
     - toc:
         permalink: 
@@ -59,7 +73,8 @@ plugins:
     - search
     - mkdocstrings:
         handlers:
-          python-legacy:
-            rendering:
-              show_source: false
-              show_signature_annotations: false
+          python:
+            paths: ["${PROJECT_SOURCE_DIR}"]
+            options:
+              show_source: False
+              show_bases: False
index 76a093c4a98b79170b3884c4f89c3719f60a247e..895a30dfb8a5890b605a27ce022b86698b2fd912 100644 (file)
@@ -86,7 +86,7 @@ class PlaceLookup
             ($this->bIncludePolygonAsSVG ? 1 : 0);
         if ($iWantedTypes > CONST_PolygonOutput_MaximumTypes) {
             if (CONST_PolygonOutput_MaximumTypes) {
-                userError('Select only '.CONST_PolygonOutput_MaximumTypes.' polgyon output option');
+                userError('Select only '.CONST_PolygonOutput_MaximumTypes.' polygon output option');
             } else {
                 userError('Polygon output is disabled');
             }
index fddad60dca19d77b2f1433808b52189d007be34e..f6ea590fb7c6cfa451f75952fde894a39f34dceb 100644 (file)
@@ -85,6 +85,11 @@ class ReverseGeocode
 
     protected function lookupLargeArea($sPointSQL, $iMaxRank)
     {
+        $sCountryCode = $this->getCountryCode($sPointSQL);
+        if (CONST_Search_WithinCountries and $sCountryCode == null) {
+            return  null;
+        }
+
         if ($iMaxRank > 4) {
             $aPlace = $this->lookupPolygon($sPointSQL, $iMaxRank);
             if ($aPlace) {
@@ -94,12 +99,12 @@ class ReverseGeocode
 
         // If no polygon which contains the searchpoint is found,
         // searches in the country_osm_grid table for a polygon.
-        return  $this->lookupInCountry($sPointSQL, $iMaxRank);
+        return  $this->lookupInCountry($sPointSQL, $iMaxRank, $sCountryCode);
     }
 
-    protected function lookupInCountry($sPointSQL, $iMaxRank)
+    protected function getCountryCode($sPointSQL)
     {
-        Debug::newFunction('lookupInCountry');
+        Debug::newFunction('getCountryCode');
         // searches for polygon in table country_osm_grid which contains the searchpoint
         // and searches for the nearest place node to the searchpoint in this polygon
         $sSQL = 'SELECT country_code FROM country_osm_grid';
@@ -111,8 +116,12 @@ class ReverseGeocode
             null,
             'Could not determine country polygon containing the point.'
         );
-        Debug::printVar('Country code', $sCountryCode);
+        return $sCountryCode;
+    }
 
+    protected function lookupInCountry($sPointSQL, $iMaxRank, $sCountryCode)
+    {
+        Debug::newFunction('lookupInCountry');
         if ($sCountryCode) {
             if ($iMaxRank > 4) {
                 // look for place nodes with the given country code
diff --git a/lib-php/admin/export.php b/lib-php/admin/export.php
deleted file mode 100644 (file)
index 887b4be..0000000
+++ /dev/null
@@ -1,190 +0,0 @@
-<?php
-/**
- * SPDX-License-Identifier: GPL-2.0-only
- *
- * This file is part of Nominatim. (https://nominatim.org)
- *
- * Copyright (C) 2022 by the Nominatim developer community.
- * For a full list of authors see the git log.
- */
-    @define('CONST_LibDir', dirname(dirname(__FILE__)));
-    // Script to extract structured city and street data
-    // from a running nominatim instance as CSV data
-
-
-    require_once(CONST_LibDir.'/init-cmd.php');
-    require_once(CONST_LibDir.'/ParameterParser.php');
-    ini_set('memory_limit', '800M');
-
-    $aCMDOptions = array(
-                    'Export addresses as CSV file from a Nominatim database',
-                    array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
-                    array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
-                    array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
-
-                    array('output-type', '', 0, 1, 1, 1, 'str', 'Type of places to output (see below)'),
-                    array('output-format', '', 0, 1, 1, 1, 'str', 'Column mapping (see below)'),
-                    array('output-all-postcodes', '', 0, 1, 0, 0, 'bool', 'List all postcodes for address instead of just the most likely one'),
-                    array('language', '', 0, 1, 1, 1, 'str', 'Preferred language for output (local name, if omitted)'),
-                    array('restrict-to-country', '', 0, 1, 1, 1, 'str', 'Export only objects within country (country code)'),
-                    array('restrict-to-osm-node', '', 0, 1, 1, 1, 'int', 'Export only objects that are children of this OSM node'),
-                    array('restrict-to-osm-way', '', 0, 1, 1, 1, 'int', 'Export only objects that are children of this OSM way'),
-                    array('restrict-to-osm-relation', '', 0, 1, 1, 1, 'int', 'Export only objects that are children of this OSM relation'),
-                    array('project-dir', '', 0, 1, 1, 1, 'realpath', 'Base directory of the Nominatim installation (default: .)'),
-                    "\nAddress ranks: continent, country, state, county, city, suburb, street, path",
-                    'Additional output types: postcode, placeid (placeid for each object)',
-                    "\noutput-format must be a semicolon-separated list of address ranks. Multiple ranks",
-                    'can be merged into one column by simply using a comma-separated list.',
-                    "\nDefault output-type: street",
-        'Default output format: street;suburb;city;county;state;country'
-                   );
-    getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
-
-    loadSettings($aCMDResult['project-dir'] ?? getcwd());
-
-    $aRankmap = array(
-                 'continent' => 1,
-                 'country' => 4,
-                 'state' => 8,
-                 'county' => 12,
-                 'city' => 16,
-                 'suburb' => 20,
-                 'street' => 26,
-                 'path' => 27
-                );
-
-    $oDB = new Nominatim\DB();
-    $oDB->connect();
-
-    if (isset($aCMDResult['output-type'])) {
-        if (!isset($aRankmap[$aCMDResult['output-type']])) {
-            fail('unknown output-type: '.$aCMDResult['output-type']);
-        }
-        $iOutputRank = $aRankmap[$aCMDResult['output-type']];
-    } else {
-        $iOutputRank = $aRankmap['street'];
-    }
-
-
-    // Preferred language
-    $oParams = new Nominatim\ParameterParser();
-    if (!isset($aCMDResult['language'])) {
-        $aCMDResult['language'] = 'xx';
-    }
-    $aLangPrefOrder = $oParams->getPreferredLanguages($aCMDResult['language']);
-    $sLanguagePrefArraySQL = $oDB->getArraySQL($oDB->getDBQuotedList($aLangPrefOrder));
-
-    // output formatting: build up a lookup table that maps address ranks to columns
-    $aColumnMapping = array();
-    $iNumCol = 0;
-    if (!isset($aCMDResult['output-format'])) {
-        $aCMDResult['output-format'] = 'street;suburb;city;county;state;country';
-    }
-    foreach (preg_split('/\s*;\s*/', $aCMDResult['output-format']) as $sColumn) {
-        $bHasData = false;
-        foreach (preg_split('/\s*,\s*/', $sColumn) as $sRank) {
-            if ($sRank == 'postcode' || $sRank == 'placeid') {
-                $aColumnMapping[$sRank] = $iNumCol;
-                $bHasData = true;
-            } elseif (isset($aRankmap[$sRank])) {
-                $iRank = $aRankmap[$sRank];
-                if ($iRank <= $iOutputRank) {
-                    $aColumnMapping[(string)$iRank] = $iNumCol;
-                    $bHasData = true;
-                }
-            }
-        }
-        if ($bHasData) {
-            $iNumCol++;
-        }
-    }
-
-    // build the query for objects
-    $sPlacexSQL = 'select min(place_id) as place_id, ';
-    $sPlacexSQL .= 'array_agg(place_id) as place_ids, ';
-    $sPlacexSQL .= 'country_code as cc, ';
-    $sPlacexSQL .= 'postcode, ';
-    // get the address places excluding postcodes
-    $sPlacexSQL .= 'array(select address_place_id from place_addressline a';
-    $sPlacexSQL .= ' where a.place_id = placex.place_id and isaddress';
-    $sPlacexSQL .= '  and address_place_id != placex.place_id';
-    $sPlacexSQL .= '  and not cached_rank_address in (5,11)';
-    $sPlacexSQL .= '  and cached_rank_address > 2 order by cached_rank_address)';
-    $sPlacexSQL .= ' as address';
-    $sPlacexSQL .= ' from placex where name is not null and linked_place_id is null';
-
-    $sPlacexSQL .= ' and rank_address = '.$iOutputRank;
-
-    if (isset($aCMDResult['restrict-to-country'])) {
-        $sPlacexSQL .= ' and country_code = '.$oDB->getDBQuoted($aCMDResult['restrict-to-country']);
-    }
-
-    // restriction to parent place id
-    $sParentId = false;
-    $sOsmType = false;
-
-    if (isset($aCMDResult['restrict-to-osm-node'])) {
-        $sOsmType = 'N';
-        $sOsmId = $aCMDResult['restrict-to-osm-node'];
-    }
-    if (isset($aCMDResult['restrict-to-osm-way'])) {
-        $sOsmType = 'W';
-        $sOsmId = $aCMDResult['restrict-to-osm-way'];
-    }
-    if (isset($aCMDResult['restrict-to-osm-relation'])) {
-        $sOsmType = 'R';
-        $sOsmId = $aCMDResult['restrict-to-osm-relation'];
-    }
-    if ($sOsmType) {
-        $sSQL = 'select place_id from placex where osm_type = :osm_type and osm_id = :osm_id';
-        $sParentId = $oDB->getOne($sSQL, array('osm_type' => $sOsmType, 'osm_id' => $sOsmId));
-        if (!$sParentId) {
-            fail('Could not find place '.$sOsmType.' '.$sOsmId);
-        }
-    }
-    if ($sParentId) {
-        $sPlacexSQL .= ' and place_id in (select place_id from place_addressline where address_place_id = '.$sParentId.' and isaddress)';
-    }
-
-    $sPlacexSQL .= " group by name->'name', address, postcode, country_code, placex.place_id";
-
-    // Iterate over placeids
-    // to get further hierarchical information
-    //var_dump($sPlacexSQL);
-    $oResults = $oDB->getQueryStatement($sPlacexSQL);
-    $fOutstream = fopen('php://output', 'w');
-    while ($aRow = $oResults->fetch()) {
-        $iPlaceID = $aRow['place_id'];
-        $sSQL = "select rank_address,get_name_by_language(name,$sLanguagePrefArraySQL) as localname from get_addressdata(:place_id, -1)";
-        $sSQL .= ' WHERE isaddress';
-        $sSQL .= ' order by rank_address desc,isaddress desc';
-        $aAddressLines = $oDB->getAll($sSQL, array('place_id' => $iPlaceID));
-
-        $aOutput = array_fill(0, $iNumCol, '');
-        // output address parts
-        foreach ($aAddressLines as $aAddress) {
-            if (isset($aColumnMapping[$aAddress['rank_address']])) {
-                $aOutput[$aColumnMapping[$aAddress['rank_address']]] = $aAddress['localname'];
-            }
-        }
-        // output postcode
-        if (isset($aColumnMapping['postcode'])) {
-            if ($aCMDResult['output-all-postcodes']) {
-                $sSQL = 'select array_agg(px.postcode) from placex px join place_addressline pa ';
-                $sSQL .= 'on px.place_id = pa.address_place_id ';
-                $sSQL .= 'where pa.cached_rank_address in (5,11) ';
-                $sSQL .= 'and pa.place_id in (select place_id from place_addressline where address_place_id in (:first_place_id)) ';
-                $sSQL .= 'group by postcode order by count(*) desc limit 1';
-                $sRes = $oDB->getOne($sSQL, array('first_place_id' => substr($aRow['place_ids'], 1, -1)));
-
-                $aOutput[$aColumnMapping['postcode']] = substr($sRes, 1, -1);
-            } else {
-                $aOutput[$aColumnMapping['postcode']] = $aRow['postcode'];
-            }
-        }
-        if (isset($aColumnMapping['placeid'])) {
-            $aOutput[$aColumnMapping['placeid']] = substr($aRow['place_ids'], 1, -1);
-        }
-        fputcsv($fOutstream, $aOutput);
-    }
-    fclose($fOutstream);
diff --git a/lib-php/admin/warm.php b/lib-php/admin/warm.php
deleted file mode 100644 (file)
index 5cbae89..0000000
+++ /dev/null
@@ -1,114 +0,0 @@
-<?php
-/**
- * SPDX-License-Identifier: GPL-2.0-only
- *
- * This file is part of Nominatim. (https://nominatim.org)
- *
- * Copyright (C) 2022 by the Nominatim developer community.
- * For a full list of authors see the git log.
- */
-@define('CONST_LibDir', dirname(dirname(__FILE__)));
-
-require_once(CONST_LibDir.'/init-cmd.php');
-require_once(CONST_LibDir.'/log.php');
-require_once(CONST_LibDir.'/PlaceLookup.php');
-require_once(CONST_LibDir.'/ReverseGeocode.php');
-
-ini_set('memory_limit', '800M');
-
-$aCMDOptions = array(
-                'Tools to warm nominatim db',
-                array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
-                array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
-                array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
-                array('reverse-only', '', 0, 1, 0, 0, 'bool', 'Warm reverse only'),
-                array('search-only', '', 0, 1, 0, 0, 'bool', 'Warm search only'),
-                array('project-dir', '', 0, 1, 1, 1, 'realpath', 'Base directory of the Nominatim installation (default: .)'),
-               );
-getCmdOpt($_SERVER['argv'], $aCMDOptions, $aResult, true, true);
-
-loadSettings($aCMDResult['project-dir'] ?? getcwd());
-
-@define('CONST_Database_DSN', getSetting('DATABASE_DSN'));
-@define('CONST_Default_Language', getSetting('DEFAULT_LANGUAGE', false));
-@define('CONST_Log_DB', getSettingBool('LOG_DB'));
-@define('CONST_Log_File', getSetting('LOG_FILE', false));
-@define('CONST_NoAccessControl', getSettingBool('CORS_NOACCESSCONTROL'));
-@define('CONST_Places_Max_ID_count', getSetting('LOOKUP_MAX_COUNT'));
-@define('CONST_PolygonOutput_MaximumTypes', getSetting('POLYGON_OUTPUT_MAX_TYPES'));
-@define('CONST_Search_BatchMode', getSettingBool('SEARCH_BATCH_MODE'));
-@define('CONST_Search_NameOnlySearchFrequencyThreshold', getSetting('SEARCH_NAME_ONLY_THRESHOLD'));
-@define('CONST_Use_US_Tiger_Data', getSettingBool('USE_US_TIGER_DATA'));
-@define('CONST_MapIcon_URL', getSetting('MAPICON_URL', false));
-@define('CONST_TokenizerDir', CONST_InstallDir.'/tokenizer');
-
-require_once(CONST_LibDir.'/Geocode.php');
-
-$oDB = new Nominatim\DB();
-$oDB->connect();
-
-$bVerbose = $aResult['verbose'];
-
-function print_results($aResults, $bVerbose)
-{
-    if ($bVerbose) {
-        if ($aResults && count($aResults)) {
-            echo $aResults[0]['langaddress']."\n";
-        } else {
-            echo "<not found>\n";
-        }
-    } else {
-        echo '.';
-    }
-}
-
-if (!$aResult['search-only']) {
-    $oReverseGeocode = new Nominatim\ReverseGeocode($oDB);
-    $oReverseGeocode->setZoom(20);
-    $oPlaceLookup = new Nominatim\PlaceLookup($oDB);
-    $oPlaceLookup->setIncludeAddressDetails(true);
-    $oPlaceLookup->setLanguagePreference(array('en'));
-
-    echo 'Warm reverse: ';
-    if ($bVerbose) {
-        echo "\n";
-    }
-    for ($i = 0; $i < 1000; $i++) {
-        $fLat = rand(-9000, 9000) / 100;
-        $fLon = rand(-18000, 18000) / 100;
-        if ($bVerbose) {
-            echo "$fLat, $fLon = ";
-        }
-
-        $oLookup = $oReverseGeocode->lookup($fLat, $fLon);
-        $aSearchResults = $oLookup ? $oPlaceLookup->lookup(array($oLookup->iId => $oLookup)) : null;
-        print_results($aSearchResults, $bVerbose);
-    }
-    echo "\n";
-}
-
-if (!$aResult['reverse-only']) {
-    $oGeocode = new Nominatim\Geocode($oDB);
-
-    echo 'Warm search: ';
-    if ($bVerbose) {
-        echo "\n";
-    }
-
-    $oTokenizer = new \Nominatim\Tokenizer($oDB);
-
-    $aWords = $oTokenizer->mostFrequentWords(1000);
-
-    $sSQL = 'SELECT word FROM word WHERE word is not null ORDER BY search_name_count DESC LIMIT 1000';
-    foreach ($aWords as $sWord) {
-        if ($bVerbose) {
-            echo "$sWord = ";
-        }
-
-        $oGeocode->setLanguagePreference(array('en'));
-        $oGeocode->setQuery($sWord);
-        $aSearchResults = $oGeocode->lookup();
-        print_results($aSearchResults, $bVerbose);
-    }
-    echo "\n";
-}
index 99307bbd9fb229a09e01b278872839f6b7512198..98fb6ef75fdaf9fb6799b0d97c97028544c9395a 100644 (file)
@@ -23,7 +23,7 @@ $aLangPrefOrder = $oParams->getPreferredLanguages();
 
 $sPlaceId = $oParams->getString('place_id');
 $sOsmType = $oParams->getSet('osmtype', array('N', 'W', 'R'));
-$iOsmId = $oParams->getInt('osmid', -1);
+$iOsmId = $oParams->getInt('osmid', 0);
 $sClass = $oParams->getString('class');
 
 $bIncludeKeywords = $oParams->getBool('keywords', false);
@@ -38,7 +38,7 @@ $oDB->connect();
 
 $sLanguagePrefArraySQL = $oDB->getArraySQL($oDB->getDBQuotedList($aLangPrefOrder));
 
-if ($sOsmType && $iOsmId > 0) {
+if ($sOsmType && $iOsmId !== 0) {
     $sSQL = 'SELECT place_id FROM placex WHERE osm_type = :type AND osm_id = :id';
     $aSQLParams = array(':type' => $sOsmType, ':id' => $iOsmId);
     // osm_type and osm_id are not unique enough
index 2bbfcd5c03c6296ff06191a00571c7b11f5da25a..26ce20738d301d4e3b33c43ff1571fd52a4e9a45 100644 (file)
@@ -187,6 +187,7 @@ BEGIN
 
   -- --- Return the record for the base entry.
 
+  current_rank_address := 1000;
   FOR location IN
     SELECT placex.place_id, osm_type, osm_id, name,
            coalesce(extratags->'linked_place', extratags->'place') as place_type,
@@ -261,7 +262,7 @@ BEGIN
       -- If the place had a postcode assigned, take this one only
       -- into consideration when it is an area and the place does not have
       -- a postcode itself.
-      IF location.fromarea AND location.isaddress
+      IF location.fromarea AND location_isaddress
          AND (place.address is null or not place.address ? 'postcode')
       THEN
         place.postcode := null; -- remove the less exact postcode
index 6a52021bb02d40f2a83ce4e71e88f0185b4efe41..f3b6ab2b4e7f5eff2482eb45792a2f032f62b363 100644 (file)
@@ -37,7 +37,7 @@ BEGIN
   -- Remove the place from the list of places to be deleted
   DELETE FROM place_to_be_deleted pdel
     WHERE pdel.osm_type = NEW.osm_type and pdel.osm_id = NEW.osm_id
-          and pdel.class = NEW.class;
+          and pdel.class = NEW.class and pdel.type = NEW.type;
 
   -- Have we already done this place?
   SELECT * INTO existing
@@ -363,57 +363,3 @@ BEGIN
   RETURN NULL;
 END;
 $$ LANGUAGE plpgsql;
-
-CREATE OR REPLACE FUNCTION flush_deleted_places()
-  RETURNS INTEGER
-  AS $$
-BEGIN
-  -- deleting large polygons can have a massive effect on the system - require manual intervention to let them through
-  INSERT INTO import_polygon_delete (osm_type, osm_id, class, type)
-    SELECT osm_type, osm_id, class, type FROM place_to_be_deleted WHERE deferred;
-
-  -- delete from place table
-  ALTER TABLE place DISABLE TRIGGER place_before_delete;
-  DELETE FROM place USING place_to_be_deleted
-    WHERE place.osm_type = place_to_be_deleted.osm_type
-          and place.osm_id = place_to_be_deleted.osm_id
-          and place.class = place_to_be_deleted.class
-          and place.type = place_to_be_deleted.type
-          and not deferred;
-  ALTER TABLE place ENABLE TRIGGER place_before_delete;
-
-  -- Mark for delete in the placex table
-  UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
-    WHERE placex.osm_type = 'N' and place_to_be_deleted.osm_type = 'N'
-          and placex.osm_id = place_to_be_deleted.osm_id
-          and placex.class = place_to_be_deleted.class
-          and placex.type = place_to_be_deleted.type
-          and not deferred;
-  UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
-    WHERE placex.osm_type = 'W' and place_to_be_deleted.osm_type = 'W'
-          and placex.osm_id = place_to_be_deleted.osm_id
-          and placex.class = place_to_be_deleted.class
-          and placex.type = place_to_be_deleted.type
-          and not deferred;
-  UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
-    WHERE placex.osm_type = 'R' and place_to_be_deleted.osm_type = 'R'
-          and placex.osm_id = place_to_be_deleted.osm_id
-          and placex.class = place_to_be_deleted.class
-          and placex.type = place_to_be_deleted.type
-          and not deferred;
-
-   -- Mark for delete in interpolations
-   UPDATE location_property_osmline SET indexed_status = 100 FROM place_to_be_deleted
-    WHERE place_to_be_deleted.osm_type = 'W'
-          and place_to_be_deleted.class = 'place'
-          and place_to_be_deleted.type = 'houses'
-          and location_property_osmline.osm_id = place_to_be_deleted.osm_id
-          and not deferred;
-
-   -- Clear todo list.
-   TRUNCATE TABLE place_to_be_deleted;
-
-   RETURN NULL;
-END;
-$$ LANGUAGE plpgsql;
-
index f5be7b61262532d462044ff7657a4bf69ee7ae58..ff2f037d01dabdb86b40212fff372738727bfb0e 100644 (file)
@@ -273,8 +273,8 @@ BEGIN
   END IF;
 
   RETURN ST_Envelope(ST_Collect(
-                     ST_Project(geom, radius, 0.785398)::geometry,
-                     ST_Project(geom, radius, 3.9269908)::geometry));
+                     ST_Project(geom::geography, radius, 0.785398)::geometry,
+                     ST_Project(geom::geography, radius, 3.9269908)::geometry));
 END;
 $$
 LANGUAGE plpgsql IMMUTABLE;
@@ -487,3 +487,56 @@ BEGIN
 END;
 $$
 LANGUAGE plpgsql;
+
+CREATE OR REPLACE FUNCTION flush_deleted_places()
+  RETURNS INTEGER
+  AS $$
+BEGIN
+  -- deleting large polygons can have a massive effect on the system - require manual intervention to let them through
+  INSERT INTO import_polygon_delete (osm_type, osm_id, class, type)
+    SELECT osm_type, osm_id, class, type FROM place_to_be_deleted WHERE deferred;
+
+  -- delete from place table
+  ALTER TABLE place DISABLE TRIGGER place_before_delete;
+  DELETE FROM place USING place_to_be_deleted
+    WHERE place.osm_type = place_to_be_deleted.osm_type
+          and place.osm_id = place_to_be_deleted.osm_id
+          and place.class = place_to_be_deleted.class
+          and place.type = place_to_be_deleted.type
+          and not deferred;
+  ALTER TABLE place ENABLE TRIGGER place_before_delete;
+
+  -- Mark for delete in the placex table
+  UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
+    WHERE placex.osm_type = 'N' and place_to_be_deleted.osm_type = 'N'
+          and placex.osm_id = place_to_be_deleted.osm_id
+          and placex.class = place_to_be_deleted.class
+          and placex.type = place_to_be_deleted.type
+          and not deferred;
+  UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
+    WHERE placex.osm_type = 'W' and place_to_be_deleted.osm_type = 'W'
+          and placex.osm_id = place_to_be_deleted.osm_id
+          and placex.class = place_to_be_deleted.class
+          and placex.type = place_to_be_deleted.type
+          and not deferred;
+  UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
+    WHERE placex.osm_type = 'R' and place_to_be_deleted.osm_type = 'R'
+          and placex.osm_id = place_to_be_deleted.osm_id
+          and placex.class = place_to_be_deleted.class
+          and placex.type = place_to_be_deleted.type
+          and not deferred;
+
+   -- Mark for delete in interpolations
+   UPDATE location_property_osmline SET indexed_status = 100 FROM place_to_be_deleted
+    WHERE place_to_be_deleted.osm_type = 'W'
+          and place_to_be_deleted.class = 'place'
+          and place_to_be_deleted.type = 'houses'
+          and location_property_osmline.osm_id = place_to_be_deleted.osm_id
+          and not deferred;
+
+   -- Clear todo list.
+   TRUNCATE TABLE place_to_be_deleted;
+
+   RETURN NULL;
+END;
+$$ LANGUAGE plpgsql;
index 34f00a8bbc9fb0658b7243744dc12f4d31647e4e..427bcb63b1d619e53dedf79a714bd96738905f5b 100644 (file)
@@ -7,6 +7,6 @@ sys.path.append('@PROJECT_SOURCE_DIR@')
 from nominatim.cli import get_set_parser
 
 def get_parser():
-    parser = get_set_parser(phpcgi_path='@PHPCGI_BIN@')
+    parser = get_set_parser()
 
     return parser.parser
index 794cd96c054d2f68c2d888b7ec54bc9818ea0a9d..9e3d6a1dcfc9496df468b050b480c5c492c7d6dc 100644 (file)
@@ -16,6 +16,7 @@ import from this file, not from the source files directly.
 
 from .core import (NominatimAPI as NominatimAPI,
                    NominatimAPIAsync as NominatimAPIAsync)
+from .connection import (SearchConnection as SearchConnection)
 from .status import (StatusResult as StatusResult)
 from .types import (PlaceID as PlaceID,
                     OsmID as OsmID,
index 72cabf7814bd4d5402d4b99dada5ce720fd8c852..405213e97659d32fb9ff9d56c2478219690af6a4 100644 (file)
@@ -7,7 +7,9 @@
 """
 Extended SQLAlchemy connection class that also includes access to the schema.
 """
-from typing import cast, Any, Mapping, Sequence, Union, Dict, Optional, Set
+from typing import cast, Any, Mapping, Sequence, Union, Dict, Optional, Set, \
+                   Awaitable, Callable, TypeVar
+import asyncio
 
 import sqlalchemy as sa
 from sqlalchemy.ext.asyncio import AsyncConnection
@@ -17,6 +19,8 @@ from nominatim.db.sqlalchemy_schema import SearchTables
 from nominatim.db.sqlalchemy_types import Geometry
 from nominatim.api.logging import log
 
+T = TypeVar('T')
+
 class SearchConnection:
     """ An extended SQLAlchemy connection class, that also contains
         then table definitions. The underlying asynchronous SQLAlchemy
@@ -31,6 +35,14 @@ class SearchConnection:
         self.t = tables # pylint: disable=invalid-name
         self._property_cache = properties
         self._classtables: Optional[Set[str]] = None
+        self.query_timeout: Optional[int] = None
+
+
+    def set_query_timeout(self, timeout: Optional[int]) -> None:
+        """ Set the timeout after which a query over this connection
+            is cancelled.
+        """
+        self.query_timeout = timeout
 
 
     async def scalar(self, sql: sa.sql.base.Executable,
@@ -39,7 +51,7 @@ class SearchConnection:
         """ Execute a 'scalar()' query on the connection.
         """
         log().sql(self.connection, sql, params)
-        return await self.connection.scalar(sql, params)
+        return await asyncio.wait_for(self.connection.scalar(sql, params), self.query_timeout)
 
 
     async def execute(self, sql: 'sa.Executable',
@@ -48,7 +60,7 @@ class SearchConnection:
         """ Execute a 'execute()' query on the connection.
         """
         log().sql(self.connection, sql, params)
-        return await self.connection.execute(sql, params)
+        return await asyncio.wait_for(self.connection.execute(sql, params), self.query_timeout)
 
 
     async def get_property(self, name: str, cached: bool = True) -> str:
@@ -61,11 +73,10 @@ class SearchConnection:
 
             Raises a ValueError if the property does not exist.
         """
-        if name.startswith('DB:'):
-            raise ValueError(f"Illegal property value '{name}'.")
+        lookup_name = f'DBPROP:{name}'
 
-        if cached and name in self._property_cache:
-            return cast(str, self._property_cache[name])
+        if cached and lookup_name in self._property_cache:
+            return cast(str, self._property_cache[lookup_name])
 
         sql = sa.select(self.t.properties.c.value)\
             .where(self.t.properties.c.property == name)
@@ -74,7 +85,7 @@ class SearchConnection:
         if value is None:
             raise ValueError(f"Property '{name}' not found in database.")
 
-        self._property_cache[name] = cast(str, value)
+        self._property_cache[lookup_name] = cast(str, value)
 
         return cast(str, value)
 
@@ -92,6 +103,29 @@ class SearchConnection:
         return self._property_cache['DB:server_version']
 
 
+    async def get_cached_value(self, group: str, name: str,
+                               factory: Callable[[], Awaitable[T]]) -> T:
+        """ Access the cache for this Nominatim instance.
+            Each cache value needs to belong to a group and have a name.
+            This function is for internal API use only.
+
+            `factory` is an async callback function that produces
+            the value if it is not already cached.
+
+            Returns the cached value or the result of factory (also caching
+            the result).
+        """
+        full_name = f'{group}:{name}'
+
+        if full_name in self._property_cache:
+            return cast(T, self._property_cache[full_name])
+
+        value = await factory()
+        self._property_cache[full_name] = value
+
+        return value
+
+
     async def get_class_table(self, cls: str, typ: str) -> Optional[SaFromClause]:
         """ Lookup up if there is a classtype table for the given category
             and return a SQLAlchemy table for it, if it exists.
index 8d503fa5e836cbb9b82ab992530de5cead00bf4b..44ac91606fef90a746bb26d06b2a9fc6da0e61e4 100644 (file)
@@ -9,6 +9,7 @@ Implementation of classes for API access via libraries.
 """
 from typing import Mapping, Optional, Any, AsyncIterator, Dict, Sequence, List, Tuple
 import asyncio
+import sys
 import contextlib
 from pathlib import Path
 
@@ -28,22 +29,49 @@ import nominatim.api.types as ntyp
 from nominatim.api.results import DetailedResult, ReverseResult, SearchResults
 
 
-class NominatimAPIAsync:
-    """ API loader asynchornous version.
+class NominatimAPIAsync: #pylint: disable=too-many-instance-attributes
+    """ The main frontend to the Nominatim database implements the
+        functions for lookup, forward and reverse geocoding using
+        asynchronous functions.
+
+        This class shares most of the functions with its synchronous
+        version. There are some additional functions or parameters,
+        which are documented below.
     """
     def __init__(self, project_dir: Path,
-                 environ: Optional[Mapping[str, str]] = None) -> None:
+                 environ: Optional[Mapping[str, str]] = None,
+                 loop: Optional[asyncio.AbstractEventLoop] = None) -> None:
+        """ Initiate a new frontend object with synchronous API functions.
+
+            Parameters:
+              project_dir: Path to the
+                  [project directory](../admin/Import.md#creating-the-project-directory)
+                  of the local Nominatim installation.
+              environ: Mapping of [configuration parameters](../customize/Settings.md).
+                  When set, replaces any configuration via environment variables.
+                  Settings in this mapping also have precedence over any
+                  parameters found in the `.env` file of the project directory.
+              loop: The asyncio event loop that will be used when calling
+                  functions. Only needed, when a custom event loop is used
+                  and the Python version is 3.9 or earlier.
+        """
         self.config = Configuration(project_dir, environ)
+        self.query_timeout = self.config.get_int('QUERY_TIMEOUT') \
+                             if self.config.QUERY_TIMEOUT else None
+        self.reverse_restrict_to_country_area = self.config.get_bool('SEARCH_WITHIN_COUNTRIES')
         self.server_version = 0
 
-        self._engine_lock = asyncio.Lock()
+        if sys.version_info >= (3, 10):
+            self._engine_lock = asyncio.Lock()
+        else:
+            self._engine_lock = asyncio.Lock(loop=loop) # pylint: disable=unexpected-keyword-arg
         self._engine: Optional[sa_asyncio.AsyncEngine] = None
         self._tables: Optional[SearchTables] = None
         self._property_cache: Dict[str, Any] = {'DB:server_version': 0}
 
 
     async def setup_database(self) -> None:
-        """ Set up the engine and connection parameters.
+        """ Set up the SQL engine and connections.
 
             This function will be implicitly called when the database is
             accessed for the first time. You may also call it explicitly to
@@ -53,21 +81,34 @@ class NominatimAPIAsync:
             if self._engine:
                 return
 
-            dsn = self.config.get_database_params()
+            extra_args: Dict[str, Any] = {'future': True,
+                                          'echo': self.config.get_bool('DEBUG_SQL')}
 
-            query = {k: v for k, v in dsn.items()
-                      if k not in ('user', 'password', 'dbname', 'host', 'port')}
-            if PGCORE_LIB == 'asyncpg':
-                query['prepared_statement_cache_size'] = '0'
+            is_sqlite = self.config.DATABASE_DSN.startswith('sqlite:')
 
-            dburl = sa.engine.URL.create(
-                       f'postgresql+{PGCORE_LIB}',
-                       database=dsn.get('dbname'),
-                       username=dsn.get('user'), password=dsn.get('password'),
-                       host=dsn.get('host'), port=int(dsn['port']) if 'port' in dsn else None,
-                       query=query)
-            engine = sa_asyncio.create_async_engine(dburl, future=True,
-                                                    echo=self.config.get_bool('DEBUG_SQL'))
+            if is_sqlite:
+                params = dict((p.split('=', 1)
+                              for p in self.config.DATABASE_DSN[7:].split(';')))
+                dburl = sa.engine.URL.create('sqlite+aiosqlite',
+                                             database=params.get('dbname'))
+
+            else:
+                dsn = self.config.get_database_params()
+                query = {k: v for k, v in dsn.items()
+                         if k not in ('user', 'password', 'dbname', 'host', 'port')}
+
+                dburl = sa.engine.URL.create(
+                           f'postgresql+{PGCORE_LIB}',
+                           database=dsn.get('dbname'),
+                           username=dsn.get('user'),
+                           password=dsn.get('password'),
+                           host=dsn.get('host'),
+                           port=int(dsn['port']) if 'port' in dsn else None,
+                           query=query)
+                extra_args['max_overflow'] = 0
+                extra_args['pool_size'] = self.config.get_int('API_POOL_SIZE')
+
+            engine = sa_asyncio.create_async_engine(dburl, **extra_args)
 
             try:
                 async with engine.begin() as conn:
@@ -76,7 +117,7 @@ class NominatimAPIAsync:
             except (PGCORE_ERROR, sa.exc.OperationalError):
                 server_version = 0
 
-            if server_version >= 110000:
+            if server_version >= 110000 and not is_sqlite:
                 @sa.event.listens_for(engine.sync_engine, "connect")
                 def _on_connect(dbapi_con: Any, _: Any) -> None:
                     cursor = dbapi_con.cursor()
@@ -85,6 +126,15 @@ class NominatimAPIAsync:
                 # Make sure that all connections get the new settings
                 await self.close()
 
+            if is_sqlite:
+                @sa.event.listens_for(engine.sync_engine, "connect")
+                def _on_sqlite_connect(dbapi_con: Any, _: Any) -> None:
+                    dbapi_con.run_async(lambda conn: conn.enable_load_extension(True))
+                    cursor = dbapi_con.cursor()
+                    cursor.execute("SELECT load_extension('mod_spatialite')")
+                    cursor.execute('SELECT SetDecimalPrecision(7)')
+                    dbapi_con.run_async(lambda conn: conn.enable_load_extension(False))
+
             self._property_cache['DB:server_version'] = server_version
 
             self._tables = SearchTables(sa.MetaData(), engine.name) # pylint: disable=no-member
@@ -123,6 +173,7 @@ class NominatimAPIAsync:
         """
         try:
             async with self.begin() as conn:
+                conn.set_query_timeout(self.query_timeout)
                 status = await get_status(conn)
         except (PGCORE_ERROR, sa.exc.OperationalError):
             return StatusResult(700, 'Database connection failed')
@@ -137,6 +188,7 @@ class NominatimAPIAsync:
         """
         details = ntyp.LookupDetails.from_kwargs(params)
         async with self.begin() as conn:
+            conn.set_query_timeout(self.query_timeout)
             if details.keywords:
                 await make_query_analyzer(conn)
             return await get_detailed_place(conn, place, details)
@@ -149,6 +201,7 @@ class NominatimAPIAsync:
         """
         details = ntyp.LookupDetails.from_kwargs(params)
         async with self.begin() as conn:
+            conn.set_query_timeout(self.query_timeout)
             if details.keywords:
                 await make_query_analyzer(conn)
             return SearchResults(filter(None,
@@ -168,9 +221,11 @@ class NominatimAPIAsync:
 
         details = ntyp.ReverseDetails.from_kwargs(params)
         async with self.begin() as conn:
+            conn.set_query_timeout(self.query_timeout)
             if details.keywords:
                 await make_query_analyzer(conn)
-            geocoder = ReverseGeocoder(conn, details)
+            geocoder = ReverseGeocoder(conn, details,
+                                       self.reverse_restrict_to_country_area)
             return await geocoder.lookup(coord)
 
 
@@ -182,7 +237,10 @@ class NominatimAPIAsync:
             raise UsageError('Nothing to search for.')
 
         async with self.begin() as conn:
-            geocoder = ForwardGeocoder(conn, ntyp.SearchDetails.from_kwargs(params))
+            conn.set_query_timeout(self.query_timeout)
+            geocoder = ForwardGeocoder(conn, ntyp.SearchDetails.from_kwargs(params),
+                                       self.config.get_int('REQUEST_TIMEOUT') \
+                                         if self.config.REQUEST_TIMEOUT else None)
             phrases = [Phrase(PhraseType.NONE, p.strip()) for p in query.split(',')]
             return await geocoder.lookup(phrases)
 
@@ -199,6 +257,7 @@ class NominatimAPIAsync:
         """ Find an address using structured search.
         """
         async with self.begin() as conn:
+            conn.set_query_timeout(self.query_timeout)
             details = ntyp.SearchDetails.from_kwargs(params)
 
             phrases: List[Phrase] = []
@@ -239,7 +298,9 @@ class NominatimAPIAsync:
                 if amenity:
                     details.layers |= ntyp.DataLayer.POI
 
-            geocoder = ForwardGeocoder(conn, details)
+            geocoder = ForwardGeocoder(conn, details,
+                                       self.config.get_int('REQUEST_TIMEOUT') \
+                                         if self.config.REQUEST_TIMEOUT else None)
             return await geocoder.lookup(phrases)
 
 
@@ -255,6 +316,7 @@ class NominatimAPIAsync:
 
         details = ntyp.SearchDetails.from_kwargs(params)
         async with self.begin() as conn:
+            conn.set_query_timeout(self.query_timeout)
             if near_query:
                 phrases = [Phrase(PhraseType.NONE, p) for p in near_query.split(',')]
             else:
@@ -262,25 +324,41 @@ class NominatimAPIAsync:
                 if details.keywords:
                     await make_query_analyzer(conn)
 
-            geocoder = ForwardGeocoder(conn, details)
+            geocoder = ForwardGeocoder(conn, details,
+                                       self.config.get_int('REQUEST_TIMEOUT') \
+                                         if self.config.REQUEST_TIMEOUT else None)
             return await geocoder.lookup_pois(categories, phrases)
 
 
 
 class NominatimAPI:
-    """ API loader, synchronous version.
+    """ This class provides a thin synchronous wrapper around the asynchronous
+        Nominatim functions. It creates its own event loop and runs each
+        synchronous function call to completion using that loop.
     """
 
     def __init__(self, project_dir: Path,
                  environ: Optional[Mapping[str, str]] = None) -> None:
+        """ Initiate a new frontend object with synchronous API functions.
+
+            Parameters:
+              project_dir: Path to the
+                  [project directory](../admin/Import.md#creating-the-project-directory)
+                  of the local Nominatim installation.
+              environ: Mapping of [configuration parameters](../customize/Settings.md).
+                  When set, replaces any configuration via environment variables.
+                  Settings in this mapping also have precedence over any
+                  parameters found in the `.env` file of the project directory.
+        """
         self._loop = asyncio.new_event_loop()
-        self._async_api = NominatimAPIAsync(project_dir, environ)
+        self._async_api = NominatimAPIAsync(project_dir, environ, loop=self._loop)
 
 
     def close(self) -> None:
-        """ Close all active connections to the database. The NominatimAPIAsync
-            object remains usable after closing. If a new API functions is
-            called, new connections are created.
+        """ Close all active connections to the database.
+
+            This function also closes the asynchronous worker loop making
+            the NominatimAPI object unusuable.
         """
         self._loop.run_until_complete(self._async_api.close())
         self._loop.close()
@@ -288,18 +366,109 @@ class NominatimAPI:
 
     @property
     def config(self) -> Configuration:
-        """ Return the configuration used by the API.
+        """ Provide read-only access to the [configuration](#Configuration)
+            used by the API.
         """
         return self._async_api.config
 
     def status(self) -> StatusResult:
-        """ Return the status of the database.
+        """ Return the status of the database as a dataclass object
+            with the fields described below.
+
+            Returns:
+              status(int): A status code as described on the status page.
+              message(str): Either 'OK' or a human-readable message of the
+                  problem encountered.
+              software_version(tuple): A tuple with the version of the
+                  Nominatim library consisting of (major, minor, patch, db-patch)
+                  version.
+              database_version(tuple): A tuple with the version of the library
+                  which was used for the import or last migration.
+                  Also consists of (major, minor, patch, db-patch).
+              data_updated(datetime): Timestamp with the age of the data.
         """
         return self._loop.run_until_complete(self._async_api.status())
 
 
     def details(self, place: ntyp.PlaceRef, **params: Any) -> Optional[DetailedResult]:
         """ Get detailed information about a place in the database.
+
+            The result is a dataclass object with the fields described below
+            or `None` if the place could not be found in the database.
+
+            Parameters:
+              place: Description of the place to look up. See
+                     [Place identification](Input-Parameter-Types.md#place-identification)
+                     for the various ways to reference a place.
+
+            Other parameters:
+              geometry_output (enum): Add the full geometry of the place to the result.
+                Multiple formats may be selected. Note that geometries can become
+                quite large. (Default: none)
+              geometry_simplification (float): Simplification factor to use on
+                the geometries before returning them. The factor expresses
+                the tolerance in degrees from which the geometry may differ.
+                Topology is preserved. (Default: 0.0)
+              address_details (bool): Add detailed information about the places
+                that make up the address of the requested object. (Default: False)
+              linked_places (bool): Add detailed information about the places
+                that link to the result. (Default: False)
+              parented_places (bool): Add detailed information about all places
+                for which the requested object is a parent, i.e. all places for
+                which the object provides the address details.
+                Only POI places can have parents. (Default: False)
+              keywords (bool): Add detailed information about the search terms
+                used for this place.
+
+            Returns:
+              source_table (enum): Data source of the place. See below for possible values.
+              category (tuple): A tuple of two strings with the primary OSM tag
+                  and value.
+              centroid (Point): Point position of the place.
+              place_id (Optional[int]): Internal ID of the place. This ID may differ
+                  for the same place between different installations.
+              parent_place_id (Optional(int]): Internal ID of the parent of this
+                  place. Only meaning full for POI-like objects (places with a
+                  rank_address of 30).
+              linked_place_id (Optional[int]): Internal ID of the place this object
+                  linkes to. When this ID is set then there is no guarantee that
+                  the rest of the result information is complete.
+              admin_level (int): Value of the `admin_level` OSM tag. Only meaningful
+                  for administrative boundary objects.
+              indexed_date (datetime): Timestamp when the place was last updated.
+              osm_object (Optional[tuple]): OSM type and ID of the place, if available.
+              names (Optional[dict]): Dictionary of names of the place. Keys are
+                  usually the corresponding OSM tag keys.
+              address (Optional[dict]): Dictionary of address parts directly
+                  attributed to the place. Keys are usually the corresponding
+                  OSM tag keys with the `addr:` prefix removed.
+              extratags (Optional[dict]): Dictionary of additional attributes for
+                  the place. Usually OSM tag keys and values.
+              housenumber (Optional[str]): House number of the place, normalised
+                  for lookup. To get the house number in its original spelling,
+                  use `address['housenumber']`.
+              postcode (Optional[str]): Computed postcode for the place. To get
+                  directly attributed postcodes, use `address['postcode']` instead.
+              wikipedia (Optional[str]): Reference to a wikipedia site for the place.
+                  The string has the format <language code>:<wikipedia title>.
+              rank_address (int): [Address rank](../customize/Ranking.md#address-rank).
+              rank_search (int): [Search rank](../customize/Ranking.md#search-rank).
+              importance (Optional[float]): Relative importance of the place. This is a measure
+                  how likely the place will be searched for.
+              country_code (Optional[str]): Country the feature is in as
+                  ISO 3166-1 alpha-2 country code.
+              address_rows (Optional[AddressLines]): List of places that make up the
+                  computed address. `None` when `address_details` parameter was False.
+              linked_rows (Optional[AddressLines]): List of places that link to the object.
+                  `None` when `linked_places` parameter was False.
+              parented_rows (Optional[AddressLines]): List of direct children of the place.
+                  `None` when `parented_places` parameter was False.
+              name_keywords (Optional[WordInfos]): List of search words for the name of
+                   the place. `None` when `keywords` parameter is set to False.
+              address_keywords (Optional[WordInfos]): List of search word for the address of
+                   the place. `None` when `keywords` parameter is set to False.
+              geometry (dict): Dictionary containing the full geometry of the place
+                   in the formats requested in the `geometry_output` parameter.
         """
         return self._loop.run_until_complete(self._async_api.details(place, **params))
 
@@ -308,6 +477,75 @@ class NominatimAPI:
         """ Get simple information about a list of places.
 
             Returns a list of place information for all IDs that were found.
+            Each result is a dataclass with the fields detailed below.
+
+            Parameters:
+              places: List of descriptions of the place to look up. See
+                      [Place identification](Input-Parameter-Types.md#place-identification)
+                      for the various ways to reference a place.
+
+            Other parameters:
+              geometry_output (enum): Add the full geometry of the place to the result.
+                Multiple formats may be selected. Note that geometries can become
+                quite large. (Default: none)
+              geometry_simplification (float): Simplification factor to use on
+                the geometries before returning them. The factor expresses
+                the tolerance in degrees from which the geometry may differ.
+                Topology is preserved. (Default: 0.0)
+              address_details (bool): Add detailed information about the places
+                that make up the address of the requested object. (Default: False)
+              linked_places (bool): Add detailed information about the places
+                that link to the result. (Default: False)
+              parented_places (bool): Add detailed information about all places
+                for which the requested object is a parent, i.e. all places for
+                which the object provides the address details.
+                Only POI places can have parents. (Default: False)
+              keywords (bool): Add detailed information about the search terms
+                used for this place.
+
+            Returns:
+              source_table (enum): Data source of the place. See below for possible values.
+              category (tuple): A tuple of two strings with the primary OSM tag
+                  and value.
+              centroid (Point): Point position of the place.
+              place_id (Optional[int]): Internal ID of the place. This ID may differ
+                  for the same place between different installations.
+              osm_object (Optional[tuple]): OSM type and ID of the place, if available.
+              names (Optional[dict]): Dictionary of names of the place. Keys are
+                  usually the corresponding OSM tag keys.
+              address (Optional[dict]): Dictionary of address parts directly
+                  attributed to the place. Keys are usually the corresponding
+                  OSM tag keys with the `addr:` prefix removed.
+              extratags (Optional[dict]): Dictionary of additional attributes for
+                  the place. Usually OSM tag keys and values.
+              housenumber (Optional[str]): House number of the place, normalised
+                  for lookup. To get the house number in its original spelling,
+                  use `address['housenumber']`.
+              postcode (Optional[str]): Computed postcode for the place. To get
+                  directly attributed postcodes, use `address['postcode']` instead.
+              wikipedia (Optional[str]): Reference to a wikipedia site for the place.
+                  The string has the format <language code>:<wikipedia title>.
+              rank_address (int): [Address rank](../customize/Ranking.md#address-rank).
+              rank_search (int): [Search rank](../customize/Ranking.md#search-rank).
+              importance (Optional[float]): Relative importance of the place. This is a measure
+                  how likely the place will be searched for.
+              country_code (Optional[str]): Country the feature is in as
+                  ISO 3166-1 alpha-2 country code.
+              address_rows (Optional[AddressLines]): List of places that make up the
+                  computed address. `None` when `address_details` parameter was False.
+              linked_rows (Optional[AddressLines]): List of places that link to the object.
+                  `None` when `linked_places` parameter was False.
+              parented_rows (Optional[AddressLines]): List of direct children of the place.
+                  `None` when `parented_places` parameter was False.
+              name_keywords (Optional[WordInfos]): List of search words for the name of
+                   the place. `None` when `keywords` parameter is set to False.
+              address_keywords (Optional[WordInfos]): List of search word for the address of
+                   the place. `None` when `keywords` parameter is set to False.
+              bbox (Bbox): Bounding box of the full geometry of the place.
+                   If the place is a single point, then the size of the bounding
+                   box is guessed according to the type of place.
+              geometry (dict): Dictionary containing the full geometry of the place
+                   in the formats requested in the `geometry_output` parameter.
         """
         return self._loop.run_until_complete(self._async_api.lookup(places, **params))
 
@@ -315,14 +553,180 @@ class NominatimAPI:
     def reverse(self, coord: ntyp.AnyPoint, **params: Any) -> Optional[ReverseResult]:
         """ Find a place by its coordinates. Also known as reverse geocoding.
 
-            Returns the closest result that can be found or None if
-            no place matches the given criteria.
+            Returns the closest result that can be found or `None` if
+            no place matches the given criteria. The result is a dataclass
+            with the fields as detailed below.
+
+            Parameters:
+              coord: Coordinate to lookup the place for as a Point
+                     or a tuple (x, y). Must be in WGS84 projection.
+
+            Other parameters:
+              max_rank (int): Highest address rank to return. Can be used to
+                restrict search to streets or settlements.
+              layers (enum): Defines the kind of data to take into account.
+                See description of layers below. (Default: addresses and POIs)
+              geometry_output (enum): Add the full geometry of the place to the result.
+                Multiple formats may be selected. Note that geometries can become
+                quite large. (Default: none)
+              geometry_simplification (float): Simplification factor to use on
+                the geometries before returning them. The factor expresses
+                the tolerance in degrees from which the geometry may differ.
+                Topology is preserved. (Default: 0.0)
+              address_details (bool): Add detailed information about the places
+                that make up the address of the requested object. (Default: False)
+              linked_places (bool): Add detailed information about the places
+                that link to the result. (Default: False)
+              parented_places (bool): Add detailed information about all places
+                for which the requested object is a parent, i.e. all places for
+                which the object provides the address details.
+                Only POI places can have parents. (Default: False)
+              keywords (bool): Add detailed information about the search terms
+                used for this place.
+
+            Returns:
+              source_table (enum): Data source of the place. See below for possible values.
+              category (tuple): A tuple of two strings with the primary OSM tag
+                  and value.
+              centroid (Point): Point position of the place.
+              place_id (Optional[int]): Internal ID of the place. This ID may differ
+                  for the same place between different installations.
+              osm_object (Optional[tuple]): OSM type and ID of the place, if available.
+              names (Optional[dict]): Dictionary of names of the place. Keys are
+                  usually the corresponding OSM tag keys.
+              address (Optional[dict]): Dictionary of address parts directly
+                  attributed to the place. Keys are usually the corresponding
+                  OSM tag keys with the `addr:` prefix removed.
+              extratags (Optional[dict]): Dictionary of additional attributes for
+                  the place. Usually OSM tag keys and values.
+              housenumber (Optional[str]): House number of the place, normalised
+                  for lookup. To get the house number in its original spelling,
+                  use `address['housenumber']`.
+              postcode (Optional[str]): Computed postcode for the place. To get
+                  directly attributed postcodes, use `address['postcode']` instead.
+              wikipedia (Optional[str]): Reference to a wikipedia site for the place.
+                  The string has the format <language code>:<wikipedia title>.
+              rank_address (int): [Address rank](../customize/Ranking.md#address-rank).
+              rank_search (int): [Search rank](../customize/Ranking.md#search-rank).
+              importance (Optional[float]): Relative importance of the place. This is a measure
+                  how likely the place will be searched for.
+              country_code (Optional[str]): Country the feature is in as
+                  ISO 3166-1 alpha-2 country code.
+              address_rows (Optional[AddressLines]): List of places that make up the
+                  computed address. `None` when `address_details` parameter was False.
+              linked_rows (Optional[AddressLines]): List of places that link to the object.
+                  `None` when `linked_places` parameter was False.
+              parented_rows (Optional[AddressLines]): List of direct children of the place.
+                  `None` when `parented_places` parameter was False.
+              name_keywords (Optional[WordInfos]): List of search words for the name of
+                   the place. `None` when `keywords` parameter is set to False.
+              address_keywords (Optional[WordInfos]): List of search word for the address of
+                   the place. `None` when `keywords` parameter is set to False.
+              bbox (Bbox): Bounding box of the full geometry of the place.
+                   If the place is a single point, then the size of the bounding
+                   box is guessed according to the type of place.
+              geometry (dict): Dictionary containing the full geometry of the place
+                   in the formats requested in the `geometry_output` parameter.
+              distance (Optional[float]): Distance in degree from the input point.
         """
         return self._loop.run_until_complete(self._async_api.reverse(coord, **params))
 
 
     def search(self, query: str, **params: Any) -> SearchResults:
         """ Find a place by free-text search. Also known as forward geocoding.
+
+            Parameters:
+              query: Free-form text query searching for a place.
+
+            Other parameters:
+              max_results (int): Maximum number of results to return. The
+                actual number of results may be less. (Default: 10)
+              min_rank (int): Lowest permissible rank for the result.
+                For addressable places this is the minimum
+                [address rank](../customize/Ranking.md#address-rank). For all
+                other places the [search rank](../customize/Ranking.md#search-rank)
+                is used.
+              max_rank (int): Highest permissible rank for the result. See min_rank above.
+              layers (enum): Defines the kind of data to take into account.
+                See [layers section](Input-Parameter-Types.md#layers) for details.
+                (Default: addresses and POIs)
+              countries (list[str]): Restrict search to countries with the given
+                ISO 3166-1 alpha-2 country code. An empty list (the default)
+                disables this filter.
+              excluded (list[int]): A list of internal IDs of places to exclude
+                from the search.
+              viewbox (Optional[Bbox]): Bounding box of an area to focus search on.
+              bounded_viewbox (bool): Consider the bounding box given in `viewbox`
+                as a filter and return only results within the bounding box.
+              near (Optional[Point]): Focus search around the given point and
+                return results ordered by distance to the given point.
+              near_radius (Optional[float]): Restrict results to results within
+                the given distance in degrees of `near` point. Ignored, when
+                `near` is not set.
+              categories (list[tuple]): Restrict search to places of the given
+                categories. The category is the main OSM tag assigned to each
+                place. An empty list (the default) disables this filter.
+              geometry_output (enum): Add the full geometry of the place to the result.
+                Multiple formats may be selected. Note that geometries can become
+                quite large. (Default: none)
+              geometry_simplification (float): Simplification factor to use on
+                the geometries before returning them. The factor expresses
+                the tolerance in degrees from which the geometry may differ.
+                Topology is preserved. (Default: 0.0)
+              address_details (bool): Add detailed information about the places
+                that make up the address of the requested object. (Default: False)
+              linked_places (bool): Add detailed information about the places
+                that link to the result. (Default: False)
+              parented_places (bool): Add detailed information about all places
+                for which the requested object is a parent, i.e. all places for
+                which the object provides the address details.
+                Only POI places can have parents. (Default: False)
+              keywords (bool): Add detailed information about the search terms
+                used for this place.
+
+            Returns:
+              source_table (enum): Data source of the place. See below for possible values.
+              category (tuple): A tuple of two strings with the primary OSM tag
+                  and value.
+              centroid (Point): Point position of the place.
+              place_id (Optional[int]): Internal ID of the place. This ID may differ
+                  for the same place between different installations.
+              osm_object (Optional[tuple]): OSM type and ID of the place, if available.
+              names (Optional[dict]): Dictionary of names of the place. Keys are
+                  usually the corresponding OSM tag keys.
+              address (Optional[dict]): Dictionary of address parts directly
+                  attributed to the place. Keys are usually the corresponding
+                  OSM tag keys with the `addr:` prefix removed.
+              extratags (Optional[dict]): Dictionary of additional attributes for
+                  the place. Usually OSM tag keys and values.
+              housenumber (Optional[str]): House number of the place, normalised
+                  for lookup. To get the house number in its original spelling,
+                  use `address['housenumber']`.
+              postcode (Optional[str]): Computed postcode for the place. To get
+                  directly attributed postcodes, use `address['postcode']` instead.
+              wikipedia (Optional[str]): Reference to a wikipedia site for the place.
+                  The string has the format <language code>:<wikipedia title>.
+              rank_address (int): [Address rank](../customize/Ranking.md#address-rank).
+              rank_search (int): [Search rank](../customize/Ranking.md#search-rank).
+              importance (Optional[float]): Relative importance of the place. This is a measure
+                  how likely the place will be searched for.
+              country_code (Optional[str]): Country the feature is in as
+                  ISO 3166-1 alpha-2 country code.
+              address_rows (Optional[AddressLines]): List of places that make up the
+                  computed address. `None` when `address_details` parameter was False.
+              linked_rows (Optional[AddressLines]): List of places that link to the object.
+                  `None` when `linked_places` parameter was False.
+              parented_rows (Optional[AddressLines]): List of direct children of the place.
+                  `None` when `parented_places` parameter was False.
+              name_keywords (Optional[WordInfos]): List of search words for the name of
+                   the place. `None` when `keywords` parameter is set to False.
+              address_keywords (Optional[WordInfos]): List of search word for the address of
+                   the place. `None` when `keywords` parameter is set to False.
+              bbox (Bbox): Bounding box of the full geometry of the place.
+                   If the place is a single point, then the size of the bounding
+                   box is guessed according to the type of place.
+              geometry (dict): Dictionary containing the full geometry of the place
+                   in the formats requested in the `geometry_output` parameter.
         """
         return self._loop.run_until_complete(
                    self._async_api.search(query, **params))
@@ -338,6 +742,109 @@ class NominatimAPI:
                        postalcode: Optional[str] = None,
                        **params: Any) -> SearchResults:
         """ Find an address using structured search.
+
+            Parameters:
+              amenity: Name of a POI.
+              street: Street and optionally housenumber of the address. If the address
+                does not have a street, then the place the housenumber references to.
+              city: Postal city of the address.
+              county: County equivalent of the address. Does not exist in all
+                jurisdictions.
+              state: State or province of the address.
+              country: Country with its full name or its ISO 3166-1 alpha-2 country code.
+                Do not use together with the country_code filter.
+              postalcode: Post code or ZIP for the place.
+
+            Other parameters:
+              max_results (int): Maximum number of results to return. The
+                actual number of results may be less. (Default: 10)
+              min_rank (int): Lowest permissible rank for the result.
+                For addressable places this is the minimum
+                [address rank](../customize/Ranking.md#address-rank). For all
+                other places the [search rank](../customize/Ranking.md#search-rank)
+                is used.
+              max_rank (int): Highest permissible rank for the result. See min_rank above.
+              layers (enum): Defines the kind of data to take into account.
+                See [layers section](Input-Parameter-Types.md#layers) for details.
+                (Default: addresses and POIs)
+              countries (list[str]): Restrict search to countries with the given
+                ISO 3166-1 alpha-2 country code. An empty list (the default)
+                disables this filter. Do not use, when the country parameter
+                is used.
+              excluded (list[int]): A list of internal IDs of places to exclude
+                from the search.
+              viewbox (Optional[Bbox]): Bounding box of an area to focus search on.
+              bounded_viewbox (bool): Consider the bounding box given in `viewbox`
+                as a filter and return only results within the bounding box.
+              near (Optional[Point]): Focus search around the given point and
+                return results ordered by distance to the given point.
+              near_radius (Optional[float]): Restrict results to results within
+                the given distance in degrees of `near` point. Ignored, when
+                `near` is not set.
+              categories (list[tuple]): Restrict search to places of the given
+                categories. The category is the main OSM tag assigned to each
+                place. An empty list (the default) disables this filter.
+              geometry_output (enum): Add the full geometry of the place to the result.
+                Multiple formats may be selected. Note that geometries can become
+                quite large. (Default: none)
+              geometry_simplification (float): Simplification factor to use on
+                the geometries before returning them. The factor expresses
+                the tolerance in degrees from which the geometry may differ.
+                Topology is preserved. (Default: 0.0)
+              address_details (bool): Add detailed information about the places
+                that make up the address of the requested object. (Default: False)
+              linked_places (bool): Add detailed information about the places
+                that link to the result. (Default: False)
+              parented_places (bool): Add detailed information about all places
+                for which the requested object is a parent, i.e. all places for
+                which the object provides the address details.
+                Only POI places can have parents. (Default: False)
+              keywords (bool): Add detailed information about the search terms
+                used for this place.
+
+            Returns:
+              source_table (enum): Data source of the place. See below for possible values.
+              category (tuple): A tuple of two strings with the primary OSM tag
+                  and value.
+              centroid (Point): Point position of the place.
+              place_id (Optional[int]): Internal ID of the place. This ID may differ
+                  for the same place between different installations.
+              osm_object (Optional[tuple]): OSM type and ID of the place, if available.
+              names (Optional[dict]): Dictionary of names of the place. Keys are
+                  usually the corresponding OSM tag keys.
+              address (Optional[dict]): Dictionary of address parts directly
+                  attributed to the place. Keys are usually the corresponding
+                  OSM tag keys with the `addr:` prefix removed.
+              extratags (Optional[dict]): Dictionary of additional attributes for
+                  the place. Usually OSM tag keys and values.
+              housenumber (Optional[str]): House number of the place, normalised
+                  for lookup. To get the house number in its original spelling,
+                  use `address['housenumber']`.
+              postcode (Optional[str]): Computed postcode for the place. To get
+                  directly attributed postcodes, use `address['postcode']` instead.
+              wikipedia (Optional[str]): Reference to a wikipedia site for the place.
+                  The string has the format <language code>:<wikipedia title>.
+              rank_address (int): [Address rank](../customize/Ranking.md#address-rank).
+              rank_search (int): [Search rank](../customize/Ranking.md#search-rank).
+              importance (Optional[float]): Relative importance of the place. This is a measure
+                  how likely the place will be searched for.
+              country_code (Optional[str]): Country the feature is in as
+                  ISO 3166-1 alpha-2 country code.
+              address_rows (Optional[AddressLines]): List of places that make up the
+                  computed address. `None` when `address_details` parameter was False.
+              linked_rows (Optional[AddressLines]): List of places that link to the object.
+                  `None` when `linked_places` parameter was False.
+              parented_rows (Optional[AddressLines]): List of direct children of the place.
+                  `None` when `parented_places` parameter was False.
+              name_keywords (Optional[WordInfos]): List of search words for the name of
+                   the place. `None` when `keywords` parameter is set to False.
+              address_keywords (Optional[WordInfos]): List of search word for the address of
+                   the place. `None` when `keywords` parameter is set to False.
+              bbox (Bbox): Bounding box of the full geometry of the place.
+                   If the place is a single point, then the size of the bounding
+                   box is guessed according to the type of place.
+              geometry (dict): Dictionary containing the full geometry of the place
+                   in the formats requested in the `geometry_output` parameter.
         """
         return self._loop.run_until_complete(
                    self._async_api.search_address(amenity, street, city, county,
@@ -348,9 +855,104 @@ class NominatimAPI:
                         near_query: Optional[str] = None,
                         **params: Any) -> SearchResults:
         """ Find an object of a certain category near another place.
+
             The near place may either be given as an unstructured search
             query in itself or as a geographic area through the
             viewbox or near parameters.
+
+            Parameters:
+              categories: Restrict search to places of the given
+                categories. The category is the main OSM tag assigned to each
+                place.
+              near_query: Optional free-text query to define the are to
+                restrict search to.
+
+            Other parameters:
+              max_results (int): Maximum number of results to return. The
+                actual number of results may be less. (Default: 10)
+              min_rank (int): Lowest permissible rank for the result.
+                For addressable places this is the minimum
+                [address rank](../customize/Ranking.md#address-rank). For all
+                other places the [search rank](../customize/Ranking.md#search-rank)
+                is used.
+              max_rank (int): Highest permissible rank for the result. See min_rank above.
+              layers (enum): Defines the kind of data to take into account.
+                See [layers section](Input-Parameter-Types.md#layers) for details.
+                (Default: addresses and POIs)
+              countries (list[str]): Restrict search to countries with the given
+                ISO 3166-1 alpha-2 country code. An empty list (the default)
+                disables this filter.
+              excluded (list[int]): A list of internal IDs of places to exclude
+                from the search.
+              viewbox (Optional[Bbox]): Bounding box of an area to focus search on.
+              bounded_viewbox (bool): Consider the bounding box given in `viewbox`
+                as a filter and return only results within the bounding box.
+              near (Optional[Point]): Focus search around the given point and
+                return results ordered by distance to the given point.
+              near_radius (Optional[float]): Restrict results to results within
+                the given distance in degrees of `near` point. Ignored, when
+                `near` is not set.
+              geometry_output (enum): Add the full geometry of the place to the result.
+                Multiple formats may be selected. Note that geometries can become
+                quite large. (Default: none)
+              geometry_simplification (float): Simplification factor to use on
+                the geometries before returning them. The factor expresses
+                the tolerance in degrees from which the geometry may differ.
+                Topology is preserved. (Default: 0.0)
+              address_details (bool): Add detailed information about the places
+                that make up the address of the requested object. (Default: False)
+              linked_places (bool): Add detailed information about the places
+                that link to the result. (Default: False)
+              parented_places (bool): Add detailed information about all places
+                for which the requested object is a parent, i.e. all places for
+                which the object provides the address details.
+                Only POI places can have parents. (Default: False)
+              keywords (bool): Add detailed information about the search terms
+                used for this place.
+
+            Returns:
+              source_table (enum): Data source of the place. See below for possible values.
+              category (tuple): A tuple of two strings with the primary OSM tag
+                  and value.
+              centroid (Point): Point position of the place.
+              place_id (Optional[int]): Internal ID of the place. This ID may differ
+                  for the same place between different installations.
+              osm_object (Optional[tuple]): OSM type and ID of the place, if available.
+              names (Optional[dict]): Dictionary of names of the place. Keys are
+                  usually the corresponding OSM tag keys.
+              address (Optional[dict]): Dictionary of address parts directly
+                  attributed to the place. Keys are usually the corresponding
+                  OSM tag keys with the `addr:` prefix removed.
+              extratags (Optional[dict]): Dictionary of additional attributes for
+                  the place. Usually OSM tag keys and values.
+              housenumber (Optional[str]): House number of the place, normalised
+                  for lookup. To get the house number in its original spelling,
+                  use `address['housenumber']`.
+              postcode (Optional[str]): Computed postcode for the place. To get
+                  directly attributed postcodes, use `address['postcode']` instead.
+              wikipedia (Optional[str]): Reference to a wikipedia site for the place.
+                  The string has the format <language code>:<wikipedia title>.
+              rank_address (int): [Address rank](../customize/Ranking.md#address-rank).
+              rank_search (int): [Search rank](../customize/Ranking.md#search-rank).
+              importance (Optional[float]): Relative importance of the place. This is a measure
+                  how likely the place will be searched for.
+              country_code (Optional[str]): Country the feature is in as
+                  ISO 3166-1 alpha-2 country code.
+              address_rows (Optional[AddressLines]): List of places that make up the
+                  computed address. `None` when `address_details` parameter was False.
+              linked_rows (Optional[AddressLines]): List of places that link to the object.
+                  `None` when `linked_places` parameter was False.
+              parented_rows (Optional[AddressLines]): List of direct children of the place.
+                  `None` when `parented_places` parameter was False.
+              name_keywords (Optional[WordInfos]): List of search words for the name of
+                   the place. `None` when `keywords` parameter is set to False.
+              address_keywords (Optional[WordInfos]): List of search word for the address of
+                   the place. `None` when `keywords` parameter is set to False.
+              bbox (Bbox): Bounding box of the full geometry of the place.
+                   If the place is a single point, then the size of the bounding
+                   box is guessed according to the type of place.
+              geometry (dict): Dictionary containing the full geometry of the place
+                   in the formats requested in the `geometry_output` parameter.
         """
         return self._loop.run_until_complete(
                    self._async_api.search_category(categories, near_query, **params))
index 3785579073a71df7544518b407c5f34b173664ca..37ae7f5f04464241ad0e81062b56d125555cadff 100644 (file)
@@ -99,6 +99,7 @@ class BaseLogger:
 
         if sa.__version__.startswith('1'):
             try:
+                sqlstr = re.sub(r'__\[POSTCOMPILE_[^]]*\]', '%s', sqlstr)
                 return sqlstr % tuple((repr(params.get(name, None))
                                       for name in compiled.positiontup)) # type: ignore
             except TypeError:
@@ -107,9 +108,9 @@ class BaseLogger:
         # Fixes an odd issue with Python 3.7 where percentages are not
         # quoted correctly.
         sqlstr = re.sub(r'%(?!\()', '%%', sqlstr)
+        sqlstr = re.sub(r'__\[POSTCOMPILE_([^]]*)\]', r'%(\1)s', sqlstr)
         return sqlstr % params
 
-
 class HTMLLogger(BaseLogger):
     """ Logger that formats messages in HTML.
     """
@@ -234,6 +235,10 @@ class TextLogger(BaseLogger):
         self.buffer = io.StringIO()
 
 
+    def _timestamp(self) -> None:
+        self._write(f'[{dt.datetime.now()}]\n')
+
+
     def get_buffer(self) -> str:
         return self.buffer.getvalue()
 
@@ -246,6 +251,7 @@ class TextLogger(BaseLogger):
 
 
     def section(self, heading: str) -> None:
+        self._timestamp()
         self._write(f"\n# {heading}\n\n")
 
 
@@ -282,6 +288,7 @@ class TextLogger(BaseLogger):
 
 
     def result_dump(self, heading: str, results: Iterator[Tuple[Any, Any]]) -> None:
+        self._timestamp()
         self._write(f'{heading}:\n')
         total = 0
         for rank, res in results:
@@ -297,6 +304,7 @@ class TextLogger(BaseLogger):
 
     def sql(self, conn: AsyncConnection, statement: 'sa.Executable',
             params: Union[Mapping[str, Any], Sequence[Mapping[str, Any]], None]) -> None:
+        self._timestamp()
         sqlstr = '\n| '.join(textwrap.wrap(self.format_sql(conn, statement, params), width=78))
         self._write(f"| {sqlstr}\n\n")
 
index 0e1fd9cec6303ed188adf5d8e54223e732e3c165..402b85316853173967c13c73f7f0a30aabc97c92 100644 (file)
@@ -38,6 +38,7 @@ async def find_in_placex(conn: SearchConnection, place: ntyp.PlaceRef,
                     t.c.importance, t.c.wikipedia, t.c.indexed_date,
                     t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
                     t.c.linked_place_id,
+                    t.c.geometry.ST_Expand(0).label('bbox'),
                     t.c.centroid)
 
     if isinstance(place, ntyp.PlaceID):
@@ -76,8 +77,8 @@ async def find_in_osmline(conn: SearchConnection, place: ntyp.PlaceRef,
         sql = sql.where(t.c.osm_id == place.osm_id).limit(1)
         if place.osm_class and place.osm_class.isdigit():
             sql = sql.order_by(sa.func.greatest(0,
-                                    sa.func.least(int(place.osm_class) - t.c.endnumber),
-                                           t.c.startnumber - int(place.osm_class)))
+                                                int(place.osm_class) - t.c.endnumber,
+                                                t.c.startnumber - int(place.osm_class)))
     else:
         return None
 
@@ -162,11 +163,10 @@ async def get_detailed_place(conn: SearchConnection, place: ntyp.PlaceRef,
 
     if details.geometry_output & ntyp.GeometryFormat.GEOJSON:
         def _add_geometry(sql: SaSelect, column: SaColumn) -> SaSelect:
-            return sql.add_columns(sa.literal_column(f"""
-                      ST_AsGeoJSON(CASE WHEN ST_NPoints({column.name}) > 5000
-                                   THEN ST_SimplifyPreserveTopology({column.name}, 0.0001)
-                                   ELSE {column.name} END)
-                       """).label('geometry_geojson'))
+            return sql.add_columns(sa.func.ST_AsGeoJSON(
+                                    sa.case((sa.func.ST_NPoints(column) > 5000,
+                                             sa.func.ST_SimplifyPreserveTopology(column, 0.0001)),
+                                            else_=column), 7).label('geometry_geojson'))
     else:
         def _add_geometry(sql: SaSelect, column: SaColumn) -> SaSelect:
             return sql.add_columns(sa.func.ST_GeometryType(column).label('geometry_type'))
@@ -182,9 +182,9 @@ async def get_detailed_place(conn: SearchConnection, place: ntyp.PlaceRef,
 
     # add missing details
     assert result is not None
-    result.parent_place_id = row.parent_place_id
-    result.linked_place_id = getattr(row, 'linked_place_id', None)
-    result.admin_level = getattr(row, 'admin_level', 15)
+    if 'type' in result.geometry:
+        result.geometry['type'] = GEOMETRY_TYPE_MAP.get(result.geometry['type'],
+                                                        result.geometry['type'])
     indexed_date = getattr(row, 'indexed_date', None)
     if indexed_date is not None:
         result.indexed_date = indexed_date.replace(tzinfo=dt.timezone.utc)
@@ -207,16 +207,16 @@ async def get_simple_place(conn: SearchConnection, place: ntyp.PlaceRef,
         out = []
 
         if details.geometry_simplification > 0.0:
-            col = col.ST_SimplifyPreserveTopology(details.geometry_simplification)
+            col = sa.func.ST_SimplifyPreserveTopology(col, details.geometry_simplification)
 
         if details.geometry_output & ntyp.GeometryFormat.GEOJSON:
-            out.append(col.ST_AsGeoJSON().label('geometry_geojson'))
+            out.append(sa.func.ST_AsGeoJSON(col, 7).label('geometry_geojson'))
         if details.geometry_output & ntyp.GeometryFormat.TEXT:
-            out.append(col.ST_AsText().label('geometry_text'))
+            out.append(sa.func.ST_AsText(col).label('geometry_text'))
         if details.geometry_output & ntyp.GeometryFormat.KML:
-            out.append(col.ST_AsKML().label('geometry_kml'))
+            out.append(sa.func.ST_AsKML(col, 7).label('geometry_kml'))
         if details.geometry_output & ntyp.GeometryFormat.SVG:
-            out.append(col.ST_AsSVG().label('geometry_svg'))
+            out.append(sa.func.ST_AsSVG(col, 0, 7).label('geometry_svg'))
 
         return sql.add_columns(*out)
 
@@ -232,8 +232,20 @@ async def get_simple_place(conn: SearchConnection, place: ntyp.PlaceRef,
 
     # add missing details
     assert result is not None
-    result.bbox = getattr(row, 'bbox', None)
+    if hasattr(row, 'bbox'):
+        result.bbox = ntyp.Bbox.from_wkb(row.bbox)
 
     await nres.add_result_details(conn, [result], details)
 
     return result
+
+
+GEOMETRY_TYPE_MAP = {
+    'POINT': 'ST_Point',
+    'MULTIPOINT': 'ST_MultiPoint',
+    'LINESTRING': 'ST_LineString',
+    'MULTILINESTRING': 'ST_MultiLineString',
+    'POLYGON': 'ST_Polygon',
+    'MULTIPOLYGON': 'ST_MultiPolygon',
+    'GEOMETRYCOLLECTION': 'ST_GeometryCollection'
+}
index 3416fc7a0d7196777c8649d99493469603cfba8f..829008fb3ec316a09a753b39cb720404b1c8d98b 100644 (file)
@@ -11,7 +11,7 @@ Data classes are part of the public API while the functions are for
 internal use only. That's why they are implemented as free-standing functions
 instead of member functions.
 """
-from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List, Any
+from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List, cast
 import enum
 import dataclasses
 import datetime as dt
@@ -19,6 +19,7 @@ import datetime as dt
 import sqlalchemy as sa
 
 from nominatim.typing import SaSelect, SaRow
+from nominatim.db.sqlalchemy_types import Geometry
 from nominatim.api.types import Point, Bbox, LookupDetails
 from nominatim.api.connection import SearchConnection
 from nominatim.api.logging import log
@@ -46,32 +47,88 @@ def _mingle_name_tags(names: Optional[Dict[str, str]]) -> Optional[Dict[str, str
 
 
 class SourceTable(enum.Enum):
-    """ Enumeration of kinds of results.
+    """ The `SourceTable` type lists the possible sources a result can have.
     """
     PLACEX = 1
+    """ The placex table is the main source for result usually containing
+        OSM data.
+    """
     OSMLINE = 2
+    """ The osmline table contains address interpolations from OSM data.
+        Interpolation addresses are always approximate. The OSM id in the
+        result refers to the OSM way with the interpolation line object.
+    """
     TIGER = 3
+    """ TIGER address data contains US addresses imported on the side,
+        see [Installing TIGER data](../customize/Tiger.md).
+        TIGER address are also interpolations. The addresses always refer
+        to a street from OSM data. The OSM id in the result refers to
+        that street.
+    """
     POSTCODE = 4
+    """ The postcode table contains artificial centroids for postcodes,
+        computed from the postcodes available with address points. Results
+        are always approximate.
+    """
     COUNTRY = 5
+    """ The country table provides a fallback, when country data is missing
+        in the OSM data.
+    """
 
 
 @dataclasses.dataclass
 class AddressLine:
-    """ Detailed information about a related place.
+    """ The `AddressLine` may contain the following fields about a related place
+        and its function as an address object. Most fields are optional.
+        Their presence depends on the kind and function of the address part.
     """
-    place_id: Optional[int]
-    osm_object: Optional[Tuple[str, int]]
     category: Tuple[str, str]
+    """ Main category of the place, described by a key-value pair.
+    """
     names: Dict[str, str]
-    extratags: Optional[Dict[str, str]]
-
-    admin_level: Optional[int]
+    """ All available names for the place including references, alternative
+        names and translations.
+    """
     fromarea: bool
+    """ If true, then the exact area of the place is known. Without area
+        information, Nominatim has to make an educated guess if an address
+        belongs to one place or another.
+    """
     isaddress: bool
+    """ If true, this place should be considered for the final address display.
+        Nominatim will sometimes include more than one candidate for
+        the address in the list when it cannot reliably determine where the
+        place belongs. It will consider names of all candidates when searching
+        but when displaying the result, only the most likely candidate should
+        be shown.
+    """
     rank_address: int
+    """ [Address rank](../customize/Ranking.md#address-rank) of the place.
+    """
     distance: float
+    """ Distance in degrees between the result place and this address part.
+    """
+    place_id: Optional[int] = None
+    """ Internal ID of the place.
+    """
+    osm_object: Optional[Tuple[str, int]] = None
+    """ OSM type and ID of the place, if such an object exists.
+    """
+    extratags: Optional[Dict[str, str]] = None
+    """ Any extra information available about the place. This is a dictionary
+        that usually contains OSM tag key-value pairs.
+    """
+
+    admin_level: Optional[int] = None
+    """ The administrative level of a boundary as tagged in the input data.
+        This field is only meaningful for places of the category
+        (boundary, administrative).
+    """
 
     local_name: Optional[str] = None
+    """ Place holder for localization of this address part. See
+        [Localization](#localization) below.
+    """
 
 
 class AddressLines(List[AddressLine]):
@@ -80,7 +137,7 @@ class AddressLines(List[AddressLine]):
 
     def localize(self, locales: Locales) -> List[str]:
         """ Set the local name of address parts according to the chosen
-            locale. Return the list of local names without duplications.
+            locale. Return the list of local names without duplicates.
 
             Only address parts that are marked as isaddress are localized
             and returned.
@@ -99,11 +156,19 @@ class AddressLines(List[AddressLine]):
 
 @dataclasses.dataclass
 class WordInfo:
-    """ Detailed information about a search term.
+    """ Each entry in the list of search terms contains the
+        following detailed information.
     """
     word_id: int
+    """ Internal identifier for the word.
+    """
     word_token: str
+    """ Normalised and transliterated form of the word.
+        This form is used for searching.
+    """
     word: Optional[str] = None
+    """ Untransliterated form, if available.
+    """
 
 
 WordInfos = Sequence[WordInfo]
@@ -120,6 +185,9 @@ class BaseResult:
 
     place_id : Optional[int] = None
     osm_object: Optional[Tuple[str, int]] = None
+    parent_place_id: Optional[int] = None
+    linked_place_id: Optional[int] = None
+    admin_level: int = 15
 
     locale_name: Optional[str] = None
     display_name: Optional[str] = None
@@ -187,9 +255,6 @@ class DetailedResult(BaseResult):
     """ A search result with more internal information from the database
         added.
     """
-    parent_place_id: Optional[int] = None
-    linked_place_id: Optional[int] = None
-    admin_level: int = 15
     indexed_date: Optional[dt.datetime] = None
 
 
@@ -228,12 +293,6 @@ class SearchResults(List[SearchResult]):
         May be empty when no result was found.
     """
 
-    def localize(self, locales: Locales) -> None:
-        """ Apply the given locales to all results.
-        """
-        for result in self:
-            result.localize(locales)
-
 
 def _filter_geometries(row: SaRow) -> Dict[str, str]:
     return {k[9:]: v for k, v in row._mapping.items() # pylint: disable=W0212
@@ -253,6 +312,9 @@ def create_from_placex_row(row: Optional[SaRow],
                       place_id=row.place_id,
                       osm_object=(row.osm_type, row.osm_id),
                       category=(row.class_, row.type),
+                      parent_place_id = row.parent_place_id,
+                      linked_place_id = getattr(row, 'linked_place_id', None),
+                      admin_level = getattr(row, 'admin_level', 15),
                       names=_mingle_name_tags(row.name),
                       address=row.address,
                       extratags=row.extratags,
@@ -283,6 +345,7 @@ def create_from_osmline_row(row: Optional[SaRow],
 
     res = class_type(source_table=SourceTable.OSMLINE,
                      place_id=row.place_id,
+                     parent_place_id = row.parent_place_id,
                      osm_object=('W', row.osm_id),
                      category=('place', 'houses' if hnr is None else 'house'),
                      address=row.address,
@@ -302,7 +365,9 @@ def create_from_osmline_row(row: Optional[SaRow],
 
 
 def create_from_tiger_row(row: Optional[SaRow],
-                          class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
+                          class_type: Type[BaseResultT],
+                          osm_type: Optional[str] = None,
+                          osm_id: Optional[int] = None) -> Optional[BaseResultT]:
     """ Construct a new result and add the data from the result row
         from the Tiger data interpolation table. 'class_type' defines
         the type of result to return. Returns None if the row is None.
@@ -317,7 +382,8 @@ def create_from_tiger_row(row: Optional[SaRow],
 
     res = class_type(source_table=SourceTable.TIGER,
                      place_id=row.place_id,
-                     osm_object=(row.osm_type, row.osm_id),
+                     parent_place_id = row.parent_place_id,
+                     osm_object=(osm_type or row.osm_type, osm_id or row.osm_id),
                      category=('place', 'houses' if hnr is None else 'house'),
                      postcode=row.postcode,
                      country_code='us',
@@ -345,6 +411,7 @@ def create_from_postcode_row(row: Optional[SaRow],
 
     return class_type(source_table=SourceTable.POSTCODE,
                       place_id=row.place_id,
+                      parent_place_id = row.parent_place_id,
                       category=('place', 'postcode'),
                       names={'ref': row.postcode},
                       rank_search=row.rank_search,
@@ -368,7 +435,8 @@ def create_from_country_row(row: Optional[SaRow],
                       centroid=Point.from_wkb(row.centroid),
                       names=row.name,
                       rank_address=4, rank_search=4,
-                      country_code=row.country_code)
+                      country_code=row.country_code,
+                      geometry=_filter_geometries(row))
 
 
 async def add_result_details(conn: SearchConnection, results: List[BaseResultT],
@@ -393,19 +461,24 @@ async def add_result_details(conn: SearchConnection, results: List[BaseResultT],
             log().comment('Query keywords')
             for result in results:
                 await complete_keywords(conn, result)
+        for result in results:
+            result.localize(details.locales)
 
 
-def _result_row_to_address_row(row: SaRow) -> AddressLine:
+def _result_row_to_address_row(row: SaRow, isaddress: Optional[bool] = None) -> AddressLine:
     """ Create a new AddressLine from the results of a datbase query.
     """
-    extratags: Dict[str, str] = getattr(row, 'extratags', {})
-    if hasattr(row, 'place_type') and row.place_type:
-        extratags['place'] = row.place_type
+    extratags: Dict[str, str] = getattr(row, 'extratags', {}) or {}
+    if 'linked_place' in extratags:
+        extratags['place'] = extratags['linked_place']
 
     names = _mingle_name_tags(row.name) or {}
     if getattr(row, 'housenumber', None) is not None:
         names['housenumber'] = row.housenumber
 
+    if isaddress is None:
+        isaddress = getattr(row, 'isaddress', True)
+
     return AddressLine(place_id=row.place_id,
                        osm_object=None if row.osm_type is None else (row.osm_type, row.osm_id),
                        category=(getattr(row, 'class'), row.type),
@@ -413,81 +486,211 @@ def _result_row_to_address_row(row: SaRow) -> AddressLine:
                        extratags=extratags,
                        admin_level=row.admin_level,
                        fromarea=row.fromarea,
-                       isaddress=getattr(row, 'isaddress', True),
+                       isaddress=isaddress,
                        rank_address=row.rank_address,
                        distance=row.distance)
 
 
+def _get_address_lookup_id(result: BaseResultT) -> int:
+    assert result.place_id
+    if result.source_table != SourceTable.PLACEX or result.rank_search > 27:
+        return result.parent_place_id or result.place_id
+
+    return result.linked_place_id or result.place_id
+
+
+async def _finalize_entry(conn: SearchConnection, result: BaseResultT) -> None:
+    assert result.address_rows is not None
+    postcode = result.postcode
+    if not postcode and result.address:
+        postcode = result.address.get('postcode')
+    if postcode and ',' not in postcode and ';' not in postcode:
+        result.address_rows.append(AddressLine(
+            category=('place', 'postcode'),
+            names={'ref': postcode},
+            fromarea=False, isaddress=True, rank_address=5,
+            distance=0.0))
+    if result.country_code:
+        async def _get_country_names() -> Optional[Dict[str, str]]:
+            t = conn.t.country_name
+            sql = sa.select(t.c.name, t.c.derived_name)\
+                    .where(t.c.country_code == result.country_code)
+            for cres in await conn.execute(sql):
+                names = cast(Dict[str, str], cres[0])
+                if cres[1]:
+                    names.update(cast(Dict[str, str], cres[1]))
+                return names
+            return None
+
+        country_names = await conn.get_cached_value('COUNTRY_NAME',
+                                                    result.country_code,
+                                                    _get_country_names)
+        if country_names:
+            result.address_rows.append(AddressLine(
+                category=('place', 'country'),
+                names=country_names,
+                fromarea=False, isaddress=True, rank_address=4,
+                distance=0.0))
+        result.address_rows.append(AddressLine(
+            category=('place', 'country_code'),
+            names={'ref': result.country_code}, extratags = {},
+            fromarea=True, isaddress=False, rank_address=4,
+            distance=0.0))
+
+
+def _setup_address_details(result: BaseResultT) -> None:
+    """ Retrieve information about places that make up the address of the result.
+    """
+    result.address_rows = AddressLines()
+    if result.names:
+        result.address_rows.append(AddressLine(
+            place_id=result.place_id,
+            osm_object=result.osm_object,
+            category=result.category,
+            names=result.names,
+            extratags=result.extratags or {},
+            admin_level=result.admin_level,
+            fromarea=True, isaddress=True,
+            rank_address=result.rank_address or 100, distance=0.0))
+    if result.source_table == SourceTable.PLACEX and result.address:
+        housenumber = result.address.get('housenumber')\
+                      or result.address.get('streetnumber')\
+                      or result.address.get('conscriptionnumber')
+    elif result.housenumber:
+        housenumber = result.housenumber
+    else:
+        housenumber = None
+    if housenumber:
+        result.address_rows.append(AddressLine(
+            category=('place', 'house_number'),
+            names={'ref': housenumber},
+            fromarea=True, isaddress=True, rank_address=28, distance=0))
+    if result.address and '_unlisted_place' in result.address:
+        result.address_rows.append(AddressLine(
+            category=('place', 'locality'),
+            names={'name': result.address['_unlisted_place']},
+            fromarea=False, isaddress=True, rank_address=25, distance=0))
+
+
 async def complete_address_details(conn: SearchConnection, results: List[BaseResultT]) -> None:
     """ Retrieve information about places that make up the address of the result.
     """
-    def get_hnr(result: BaseResult) -> Tuple[int, int]:
-        housenumber = -1
-        if result.source_table in (SourceTable.TIGER, SourceTable.OSMLINE):
-            if result.housenumber is not None:
-                housenumber = int(result.housenumber)
-            elif result.extratags is not None and 'startnumber' in result.extratags:
-                # details requests do not come with a specific house number
-                housenumber = int(result.extratags['startnumber'])
-        assert result.place_id
-        return result.place_id, housenumber
+    for result in results:
+        _setup_address_details(result)
+
+    ### Lookup entries from place_address line
 
-    data: List[Tuple[Any, ...]] = [get_hnr(r) for r in results if r.place_id]
+    lookup_ids = [{'pid': r.place_id,
+                   'lid': _get_address_lookup_id(r),
+                   'names': list(r.address.values()) if r.address else [],
+                   'c': ('SRID=4326;' + r.centroid.to_wkt()) if r.centroid else '' }
+                  for r in results if r.place_id]
 
-    if not data:
+    if not lookup_ids:
         return
 
-    values = sa.values(sa.column('place_id', type_=sa.Integer),
-                       sa.column('housenumber', type_=sa.Integer),
-                       name='places',
-                       literal_binds=True).data(data)
-
-    sfn = sa.func.get_addressdata(values.c.place_id, values.c.housenumber)\
-                .table_valued( # type: ignore[no-untyped-call]
-                    sa.column('place_id', type_=sa.Integer),
-                    'osm_type',
-                    sa.column('osm_id', type_=sa.BigInteger),
-                    sa.column('name', type_=conn.t.types.Composite),
-                    'class', 'type', 'place_type',
-                    sa.column('admin_level', type_=sa.Integer),
-                    sa.column('fromarea', type_=sa.Boolean),
-                    sa.column('isaddress', type_=sa.Boolean),
-                    sa.column('rank_address', type_=sa.SmallInteger),
-                    sa.column('distance', type_=sa.Float),
-                    joins_implicitly=True)
-
-    sql = sa.select(values.c.place_id.label('result_place_id'), sfn)\
-            .order_by(values.c.place_id,
-                      sa.column('rank_address').desc(),
-                      sa.column('isaddress').desc())
+    ltab = sa.func.JsonArrayEach(sa.type_coerce(lookup_ids, sa.JSON))\
+             .table_valued(sa.column('value', type_=sa.JSON)) # type: ignore[no-untyped-call]
+
+    t = conn.t.placex
+    taddr = conn.t.addressline
+
+    sql = sa.select(ltab.c.value['pid'].as_integer().label('src_place_id'),
+                    t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
+                    t.c.class_, t.c.type, t.c.extratags,
+                    t.c.admin_level, taddr.c.fromarea,
+                    sa.case((t.c.rank_address == 11, 5),
+                            else_=t.c.rank_address).label('rank_address'),
+                    taddr.c.distance, t.c.country_code, t.c.postcode)\
+            .join(taddr, sa.or_(taddr.c.place_id == ltab.c.value['pid'].as_integer(),
+                                taddr.c.place_id == ltab.c.value['lid'].as_integer()))\
+            .join(t, taddr.c.address_place_id == t.c.place_id)\
+            .order_by('src_place_id')\
+            .order_by(sa.column('rank_address').desc())\
+            .order_by((taddr.c.place_id == ltab.c.value['pid'].as_integer()).desc())\
+            .order_by(sa.case((sa.func.CrosscheckNames(t.c.name, ltab.c.value['names']), 2),
+                              (taddr.c.isaddress, 0),
+                              (sa.and_(taddr.c.fromarea,
+                                       t.c.geometry.ST_Contains(
+                                           sa.func.ST_GeomFromEWKT(
+                                               ltab.c.value['c'].as_string()))), 1),
+                              else_=-1).desc())\
+            .order_by(taddr.c.fromarea.desc())\
+            .order_by(taddr.c.distance.desc())\
+            .order_by(t.c.rank_search.desc())
+
 
     current_result = None
+    current_rank_address = -1
     for row in await conn.execute(sql):
-        if current_result is None or row.result_place_id != current_result.place_id:
-            for result in results:
-                if result.place_id == row.result_place_id:
-                    current_result = result
-                    break
+        if current_result is None or row.src_place_id != current_result.place_id:
+            current_result = next((r for r in results if r.place_id == row.src_place_id), None)
+            assert current_result is not None
+            current_rank_address = -1
+
+        location_isaddress = row.rank_address != current_rank_address
+
+        if current_result.country_code is None and row.country_code:
+            current_result.country_code = row.country_code
+
+        if row.type in ('postcode', 'postal_code') and location_isaddress:
+            if not row.fromarea or \
+               (current_result.address and 'postcode' in current_result.address):
+                location_isaddress = False
             else:
-                assert False
-            current_result.address_rows = AddressLines()
-        current_result.address_rows.append(_result_row_to_address_row(row))
+                current_result.postcode = None
+
+        assert current_result.address_rows is not None
+        current_result.address_rows.append(_result_row_to_address_row(row, location_isaddress))
+        current_rank_address = row.rank_address
+
+    for result in results:
+        await _finalize_entry(conn, result)
+
+
+    ### Finally add the record for the parent entry where necessary.
+
+    parent_lookup_ids = list(filter(lambda e: e['pid'] != e['lid'], lookup_ids))
+    if parent_lookup_ids:
+        ltab = sa.func.JsonArrayEach(sa.type_coerce(parent_lookup_ids, sa.JSON))\
+                 .table_valued(sa.column('value', type_=sa.JSON)) # type: ignore[no-untyped-call]
+        sql = sa.select(ltab.c.value['pid'].as_integer().label('src_place_id'),
+                        t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
+                        t.c.class_, t.c.type, t.c.extratags,
+                        t.c.admin_level,
+                        t.c.rank_address)\
+                 .where(t.c.place_id == ltab.c.value['lid'].as_integer())
+
+        for row in await conn.execute(sql):
+            current_result = next((r for r in results if r.place_id == row.src_place_id), None)
+            assert current_result is not None
+            assert current_result.address_rows is not None
+
+            current_result.address_rows.append(AddressLine(
+                    place_id=row.place_id,
+                    osm_object=(row.osm_type, row.osm_id),
+                    category=(row.class_, row.type),
+                    names=row.name, extratags=row.extratags or {},
+                    admin_level=row.admin_level,
+                    fromarea=True, isaddress=True,
+                    rank_address=row.rank_address, distance=0.0))
+
+    ### Now sort everything
+    for result in results:
+        assert result.address_rows is not None
+        result.address_rows.sort(key=lambda a: (-a.rank_address, a.isaddress))
 
 
-# pylint: disable=consider-using-f-string
 def _placex_select_address_row(conn: SearchConnection,
                                centroid: Point) -> SaSelect:
     t = conn.t.placex
     return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
                      t.c.class_.label('class'), t.c.type,
                      t.c.admin_level, t.c.housenumber,
-                     sa.literal_column("""ST_GeometryType(geometry) in
-                                        ('ST_Polygon','ST_MultiPolygon')""").label('fromarea'),
+                     t.c.geometry.is_area().label('fromarea'),
                      t.c.rank_address,
-                     sa.literal_column(
-                         """ST_DistanceSpheroid(geometry, 'SRID=4326;POINT(%f %f)'::geometry,
-                              'SPHEROID["WGS 84",6378137,298.257223563, AUTHORITY["EPSG","7030"]]')
-                         """ % centroid).label('distance'))
+                     t.c.geometry.distance_spheroid(
+                       sa.bindparam('centroid', value=centroid, type_=Geometry)).label('distance'))
 
 
 async def complete_linked_places(conn: SearchConnection, result: BaseResult) -> None:
@@ -521,10 +724,10 @@ async def complete_keywords(conn: SearchConnection, result: BaseResult) -> None:
     sel = sa.select(t.c.word_id, t.c.word_token, t.c.word)
 
     for name_tokens, address_tokens in await conn.execute(sql):
-        for row in await conn.execute(sel.where(t.c.word_id == sa.any_(name_tokens))):
+        for row in await conn.execute(sel.where(t.c.word_id.in_(name_tokens))):
             result.name_keywords.append(WordInfo(*row))
 
-        for row in await conn.execute(sel.where(t.c.word_id == sa.any_(address_tokens))):
+        for row in await conn.execute(sel.where(t.c.word_id.in_(address_tokens))):
             result.address_keywords.append(WordInfo(*row))
 
 
index 00605d45b3841ad5742e25287bb2d956fd1f10ad..fb4c0b23d0f2fd4790d942b31508126f39a2d379 100644 (file)
@@ -7,11 +7,13 @@
 """
 Implementation of reverse geocoding.
 """
-from typing import Optional, List, Callable, Type, Tuple, Dict, Any
+from typing import Optional, List, Callable, Type, Tuple, Dict, Any, cast, Union
+import functools
 
 import sqlalchemy as sa
 
-from nominatim.typing import SaColumn, SaSelect, SaFromClause, SaLabel, SaRow, SaBind
+from nominatim.typing import SaColumn, SaSelect, SaFromClause, SaLabel, SaRow,\
+                             SaBind, SaLambdaSelect
 from nominatim.api.connection import SearchConnection
 import nominatim.api.results as nres
 from nominatim.api.logging import log
@@ -27,6 +29,13 @@ RowFunc = Callable[[Optional[SaRow], Type[nres.ReverseResult]], Optional[nres.Re
 WKT_PARAM: SaBind = sa.bindparam('wkt', type_=Geometry)
 MAX_RANK_PARAM: SaBind = sa.bindparam('max_rank')
 
+def no_index(expr: SaColumn) -> SaColumn:
+    """ Wrap the given expression, so that the query planner will
+        refrain from using the expression for index lookup.
+    """
+    return sa.func.coalesce(sa.null(), expr) # pylint: disable=not-callable
+
+
 def _select_from_placex(t: SaFromClause, use_wkt: bool = True) -> SaSelect:
     """ Create a select statement with the columns relevant for reverse
         results.
@@ -47,6 +56,7 @@ def _select_from_placex(t: SaFromClause, use_wkt: bool = True) -> SaSelect:
                      t.c.importance, t.c.wikipedia,
                      t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
                      centroid,
+                     t.c.linked_place_id, t.c.admin_level,
                      distance.label('distance'),
                      t.c.geometry.ST_Expand(0).label('bbox'))
 
@@ -74,12 +84,6 @@ def _locate_interpolation(table: SaFromClause) -> SaLabel:
                    else_=0).label('position')
 
 
-def _is_address_point(table: SaFromClause) -> SaColumn:
-    return sa.and_(table.c.rank_address == 30,
-                   sa.or_(table.c.housenumber != None,
-                          table.c.name.has_key('housename')))
-
-
 def _get_closest(*rows: Optional[SaRow]) -> Optional[SaRow]:
     return min(rows, key=lambda row: 1000 if row is None else row.distance)
 
@@ -89,9 +93,11 @@ class ReverseGeocoder:
         coordinate.
     """
 
-    def __init__(self, conn: SearchConnection, params: ReverseDetails) -> None:
+    def __init__(self, conn: SearchConnection, params: ReverseDetails,
+                 restrict_to_country_areas: bool = False) -> None:
         self.conn = conn
         self.params = params
+        self.restrict_to_country_areas = restrict_to_country_areas
 
         self.bind_params: Dict[str, Any] = {'max_rank': params.max_rank}
 
@@ -127,23 +133,20 @@ class ReverseGeocoder:
         return self.layer_enabled(DataLayer.RAILWAY, DataLayer.MANMADE, DataLayer.NATURAL)
 
 
-    def _add_geometry_columns(self, sql: SaSelect, col: SaColumn) -> SaSelect:
-        if not self.has_geometries():
-            return sql
-
+    def _add_geometry_columns(self, sql: SaLambdaSelect, col: SaColumn) -> SaSelect:
         out = []
 
         if self.params.geometry_simplification > 0.0:
             col = sa.func.ST_SimplifyPreserveTopology(col, self.params.geometry_simplification)
 
         if self.params.geometry_output & GeometryFormat.GEOJSON:
-            out.append(sa.func.ST_AsGeoJSON(col).label('geometry_geojson'))
+            out.append(sa.func.ST_AsGeoJSON(col, 7).label('geometry_geojson'))
         if self.params.geometry_output & GeometryFormat.TEXT:
             out.append(sa.func.ST_AsText(col).label('geometry_text'))
         if self.params.geometry_output & GeometryFormat.KML:
-            out.append(sa.func.ST_AsKML(col).label('geometry_kml'))
+            out.append(sa.func.ST_AsKML(col, 7).label('geometry_kml'))
         if self.params.geometry_output & GeometryFormat.SVG:
-            out.append(sa.func.ST_AsSVG(col).label('geometry_svg'))
+            out.append(sa.func.ST_AsSVG(col, 0, 7).label('geometry_svg'))
 
         return sql.add_columns(*out)
 
@@ -171,31 +174,37 @@ class ReverseGeocoder:
         """
         t = self.conn.t.placex
 
-        sql = _select_from_placex(t)\
-                .where(t.c.geometry.ST_DWithin(WKT_PARAM, distance))\
-                .where(t.c.indexed_status == 0)\
-                .where(t.c.linked_place_id == None)\
+        # PostgreSQL must not get the distance as a parameter because
+        # there is a danger it won't be able to proberly estimate index use
+        # when used with prepared statements
+        diststr = sa.text(f"{distance}")
+
+        sql: SaLambdaSelect = sa.lambda_stmt(lambda: _select_from_placex(t)
+                .where(t.c.geometry.ST_DWithin(WKT_PARAM, diststr))
+                .where(t.c.indexed_status == 0)
+                .where(t.c.linked_place_id == None)
                 .where(sa.or_(sa.not_(t.c.geometry.is_area()),
-                              t.c.centroid.ST_Distance(WKT_PARAM) < distance))\
-                .order_by('distance')\
-                .limit(1)
+                              t.c.centroid.ST_Distance(WKT_PARAM) < diststr))
+                .order_by('distance')
+                .limit(1))
 
-        sql = self._add_geometry_columns(sql, t.c.geometry)
+        if self.has_geometries():
+            sql = self._add_geometry_columns(sql, t.c.geometry)
 
-        restrict: List[SaColumn] = []
+        restrict: List[Union[SaColumn, Callable[[], SaColumn]]] = []
 
         if self.layer_enabled(DataLayer.ADDRESS):
-            restrict.append(sa.and_(t.c.rank_address >= 26,
-                                    t.c.rank_address <= min(29, self.max_rank)))
+            max_rank = min(29, self.max_rank)
+            restrict.append(lambda: no_index(t.c.rank_address).between(26, max_rank))
             if self.max_rank == 30:
-                restrict.append(_is_address_point(t))
+                restrict.append(lambda: sa.func.IsAddressPoint(t))
         if self.layer_enabled(DataLayer.POI) and self.max_rank == 30:
-            restrict.append(sa.and_(t.c.rank_search == 30,
-                                    t.c.class_.not_in(('place', 'building')),
-                                    sa.not_(t.c.geometry.is_line_like())))
+            restrict.append(lambda: sa.and_(no_index(t.c.rank_search) == 30,
+                                            t.c.class_.not_in(('place', 'building')),
+                                            sa.not_(t.c.geometry.is_line_like())))
         if self.has_feature_layers():
-            restrict.append(sa.and_(t.c.rank_search.between(26, MAX_RANK_PARAM),
-                                    t.c.rank_address == 0,
+            restrict.append(sa.and_(no_index(t.c.rank_search).between(26, MAX_RANK_PARAM),
+                                    no_index(t.c.rank_address) == 0,
                                     self._filter_by_layer(t)))
 
         if not restrict:
@@ -209,16 +218,17 @@ class ReverseGeocoder:
     async def _find_housenumber_for_street(self, parent_place_id: int) -> Optional[SaRow]:
         t = self.conn.t.placex
 
-        sql = _select_from_placex(t)\
-                .where(t.c.geometry.ST_DWithin(WKT_PARAM, 0.001))\
-                .where(t.c.parent_place_id == parent_place_id)\
-                .where(_is_address_point(t))\
-                .where(t.c.indexed_status == 0)\
-                .where(t.c.linked_place_id == None)\
-                .order_by('distance')\
-                .limit(1)
+        sql: SaLambdaSelect = sa.lambda_stmt(lambda: _select_from_placex(t)
+                .where(t.c.geometry.ST_DWithin(WKT_PARAM, 0.001))
+                .where(t.c.parent_place_id == parent_place_id)
+                .where(sa.func.IsAddressPoint(t))
+                .where(t.c.indexed_status == 0)
+                .where(t.c.linked_place_id == None)
+                .order_by('distance')
+                .limit(1))
 
-        sql = self._add_geometry_columns(sql, t.c.geometry)
+        if self.has_geometries():
+            sql = self._add_geometry_columns(sql, t.c.geometry)
 
         return (await self.conn.execute(sql, self.bind_params)).one_or_none()
 
@@ -227,25 +237,29 @@ class ReverseGeocoder:
                                              distance: float) -> Optional[SaRow]:
         t = self.conn.t.osmline
 
-        sql = sa.select(t,
-                        t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
-                        _locate_interpolation(t))\
-                .where(t.c.linegeo.ST_DWithin(WKT_PARAM, distance))\
-                .where(t.c.startnumber != None)\
-                .order_by('distance')\
-                .limit(1)
+        sql: Any = sa.lambda_stmt(lambda:
+                   sa.select(t,
+                             t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
+                             _locate_interpolation(t))
+                     .where(t.c.linegeo.ST_DWithin(WKT_PARAM, distance))
+                     .where(t.c.startnumber != None)
+                     .order_by('distance')
+                     .limit(1))
 
         if parent_place_id is not None:
-            sql = sql.where(t.c.parent_place_id == parent_place_id)
+            sql += lambda s: s.where(t.c.parent_place_id == parent_place_id)
 
-        inner = sql.subquery('ipol')
+        def _wrap_query(base_sql: SaLambdaSelect) -> SaSelect:
+            inner = base_sql.subquery('ipol')
 
-        sql = sa.select(inner.c.place_id, inner.c.osm_id,
-                        inner.c.parent_place_id, inner.c.address,
-                        _interpolated_housenumber(inner),
-                        _interpolated_position(inner),
-                        inner.c.postcode, inner.c.country_code,
-                        inner.c.distance)
+            return sa.select(inner.c.place_id, inner.c.osm_id,
+                             inner.c.parent_place_id, inner.c.address,
+                             _interpolated_housenumber(inner),
+                             _interpolated_position(inner),
+                             inner.c.postcode, inner.c.country_code,
+                             inner.c.distance)
+
+        sql += _wrap_query
 
         if self.has_geometries():
             sub = sql.subquery('geom')
@@ -254,28 +268,27 @@ class ReverseGeocoder:
         return (await self.conn.execute(sql, self.bind_params)).one_or_none()
 
 
-    async def _find_tiger_number_for_street(self, parent_place_id: int,
-                                            parent_type: str,
-                                            parent_id: int) -> Optional[SaRow]:
+    async def _find_tiger_number_for_street(self, parent_place_id: int) -> Optional[SaRow]:
         t = self.conn.t.tiger
 
-        inner = sa.select(t,
-                          t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
-                          _locate_interpolation(t))\
-                  .where(t.c.linegeo.ST_DWithin(WKT_PARAM, 0.001))\
-                  .where(t.c.parent_place_id == parent_place_id)\
-                  .order_by('distance')\
-                  .limit(1)\
-                  .subquery('tiger')
-
-        sql = sa.select(inner.c.place_id,
-                        inner.c.parent_place_id,
-                        sa.literal(parent_type).label('osm_type'),
-                        sa.literal(parent_id).label('osm_id'),
-                        _interpolated_housenumber(inner),
-                        _interpolated_position(inner),
-                        inner.c.postcode,
-                        inner.c.distance)
+        def _base_query() -> SaSelect:
+            inner = sa.select(t,
+                              t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
+                              _locate_interpolation(t))\
+                      .where(t.c.linegeo.ST_DWithin(WKT_PARAM, 0.001))\
+                      .where(t.c.parent_place_id == parent_place_id)\
+                      .order_by('distance')\
+                      .limit(1)\
+                      .subquery('tiger')
+
+            return sa.select(inner.c.place_id,
+                             inner.c.parent_place_id,
+                             _interpolated_housenumber(inner),
+                             _interpolated_position(inner),
+                             inner.c.postcode,
+                             inner.c.distance)
+
+        sql: SaLambdaSelect = sa.lambda_stmt(_base_query)
 
         if self.has_geometries():
             sub = sql.subquery('geom')
@@ -313,14 +326,15 @@ class ReverseGeocoder:
                     distance = addr_row.distance
                 elif row.country_code == 'us' and parent_place_id is not None:
                     log().comment('Find TIGER housenumber for street')
-                    addr_row = await self._find_tiger_number_for_street(parent_place_id,
-                                                                        row.osm_type,
-                                                                        row.osm_id)
+                    addr_row = await self._find_tiger_number_for_street(parent_place_id)
                     log().var_dump('Result (street Tiger housenumber)', addr_row)
 
                     if addr_row is not None:
+                        row_func = cast(RowFunc,
+                                        functools.partial(nres.create_from_tiger_row,
+                                                          osm_type=row.osm_type,
+                                                          osm_id=row.osm_id))
                         row = addr_row
-                        row_func = nres.create_from_tiger_row
             else:
                 distance = row.distance
 
@@ -344,59 +358,58 @@ class ReverseGeocoder:
         log().comment('Reverse lookup by larger address area features')
         t = self.conn.t.placex
 
-        # The inner SQL brings results in the right order, so that
-        # later only a minimum of results needs to be checked with ST_Contains.
-        inner = sa.select(t, sa.literal(0.0).label('distance'))\
-                  .where(t.c.rank_search.between(5, MAX_RANK_PARAM))\
-                  .where(t.c.rank_address.between(5, 25))\
-                  .where(t.c.geometry.is_area())\
-                  .where(t.c.geometry.intersects(WKT_PARAM))\
-                  .where(t.c.name != None)\
-                  .where(t.c.indexed_status == 0)\
-                  .where(t.c.linked_place_id == None)\
-                  .where(t.c.type != 'postcode')\
-                  .order_by(sa.desc(t.c.rank_search))\
-                  .limit(50)\
-                  .subquery('area')
+        def _base_query() -> SaSelect:
+            # The inner SQL brings results in the right order, so that
+            # later only a minimum of results needs to be checked with ST_Contains.
+            inner = sa.select(t, sa.literal(0.0).label('distance'))\
+                      .where(t.c.rank_search.between(5, MAX_RANK_PARAM))\
+                      .where(t.c.geometry.intersects(WKT_PARAM))\
+                      .where(sa.func.PlacexGeometryReverseLookuppolygon())\
+                      .order_by(sa.desc(t.c.rank_search))\
+                      .limit(50)\
+                      .subquery('area')
 
-        sql = _select_from_placex(inner, False)\
-                  .where(inner.c.geometry.ST_Contains(WKT_PARAM))\
-                  .order_by(sa.desc(inner.c.rank_search))\
-                  .limit(1)
+            return _select_from_placex(inner, False)\
+                      .where(inner.c.geometry.ST_Contains(WKT_PARAM))\
+                      .order_by(sa.desc(inner.c.rank_search))\
+                      .limit(1)
 
-        sql = self._add_geometry_columns(sql, inner.c.geometry)
+        sql: SaLambdaSelect = sa.lambda_stmt(_base_query)
+        if self.has_geometries():
+            sql = self._add_geometry_columns(sql, sa.literal_column('area.geometry'))
 
         address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
         log().var_dump('Result (area)', address_row)
 
         if address_row is not None and address_row.rank_search < self.max_rank:
             log().comment('Search for better matching place nodes inside the area')
-            inner = sa.select(t,
+
+            address_rank = address_row.rank_search
+            address_id = address_row.place_id
+
+            def _place_inside_area_query() -> SaSelect:
+                inner = \
+                    sa.select(t,
                               t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\
-                      .where(t.c.osm_type == 'N')\
-                      .where(t.c.rank_search > address_row.rank_search)\
+                      .where(t.c.rank_search > address_rank)\
                       .where(t.c.rank_search <= MAX_RANK_PARAM)\
-                      .where(t.c.rank_address.between(5, 25))\
-                      .where(t.c.name != None)\
                       .where(t.c.indexed_status == 0)\
-                      .where(t.c.linked_place_id == None)\
-                      .where(t.c.type != 'postcode')\
-                      .where(t.c.geometry
-                                .ST_Buffer(sa.func.reverse_place_diameter(t.c.rank_search))
-                                .intersects(WKT_PARAM))\
+                      .where(sa.func.IntersectsReverseDistance(t, WKT_PARAM))\
                       .order_by(sa.desc(t.c.rank_search))\
                       .limit(50)\
                       .subquery('places')
 
-            touter = self.conn.t.placex.alias('outer')
-            sql = _select_from_placex(inner, False)\
-                  .join(touter, touter.c.geometry.ST_Contains(inner.c.geometry))\
-                  .where(touter.c.place_id == address_row.place_id)\
-                  .where(inner.c.distance < sa.func.reverse_place_diameter(inner.c.rank_search))\
-                  .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
-                  .limit(1)
+                touter = t.alias('outer')
+                return _select_from_placex(inner, False)\
+                    .join(touter, touter.c.geometry.ST_Contains(inner.c.geometry))\
+                    .where(touter.c.place_id == address_id)\
+                    .where(sa.func.IsBelowReverseDistance(inner.c.distance, inner.c.rank_search))\
+                    .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
+                    .limit(1)
 
-            sql = self._add_geometry_columns(sql, inner.c.geometry)
+            sql = sa.lambda_stmt(_place_inside_area_query)
+            if self.has_geometries():
+                sql = self._add_geometry_columns(sql, sa.literal_column('places.geometry'))
 
             place_address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
             log().var_dump('Result (place node)', place_address_row)
@@ -417,10 +430,9 @@ class ReverseGeocoder:
                   .where(t.c.indexed_status == 0)\
                   .where(t.c.linked_place_id == None)\
                   .where(self._filter_by_layer(t))\
-                  .where(t.c.geometry
-                                .ST_Buffer(sa.func.reverse_place_diameter(t.c.rank_search))
-                                .intersects(WKT_PARAM))\
+                  .where(t.c.geometry.intersects(sa.func.ST_Expand(WKT_PARAM, 0.007)))\
                   .order_by(sa.desc(t.c.rank_search))\
+                  .order_by('distance')\
                   .limit(50)\
                   .subquery()
 
@@ -430,7 +442,8 @@ class ReverseGeocoder:
                   .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
                   .limit(1)
 
-        sql = self._add_geometry_columns(sql, inner.c.geometry)
+        if self.has_geometries():
+            sql = self._add_geometry_columns(sql, inner.c.geometry)
 
         row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
         log().var_dump('Result (non-address feature)', row)
@@ -456,7 +469,7 @@ class ReverseGeocoder:
         return _get_closest(address_row, other_row)
 
 
-    async def lookup_country(self) -> Optional[SaRow]:
+    async def lookup_country_codes(self) -> List[str]:
         """ Lookup the country for the current search.
         """
         log().section('Reverse lookup by country code')
@@ -464,8 +477,16 @@ class ReverseGeocoder:
         sql = sa.select(t.c.country_code).distinct()\
                 .where(t.c.geometry.ST_Contains(WKT_PARAM))
 
-        ccodes = tuple((r[0] for r in await self.conn.execute(sql, self.bind_params)))
+        ccodes = [cast(str, r[0]) for r in await self.conn.execute(sql, self.bind_params)]
         log().var_dump('Country codes', ccodes)
+        return ccodes
+
+
+    async def lookup_country(self, ccodes: List[str]) -> Optional[SaRow]:
+        """ Lookup the country for the current search.
+        """
+        if not ccodes:
+            ccodes = await self.lookup_country_codes()
 
         if not ccodes:
             return None
@@ -474,30 +495,27 @@ class ReverseGeocoder:
         if self.max_rank > 4:
             log().comment('Search for place nodes in country')
 
-            inner = sa.select(t,
+            def _base_query() -> SaSelect:
+                inner = \
+                    sa.select(t,
                               t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\
-                      .where(t.c.osm_type == 'N')\
                       .where(t.c.rank_search > 4)\
                       .where(t.c.rank_search <= MAX_RANK_PARAM)\
-                      .where(t.c.rank_address.between(5, 25))\
-                      .where(t.c.name != None)\
                       .where(t.c.indexed_status == 0)\
-                      .where(t.c.linked_place_id == None)\
-                      .where(t.c.type != 'postcode')\
                       .where(t.c.country_code.in_(ccodes))\
-                      .where(t.c.geometry
-                                .ST_Buffer(sa.func.reverse_place_diameter(t.c.rank_search))
-                                .intersects(WKT_PARAM))\
+                      .where(sa.func.IntersectsReverseDistance(t, WKT_PARAM))\
                       .order_by(sa.desc(t.c.rank_search))\
                       .limit(50)\
-                      .subquery()
+                      .subquery('area')
 
-            sql = _select_from_placex(inner, False)\
-                  .where(inner.c.distance < sa.func.reverse_place_diameter(inner.c.rank_search))\
-                  .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
-                  .limit(1)
+                return _select_from_placex(inner, False)\
+                    .where(sa.func.IsBelowReverseDistance(inner.c.distance, inner.c.rank_search))\
+                    .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
+                    .limit(1)
 
-            sql = self._add_geometry_columns(sql, inner.c.geometry)
+            sql: SaLambdaSelect = sa.lambda_stmt(_base_query)
+            if self.has_geometries():
+                sql = self._add_geometry_columns(sql, sa.literal_column('area.geometry'))
 
             address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
             log().var_dump('Result (addressable place node)', address_row)
@@ -506,15 +524,16 @@ class ReverseGeocoder:
 
         if address_row is None:
             # Still nothing, then return a country with the appropriate country code.
-            sql = _select_from_placex(t)\
+            sql = sa.lambda_stmt(lambda: _select_from_placex(t)\
                       .where(t.c.country_code.in_(ccodes))\
                       .where(t.c.rank_address == 4)\
                       .where(t.c.rank_search == 4)\
                       .where(t.c.linked_place_id == None)\
                       .order_by('distance')\
-                      .limit(1)
+                      .limit(1))
 
-            sql = self._add_geometry_columns(sql, t.c.geometry)
+            if self.has_geometries():
+                sql = self._add_geometry_columns(sql, t.c.geometry)
 
             address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
 
@@ -537,10 +556,19 @@ class ReverseGeocoder:
             row, tmp_row_func = await self.lookup_street_poi()
             if row is not None:
                 row_func = tmp_row_func
-        if row is None and self.max_rank > 4:
-            row = await self.lookup_area()
-        if row is None and self.layer_enabled(DataLayer.ADDRESS):
-            row = await self.lookup_country()
+
+        if row is None:
+            if self.restrict_to_country_areas:
+                ccodes = await self.lookup_country_codes()
+                if not ccodes:
+                    return None
+            else:
+                ccodes = []
+
+            if self.max_rank > 4:
+                row = await self.lookup_area()
+            if row is None and self.layer_enabled(DataLayer.ADDRESS):
+                row = await self.lookup_country(ccodes)
 
         result = row_func(row, nres.ReverseResult)
         if result is not None:
index 9ff8c03c90c3d6ef4b7f1ff1c038e24bdb165171..c755f2a74f8a16e2d53ca30503549040685d0046 100644 (file)
@@ -7,7 +7,7 @@
 """
 Convertion from token assignment to an abstract DB search.
 """
-from typing import Optional, List, Tuple, Iterator
+from typing import Optional, List, Tuple, Iterator, Dict
 import heapq
 
 from nominatim.api.types import SearchDetails, DataLayer
@@ -15,7 +15,6 @@ from nominatim.api.search.query import QueryStruct, Token, TokenType, TokenRange
 from nominatim.api.search.token_assignment import TokenAssignment
 import nominatim.api.search.db_search_fields as dbf
 import nominatim.api.search.db_searches as dbs
-from nominatim.api.logging import log
 
 
 def wrap_near_search(categories: List[Tuple[str, str]],
@@ -90,12 +89,14 @@ class SearchBuilder:
         if sdata is None:
             return
 
-        categories = self.get_search_categories(assignment)
+        near_items = self.get_near_items(assignment)
+        if near_items is not None and not near_items:
+            return # impossible compbination of near items and category parameter
 
         if assignment.name is None:
-            if categories and not sdata.postcodes:
-                sdata.qualifiers = categories
-                categories = None
+            if near_items and not sdata.postcodes:
+                sdata.qualifiers = near_items
+                near_items = None
                 builder = self.build_poi_search(sdata)
             elif assignment.housenumber:
                 hnr_tokens = self.query.get_tokens(assignment.housenumber,
@@ -103,18 +104,23 @@ class SearchBuilder:
                 builder = self.build_housenumber_search(sdata, hnr_tokens, assignment.address)
             else:
                 builder = self.build_special_search(sdata, assignment.address,
-                                                    bool(categories))
+                                                    bool(near_items))
         else:
             builder = self.build_name_search(sdata, assignment.name, assignment.address,
-                                             bool(categories))
+                                             bool(near_items))
 
-        if categories:
-            penalty = min(categories.penalties)
-            categories.penalties = [p - penalty for p in categories.penalties]
+        if near_items:
+            penalty = min(near_items.penalties)
+            near_items.penalties = [p - penalty for p in near_items.penalties]
             for search in builder:
-                yield dbs.NearSearch(penalty, categories, search)
+                search_penalty = search.penalty
+                search.penalty = 0.0
+                yield dbs.NearSearch(penalty + assignment.penalty + search_penalty,
+                                     near_items, search)
         else:
-            yield from builder
+            for search in builder:
+                search.penalty += assignment.penalty
+                yield search
 
 
     def build_poi_search(self, sdata: dbf.SearchData) -> Iterator[dbs.AbstractSearch]:
@@ -156,14 +162,27 @@ class SearchBuilder:
         """ Build a simple address search for special entries where the
             housenumber is the main name token.
         """
-        partial_tokens: List[int] = []
-        for trange in address:
-            partial_tokens.extend(t.token for t in self.query.get_partials_list(trange))
+        sdata.lookups = [dbf.FieldLookup('name_vector', [t.token for t in hnrs], 'lookup_any')]
+        expected_count = sum(t.count for t in hnrs)
+
+        partials = [t for trange in address
+                       for t in self.query.get_partials_list(trange)]
+
+        if expected_count < 8000:
+            sdata.lookups.append(dbf.FieldLookup('nameaddress_vector',
+                                                 [t.token for t in partials], 'restrict'))
+        elif len(partials) != 1 or partials[0].count < 10000:
+            sdata.lookups.append(dbf.FieldLookup('nameaddress_vector',
+                                                 [t.token for t in partials], 'lookup_all'))
+        else:
+            sdata.lookups.append(
+                dbf.FieldLookup('nameaddress_vector',
+                                [t.token for t
+                                 in self.query.get_tokens(address[0], TokenType.WORD)],
+                                'lookup_any'))
 
-        sdata.lookups = [dbf.FieldLookup('name_vector', [t.token for t in hnrs], 'lookup_any'),
-                         dbf.FieldLookup('nameaddress_vector', partial_tokens, 'lookup_all')
-                        ]
-        yield dbs.PlaceSearch(0.05, sdata, sum(t.count for t in hnrs))
+        sdata.housenumbers = dbf.WeightedStrings([], [])
+        yield dbs.PlaceSearch(0.05, sdata, expected_count)
 
 
     def build_name_search(self, sdata: dbf.SearchData,
@@ -187,65 +206,44 @@ class SearchBuilder:
             be searched for. This takes into account how frequent the terms
             are and tries to find a lookup that optimizes index use.
         """
-        penalty = 0.0 # extra penalty currently unused
-
+        penalty = 0.0 # extra penalty
         name_partials = self.query.get_partials_list(name)
-        exp_name_count = min(t.count for t in name_partials)
-        addr_partials = []
-        for trange in address:
-            addr_partials.extend(self.query.get_partials_list(trange))
+        name_tokens = [t.token for t in name_partials]
+
+        addr_partials = [t for r in address for t in self.query.get_partials_list(r)]
         addr_tokens = [t.token for t in addr_partials]
+
         partials_indexed = all(t.is_indexed for t in name_partials) \
                            and all(t.is_indexed for t in addr_partials)
+        exp_count = min(t.count for t in name_partials) / (2**(len(name_partials) - 1))
 
-        if (len(name_partials) > 3 or exp_name_count < 1000) and partials_indexed:
-            # Lookup by name partials, use address partials to restrict results.
-            lookup = [dbf.FieldLookup('name_vector',
-                                  [t.token for t in name_partials], 'lookup_all')]
-            if addr_tokens:
-                lookup.append(dbf.FieldLookup('nameaddress_vector', addr_tokens, 'restrict'))
-            yield penalty, exp_name_count, lookup
-            return
-
-        exp_addr_count = min(t.count for t in addr_partials) if addr_partials else exp_name_count
-        if exp_addr_count < 1000 and partials_indexed:
-            # Lookup by address partials and restrict results through name terms.
-            yield penalty, exp_addr_count,\
-                  [dbf.FieldLookup('name_vector', [t.token for t in name_partials], 'restrict'),
-                   dbf.FieldLookup('nameaddress_vector', addr_tokens, 'lookup_all')]
+        if (len(name_partials) > 3 or exp_count < 8000) and partials_indexed:
+            yield penalty, exp_count, dbf.lookup_by_names(name_tokens, addr_tokens)
             return
 
         # Partial term to frequent. Try looking up by rare full names first.
         name_fulls = self.query.get_tokens(name, TokenType.WORD)
-        rare_names = list(filter(lambda t: t.count < 1000, name_fulls))
-        # At this point drop unindexed partials from the address.
-        # This might yield wrong results, nothing we can do about that.
-        if not partials_indexed:
-            addr_tokens = [t.token for t in addr_partials if t.is_indexed]
-            log().var_dump('before', penalty)
-            penalty += 1.2 * sum(t.penalty for t in addr_partials if not t.is_indexed)
-            log().var_dump('after', penalty)
-        if rare_names:
+        if name_fulls:
+            fulls_count = sum(t.count for t in name_fulls)
+            # At this point drop unindexed partials from the address.
+            # This might yield wrong results, nothing we can do about that.
+            if not partials_indexed:
+                addr_tokens = [t.token for t in addr_partials if t.is_indexed]
+                penalty += 1.2 * sum(t.penalty for t in addr_partials if not t.is_indexed)
             # Any of the full names applies with all of the partials from the address
-            lookup = [dbf.FieldLookup('name_vector', [t.token for t in rare_names], 'lookup_any')]
-            if addr_tokens:
-                lookup.append(dbf.FieldLookup('nameaddress_vector', addr_tokens, 'restrict'))
-            yield penalty, sum(t.count for t in rare_names), lookup
+            yield penalty, fulls_count / (2**len(addr_partials)),\
+                  dbf.lookup_by_any_name([t.token for t in name_fulls], addr_tokens,
+                                         'restrict' if fulls_count < 10000 else 'lookup_all')
 
         # To catch remaining results, lookup by name and address
-        if all(t.is_indexed for t in name_partials):
-            lookup = [dbf.FieldLookup('name_vector',
-                                      [t.token for t in name_partials], 'lookup_all')]
-        else:
-            # we don't have the partials, try with the non-rare names
-            non_rare_names = [t.token for t in name_fulls if t.count >= 1000]
-            if not non_rare_names:
-                return
-            lookup = [dbf.FieldLookup('name_vector', non_rare_names, 'lookup_any')]
-        if addr_tokens:
-            lookup.append(dbf.FieldLookup('nameaddress_vector', addr_tokens, 'lookup_all'))
-        yield penalty + 0.1 * max(0, 5 - len(name_partials) - len(addr_tokens)),\
-              min(exp_name_count, exp_addr_count), lookup
+        # We only do this if there is a reasonable number of results expected.
+        exp_count = exp_count / (2**len(addr_partials)) if addr_partials else exp_count
+        if exp_count < 10000 and all(t.is_indexed for t in name_partials):
+            lookup = [dbf.FieldLookup('name_vector', name_tokens, 'lookup_all')]
+            if addr_tokens:
+                lookup.append(dbf.FieldLookup('nameaddress_vector', addr_tokens, 'lookup_all'))
+            penalty += 0.35 * max(0, 5 - len(name_partials) - len(addr_tokens))
+            yield penalty, exp_count, lookup
 
 
     def get_name_ranking(self, trange: TokenRange) -> dbf.FieldRanking:
@@ -333,8 +331,15 @@ class SearchBuilder:
                               self.query.get_tokens(assignment.postcode,
                                                     TokenType.POSTCODE))
         if assignment.qualifier:
-            sdata.set_qualifiers(self.query.get_tokens(assignment.qualifier,
-                                                       TokenType.QUALIFIER))
+            tokens = self.query.get_tokens(assignment.qualifier, TokenType.QUALIFIER)
+            if self.details.categories:
+                tokens = [t for t in tokens if t.get_category() in self.details.categories]
+                if not tokens:
+                    return None
+            sdata.set_qualifiers(tokens)
+        elif self.details.categories:
+            sdata.qualifiers = dbf.WeightedCategories(self.details.categories,
+                                                      [0.0] * len(self.details.categories))
 
         if assignment.address:
             sdata.set_ranking([self.get_addr_ranking(r) for r in assignment.address])
@@ -344,23 +349,22 @@ class SearchBuilder:
         return sdata
 
 
-    def get_search_categories(self,
-                              assignment: TokenAssignment) -> Optional[dbf.WeightedCategories]:
-        """ Collect tokens for category search or use the categories
+    def get_near_items(self, assignment: TokenAssignment) -> Optional[dbf.WeightedCategories]:
+        """ Collect tokens for near items search or use the categories
             requested per parameter.
             Returns None if no category search is requested.
         """
-        if assignment.category:
-            tokens = [t for t in self.query.get_tokens(assignment.category,
-                                                       TokenType.CATEGORY)
-                      if not self.details.categories
-                         or t.get_category() in self.details.categories]
-            return dbf.WeightedCategories([t.get_category() for t in tokens],
-                                          [t.penalty for t in tokens])
-
-        if self.details.categories:
-            return dbf.WeightedCategories(self.details.categories,
-                                          [0.0] * len(self.details.categories))
+        if assignment.near_item:
+            tokens: Dict[Tuple[str, str], float] = {}
+            for t in self.query.get_tokens(assignment.near_item, TokenType.NEAR_ITEM):
+                cat = t.get_category()
+                # The category of a near search will be that of near_item.
+                # Thus, if search is restricted to a category parameter,
+                # the two sets must intersect.
+                if (not self.details.categories or cat in self.details.categories)\
+                   and t.penalty < tokens.get(cat, 1000.0):
+                    tokens[cat] = t.penalty
+            return dbf.WeightedCategories(list(tokens.keys()), list(tokens.values()))
 
         return None
 
index 13f1c56eb09493261a10ece0e728c9e7cf0f252d..59af826086db86027f2c808dee51824fb17e72ff 100644 (file)
@@ -7,7 +7,7 @@
 """
 Data structures for more complex fields in abstract search descriptions.
 """
-from typing import List, Tuple, Iterator, cast
+from typing import List, Tuple, Iterator, cast, Dict
 import dataclasses
 
 import sqlalchemy as sa
@@ -195,10 +195,17 @@ class SearchData:
         """ Set the qulaifier field from the given tokens.
         """
         if tokens:
-            min_penalty = min(t.penalty for t in tokens)
+            categories: Dict[Tuple[str, str], float] = {}
+            min_penalty = 1000.0
+            for t in tokens:
+                if t.penalty < min_penalty:
+                    min_penalty = t.penalty
+                cat = t.get_category()
+                if t.penalty < categories.get(cat, 1000.0):
+                    categories[cat] = t.penalty
             self.penalty += min_penalty
-            self.qualifiers = WeightedCategories([t.get_category() for t in tokens],
-                                                 [t.penalty - min_penalty for t in tokens])
+            self.qualifiers = WeightedCategories(list(categories.keys()),
+                                                 list(categories.values()))
 
 
     def set_ranking(self, rankings: List[FieldRanking]) -> None:
@@ -211,3 +218,35 @@ class SearchData:
                 self.rankings.append(ranking)
             else:
                 self.penalty += ranking.default
+
+
+def lookup_by_names(name_tokens: List[int], addr_tokens: List[int]) -> List[FieldLookup]:
+    """ Create a lookup list where name tokens are looked up via index
+        and potential address tokens are used to restrict the search further.
+    """
+    lookup = [FieldLookup('name_vector', name_tokens, 'lookup_all')]
+    if addr_tokens:
+        lookup.append(FieldLookup('nameaddress_vector', addr_tokens, 'restrict'))
+
+    return lookup
+
+
+def lookup_by_any_name(name_tokens: List[int], addr_tokens: List[int],
+                       lookup_type: str) -> List[FieldLookup]:
+    """ Create a lookup list where name tokens are looked up via index
+        and only one of the name tokens must be present.
+        Potential address tokens are used to restrict the search further.
+    """
+    lookup = [FieldLookup('name_vector', name_tokens, 'lookup_any')]
+    if addr_tokens:
+        lookup.append(FieldLookup('nameaddress_vector', addr_tokens, lookup_type))
+
+    return lookup
+
+
+def lookup_by_addr(name_tokens: List[int], addr_tokens: List[int]) -> List[FieldLookup]:
+    """ Create a lookup list where address tokens are looked up via index
+        and the name tokens are only used to restrict the search further.
+    """
+    return [FieldLookup('name_vector', name_tokens, 'restrict'),
+            FieldLookup('nameaddress_vector', addr_tokens, 'lookup_all')]
index cea19c852836ac7490b33e0511cc574e86ef317a..232f816ef89609f050ea15e79f3651410222ef86 100644 (file)
@@ -7,14 +7,14 @@
 """
 Implementation of the acutal database accesses for forward search.
 """
-from typing import List, Tuple, AsyncIterator, Dict, Any
+from typing import List, Tuple, AsyncIterator, Dict, Any, Callable
 import abc
 
 import sqlalchemy as sa
 from sqlalchemy.dialects.postgresql import ARRAY, array_agg
 
 from nominatim.typing import SaFromClause, SaScalarSelect, SaColumn, \
-                             SaExpression, SaSelect, SaRow, SaBind
+                             SaExpression, SaSelect, SaLambdaSelect, SaRow, SaBind
 from nominatim.api.connection import SearchConnection
 from nominatim.api.types import SearchDetails, DataLayer, GeometryFormat, Bbox
 import nominatim.api.results as nres
@@ -24,6 +24,13 @@ from nominatim.db.sqlalchemy_types import Geometry
 #pylint: disable=singleton-comparison,not-callable
 #pylint: disable=too-many-branches,too-many-arguments,too-many-locals,too-many-statements
 
+def no_index(expr: SaColumn) -> SaColumn:
+    """ Wrap the given expression, so that the query planner will
+        refrain from using the expression for index lookup.
+    """
+    return sa.func.coalesce(sa.null(), expr) # pylint: disable=not-callable
+
+
 def _details_to_bind_params(details: SearchDetails) -> Dict[str, Any]:
     """ Create a dictionary from search parameters that can be used
         as bind parameter for SQL execute.
@@ -46,37 +53,40 @@ VIEWBOX_PARAM: SaBind = sa.bindparam('viewbox', type_=Geometry)
 VIEWBOX2_PARAM: SaBind = sa.bindparam('viewbox2', type_=Geometry)
 NEAR_PARAM: SaBind = sa.bindparam('near', type_=Geometry)
 NEAR_RADIUS_PARAM: SaBind = sa.bindparam('near_radius')
-EXCLUDED_PARAM: SaBind = sa.bindparam('excluded')
 COUNTRIES_PARAM: SaBind = sa.bindparam('countries')
 
+def _within_near(t: SaFromClause) -> Callable[[], SaExpression]:
+    return lambda: t.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM)
+
+def _exclude_places(t: SaFromClause) -> Callable[[], SaExpression]:
+    return lambda: t.c.place_id.not_in(sa.bindparam('excluded'))
+
 def _select_placex(t: SaFromClause) -> SaSelect:
     return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
                      t.c.class_, t.c.type,
                      t.c.address, t.c.extratags,
                      t.c.housenumber, t.c.postcode, t.c.country_code,
-                     t.c.importance, t.c.wikipedia,
+                     t.c.wikipedia,
                      t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
+                     t.c.linked_place_id, t.c.admin_level,
                      t.c.centroid,
                      t.c.geometry.ST_Expand(0).label('bbox'))
 
 
-def _add_geometry_columns(sql: SaSelect, col: SaColumn, details: SearchDetails) -> SaSelect:
-    if not details.geometry_output:
-        return sql
-
+def _add_geometry_columns(sql: SaLambdaSelect, col: SaColumn, details: SearchDetails) -> SaSelect:
     out = []
 
     if details.geometry_simplification > 0.0:
         col = sa.func.ST_SimplifyPreserveTopology(col, details.geometry_simplification)
 
     if details.geometry_output & GeometryFormat.GEOJSON:
-        out.append(sa.func.ST_AsGeoJSON(col).label('geometry_geojson'))
+        out.append(sa.func.ST_AsGeoJSON(col, 7).label('geometry_geojson'))
     if details.geometry_output & GeometryFormat.TEXT:
         out.append(sa.func.ST_AsText(col).label('geometry_text'))
     if details.geometry_output & GeometryFormat.KML:
-        out.append(sa.func.ST_AsKML(col).label('geometry_kml'))
+        out.append(sa.func.ST_AsKML(col, 7).label('geometry_kml'))
     if details.geometry_output & GeometryFormat.SVG:
-        out.append(sa.func.ST_AsSVG(col).label('geometry_svg'))
+        out.append(sa.func.ST_AsSVG(col, 0, 7).label('geometry_svg'))
 
     return sql.add_columns(*out)
 
@@ -96,7 +106,7 @@ def _make_interpolation_subquery(table: SaFromClause, inner: SaFromClause,
                   for n in numerals)))
 
     if details.excluded:
-        sql = sql.where(table.c.place_id.not_in(EXCLUDED_PARAM))
+        sql = sql.where(_exclude_places(table))
 
     return sql.scalar_subquery()
 
@@ -104,14 +114,14 @@ def _make_interpolation_subquery(table: SaFromClause, inner: SaFromClause,
 def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn:
     orexpr: List[SaExpression] = []
     if layers & DataLayer.ADDRESS and layers & DataLayer.POI:
-        orexpr.append(table.c.rank_address.between(1, 30))
+        orexpr.append(no_index(table.c.rank_address).between(1, 30))
     elif layers & DataLayer.ADDRESS:
-        orexpr.append(table.c.rank_address.between(1, 29))
-        orexpr.append(sa.and_(table.c.rank_address == 30,
+        orexpr.append(no_index(table.c.rank_address).between(1, 29))
+        orexpr.append(sa.and_(no_index(table.c.rank_address) == 30,
                               sa.or_(table.c.housenumber != None,
-                                     table.c.address.has_key('housename'))))
+                                     table.c.address.has_key('addr:housename'))))
     elif layers & DataLayer.POI:
-        orexpr.append(sa.and_(table.c.rank_address == 30,
+        orexpr.append(sa.and_(no_index(table.c.rank_address) == 30,
                               table.c.class_.not_in(('place', 'building'))))
 
     if layers & DataLayer.MANMADE:
@@ -121,7 +131,7 @@ def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn:
         if not layers & DataLayer.NATURAL:
             exclude.extend(('natural', 'water', 'waterway'))
         orexpr.append(sa.and_(table.c.class_.not_in(tuple(exclude)),
-                              table.c.rank_address == 0))
+                              no_index(table.c.rank_address) == 0))
     else:
         include = []
         if layers & DataLayer.RAILWAY:
@@ -129,7 +139,7 @@ def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn:
         if layers & DataLayer.NATURAL:
             include.extend(('natural', 'water', 'waterway'))
         orexpr.append(sa.and_(table.c.class_.in_(tuple(include)),
-                              table.c.rank_address == 0))
+                              no_index(table.c.rank_address) == 0))
 
     if len(orexpr) == 1:
         return orexpr[0]
@@ -148,9 +158,11 @@ async def _get_placex_housenumbers(conn: SearchConnection,
                                    place_ids: List[int],
                                    details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
     t = conn.t.placex
-    sql = _select_placex(t).where(t.c.place_id.in_(place_ids))
+    sql = _select_placex(t).add_columns(t.c.importance)\
+                           .where(t.c.place_id.in_(place_ids))
 
-    sql = _add_geometry_columns(sql, t.c.geometry, details)
+    if details.geometry_output:
+        sql = _add_geometry_columns(sql, t.c.geometry, details)
 
     for row in await conn.execute(sql):
         result = nres.create_from_placex_row(row, nres.SearchResult)
@@ -244,9 +256,20 @@ class NearSearch(AbstractSearch):
 
         base.sort(key=lambda r: (r.accuracy, r.rank_search))
         max_accuracy = base[0].accuracy + 0.5
+        if base[0].rank_address == 0:
+            min_rank = 0
+            max_rank = 0
+        elif base[0].rank_address < 26:
+            min_rank = 1
+            max_rank = min(25, base[0].rank_address + 4)
+        else:
+            min_rank = 26
+            max_rank = 30
         base = nres.SearchResults(r for r in base if r.source_table == nres.SourceTable.PLACEX
                                                      and r.accuracy <= max_accuracy
-                                                     and r.bbox and r.bbox.area < 20)
+                                                     and r.bbox and r.bbox.area < 20
+                                                     and r.rank_address >= min_rank
+                                                     and r.rank_address <= max_rank)
 
         if base:
             baseids = [b.place_id for b in base[:5] if b.place_id]
@@ -268,33 +291,43 @@ class NearSearch(AbstractSearch):
         """
         table = await conn.get_class_table(*category)
 
-        t = conn.t.placex.alias('p')
         tgeom = conn.t.placex.alias('pgeom')
 
-        sql = _select_placex(t).where(tgeom.c.place_id.in_(ids))\
-                               .where(t.c.class_ == category[0])\
-                               .where(t.c.type == category[1])
-
         if table is None:
             # No classtype table available, do a simplified lookup in placex.
-            sql = sql.join(tgeom, t.c.geometry.ST_DWithin(tgeom.c.centroid, 0.01))\
-                     .order_by(tgeom.c.centroid.ST_Distance(t.c.centroid))
+            table = conn.t.placex.alias('inner')
+            sql = sa.select(table.c.place_id,
+                            sa.func.min(tgeom.c.centroid.ST_Distance(table.c.centroid))
+                              .label('dist'))\
+                    .join(tgeom, table.c.geometry.intersects(tgeom.c.centroid.ST_Expand(0.01)))\
+                    .where(table.c.class_ == category[0])\
+                    .where(table.c.type == category[1])
         else:
             # Use classtype table. We can afford to use a larger
             # radius for the lookup.
-            sql = sql.join(table, t.c.place_id == table.c.place_id)\
-                     .join(tgeom,
-                           sa.case((sa.and_(tgeom.c.rank_address < 9,
-                                            tgeom.c.geometry.is_area()),
-                                    tgeom.c.geometry.ST_Contains(table.c.centroid)),
-                                   else_ = tgeom.c.centroid.ST_DWithin(table.c.centroid, 0.05)))\
-                     .order_by(tgeom.c.centroid.ST_Distance(table.c.centroid))
-
-        sql = sql.where(t.c.rank_address.between(MIN_RANK_PARAM, MAX_RANK_PARAM))
+            sql = sa.select(table.c.place_id,
+                            sa.func.min(tgeom.c.centroid.ST_Distance(table.c.centroid))
+                              .label('dist'))\
+                    .join(tgeom,
+                          table.c.centroid.ST_CoveredBy(
+                              sa.case((sa.and_(tgeom.c.rank_address > 9,
+                                                tgeom.c.geometry.is_area()),
+                                       tgeom.c.geometry),
+                                      else_ = tgeom.c.centroid.ST_Expand(0.05))))
+
+        inner = sql.where(tgeom.c.place_id.in_(ids))\
+                   .group_by(table.c.place_id).subquery()
+
+        t = conn.t.placex
+        sql = _select_placex(t).add_columns((-inner.c.dist).label('importance'))\
+                               .join(inner, inner.c.place_id == t.c.place_id)\
+                               .order_by(inner.c.dist)
+
+        sql = sql.where(no_index(t.c.rank_address).between(MIN_RANK_PARAM, MAX_RANK_PARAM))
         if details.countries:
             sql = sql.where(t.c.country_code.in_(COUNTRIES_PARAM))
         if details.excluded:
-            sql = sql.where(t.c.place_id.not_in(EXCLUDED_PARAM))
+            sql = sql.where(_exclude_places(t))
         if details.layers is not None:
             sql = sql.where(_filter_by_layer(t, details.layers))
 
@@ -313,7 +346,7 @@ class PoiSearch(AbstractSearch):
     """
     def __init__(self, sdata: SearchData) -> None:
         super().__init__(sdata.penalty)
-        self.categories = sdata.qualifiers
+        self.qualifiers = sdata.qualifiers
         self.countries = sdata.countries
 
 
@@ -328,10 +361,24 @@ class PoiSearch(AbstractSearch):
 
         if details.near and details.near_radius is not None and details.near_radius < 0.2:
             # simply search in placex table
-            sql = _select_placex(t) \
-                      .where(t.c.linked_place_id == None) \
-                      .where(t.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM)) \
-                      .order_by(t.c.centroid.ST_Distance(NEAR_PARAM))
+            def _base_query() -> SaSelect:
+                return _select_placex(t) \
+                           .add_columns((-t.c.centroid.ST_Distance(NEAR_PARAM))
+                                         .label('importance'))\
+                           .where(t.c.linked_place_id == None) \
+                           .where(t.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM)) \
+                           .order_by(t.c.centroid.ST_Distance(NEAR_PARAM)) \
+                           .limit(LIMIT_PARAM)
+
+            classtype = self.qualifiers.values
+            if len(classtype) == 1:
+                cclass, ctype = classtype[0]
+                sql: SaLambdaSelect = sa.lambda_stmt(lambda: _base_query()
+                                                 .where(t.c.class_ == cclass)
+                                                 .where(t.c.type == ctype))
+            else:
+                sql = _base_query().where(sa.or_(*(sa.and_(t.c.class_ == cls, t.c.type == typ)
+                                                   for cls, typ in classtype)))
 
             if self.countries:
                 sql = sql.where(t.c.country_code.in_(self.countries.values))
@@ -339,22 +386,14 @@ class PoiSearch(AbstractSearch):
             if details.viewbox is not None and details.bounded_viewbox:
                 sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM))
 
-            classtype = self.categories.values
-            if len(classtype) == 1:
-                sql = sql.where(t.c.class_ == classtype[0][0]) \
-                         .where(t.c.type == classtype[0][1])
-            else:
-                sql = sql.where(sa.or_(*(sa.and_(t.c.class_ == cls, t.c.type == typ)
-                                         for cls, typ in classtype)))
-
-            sql = sql.limit(LIMIT_PARAM)
             rows.extend(await conn.execute(sql, bind_params))
         else:
             # use the class type tables
-            for category in self.categories.values:
+            for category in self.qualifiers.values:
                 table = await conn.get_class_table(*category)
                 if table is not None:
                     sql = _select_placex(t)\
+                               .add_columns(t.c.importance)\
                                .join(table, t.c.place_id == table.c.place_id)\
                                .where(t.c.class_ == category[0])\
                                .where(t.c.type == category[1])
@@ -377,7 +416,7 @@ class PoiSearch(AbstractSearch):
         for row in rows:
             result = nres.create_from_placex_row(row, nres.SearchResult)
             assert result
-            result.accuracy = self.penalty + self.categories.get_penalty((row.class_, row.type))
+            result.accuracy = self.penalty + self.qualifiers.get_penalty((row.class_, row.type))
             result.bbox = Bbox.from_wkb(row.bbox)
             results.append(result)
 
@@ -398,26 +437,30 @@ class CountrySearch(AbstractSearch):
         """
         t = conn.t.placex
 
+        ccodes = self.countries.values
         sql = _select_placex(t)\
-                .where(t.c.country_code.in_(self.countries.values))\
+                .add_columns(t.c.importance)\
+                .where(t.c.country_code.in_(ccodes))\
                 .where(t.c.rank_address == 4)
 
-        sql = _add_geometry_columns(sql, t.c.geometry, details)
+        if details.geometry_output:
+            sql = _add_geometry_columns(sql, t.c.geometry, details)
 
         if details.excluded:
-            sql = sql.where(t.c.place_id.not_in(EXCLUDED_PARAM))
+            sql = sql.where(_exclude_places(t))
 
         if details.viewbox is not None and details.bounded_viewbox:
-            sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM))
+            sql = sql.where(lambda: t.c.geometry.intersects(VIEWBOX_PARAM))
 
         if details.near is not None and details.near_radius is not None:
-            sql = sql.where(t.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM))
+            sql = sql.where(_within_near(t))
 
         results = nres.SearchResults()
         for row in await conn.execute(sql, _details_to_bind_params(details)):
             result = nres.create_from_placex_row(row, nres.SearchResult)
             assert result
             result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0)
+            result.bbox = Bbox.from_wkb(row.bbox)
             results.append(result)
 
         return results or await self.lookup_in_country_table(conn, details)
@@ -438,14 +481,15 @@ class CountrySearch(AbstractSearch):
 
         sql = sa.select(tgrid.c.country_code,
                         tgrid.c.geometry.ST_Centroid().ST_Collect().ST_Centroid()
-                              .label('centroid'))\
+                              .label('centroid'),
+                        tgrid.c.geometry.ST_Collect().ST_Expand(0).label('bbox'))\
                 .where(tgrid.c.country_code.in_(self.countries.values))\
                 .group_by(tgrid.c.country_code)
 
         if details.viewbox is not None and details.bounded_viewbox:
             sql = sql.where(tgrid.c.geometry.intersects(VIEWBOX_PARAM))
         if details.near is not None and details.near_radius is not None:
-            sql = sql.where(tgrid.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM))
+            sql = sql.where(_within_near(tgrid))
 
         sub = sql.subquery('grid')
 
@@ -454,13 +498,17 @@ class CountrySearch(AbstractSearch):
                          + sa.func.coalesce(t.c.derived_name,
                                             sa.cast('', type_=conn.t.types.Composite))
                         ).label('name'),
-                        sub.c.centroid)\
+                        sub.c.centroid, sub.c.bbox)\
                 .join(sub, t.c.country_code == sub.c.country_code)
 
+        if details.geometry_output:
+            sql = _add_geometry_columns(sql, sub.c.centroid, details)
+
         results = nres.SearchResults()
         for row in await conn.execute(sql, _details_to_bind_params(details)):
             result = nres.create_from_country_row(row, nres.SearchResult)
             assert result
+            result.bbox = Bbox.from_wkb(row.bbox)
             result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0)
             results.append(result)
 
@@ -484,14 +532,16 @@ class PostcodeSearch(AbstractSearch):
         """ Find results for the search in the database.
         """
         t = conn.t.postcode
+        pcs = self.postcodes.values
 
         sql = sa.select(t.c.place_id, t.c.parent_place_id,
                         t.c.rank_search, t.c.rank_address,
                         t.c.postcode, t.c.country_code,
                         t.c.geometry.label('centroid'))\
-                .where(t.c.postcode.in_(self.postcodes.values))
+                .where(t.c.postcode.in_(pcs))
 
-        sql = _add_geometry_columns(sql, t.c.geometry, details)
+        if details.geometry_output:
+            sql = _add_geometry_columns(sql, t.c.geometry, details)
 
         penalty: SaExpression = sa.literal(self.penalty)
 
@@ -500,19 +550,19 @@ class PostcodeSearch(AbstractSearch):
                 sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM))
             else:
                 penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0),
-                                   (t.c.geometry.intersects(VIEWBOX2_PARAM), 1.0),
-                                   else_=2.0)
+                                   (t.c.geometry.intersects(VIEWBOX2_PARAM), 0.5),
+                                   else_=1.0)
 
         if details.near is not None:
             if details.near_radius is not None:
-                sql = sql.where(t.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM))
+                sql = sql.where(_within_near(t))
             sql = sql.order_by(t.c.geometry.ST_Distance(NEAR_PARAM))
 
         if self.countries:
             sql = sql.where(t.c.country_code.in_(self.countries.values))
 
         if details.excluded:
-            sql = sql.where(t.c.place_id.not_in(EXCLUDED_PARAM))
+            sql = sql.where(_exclude_places(t))
 
         if self.lookups:
             assert len(self.lookups) == 1
@@ -562,21 +612,15 @@ class PlaceSearch(AbstractSearch):
                      details: SearchDetails) -> nres.SearchResults:
         """ Find results for the search in the database.
         """
-        t = conn.t.placex.alias('p')
-        tsearch = conn.t.search_name.alias('s')
+        t = conn.t.placex
+        tsearch = conn.t.search_name
 
-        sql = sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
-                        t.c.class_, t.c.type,
-                        t.c.address, t.c.extratags,
-                        t.c.housenumber, t.c.postcode, t.c.country_code,
-                        t.c.wikipedia,
-                        t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
-                        t.c.centroid,
-                        t.c.geometry.ST_Expand(0).label('bbox'))\
-                .where(t.c.place_id == tsearch.c.place_id)
+        sql: SaLambdaSelect = sa.lambda_stmt(lambda:
+                  _select_placex(t).where(t.c.place_id == tsearch.c.place_id))
 
 
-        sql = _add_geometry_columns(sql, t.c.geometry, details)
+        if details.geometry_output:
+            sql = _add_geometry_columns(sql, t.c.geometry, details)
 
         penalty: SaExpression = sa.literal(self.penalty)
         for ranking in self.rankings:
@@ -592,48 +636,66 @@ class PlaceSearch(AbstractSearch):
             # if a postcode is given, don't search for state or country level objects
             sql = sql.where(tsearch.c.address_rank > 9)
             tpc = conn.t.postcode
+            pcs = self.postcodes.values
             if self.expected_count > 1000:
                 # Many results expected. Restrict by postcode.
                 sql = sql.where(sa.select(tpc.c.postcode)
-                                  .where(tpc.c.postcode.in_(self.postcodes.values))
+                                  .where(tpc.c.postcode.in_(pcs))
                                   .where(tsearch.c.centroid.ST_DWithin(tpc.c.geometry, 0.12))
                                   .exists())
 
             # Less results, only have a preference for close postcodes
             pc_near = sa.select(sa.func.min(tpc.c.geometry.ST_Distance(tsearch.c.centroid)))\
-                      .where(tpc.c.postcode.in_(self.postcodes.values))\
+                      .where(tpc.c.postcode.in_(pcs))\
                       .scalar_subquery()
-            penalty += sa.case((t.c.postcode.in_(self.postcodes.values), 0.0),
+            penalty += sa.case((t.c.postcode.in_(pcs), 0.0),
                                else_=sa.func.coalesce(pc_near, 2.0))
 
         if details.viewbox is not None:
             if details.bounded_viewbox:
-                sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX_PARAM))
+                if details.viewbox.area < 0.2:
+                    sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX_PARAM))
+                else:
+                    sql = sql.where(tsearch.c.centroid.ST_Intersects_no_index(VIEWBOX_PARAM))
+            elif self.expected_count >= 10000:
+                if details.viewbox.area < 0.5:
+                    sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX2_PARAM))
+                else:
+                    sql = sql.where(tsearch.c.centroid.ST_Intersects_no_index(VIEWBOX2_PARAM))
             else:
                 penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0),
-                                   (t.c.geometry.intersects(VIEWBOX2_PARAM), 1.0),
-                                   else_=2.0)
+                                   (t.c.geometry.intersects(VIEWBOX2_PARAM), 0.5),
+                                   else_=1.0)
 
         if details.near is not None:
             if details.near_radius is not None:
-                sql = sql.where(tsearch.c.centroid.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM))
-            sql = sql.add_columns(-tsearch.c.centroid.ST_Distance(NEAR_PARAM)
+                if details.near_radius < 0.1:
+                    sql = sql.where(tsearch.c.centroid.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM))
+                else:
+                    sql = sql.where(tsearch.c.centroid.ST_DWithin_no_index(NEAR_PARAM,
+                                                                           NEAR_RADIUS_PARAM))
+            sql = sql.add_columns((-tsearch.c.centroid.ST_Distance(NEAR_PARAM))
                                       .label('importance'))
             sql = sql.order_by(sa.desc(sa.text('importance')))
         else:
-            sql = sql.order_by(penalty - sa.case((tsearch.c.importance > 0, tsearch.c.importance),
-                                  else_=0.75001-(sa.cast(tsearch.c.search_rank, sa.Float())/40)))
+            if self.expected_count < 10000\
+               or (details.viewbox is not None and details.viewbox.area < 0.5):
+                sql = sql.order_by(
+                        penalty - sa.case((tsearch.c.importance > 0, tsearch.c.importance),
+                                    else_=0.75001-(sa.cast(tsearch.c.search_rank, sa.Float())/40)))
             sql = sql.add_columns(t.c.importance)
 
 
-        sql = sql.add_columns(penalty.label('accuracy'))\
-                 .order_by(sa.text('accuracy'))
+        sql = sql.add_columns(penalty.label('accuracy'))
+
+        if self.expected_count < 10000:
+            sql = sql.order_by(sa.text('accuracy'))
 
         if self.housenumbers:
             hnr_regexp = f"\\m({'|'.join(self.housenumbers.values)})\\M"
             sql = sql.where(tsearch.c.address_rank.between(16, 30))\
                      .where(sa.or_(tsearch.c.address_rank < 30,
-                                  t.c.housenumber.op('~*')(hnr_regexp)))
+                                   t.c.housenumber.op('~*')(hnr_regexp)))
 
             # Cross check for housenumbers, need to do that on a rather large
             # set. Worst case there are 40.000 main streets in OSM.
@@ -649,13 +711,14 @@ class PlaceSearch(AbstractSearch):
                           .where(thnr.c.indexed_status == 0)
 
             if details.excluded:
-                place_sql = place_sql.where(thnr.c.place_id.not_in(EXCLUDED_PARAM))
+                place_sql = place_sql.where(thnr.c.place_id.not_in(sa.bindparam('excluded')))
             if self.qualifiers:
                 place_sql = place_sql.where(self.qualifiers.sql_restrict(thnr))
 
-            numerals = [int(n) for n in self.housenumbers.values if n.isdigit()]
-            interpol_sql: SaExpression
-            tiger_sql: SaExpression
+            numerals = [int(n) for n in self.housenumbers.values
+                        if n.isdigit() and len(n) < 8]
+            interpol_sql: SaColumn
+            tiger_sql: SaColumn
             if numerals and \
                (not self.qualifiers or ('place', 'house') in self.qualifiers.values):
                 # Housenumbers from interpolations
@@ -667,8 +730,8 @@ class PlaceSearch(AbstractSearch):
                                                                   numerals, details)
                                     ), else_=None)
             else:
-                interpol_sql = sa.literal_column('NULL')
-                tiger_sql = sa.literal_column('NULL')
+                interpol_sql = sa.null()
+                tiger_sql = sa.null()
 
             unsort = sa.select(inner, place_sql.scalar_subquery().label('placex_hnr'),
                                interpol_sql.label('interpol_hnr'),
@@ -685,7 +748,7 @@ class PlaceSearch(AbstractSearch):
             if self.qualifiers:
                 sql = sql.where(self.qualifiers.sql_restrict(t))
             if details.excluded:
-                sql = sql.where(tsearch.c.place_id.not_in(EXCLUDED_PARAM))
+                sql = sql.where(_exclude_places(tsearch))
             if details.min_rank > 0:
                 sql = sql.where(sa.or_(tsearch.c.address_rank >= MIN_RANK_PARAM,
                                        tsearch.c.search_rank >= MIN_RANK_PARAM))
@@ -703,9 +766,6 @@ class PlaceSearch(AbstractSearch):
             assert result
             result.bbox = Bbox.from_wkb(row.bbox)
             result.accuracy = row.accuracy
-            if not details.excluded or not result.place_id in details.excluded:
-                results.append(result)
-
             if self.housenumbers and row.rank_address < 30:
                 if row.placex_hnr:
                     subs = _get_placex_housenumbers(conn, row.placex_hnr, details)
@@ -725,6 +785,14 @@ class PlaceSearch(AbstractSearch):
                             sub.accuracy += 0.6
                         results.append(sub)
 
-                result.accuracy += 1.0 # penalty for missing housenumber
+                # Only add the street as a result, if it meets all other
+                # filter conditions.
+                if (not details.excluded or result.place_id not in details.excluded)\
+                   and (not self.qualifiers or result.category in self.qualifiers.values)\
+                   and result.rank_address >= details.min_rank:
+                    result.accuracy += 1.0 # penalty for missing housenumber
+                    results.append(result)
+            else:
+                results.append(result)
 
         return results
index 0ef649d99ab8857924ef7d86bc3aa96865608666..bb3c6a1c86d1a8ae2271dd8426adb2518108a55e 100644 (file)
@@ -7,12 +7,15 @@
 """
 Public interface to the search code.
 """
-from typing import List, Any, Optional, Iterator, Tuple
+from typing import List, Any, Optional, Iterator, Tuple, Dict
 import itertools
+import re
+import datetime as dt
+import difflib
 
 from nominatim.api.connection import SearchConnection
 from nominatim.api.types import SearchDetails
-from nominatim.api.results import SearchResults, add_result_details
+from nominatim.api.results import SearchResult, SearchResults, add_result_details
 from nominatim.api.search.token_assignment import yield_token_assignments
 from nominatim.api.search.db_search_builder import SearchBuilder, build_poi_search, wrap_near_search
 from nominatim.api.search.db_searches import AbstractSearch
@@ -24,9 +27,11 @@ class ForwardGeocoder:
     """ Main class responsible for place search.
     """
 
-    def __init__(self, conn: SearchConnection, params: SearchDetails) -> None:
+    def __init__(self, conn: SearchConnection,
+                 params: SearchDetails, timeout: Optional[int]) -> None:
         self.conn = conn
         self.params = params
+        self.timeout = dt.timedelta(seconds=timeout or 1000000)
         self.query_analyzer: Optional[AbstractQueryAnalyzer] = None
 
 
@@ -55,8 +60,9 @@ class ForwardGeocoder:
             num_searches = 0
             for assignment in yield_token_assignments(query):
                 searches.extend(search_builder.build(assignment))
-                log().table_dump('Searches for assignment',
-                                 _dump_searches(searches, query, num_searches))
+                if num_searches < len(searches):
+                    log().table_dump('Searches for assignment',
+                                     _dump_searches(searches, query, num_searches))
                 num_searches = len(searches)
             searches.sort(key=lambda s: s.penalty)
 
@@ -69,39 +75,89 @@ class ForwardGeocoder:
             is found.
         """
         log().section('Execute database searches')
-        results = SearchResults()
+        results: Dict[Any, SearchResult] = {}
+
+        end_time = dt.datetime.now() + self.timeout
 
-        num_results = 0
-        min_ranking = 1000.0
+        min_ranking = searches[0].penalty + 2.0
         prev_penalty = 0.0
         for i, search in enumerate(searches):
             if search.penalty > prev_penalty and (search.penalty > min_ranking or i > 20):
                 break
             log().table_dump(f"{i + 1}. Search", _dump_searches([search], query))
-            for result in await search.lookup(self.conn, self.params):
-                results.append(result)
-                min_ranking = min(min_ranking, result.ranking + 0.5, search.penalty + 0.3)
-            log().result_dump('Results', ((r.accuracy, r) for r in results[num_results:]))
-            num_results = len(results)
+            lookup_results = await search.lookup(self.conn, self.params)
+            for result in lookup_results:
+                rhash = (result.source_table, result.place_id,
+                         result.housenumber, result.country_code)
+                prevresult = results.get(rhash)
+                if prevresult:
+                    prevresult.accuracy = min(prevresult.accuracy, result.accuracy)
+                else:
+                    results[rhash] = result
+                min_ranking = min(min_ranking, result.accuracy * 1.2)
+            log().result_dump('Results', ((r.accuracy, r) for r in lookup_results))
             prev_penalty = search.penalty
+            if dt.datetime.now() >= end_time:
+                break
 
+        return SearchResults(results.values())
+
+
+    def sort_and_cut_results(self, results: SearchResults) -> SearchResults:
+        """ Remove badly matching results, sort by ranking and
+            limit to the configured number of results.
+        """
         if results:
             min_ranking = min(r.ranking for r in results)
             results = SearchResults(r for r in results if r.ranking < min_ranking + 0.5)
+            results.sort(key=lambda r: r.ranking)
 
         if results:
-            min_rank = min(r.rank_search for r in results)
-
+            min_rank = results[0].rank_search
             results = SearchResults(r for r in results
                                     if r.ranking + 0.05 * (r.rank_search - min_rank)
                                        < min_ranking + 0.5)
 
-            results.sort(key=lambda r: r.accuracy - r.calculated_importance())
             results = SearchResults(results[:self.limit])
 
         return results
 
 
+    def rerank_by_query(self, query: QueryStruct, results: SearchResults) -> None:
+        """ Adjust the accuracy of the localized result according to how well
+            they match the original query.
+        """
+        assert self.query_analyzer is not None
+        qwords = [word for phrase in query.source
+                       for word in re.split('[, ]+', phrase.text) if word]
+        if not qwords:
+            return
+
+        for result in results:
+            # Negative importance indicates ordering by distance, which is
+            # more important than word matching.
+            if not result.display_name\
+               or (result.importance is not None and result.importance < 0):
+                continue
+            distance = 0.0
+            norm = self.query_analyzer.normalize_text(result.display_name)
+            words = set((w for w in norm.split(' ') if w))
+            if not words:
+                continue
+            for qword in qwords:
+                wdist = max(difflib.SequenceMatcher(a=qword, b=w).quick_ratio() for w in words)
+                if wdist < 0.5:
+                    distance += len(qword)
+                else:
+                    distance += (1.0 - wdist) * len(qword)
+            # Compensate for the fact that country names do not get a
+            # match penalty yet by the tokenizer.
+            # Temporary hack that needs to be removed!
+            if result.rank_address == 4:
+                distance *= 2
+            result.accuracy += distance * 0.4 / sum(len(w) for w in qwords)
+
+
     async def lookup_pois(self, categories: List[Tuple[str, str]],
                           phrases: List[Phrase]) -> SearchResults:
         """ Look up places by category. If phrase is given, a place search
@@ -116,13 +172,16 @@ class ForwardGeocoder:
             if query:
                 searches = [wrap_near_search(categories, s) for s in searches[:50]]
                 results = await self.execute_searches(query, searches)
+                await add_result_details(self.conn, results, self.params)
+                log().result_dump('Preliminary Results', ((r.accuracy, r) for r in results))
+                results = self.sort_and_cut_results(results)
             else:
                 results = SearchResults()
         else:
             search = build_poi_search(categories, self.params.countries)
             results = await search.lookup(self.conn, self.params)
+            await add_result_details(self.conn, results, self.params)
 
-        await add_result_details(self.conn, results, self.params)
         log().result_dump('Final Results', ((r.accuracy, r) for r in results))
 
         return results
@@ -143,6 +202,10 @@ class ForwardGeocoder:
             # Execute SQL until an appropriate result is found.
             results = await self.execute_searches(query, searches[:50])
             await add_result_details(self.conn, results, self.params)
+            log().result_dump('Preliminary Results', ((r.accuracy, r) for r in results))
+            self.rerank_by_query(query, results)
+            log().result_dump('Results after reranking', ((r.accuracy, r) for r in results))
+            results = self.sort_and_cut_results(results)
             log().result_dump('Final Results', ((r.accuracy, r) for r in results))
 
         return results
@@ -151,7 +214,8 @@ class ForwardGeocoder:
 # pylint: disable=invalid-name,too-many-locals
 def _dump_searches(searches: List[AbstractSearch], query: QueryStruct,
                    start: int = 0) -> Iterator[Optional[List[Any]]]:
-    yield ['Penalty', 'Lookups', 'Housenr', 'Postcode', 'Countries', 'Qualifier', 'Rankings']
+    yield ['Penalty', 'Lookups', 'Housenr', 'Postcode', 'Countries',
+           'Qualifier', 'Catgeory', 'Rankings']
 
     def tk(tl: List[int]) -> str:
         tstr = [f"{query.find_lookup_word_by_id(t)}({t})" for t in tl]
@@ -181,11 +245,18 @@ def _dump_searches(searches: List[AbstractSearch], query: QueryStruct,
 
     for search in searches[start:]:
         fields = ('lookups', 'rankings', 'countries', 'housenumbers',
-                  'postcodes', 'qualifier')
-        iters = itertools.zip_longest([f"{search.penalty:.3g}"],
-                                      *(getattr(search, attr, []) for attr in fields),
-                                      fillvalue= '')
-        for penalty, lookup, rank, cc, hnr, pc, qual in iters:
+                  'postcodes', 'qualifiers')
+        if hasattr(search, 'search'):
+            iters = itertools.zip_longest([f"{search.penalty:.3g}"],
+                                          *(getattr(search.search, attr, []) for attr in fields),
+                                          getattr(search, 'categories', []),
+                                          fillvalue='')
+        else:
+            iters = itertools.zip_longest([f"{search.penalty:.3g}"],
+                                          *(getattr(search, attr, []) for attr in fields),
+                                          [],
+                                          fillvalue='')
+        for penalty, lookup, rank, cc, hnr, pc, qual, cat in iters:
             yield [penalty, fmt_lookup(lookup), fmt_cstr(hnr),
-                   fmt_cstr(pc), fmt_cstr(cc), fmt_cstr(qual), fmt_ranking(rank)]
+                   fmt_cstr(pc), fmt_cstr(cc), fmt_cstr(qual), fmt_cstr(cat), fmt_ranking(rank)]
         yield None
index f259995db112bbbe537aaa3855f2d4d78e36f5e2..fceec2df522feb5105936204b099e9a8a7a2ad96 100644 (file)
@@ -83,7 +83,7 @@ class ICUToken(qmod.Token):
         seq = difflib.SequenceMatcher(a=self.lookup_word, b=norm)
         distance = 0
         for tag, afrom, ato, bfrom, bto in seq.get_opcodes():
-            if tag == 'delete' and (afrom == 0 or ato == len(self.lookup_word)):
+            if tag in ('delete', 'insert') and (afrom == 0 or ato == len(self.lookup_word)):
                 distance += 1
             elif tag == 'replace':
                 distance += max((ato-afrom), (bto-bfrom))
@@ -101,10 +101,16 @@ class ICUToken(qmod.Token):
         penalty = 0.0
         if row.type == 'w':
             penalty = 0.3
+        elif row.type == 'W':
+            if len(row.word_token) == 1 and row.word_token == row.word:
+                penalty = 0.2 if row.word.isdigit() else 0.3
         elif row.type == 'H':
             penalty = sum(0.1 for c in row.word_token if c != ' ' and not c.isdigit())
             if all(not c.isdigit() for c in row.word_token):
                 penalty += 0.2 * (len(row.word_token) - 1)
+        elif row.type == 'C':
+            if len(row.word_token) == 1:
+                penalty = 0.3
 
         if row.info is None:
             lookup_word = row.word
@@ -133,10 +139,19 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
     async def setup(self) -> None:
         """ Set up static data structures needed for the analysis.
         """
-        rules = await self.conn.get_property('tokenizer_import_normalisation')
-        self.normalizer = Transliterator.createFromRules("normalization", rules)
-        rules = await self.conn.get_property('tokenizer_import_transliteration')
-        self.transliterator = Transliterator.createFromRules("transliteration", rules)
+        async def _make_normalizer() -> Any:
+            rules = await self.conn.get_property('tokenizer_import_normalisation')
+            return Transliterator.createFromRules("normalization", rules)
+
+        self.normalizer = await self.conn.get_cached_value('ICUTOK', 'normalizer',
+                                                           _make_normalizer)
+
+        async def _make_transliterator() -> Any:
+            rules = await self.conn.get_property('tokenizer_import_transliteration')
+            return Transliterator.createFromRules("transliteration", rules)
+
+        self.transliterator = await self.conn.get_cached_value('ICUTOK', 'transliterator',
+                                                               _make_transliterator)
 
         if 'word' not in self.conn.t.meta.tables:
             sa.Table('word', self.conn.t.meta,
@@ -169,13 +184,13 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
                 if row.type == 'S':
                     if row.info['op'] in ('in', 'near'):
                         if trange.start == 0:
-                            query.add_token(trange, qmod.TokenType.CATEGORY, token)
+                            query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
                     else:
                         query.add_token(trange, qmod.TokenType.QUALIFIER, token)
                         if trange.start == 0 or trange.end == query.num_token_slots():
                             token = copy(token)
                             token.penalty += 0.1 * (query.num_token_slots())
-                            query.add_token(trange, qmod.TokenType.CATEGORY, token)
+                            query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
                 else:
                     query.add_token(trange, DB_TO_TOKEN_TYPE[row.type], token)
 
index 3346584ccd1b35b4e74e4725ee079cb54e45a905..e7984ee41832909fe608edd69dfc2dc6ec635a50 100644 (file)
@@ -107,15 +107,15 @@ class LegacyQueryAnalyzer(AbstractQueryAnalyzer):
         for row in await self.lookup_in_db(lookup_words):
             for trange in words[row.word_token.strip()]:
                 token, ttype = self.make_token(row)
-                if ttype == qmod.TokenType.CATEGORY:
+                if ttype == qmod.TokenType.NEAR_ITEM:
                     if trange.start == 0:
-                        query.add_token(trange, qmod.TokenType.CATEGORY, token)
+                        query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
                 elif ttype == qmod.TokenType.QUALIFIER:
                     query.add_token(trange, qmod.TokenType.QUALIFIER, token)
                     if trange.start == 0 or trange.end == query.num_token_slots():
                         token = copy(token)
                         token.penalty += 0.1 * (query.num_token_slots())
-                        query.add_token(trange, qmod.TokenType.CATEGORY, token)
+                        query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
                 elif ttype != qmod.TokenType.PARTIAL or trange.start + 1 == trange.end:
                     query.add_token(trange, ttype, token)
 
@@ -127,6 +127,15 @@ class LegacyQueryAnalyzer(AbstractQueryAnalyzer):
         return query
 
 
+    def normalize_text(self, text: str) -> str:
+        """ Bring the given text into a normalized form.
+
+            This only removes case, so some difference with the normalization
+            in the phrase remains.
+        """
+        return text.lower()
+
+
     def split_query(self, query: qmod.QueryStruct) -> Tuple[List[str],
                                                             Dict[str, List[qmod.TokenRange]]]:
         """ Transliterate the phrases and split them into tokens.
@@ -186,7 +195,7 @@ class LegacyQueryAnalyzer(AbstractQueryAnalyzer):
                 ttype = qmod.TokenType.POSTCODE
                 lookup_word = row.word_token[1:]
             else:
-                ttype = qmod.TokenType.CATEGORY if row.operator in ('in', 'near')\
+                ttype = qmod.TokenType.NEAR_ITEM if row.operator in ('in', 'near')\
                         else qmod.TokenType.QUALIFIER
                 lookup_word = row.word
         elif row.word_token.startswith(' '):
index f2b18f873a8121fbdac79ea3c67b682826316e6b..ad1b69ef521dfd303ae1d5b95aa8629859feb10b 100644 (file)
@@ -7,7 +7,7 @@
 """
 Datastructures for a tokenized query.
 """
-from typing import List, Tuple, Optional, NamedTuple, Iterator
+from typing import List, Tuple, Optional, Iterator
 from abc import ABC, abstractmethod
 import dataclasses
 import enum
@@ -46,7 +46,7 @@ class TokenType(enum.Enum):
     """ Country name or reference. """
     QUALIFIER = enum.auto()
     """ Special term used together with name (e.g. _Hotel_ Bellevue). """
-    CATEGORY = enum.auto()
+    NEAR_ITEM = enum.auto()
     """ Special term used as searchable object(e.g. supermarket in ...). """
 
 
@@ -70,14 +70,16 @@ class PhraseType(enum.Enum):
     COUNTRY = enum.auto()
     """ Contains the country name or code. """
 
-    def compatible_with(self, ttype: TokenType) -> bool:
+    def compatible_with(self, ttype: TokenType,
+                        is_full_phrase: bool) -> bool:
         """ Check if the given token type can be used with the phrase type.
         """
         if self == PhraseType.NONE:
-            return True
+            return not is_full_phrase or ttype != TokenType.QUALIFIER
         if self == PhraseType.AMENITY:
-            return ttype in (TokenType.WORD, TokenType.PARTIAL,
-                             TokenType.QUALIFIER, TokenType.CATEGORY)
+            return ttype in (TokenType.WORD, TokenType.PARTIAL)\
+                   or (is_full_phrase and ttype == TokenType.NEAR_ITEM)\
+                   or (not is_full_phrase and ttype == TokenType.QUALIFIER)
         if self == PhraseType.STREET:
             return ttype in (TokenType.WORD, TokenType.PARTIAL, TokenType.HOUSENUMBER)
         if self == PhraseType.POSTCODE:
@@ -107,13 +109,29 @@ class Token(ABC):
             category objects.
         """
 
-
-class TokenRange(NamedTuple):
+@dataclasses.dataclass
+class TokenRange:
     """ Indexes of query nodes over which a token spans.
     """
     start: int
     end: int
 
+    def __lt__(self, other: 'TokenRange') -> bool:
+        return self.end <= other.start
+
+
+    def __le__(self, other: 'TokenRange') -> bool:
+        return NotImplemented
+
+
+    def __gt__(self, other: 'TokenRange') -> bool:
+        return self.start >= other.end
+
+
+    def __ge__(self, other: 'TokenRange') -> bool:
+        return NotImplemented
+
+
     def replace_start(self, new_start: int) -> 'TokenRange':
         """ Return a new token range with the new start.
         """
@@ -228,7 +246,9 @@ class QueryStruct:
             be added to, then the token is silently dropped.
         """
         snode = self.nodes[trange.start]
-        if snode.ptype.compatible_with(ttype):
+        full_phrase = snode.btype in (BreakType.START, BreakType.PHRASE)\
+                      and self.nodes[trange.end].btype in (BreakType.PHRASE, BreakType.END)
+        if snode.ptype.compatible_with(ttype, full_phrase):
             tlist = snode.get_tokens(trange.end, ttype)
             if tlist is None:
                 snode.starting.append(TokenList(trange.end, ttype, [token]))
index 35649d0ffe4cb544daf5a07a0df17ebbfe159d81..bbc1eb6b1d787c483fc8086912279afda0a53b1a 100644 (file)
@@ -30,6 +30,15 @@ class AbstractQueryAnalyzer(ABC):
         """
 
 
+    @abstractmethod
+    def normalize_text(self, text: str) -> str:
+        """ Bring the given text into a normalized form. That is the
+            standardized form search will work with. All information removed
+            at this stage is inevitably lost.
+        """
+
+
+
 async def make_query_analyzer(conn: SearchConnection) -> AbstractQueryAnalyzer:
     """ Create a query analyzer for the tokenizer used by the database.
     """
index 11da23594880f9f4353630e69e6e26dbee6f0f32..d94d69039f0f602ab8c0aeda9e5c971a881353fd 100644 (file)
@@ -46,7 +46,7 @@ class TokenAssignment: # pylint: disable=too-many-instance-attributes
     housenumber: Optional[qmod.TokenRange] = None
     postcode: Optional[qmod.TokenRange] = None
     country: Optional[qmod.TokenRange] = None
-    category: Optional[qmod.TokenRange] = None
+    near_item: Optional[qmod.TokenRange] = None
     qualifier: Optional[qmod.TokenRange] = None
 
 
@@ -64,8 +64,8 @@ class TokenAssignment: # pylint: disable=too-many-instance-attributes
                 out.postcode = token.trange
             elif token.ttype == qmod.TokenType.COUNTRY:
                 out.country = token.trange
-            elif token.ttype == qmod.TokenType.CATEGORY:
-                out.category = token.trange
+            elif token.ttype == qmod.TokenType.NEAR_ITEM:
+                out.near_item = token.trange
             elif token.ttype == qmod.TokenType.QUALIFIER:
                 out.qualifier = token.trange
         return out
@@ -109,7 +109,7 @@ class _TokenSequence:
         """
         # Country and category must be the final term for left-to-right
         return len(self.seq) > 1 and \
-               self.seq[-1].ttype in (qmod.TokenType.COUNTRY, qmod.TokenType.CATEGORY)
+               self.seq[-1].ttype in (qmod.TokenType.COUNTRY, qmod.TokenType.NEAR_ITEM)
 
 
     def appendable(self, ttype: qmod.TokenType) -> Optional[int]:
@@ -165,22 +165,22 @@ class _TokenSequence:
         if ttype == qmod.TokenType.COUNTRY:
             return None if self.direction == -1 else 1
 
-        if ttype == qmod.TokenType.CATEGORY:
+        if ttype == qmod.TokenType.NEAR_ITEM:
             return self.direction
 
         if ttype == qmod.TokenType.QUALIFIER:
             if self.direction == 1:
                 if (len(self.seq) == 1
-                    and self.seq[0].ttype in (qmod.TokenType.PARTIAL, qmod.TokenType.CATEGORY)) \
+                    and self.seq[0].ttype in (qmod.TokenType.PARTIAL, qmod.TokenType.NEAR_ITEM)) \
                    or (len(self.seq) == 2
-                       and self.seq[0].ttype == qmod.TokenType.CATEGORY
+                       and self.seq[0].ttype == qmod.TokenType.NEAR_ITEM
                        and self.seq[1].ttype == qmod.TokenType.PARTIAL):
                     return 1
                 return None
             if self.direction == -1:
                 return -1
 
-            tempseq = self.seq[1:] if self.seq[0].ttype == qmod.TokenType.CATEGORY else self.seq
+            tempseq = self.seq[1:] if self.seq[0].ttype == qmod.TokenType.NEAR_ITEM else self.seq
             if len(tempseq) == 0:
                 return 1
             if len(tempseq) == 1 and self.seq[0].ttype == qmod.TokenType.HOUSENUMBER:
@@ -253,10 +253,103 @@ class _TokenSequence:
                 priors = sum(1 for t in self.seq[hnrpos+1:] if t.ttype == qmod.TokenType.PARTIAL)
                 if not self._adapt_penalty_from_priors(priors, 1):
                     return False
+            if any(t.ttype == qmod.TokenType.NEAR_ITEM for t in self.seq):
+                self.penalty += 1.0
 
         return True
 
 
+    def _get_assignments_postcode(self, base: TokenAssignment,
+                                  query_len: int)  -> Iterator[TokenAssignment]:
+        """ Yield possible assignments of Postcode searches with an
+            address component.
+        """
+        assert base.postcode is not None
+
+        if (base.postcode.start == 0 and self.direction != -1)\
+           or (base.postcode.end == query_len and self.direction != 1):
+            log().comment('postcode search')
+            # <address>,<postcode> should give preference to address search
+            if base.postcode.start == 0:
+                penalty = self.penalty
+                self.direction = -1 # name searches are only possbile backwards
+            else:
+                penalty = self.penalty + 0.1
+                self.direction = 1 # name searches are only possbile forwards
+            yield dataclasses.replace(base, penalty=penalty)
+
+
+    def _get_assignments_address_forward(self, base: TokenAssignment,
+                                         query: qmod.QueryStruct) -> Iterator[TokenAssignment]:
+        """ Yield possible assignments of address searches with
+            left-to-right reading.
+        """
+        first = base.address[0]
+
+        log().comment('first word = name')
+        yield dataclasses.replace(base, penalty=self.penalty,
+                                  name=first, address=base.address[1:])
+
+        # To paraphrase:
+        #  * if another name term comes after the first one and before the
+        #    housenumber
+        #  * a qualifier comes after the name
+        #  * the containing phrase is strictly typed
+        if (base.housenumber and first.end < base.housenumber.start)\
+           or (base.qualifier and base.qualifier > first)\
+           or (query.nodes[first.start].ptype != qmod.PhraseType.NONE):
+            return
+
+        penalty = self.penalty
+
+        # Penalty for:
+        #  * <name>, <street>, <housenumber> , ...
+        #  * queries that are comma-separated
+        if (base.housenumber and base.housenumber > first) or len(query.source) > 1:
+            penalty += 0.25
+
+        for i in range(first.start + 1, first.end):
+            name, addr = first.split(i)
+            log().comment(f'split first word = name ({i - first.start})')
+            yield dataclasses.replace(base, name=name, address=[addr] + base.address[1:],
+                                      penalty=penalty + PENALTY_TOKENCHANGE[query.nodes[i].btype])
+
+
+    def _get_assignments_address_backward(self, base: TokenAssignment,
+                                          query: qmod.QueryStruct) -> Iterator[TokenAssignment]:
+        """ Yield possible assignments of address searches with
+            right-to-left reading.
+        """
+        last = base.address[-1]
+
+        if self.direction == -1 or len(base.address) > 1:
+            log().comment('last word = name')
+            yield dataclasses.replace(base, penalty=self.penalty,
+                                      name=last, address=base.address[:-1])
+
+        # To paraphrase:
+        #  * if another name term comes before the last one and after the
+        #    housenumber
+        #  * a qualifier comes before the name
+        #  * the containing phrase is strictly typed
+        if (base.housenumber and last.start > base.housenumber.end)\
+           or (base.qualifier and base.qualifier < last)\
+           or (query.nodes[last.start].ptype != qmod.PhraseType.NONE):
+            return
+
+        penalty = self.penalty
+        if base.housenumber and base.housenumber < last:
+            penalty += 0.4
+        if len(query.source) > 1:
+            penalty += 0.25
+
+        for i in range(last.start + 1, last.end):
+            addr, name = last.split(i)
+            log().comment(f'split last word = name ({i - last.start})')
+            yield dataclasses.replace(base, name=name, address=base.address[:-1] + [addr],
+                                      penalty=penalty + PENALTY_TOKENCHANGE[query.nodes[i].btype])
+
+
     def get_assignments(self, query: qmod.QueryStruct) -> Iterator[TokenAssignment]:
         """ Yield possible assignments for the current sequence.
 
@@ -265,70 +358,36 @@ class _TokenSequence:
         """
         base = TokenAssignment.from_ranges(self.seq)
 
+        num_addr_tokens = sum(t.end - t.start for t in base.address)
+        if num_addr_tokens > 50:
+            return
+
         # Postcode search (postcode-only search is covered in next case)
         if base.postcode is not None and base.address:
-            if (base.postcode.start == 0 and self.direction != -1)\
-               or (base.postcode.end == query.num_token_slots() and self.direction != 1):
-                log().comment('postcode search')
-                # <address>,<postcode> should give preference to address search
-                if base.postcode.start == 0:
-                    penalty = self.penalty
-                else:
-                    penalty = self.penalty + 0.1
-                yield dataclasses.replace(base, penalty=penalty)
+            yield from self._get_assignments_postcode(base, query.num_token_slots())
 
         # Postcode or country-only search
         if not base.address:
-            if not base.housenumber and (base.postcode or base.country or base.category):
+            if not base.housenumber and (base.postcode or base.country or base.near_item):
                 log().comment('postcode/country search')
                 yield dataclasses.replace(base, penalty=self.penalty)
         else:
             # <postcode>,<address> should give preference to postcode search
             if base.postcode and base.postcode.start == 0:
                 self.penalty += 0.1
-            # Use entire first word as name
+
+            # Right-to-left reading of the address
             if self.direction != -1:
-                log().comment('first word = name')
-                yield dataclasses.replace(base, name=base.address[0],
-                                          penalty=self.penalty,
-                                          address=base.address[1:])
-
-            # Use entire last word as name
-            if self.direction == -1 or (self.direction == 0 and len(base.address) > 1):
-                log().comment('last word = name')
-                yield dataclasses.replace(base, name=base.address[-1],
-                                          penalty=self.penalty,
-                                          address=base.address[:-1])
+                yield from self._get_assignments_address_forward(base, query)
+
+            # Left-to-right reading of the address
+            if self.direction != 1:
+                yield from self._get_assignments_address_backward(base, query)
 
             # variant for special housenumber searches
             if base.housenumber:
                 yield dataclasses.replace(base, penalty=self.penalty)
 
-            # Use beginning of first word as name
-            if self.direction != -1:
-                first = base.address[0]
-                if (not base.housenumber or first.end >= base.housenumber.start)\
-                   and (not base.qualifier or first.start >= base.qualifier.end):
-                    for i in range(first.start + 1, first.end):
-                        name, addr = first.split(i)
-                        penalty = self.penalty + PENALTY_TOKENCHANGE[query.nodes[i].btype]
-                        log().comment(f'split first word = name ({i - first.start})')
-                        yield dataclasses.replace(base, name=name, penalty=penalty,
-                                                  address=[addr] + base.address[1:])
-
-            # Use end of last word as name
-            if self.direction != 1:
-                last = base.address[-1]
-                if (not base.housenumber or last.start <= base.housenumber.end)\
-                   and (not base.qualifier or last.end <= base.qualifier.start):
-                    for i in range(last.start + 1, last.end):
-                        addr, name = last.split(i)
-                        penalty = self.penalty + PENALTY_TOKENCHANGE[query.nodes[i].btype]
-                        log().comment(f'split last word = name ({i - last.start})')
-                        yield dataclasses.replace(base, name=name, penalty=penalty,
-                                                  address=base.address[:-1] + [addr])
-
-
 
 def yield_token_assignments(query: qmod.QueryStruct) -> Iterator[TokenAssignment]:
     """ Return possible word type assignments to word positions.
index 61e36cc36488c097f58dd77d604b3975879500b4..adccf7a55409ef240f040c83a87c2f112f73af1a 100644 (file)
@@ -36,6 +36,9 @@ async def get_status(conn: SearchConnection) -> StatusResult:
     sql = sa.select(conn.t.import_status.c.lastimportdate).limit(1)
     status.data_updated = await conn.scalar(sql)
 
+    if status.data_updated is not None:
+        status.data_updated = status.data_updated.replace(tzinfo=dt.timezone.utc)
+
     # Database version
     try:
         verstr = await conn.get_property('database_version')
index 43e83c1f689dfadda882a85aa79fe496cf0cfecf..5767fe1604a7d31e5b1adffdf3699adf8420a439 100644 (file)
@@ -17,23 +17,46 @@ from struct import unpack
 from binascii import unhexlify
 
 from nominatim.errors import UsageError
+from nominatim.api.localization import Locales
 
 # pylint: disable=no-member,too-many-boolean-expressions,too-many-instance-attributes
 
 @dataclasses.dataclass
 class PlaceID:
-    """ Reference an object by Nominatim's internal ID.
+    """ Reference a place by Nominatim's internal ID.
+
+        A PlaceID may reference place from the main table placex, from
+        the interpolation tables or the postcode tables. Place IDs are not
+        stable between installations. You may use this type theefore only
+        with place IDs obtained from the same database.
     """
     place_id: int
+    """
+    The internal ID of the place to reference.
+    """
 
 
 @dataclasses.dataclass
 class OsmID:
-    """ Reference by the OSM ID and potentially the basic category.
+    """ Reference a place by its OSM ID and potentially the basic category.
+
+        The OSM ID may refer to places in the main table placex and OSM
+        interpolation lines.
     """
     osm_type: str
+    """ OSM type of the object. Must be one of `N`(node), `W`(way) or
+        `R`(relation).
+    """
     osm_id: int
+    """ The OSM ID of the object.
+    """
     osm_class: Optional[str] = None
+    """ The same OSM object may appear multiple times in the database under
+        different categories. The optional class parameter allows to distinguish
+        the different categories and corresponds to the key part of the category.
+        If there are multiple objects in the database and `osm_class` is
+        left out, then one of the objects is returned at random.
+    """
 
     def __post_init__(self) -> None:
         if self.osm_type not in ('N', 'W', 'R'):
@@ -135,12 +158,15 @@ WKB_BBOX_HEADER_LE = b'\x01\x03\x00\x00\x20\xE6\x10\x00\x00\x01\x00\x00\x00\x05\
 WKB_BBOX_HEADER_BE = b'\x00\x20\x00\x00\x03\x00\x00\x10\xe6\x00\x00\x00\x01\x00\x00\x00\x05'
 
 class Bbox:
-    """ A bounding box in WSG84 projection.
+    """ A bounding box in WGS84 projection.
 
         The coordinates are available as an array in the 'coord'
         property in the order (minx, miny, maxx, maxy).
     """
     def __init__(self, minx: float, miny: float, maxx: float, maxy: float) -> None:
+        """ Create a new bounding box with the given coordinates in WGS84
+            projection.
+        """
         self.coords = (minx, miny, maxx, maxy)
 
 
@@ -197,7 +223,7 @@ class Bbox:
     @staticmethod
     def from_wkb(wkb: Union[None, str, bytes]) -> 'Optional[Bbox]':
         """ Create a Bbox from a bounding box polygon as returned by
-            the database. Return s None if the input value is None.
+            the database. Returns `None` if the input value is None.
         """
         if wkb is None:
             return None
@@ -247,9 +273,10 @@ class Bbox:
         except ValueError as exc:
             raise UsageError('Bounding box parameter needs to be numbers.') from exc
 
-        if x1 < -180.0 or x1 > 180.0 or y1 < -90.0 or y1 > 90.0 \
-           or x2 < -180.0 or x2 > 180.0 or y2 < -90.0 or y2 > 90.0:
-            raise UsageError('Bounding box coordinates invalid.')
+        x1 = min(180, max(-180, x1))
+        x2 = min(180, max(-180, x2))
+        y1 = min(90, max(-90, y1))
+        y2 = min(90, max(-90, y2))
 
         if x1 == x2 or y1 == y2:
             raise UsageError('Bounding box with invalid parameters.')
@@ -258,23 +285,60 @@ class Bbox:
 
 
 class GeometryFormat(enum.Flag):
-    """ Geometry output formats supported by Nominatim.
+    """ All search functions support returning the full geometry of a place in
+        various formats. The internal geometry is converted by PostGIS to
+        the desired format and then returned as a string. It is possible to
+        request multiple formats at the same time.
     """
     NONE = 0
+    """ No geometry requested. Alias for a empty flag.
+    """
     GEOJSON = enum.auto()
+    """
+    [GeoJSON](https://geojson.org/) format
+    """
     KML = enum.auto()
+    """
+    [KML](https://en.wikipedia.org/wiki/Keyhole_Markup_Language) format
+    """
     SVG = enum.auto()
+    """
+    [SVG](http://www.w3.org/TR/SVG/paths.html) format
+    """
     TEXT = enum.auto()
+    """
+    [WKT](https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry) format
+    """
 
 
 class DataLayer(enum.Flag):
-    """ Layer types that can be selected for reverse and forward search.
+    """ The `DataLayer` flag type defines the layers that can be selected
+        for reverse and forward search.
     """
-    POI = enum.auto()
     ADDRESS = enum.auto()
+    """ The address layer contains all places relavant for addresses:
+        fully qualified addresses with a house number (or a house name equivalent,
+        for some addresses) and places that can be part of an address like
+        roads, cities, states.
+    """
+    POI = enum.auto()
+    """ Layer for points of interest like shops, restaurants but also
+        recycling bins or postboxes.
+    """
     RAILWAY = enum.auto()
-    MANMADE = enum.auto()
+    """ Layer with railway features including tracks and other infrastructure.
+        Note that in Nominatim's standard configuration, only very few railway
+        features are imported into the database. Thus a custom configuration
+        is required to make full use of this layer.
+    """
     NATURAL = enum.auto()
+    """ Layer with natural features like rivers, lakes and mountains.
+    """
+    MANMADE = enum.auto()
+    """ Layer with other human-made features and boundaries. This layer is
+        the catch-all and includes all features not covered by the other
+        layers. A typical example for this layer are national park boundaries.
+    """
 
 
 def format_country(cc: Any) -> List[str]:
@@ -323,7 +387,7 @@ TParam = TypeVar('TParam', bound='LookupDetails') # pylint: disable=invalid-name
 
 @dataclasses.dataclass
 class LookupDetails:
-    """ Collection of parameters that define the amount of details
+    """ Collection of parameters that define which kind of details are
         returned with a lookup or details result.
     """
     geometry_output: GeometryFormat = GeometryFormat.NONE
@@ -350,6 +414,9 @@ class LookupDetails:
         0.0 means the original geometry is kept. The higher the value, the
         more the geometry gets simplified.
     """
+    locales: Locales = Locales()
+    """ Prefered languages for localization of results.
+    """
 
     @classmethod
     def from_kwargs(cls: Type[TParam], kwargs: Dict[str, Any]) -> TParam:
@@ -418,7 +485,7 @@ class SearchDetails(LookupDetails):
     """
     excluded: List[int] = dataclasses.field(default_factory=list,
                                             metadata={'transform': format_excluded})
-    """ List of OSM objects to exclude from the results. Currenlty only
+    """ List of OSM objects to exclude from the results. Currently only
         works when the internal place ID is given.
         An empty list (the default) will disable this filter.
     """
@@ -471,7 +538,9 @@ class SearchDetails(LookupDetails):
                 or (self.bounded_viewbox
                     and self.viewbox is not None and self.near is not None
                     and self.viewbox.contains(self.near))
-                or self.layers is not None and not self.layers)
+                or (self.layers is not None and not self.layers)
+                or (self.max_rank <= 4 and
+                    self.layers is not None and not self.layers & DataLayer.ADDRESS))
 
 
     def layer_enabled(self, layer: DataLayer) -> bool:
index ad635e39cdbf1097e057b2ecd56152bbb1263a27..53156fdd41132ada43139546d36ec0e470590858 100644 (file)
@@ -7,8 +7,9 @@
 """
 Output formatters for API version v1.
 """
-from typing import Mapping, Any
+from typing import List, Dict, Mapping, Any
 import collections
+import datetime as dt
 
 import nominatim.api as napi
 from nominatim.api.result_formatting import FormatDispatcher
@@ -16,6 +17,10 @@ from nominatim.api.v1.classtypes import ICONS
 from nominatim.api.v1 import format_json, format_xml
 from nominatim.utils.json_writer import JsonWriter
 
+class RawDataList(List[Dict[str, Any]]):
+    """ Data type for formatting raw data lists 'as is' in json.
+    """
+
 dispatch = FormatDispatcher()
 
 @dispatch.format_func(napi.StatusResult, 'text')
@@ -136,7 +141,7 @@ def _format_details_json(result: napi.DetailedResult, options: Mapping[str, Any]
     if result.address_rows is not None:
         _add_address_rows(out, 'address', result.address_rows, locales)
 
-    if result.linked_rows is not None:
+    if result.linked_rows:
         _add_address_rows(out, 'linked_places', result.linked_rows, locales)
 
     if result.name_keywords is not None or result.address_keywords is not None:
@@ -232,3 +237,20 @@ def _format_search_jsonv2(results: napi.SearchResults,
                            options: Mapping[str, Any]) -> str:
     return format_json.format_base_json(results, options, False,
                                         class_label='category')
+
+@dispatch.format_func(RawDataList, 'json')
+def _format_raw_data_json(results: RawDataList,  _: Mapping[str, Any]) -> str:
+    out = JsonWriter()
+    out.start_array()
+    for res in results:
+        out.start_object()
+        for k, v in res.items():
+            if isinstance(v, dt.datetime):
+                out.keyval(k, v.isoformat(sep= ' ', timespec='seconds'))
+            else:
+                out.keyval(k, v)
+        out.end_object().next()
+
+    out.end_array()
+
+    return out()
index 99a3f182cb2a6b72cd76fe1e3b5d383457476183..80560c95278da93f5175dbe5e0ceb232abeb6d51 100644 (file)
@@ -86,8 +86,8 @@ def format_base_json(results: Union[napi.ReverseResults, napi.SearchResults],
 
         _write_osm_id(out, result.osm_object)
 
-        out.keyval('lat', result.centroid.lat)\
-             .keyval('lon', result.centroid.lon)\
+        out.keyval('lat', f"{result.centroid.lat}")\
+             .keyval('lon', f"{result.centroid.lon}")\
              .keyval(class_label, result.category[0])\
              .keyval('type', result.category[1])\
              .keyval('place_rank', result.rank_search)\
index 325e5bc629911dc446476c1499499eb641e741dd..896a131cc8c98da4eb982e69068e4f3df6cee2a8 100644 (file)
@@ -37,7 +37,7 @@ def zoom_to_rank(zoom: int) -> int:
     return REVERSE_MAX_RANKS[max(0, min(18, zoom))]
 
 
-FEATURE_TYPE_TO_RANK: Dict[Optional[str], Any] = {
+FEATURE_TYPE_TO_RANK: Dict[Optional[str], Tuple[int, int]] = {
     'country': (4, 4),
     'state': (8, 8),
     'city': (14, 16),
@@ -108,15 +108,18 @@ def deduplicate_results(results: SearchResults, max_results: int) -> SearchResul
             assert result.names and 'ref' in result.names
             if any(_is_postcode_relation_for(r, result.names['ref']) for r in results):
                 continue
-        classification = (result.osm_object[0] if result.osm_object else None,
-                          result.category,
-                          result.display_name,
-                          result.rank_address)
-        if result.osm_object not in osm_ids_done \
-           and classification not in classification_done:
+        if result.source_table == SourceTable.PLACEX:
+            classification = (result.osm_object[0] if result.osm_object else None,
+                              result.category,
+                              result.display_name,
+                              result.rank_address)
+            if result.osm_object not in osm_ids_done \
+               and classification not in classification_done:
+                deduped.append(result)
+            osm_ids_done.add(result.osm_object)
+            classification_done.add(classification)
+        else:
             deduped.append(result)
-        osm_ids_done.add(result.osm_object)
-        classification_done.add(classification)
         if len(deduped) >= max_results:
             break
 
@@ -136,10 +139,10 @@ def _deg(axis:str) -> str:
     return f"(?P<{axis}_deg>\\d+\\.\\d+)°?"
 
 def _deg_min(axis: str) -> str:
-    return f"(?P<{axis}_deg>\\d+)[°\\s]+(?P<{axis}_min>[\\d.]+)?[′']*"
+    return f"(?P<{axis}_deg>\\d+)[°\\s]+(?P<{axis}_min>[\\d.]+)[′']*"
 
 def _deg_min_sec(axis: str) -> str:
-    return f"(?P<{axis}_deg>\\d+)[°\\s]+(?P<{axis}_min>\\d+)[′'\\s]+(?P<{axis}_sec>[\\d.]+)?[\"″]*"
+    return f"(?P<{axis}_deg>\\d+)[°\\s]+(?P<{axis}_min>\\d+)[′'\\s]+(?P<{axis}_sec>[\\d.]+)[\"″]*"
 
 COORD_REGEX = [re.compile(r'(?:(?P<pre>.*?)\s+)??' + r + r'(?:\s+(?P<post>.*))?') for r in (
     r"(?P<ns>[NS])\s*" + _deg('lat') + r"[\s,]+" + r"(?P<ew>[EW])\s*" + _deg('lon'),
index 865e13318c61732f2f29f04a77fee3f1f4c28344..70f7dc40611f9ba552d8e8397922b1c0fc78e61a 100644 (file)
@@ -15,24 +15,28 @@ import dataclasses
 import math
 from urllib.parse import urlencode
 
+import sqlalchemy as sa
+
 from nominatim.errors import UsageError
 from nominatim.config import Configuration
 import nominatim.api as napi
 import nominatim.api.logging as loglib
 from nominatim.api.v1.format import dispatch as formatting
+from nominatim.api.v1.format import RawDataList
 from nominatim.api.v1 import helpers
 
-CONTENT_TYPE = {
-  'text': 'text/plain; charset=utf-8',
-  'xml': 'text/xml; charset=utf-8',
-  'debug': 'text/html; charset=utf-8'
-}
+CONTENT_TEXT = 'text/plain; charset=utf-8'
+CONTENT_XML = 'text/xml; charset=utf-8'
+CONTENT_HTML = 'text/html; charset=utf-8'
+CONTENT_JSON = 'application/json; charset=utf-8'
+
+CONTENT_TYPE = {'text': CONTENT_TEXT, 'xml': CONTENT_XML, 'debug': CONTENT_HTML}
 
 class ASGIAdaptor(abc.ABC):
     """ Adapter class for the different ASGI frameworks.
         Wraps functionality over concrete requests and responses.
     """
-    content_type: str = 'text/plain; charset=utf-8'
+    content_type: str = CONTENT_TEXT
 
     @abc.abstractmethod
     def get(self, name: str, default: Optional[str] = None) -> Optional[str]:
@@ -55,7 +59,7 @@ class ASGIAdaptor(abc.ABC):
 
 
     @abc.abstractmethod
-    def create_response(self, status: int, output: str) -> Any:
+    def create_response(self, status: int, output: str, num_results: int) -> Any:
         """ Create a response from the given parameters. The result will
             be returned by the endpoint functions. The adaptor may also
             return None when the response is created internally with some
@@ -66,6 +70,11 @@ class ASGIAdaptor(abc.ABC):
             body of the response to 'output'.
         """
 
+    @abc.abstractmethod
+    def base_uri(self) -> str:
+        """ Return the URI of the original request.
+        """
+
 
     @abc.abstractmethod
     def config(self) -> Configuration:
@@ -73,19 +82,19 @@ class ASGIAdaptor(abc.ABC):
         """
 
 
-    def build_response(self, output: str, status: int = 200) -> Any:
+    def build_response(self, output: str, status: int = 200, num_results: int = 0) -> Any:
         """ Create a response from the given output. Wraps a JSONP function
             around the response, if necessary.
         """
-        if self.content_type == 'application/json' and status == 200:
+        if self.content_type == CONTENT_JSON and status == 200:
             jsonp = self.get('json_callback')
             if jsonp is not None:
                 if any(not part.isidentifier() for part in jsonp.split('.')):
                     self.raise_error('Invalid json_callback value')
                 output = f"{jsonp}({output})"
-                self.content_type = 'application/javascript'
+                self.content_type = 'application/javascript; charset=utf-8'
 
-        return self.create_response(status, output)
+        return self.create_response(status, output, num_results)
 
 
     def raise_error(self, msg: str, status: int = 400) -> NoReturn:
@@ -93,16 +102,16 @@ class ASGIAdaptor(abc.ABC):
             message. The message will be formatted according to the
             output format chosen by the request.
         """
-        if self.content_type == 'text/xml; charset=utf-8':
+        if self.content_type == CONTENT_XML:
             msg = f"""<?xml version="1.0" encoding="UTF-8" ?>
                       <error>
                         <code>{status}</code>
                         <message>{msg}</message>
                       </error>
                    """
-        elif self.content_type == 'application/json':
+        elif self.content_type == CONTENT_JSON:
             msg = f"""{{"error":{{"code":{status},"message":"{msg}"}}}}"""
-        elif self.content_type == 'text/html; charset=utf-8':
+        elif self.content_type == CONTENT_HTML:
             loglib.log().section('Execution error')
             loglib.log().var_dump('Status', status)
             loglib.log().var_dump('Message', msg)
@@ -196,7 +205,7 @@ class ASGIAdaptor(abc.ABC):
         """
         if self.get_bool('debug', False):
             loglib.set_log_output('html')
-            self.content_type = 'text/html; charset=utf-8'
+            self.content_type = CONTENT_HTML
             return True
 
         return False
@@ -226,7 +235,7 @@ class ASGIAdaptor(abc.ABC):
             self.raise_error("Parameter 'format' must be one of: " +
                               ', '.join(formatting.list_formats(result_type)))
 
-        self.content_type = CONTENT_TYPE.get(fmt, 'application/json')
+        self.content_type = CONTENT_TYPE.get(fmt, CONTENT_JSON)
         return fmt
 
 
@@ -250,7 +259,7 @@ class ASGIAdaptor(abc.ABC):
                 numgeoms += 1
 
         if numgeoms > self.config().get_int('POLYGON_OUTPUT_MAX_TYPES'):
-            self.raise_error('Too many polgyon output options selected.')
+            self.raise_error('Too many polygon output options selected.')
 
         return {'address_details': True,
                 'geometry_simplification': self.get_float('polygon_threshold', 0.0),
@@ -294,12 +303,13 @@ async def details_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) ->
 
     result = await api.details(place,
                                address_details=params.get_bool('addressdetails', False),
-                               linked_places=params.get_bool('linkedplaces', False),
+                               linked_places=params.get_bool('linkedplaces', True),
                                parented_places=params.get_bool('hierarchy', False),
                                keywords=params.get_bool('keywords', False),
                                geometry_output = napi.GeometryFormat.GEOJSON
                                                  if params.get_bool('polygon_geojson', False)
-                                                 else napi.GeometryFormat.NONE
+                                                 else napi.GeometryFormat.NONE,
+                               locales=locales
                               )
 
     if debug:
@@ -308,14 +318,12 @@ async def details_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) ->
     if result is None:
         params.raise_error('No place with that OSM ID found.', status=404)
 
-    result.localize(locales)
-
     output = formatting.format_result(result, fmt,
                  {'locales': locales,
                   'group_hierarchy': params.get_bool('group_hierarchy', False),
                   'icon_base_url': params.config().MAPICON_URL})
 
-    return params.build_response(output)
+    return params.build_response(output, num_results=1)
 
 
 async def reverse_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> Any:
@@ -328,11 +336,12 @@ async def reverse_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) ->
     details = params.parse_geometry_details(fmt)
     details['max_rank'] = helpers.zoom_to_rank(params.get_int('zoom', 18))
     details['layers'] = params.get_layers()
+    details['locales'] = napi.Locales.from_accept_languages(params.get_accepted_languages())
 
     result = await api.reverse(coord, **details)
 
     if debug:
-        return params.build_response(loglib.get_and_disable())
+        return params.build_response(loglib.get_and_disable(), num_results=1 if result else 0)
 
     if fmt == 'xml':
         queryparts = {'lat': str(coord.lat), 'lon': str(coord.lon), 'format': 'xml'}
@@ -348,13 +357,10 @@ async def reverse_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) ->
                    'namedetails': params.get_bool('namedetails', False),
                    'addressdetails': params.get_bool('addressdetails', True)}
 
-    if result:
-        result.localize(napi.Locales.from_accept_languages(params.get_accepted_languages()))
-
     output = formatting.format_result(napi.ReverseResults([result] if result else []),
                                       fmt, fmt_options)
 
-    return params.build_response(output)
+    return params.build_response(output, num_results=1 if result else 0)
 
 
 async def lookup_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> Any:
@@ -363,12 +369,13 @@ async def lookup_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> A
     fmt = params.parse_format(napi.SearchResults, 'xml')
     debug = params.setup_debugging()
     details = params.parse_geometry_details(fmt)
+    details['locales'] = napi.Locales.from_accept_languages(params.get_accepted_languages())
 
     places = []
     for oid in (params.get('osm_ids') or '').split(','):
         oid = oid.strip()
         if len(oid) > 1 and oid[0] in 'RNWrnw' and oid[1:].isdigit():
-            places.append(napi.OsmID(oid[0], int(oid[1:])))
+            places.append(napi.OsmID(oid[0].upper(), int(oid[1:])))
 
     if len(places) > params.config().get_int('LOOKUP_MAX_COUNT'):
         params.raise_error('Too many object IDs.')
@@ -379,17 +386,15 @@ async def lookup_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> A
         results = napi.SearchResults()
 
     if debug:
-        return params.build_response(loglib.get_and_disable())
+        return params.build_response(loglib.get_and_disable(), num_results=len(results))
 
     fmt_options = {'extratags': params.get_bool('extratags', False),
                    'namedetails': params.get_bool('namedetails', False),
                    'addressdetails': params.get_bool('addressdetails', True)}
 
-    results.localize(napi.Locales.from_accept_languages(params.get_accepted_languages()))
-
     output = formatting.format_result(results, fmt, fmt_options)
 
-    return params.build_response(output)
+    return params.build_response(output, num_results=len(results))
 
 
 async def _unstructured_search(query: str, api: napi.NominatimAPIAsync,
@@ -444,31 +449,40 @@ async def search_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> A
         helpers.feature_type_to_rank(params.get('featureType', ''))
     if params.get('featureType', None) is not None:
         details['layers'] = napi.DataLayer.ADDRESS
+    else:
+        details['layers'] = params.get_layers()
 
+    details['locales'] = napi.Locales.from_accept_languages(params.get_accepted_languages())
+
+    # unstructured query parameters
     query = params.get('q', None)
+    # structured query parameters
     queryparts = {}
+    for key in ('amenity', 'street', 'city', 'county', 'state', 'postalcode', 'country'):
+        details[key] = params.get(key, None)
+        if details[key]:
+            queryparts[key] = details[key]
+
     try:
         if query is not None:
+            if queryparts:
+                params.raise_error("Structured query parameters"
+                                   "(amenity, street, city, county, state, postalcode, country)"
+                                   " cannot be used together with 'q' parameter.")
             queryparts['q'] = query
             results = await _unstructured_search(query, api, details)
         else:
-            for key in ('amenity', 'street', 'city', 'county', 'state', 'postalcode', 'country'):
-                details[key] = params.get(key, None)
-                if details[key]:
-                    queryparts[key] = details[key]
             query = ', '.join(queryparts.values())
 
             results = await api.search_address(**details)
     except UsageError as err:
         params.raise_error(str(err))
 
-    results.localize(napi.Locales.from_accept_languages(params.get_accepted_languages()))
-
     if details['dedupe'] and len(results) > 1:
         results = helpers.deduplicate_results(results, max_results)
 
     if debug:
-        return params.build_response(loglib.get_and_disable())
+        return params.build_response(loglib.get_and_disable(), num_results=len(results))
 
     if fmt == 'xml':
         helpers.extend_query_parts(queryparts, details,
@@ -478,7 +492,7 @@ async def search_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> A
                                    (str(r.place_id) for r in results if r.place_id))
         queryparts['format'] = fmt
 
-        moreurl = urlencode(queryparts)
+        moreurl = params.base_uri() + '/search?' + urlencode(queryparts)
     else:
         moreurl = ''
 
@@ -491,7 +505,59 @@ async def search_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> A
 
     output = formatting.format_result(results, fmt, fmt_options)
 
-    return params.build_response(output)
+    return params.build_response(output, num_results=len(results))
+
+
+async def deletable_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> Any:
+    """ Server glue for /deletable endpoint.
+        This is a special endpoint that shows polygons that have been
+        deleted or are broken in the OSM data but are kept in the
+        Nominatim database to minimize disruption.
+    """
+    fmt = params.parse_format(RawDataList, 'json')
+
+    async with api.begin() as conn:
+        sql = sa.text(""" SELECT p.place_id, country_code,
+                                 name->'name' as name, i.*
+                          FROM placex p, import_polygon_delete i
+                          WHERE p.osm_id = i.osm_id AND p.osm_type = i.osm_type
+                                AND p.class = i.class AND p.type = i.type
+                      """)
+        results = RawDataList(r._asdict() for r in await conn.execute(sql))
+
+    return params.build_response(formatting.format_result(results, fmt, {}))
+
+
+async def polygons_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> Any:
+    """ Server glue for /polygons endpoint.
+        This is a special endpoint that shows polygons that have changed
+        thier size but are kept in the Nominatim database with their
+        old area to minimize disruption.
+    """
+    fmt = params.parse_format(RawDataList, 'json')
+    sql_params: Dict[str, Any] = {
+        'days': params.get_int('days', -1),
+        'cls': params.get('class')
+    }
+    reduced = params.get_bool('reduced', False)
+
+    async with api.begin() as conn:
+        sql = sa.select(sa.text("""osm_type, osm_id, class, type,
+                                   name->'name' as name,
+                                   country_code, errormessage, updated"""))\
+                .select_from(sa.text('import_polygon_error'))
+        if sql_params['days'] > 0:
+            sql = sql.where(sa.text("updated > 'now'::timestamp - make_interval(days => :days)"))
+        if reduced:
+            sql = sql.where(sa.text("errormessage like 'Area reduced%'"))
+        if sql_params['cls'] is not None:
+            sql = sql.where(sa.text("class = :cls"))
+
+        sql = sql.order_by(sa.literal_column('updated').desc()).limit(1000)
+
+        results = RawDataList(r._asdict() for r in await conn.execute(sql, sql_params))
+
+    return params.build_response(formatting.format_result(results, fmt, {}))
 
 
 EndpointFunc = Callable[[napi.NominatimAPIAsync, ASGIAdaptor], Any]
@@ -501,5 +567,7 @@ ROUTES = [
     ('details', details_endpoint),
     ('reverse', reverse_endpoint),
     ('lookup', lookup_endpoint),
-    ('search', search_endpoint)
+    ('search', search_endpoint),
+    ('deletable', deletable_endpoint),
+    ('polygons', polygons_endpoint),
 ]
index 836f9037fd4b9b8916cf7d1563583d4d563e2f56..88a6078284424b4dc3beacf7d45757eddc1af3bb 100644 (file)
@@ -2,13 +2,13 @@
 #
 # This file is part of Nominatim. (https://nominatim.org)
 #
-# Copyright (C) 2022 by the Nominatim developer community.
+# Copyright (C) 2023 by the Nominatim developer community.
 # For a full list of authors see the git log.
 """
 Command-line interface to the Nominatim functions for import, update,
 database administration and querying.
 """
-from typing import Optional, Any, List, Union
+from typing import Optional, Any
 import importlib
 import logging
 import os
@@ -17,7 +17,7 @@ import argparse
 from pathlib import Path
 
 from nominatim.config import Configuration
-from nominatim.tools.exec_utils import run_legacy_script, run_php_server
+from nominatim.tools.exec_utils import run_php_server
 from nominatim.errors import UsageError
 from nominatim import clicmd
 from nominatim import version
@@ -101,7 +101,6 @@ class CommandlineParser:
             self.parser.print_help()
             return 1
 
-        args.phpcgi_path = Path(kwargs['phpcgi_path'])
         args.project_dir = Path(args.project_dir).resolve()
 
         if 'cli_args' not in kwargs:
@@ -140,60 +139,6 @@ class CommandlineParser:
 #
 # No need to document the functions each time.
 # pylint: disable=C0111
-class QueryExport:
-    """\
-    Export addresses as CSV file from the database.
-    """
-
-    def add_args(self, parser: argparse.ArgumentParser) -> None:
-        group = parser.add_argument_group('Output arguments')
-        group.add_argument('--output-type', default='street',
-                           choices=('continent', 'country', 'state', 'county',
-                                    'city', 'suburb', 'street', 'path'),
-                           help='Type of places to output (default: street)')
-        group.add_argument('--output-format',
-                           default='street;suburb;city;county;state;country',
-                           help=("Semicolon-separated list of address types "
-                                 "(see --output-type). Multiple ranks can be "
-                                 "merged into one column by simply using a "
-                                 "comma-separated list."))
-        group.add_argument('--output-all-postcodes', action='store_true',
-                           help=("List all postcodes for address instead of "
-                                 "just the most likely one"))
-        group.add_argument('--language',
-                           help=("Preferred language for output "
-                                 "(use local name, if omitted)"))
-        group = parser.add_argument_group('Filter arguments')
-        group.add_argument('--restrict-to-country', metavar='COUNTRY_CODE',
-                           help='Export only objects within country')
-        group.add_argument('--restrict-to-osm-node', metavar='ID', type=int,
-                           help='Export only children of this OSM node')
-        group.add_argument('--restrict-to-osm-way', metavar='ID', type=int,
-                           help='Export only children of this OSM way')
-        group.add_argument('--restrict-to-osm-relation', metavar='ID', type=int,
-                           help='Export only children of this OSM relation')
-
-
-    def run(self, args: NominatimArgs) -> int:
-        params: List[Union[int, str]] = [
-                             '--output-type', args.output_type,
-                             '--output-format', args.output_format]
-        if args.output_all_postcodes:
-            params.append('--output-all-postcodes')
-        if args.language:
-            params.extend(('--language', args.language))
-        if args.restrict_to_country:
-            params.extend(('--restrict-to-country', args.restrict_to_country))
-        if args.restrict_to_osm_node:
-            params.extend(('--restrict-to-osm-node', args.restrict_to_osm_node))
-        if args.restrict_to_osm_way:
-            params.extend(('--restrict-to-osm-way', args.restrict_to_osm_way))
-        if args.restrict_to_osm_relation:
-            params.extend(('--restrict-to-osm-relation', args.restrict_to_osm_relation))
-
-        return run_legacy_script('export.php', *params, config=args.config)
-
-
 class AdminServe:
     """\
     Start a simple web server for serving the API.
@@ -260,7 +205,8 @@ def get_set_parser() -> CommandlineParser:
 
     parser.add_subcommand('admin', clicmd.AdminFuncs())
 
-    parser.add_subcommand('export', QueryExport())
+    parser.add_subcommand('export', clicmd.QueryExport())
+    parser.add_subcommand('convert', clicmd.ConvertDB())
     parser.add_subcommand('serve', AdminServe())
 
     parser.add_subcommand('search', clicmd.APISearch())
index bdd9bafe7c4d9c5f965dcc8675a0c54544a5d72c..c8de68c144eb53704d0a96e05bd82f44f209d2df 100644 (file)
@@ -2,7 +2,7 @@
 #
 # This file is part of Nominatim. (https://nominatim.org)
 #
-# Copyright (C) 2022 by the Nominatim developer community.
+# Copyright (C) 2023 by the Nominatim developer community.
 # For a full list of authors see the git log.
 """
 Subcommand definitions for the command-line tool.
@@ -24,3 +24,5 @@ from nominatim.clicmd.add_data import UpdateAddData as UpdateAddData
 from nominatim.clicmd.admin import AdminFuncs as AdminFuncs
 from nominatim.clicmd.freeze import SetupFreeze as SetupFreeze
 from nominatim.clicmd.special_phrases import ImportSpecialPhrases as ImportSpecialPhrases
+from nominatim.clicmd.export import QueryExport as QueryExport
+from nominatim.clicmd.convert import ConvertDB as ConvertDB
index 0c7739603e94c7a73bef52c5e556506f34f6bc50..9557dc988c94712f446f8c21c8f167d69639747f 100644 (file)
@@ -9,9 +9,11 @@ Implementation of the 'admin' subcommand.
 """
 import logging
 import argparse
+import random
 
-from nominatim.tools.exec_utils import run_legacy_script
+from nominatim.db.connection import connect
 from nominatim.clicmd.args import NominatimArgs
+import nominatim.api as napi
 
 # Do not repeat documentation of subcommand classes.
 # pylint: disable=C0111
@@ -39,6 +41,8 @@ class AdminFuncs:
                           help='Print performance analysis of the indexing process')
         objs.add_argument('--collect-os-info', action="store_true",
                           help="Generate a report about the host system information")
+        objs.add_argument('--clean-deleted', action='store', metavar='AGE',
+                          help='Clean up deleted relations')
         group = parser.add_argument_group('Arguments for cache warming')
         group.add_argument('--search-only', action='store_const', dest='target',
                            const='search',
@@ -53,7 +57,9 @@ class AdminFuncs:
         mgroup.add_argument('--place-id', type=int,
                             help='Analyse indexing of the given Nominatim object')
 
+
     def run(self, args: NominatimArgs) -> int:
+        # pylint: disable=too-many-return-statements
         if args.warm:
             return self._warm(args)
 
@@ -79,13 +85,39 @@ class AdminFuncs:
             collect_os_info.report_system_information(args.config)
             return 0
 
+        if args.clean_deleted:
+            LOG.warning('Cleaning up deleted relations')
+            from ..tools import admin
+            admin.clean_deleted_relations(args.config, age=args.clean_deleted)
+            return 0
+
         return 1
 
+
     def _warm(self, args: NominatimArgs) -> int:
         LOG.warning('Warming database caches')
-        params = ['warm.php']
-        if args.target == 'reverse':
-            params.append('--reverse-only')
-        if args.target == 'search':
-            params.append('--search-only')
-        return run_legacy_script(*params, config=args.config)
+
+        api = napi.NominatimAPI(args.project_dir)
+
+        try:
+            if args.target != 'search':
+                for _ in range(1000):
+                    api.reverse((random.uniform(-90, 90), random.uniform(-180, 180)),
+                                address_details=True)
+
+            if args.target != 'reverse':
+                from ..tokenizer import factory as tokenizer_factory
+
+                tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
+                with connect(args.config.get_libpq_dsn()) as conn:
+                    if conn.table_exists('search_name'):
+                        words = tokenizer.most_frequent_words(conn, 1000)
+                    else:
+                        words = []
+
+                for word in words:
+                    api.search(word)
+        finally:
+            api.close()
+
+        return 0
index f2f1826b11b8d28cf174fb69d92b9feccfcfa8ed..e8450e6ba9890aef096f5911e606277038eab02a 100644 (file)
@@ -7,14 +7,12 @@
 """
 Subcommand definitions for API calls from the command line.
 """
-from typing import Mapping, Dict, Any
+from typing import Dict, Any
 import argparse
 import logging
 import json
 import sys
 
-from nominatim.tools.exec_utils import run_api_script
-from nominatim.errors import UsageError
 from nominatim.clicmd.args import NominatimArgs
 import nominatim.api as napi
 import nominatim.api.v1 as api_output
@@ -62,18 +60,6 @@ def _add_api_output_arguments(parser: argparse.ArgumentParser) -> None:
                              "Parameter is difference tolerance in degrees."))
 
 
-def _run_api(endpoint: str, args: NominatimArgs, params: Mapping[str, object]) -> int:
-    script_file = args.project_dir / 'website' / (endpoint + '.php')
-
-    if not script_file.exists():
-        LOG.error("Cannot find API script file.\n\n"
-                  "Make sure to run 'nominatim' from the project directory \n"
-                  "or use the option --project-dir.")
-        raise UsageError("API script not found.")
-
-    return run_api_script(endpoint, args.project_dir,
-                          phpcgi_bin=args.phpcgi_path, params=params)
-
 class APISearch:
     """\
     Execute a search query.
@@ -123,7 +109,8 @@ class APISearch:
                                   'countries': args.countrycodes,
                                   'excluded': args.exclude_place_ids,
                                   'viewbox': args.viewbox,
-                                  'bounded_viewbox': args.bounded
+                                  'bounded_viewbox': args.bounded,
+                                  'locales': args.get_locales(api.config.DEFAULT_LANGUAGE)
                                  }
 
         if args.query:
@@ -138,9 +125,6 @@ class APISearch:
                                          country=args.country,
                                          **params)
 
-        for result in results:
-            result.localize(args.get_locales(api.config.DEFAULT_LANGUAGE))
-
         if args.dedupe and len(results) > 1:
             results = deduplicate_results(results, args.limit)
 
@@ -201,14 +185,14 @@ class APIReverse:
                              layers=args.get_layers(napi.DataLayer.ADDRESS | napi.DataLayer.POI),
                              address_details=True, # needed for display name
                              geometry_output=args.get_geometry_output(),
-                             geometry_simplification=args.polygon_threshold)
+                             geometry_simplification=args.polygon_threshold,
+                             locales=args.get_locales(api.config.DEFAULT_LANGUAGE))
 
         if args.format == 'debug':
             print(loglib.get_and_disable())
             return 0
 
         if result:
-            result.localize(args.get_locales(api.config.DEFAULT_LANGUAGE))
             output = api_output.format_result(
                         napi.ReverseResults([result]),
                         args.format,
@@ -263,10 +247,8 @@ class APILookup:
         results = api.lookup(places,
                              address_details=True, # needed for display name
                              geometry_output=args.get_geometry_output(),
-                             geometry_simplification=args.polygon_threshold or 0.0)
-
-        for result in results:
-            result.localize(args.get_locales(api.config.DEFAULT_LANGUAGE))
+                             geometry_simplification=args.polygon_threshold or 0.0,
+                             locales=args.get_locales(api.config.DEFAULT_LANGUAGE))
 
         output = api_output.format_result(
                     results,
@@ -340,6 +322,7 @@ class APIDetails:
 
         api = napi.NominatimAPI(args.project_dir)
 
+        locales = args.get_locales(api.config.DEFAULT_LANGUAGE)
         result = api.details(place,
                              address_details=args.addressdetails,
                              linked_places=args.linkedplaces,
@@ -347,13 +330,11 @@ class APIDetails:
                              keywords=args.keywords,
                              geometry_output=napi.GeometryFormat.GEOJSON
                                              if args.polygon_geojson
-                                             else napi.GeometryFormat.NONE)
+                                             else napi.GeometryFormat.NONE,
+                            locales=locales)
 
 
         if result:
-            locales = args.get_locales(api.config.DEFAULT_LANGUAGE)
-            result.localize(locales)
-
             output = api_output.format_result(
                         result,
                         'json',
index 10316165d191a01883e7d9ede0d151afbf9b6dc5..433435bc6f37dd1735078f807e599a42d05fdaa7 100644 (file)
@@ -44,7 +44,6 @@ class NominatimArgs:
     # Basic environment set by root program.
     config: Configuration
     project_dir: Path
-    phpcgi_path: Path
 
     # Global switches
     version: bool
@@ -73,6 +72,7 @@ class NominatimArgs:
     check_database: bool
     migrate: bool
     collect_os_info: bool
+    clean_deleted: str
     analyse_indexing: bool
     target: Optional[str]
     osm_id: Optional[str]
@@ -87,6 +87,7 @@ class NominatimArgs:
     offline: bool
     ignore_errors: bool
     index_noanalyse: bool
+    prepare_database: bool
 
     # Arguments to 'index'
     boundaries_only: bool
@@ -100,9 +101,9 @@ class NominatimArgs:
     output_all_postcodes: bool
     language: Optional[str]
     restrict_to_country: Optional[str]
-    restrict_to_osm_node: Optional[int]
-    restrict_to_osm_way: Optional[int]
-    restrict_to_osm_relation: Optional[int]
+
+    # Arguments to 'convert'
+    output: Path
 
     # Arguments to 'refresh'
     postcodes: bool
diff --git a/nominatim/clicmd/convert.py b/nominatim/clicmd/convert.py
new file mode 100644 (file)
index 0000000..26b3fb1
--- /dev/null
@@ -0,0 +1,95 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Implementation of the 'convert' subcommand.
+"""
+from typing import Set, Any, Union, Optional, Sequence
+import argparse
+import asyncio
+from pathlib import Path
+
+from nominatim.clicmd.args import NominatimArgs
+from nominatim.errors import UsageError
+
+# Do not repeat documentation of subcommand classes.
+# pylint: disable=C0111
+# Using non-top-level imports to avoid eventually unused imports.
+# pylint: disable=E0012,C0415
+
+class WithAction(argparse.Action):
+    """ Special action that saves a list of flags, given on the command-line
+        as `--with-foo` or `--without-foo`.
+    """
+    def __init__(self, option_strings: Sequence[str], dest: Any,
+                 default: bool = True, **kwargs: Any) -> None:
+        if 'nargs' in kwargs:
+            raise ValueError("nargs not allowed.")
+        if option_strings is None:
+            raise ValueError("Positional parameter not allowed.")
+
+        self.dest_set = kwargs.pop('dest_set')
+        full_option_strings = []
+        for opt in option_strings:
+            if not opt.startswith('--'):
+                raise ValueError("short-form options not allowed")
+            if default:
+                self.dest_set.add(opt[2:])
+            full_option_strings.append(f"--with-{opt[2:]}")
+            full_option_strings.append(f"--without-{opt[2:]}")
+
+        super().__init__(full_option_strings, argparse.SUPPRESS, nargs=0, **kwargs)
+
+
+    def __call__(self, parser: argparse.ArgumentParser, namespace: argparse.Namespace,
+                 values: Union[str, Sequence[Any], None],
+                 option_string: Optional[str] = None) -> None:
+        assert option_string
+        if option_string.startswith('--with-'):
+            self.dest_set.add(option_string[7:])
+        if option_string.startswith('--without-'):
+            self.dest_set.discard(option_string[10:])
+
+
+class ConvertDB:
+    """ Convert an existing database into a different format. (EXPERIMENTAL)
+
+        Dump a read-only version of the database in a different format.
+        At the moment only a SQLite database suitable for reverse lookup
+        can be created.
+    """
+
+    def __init__(self) -> None:
+        self.options: Set[str] = set()
+
+    def add_args(self, parser: argparse.ArgumentParser) -> None:
+        parser.add_argument('--format', default='sqlite',
+                            choices=('sqlite', ),
+                            help='Format of the output database (must be sqlite currently)')
+        parser.add_argument('--output', '-o', required=True, type=Path,
+                            help='File to write the database to.')
+        group = parser.add_argument_group('Switches to define database layout'
+                                          '(currently no effect)')
+        group.add_argument('--reverse', action=WithAction, dest_set=self.options, default=True,
+                           help='Enable/disable support for reverse and lookup API'
+                                ' (default: enabled)')
+        group.add_argument('--search', action=WithAction, dest_set=self.options, default=False,
+                           help='Enable/disable support for search API (default: disabled)')
+        group.add_argument('--details', action=WithAction, dest_set=self.options, default=True,
+                           help='Enable/disable support for details API (default: enabled)')
+
+
+    def run(self, args: NominatimArgs) -> int:
+        if args.output.exists():
+            raise UsageError(f"File '{args.output}' already exists. Refusing to overwrite.")
+
+        if args.format == 'sqlite':
+            from ..tools import convert_sqlite
+
+            asyncio.run(convert_sqlite.convert(args.project_dir, args.output, self.options))
+            return 0
+
+        return 1
diff --git a/nominatim/clicmd/export.py b/nominatim/clicmd/export.py
new file mode 100644 (file)
index 0000000..f935a55
--- /dev/null
@@ -0,0 +1,200 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Implementation of the 'export' subcommand.
+"""
+from typing import Optional, List, cast
+import logging
+import argparse
+import asyncio
+import csv
+import sys
+
+import sqlalchemy as sa
+
+from nominatim.clicmd.args import NominatimArgs
+import nominatim.api as napi
+from nominatim.api.results import create_from_placex_row, ReverseResult, add_result_details
+from nominatim.api.types import LookupDetails
+from nominatim.errors import UsageError
+
+# Do not repeat documentation of subcommand classes.
+# pylint: disable=C0111
+# Using non-top-level imports to avoid eventually unused imports.
+# pylint: disable=E0012,C0415
+# Needed for SQLAlchemy
+# pylint: disable=singleton-comparison
+
+LOG = logging.getLogger()
+
+RANK_RANGE_MAP = {
+  'country': (4, 4),
+  'state': (5, 9),
+  'county': (10, 12),
+  'city': (13, 16),
+  'suburb': (17, 21),
+  'street': (26, 26),
+  'path': (27, 27)
+}
+
+RANK_TO_OUTPUT_MAP = {
+    4: 'country',
+    5: 'state', 6: 'state', 7: 'state', 8: 'state', 9: 'state',
+    10: 'county', 11: 'county', 12: 'county',
+    13: 'city', 14: 'city', 15: 'city', 16: 'city',
+    17: 'suburb', 18: 'suburb', 19: 'suburb', 20: 'suburb', 21: 'suburb',
+    26: 'street', 27: 'path'}
+
+class QueryExport:
+    """\
+    Export places as CSV file from the database.
+
+
+    """
+
+    def add_args(self, parser: argparse.ArgumentParser) -> None:
+        group = parser.add_argument_group('Output arguments')
+        group.add_argument('--output-type', default='street',
+                           choices=('country', 'state', 'county',
+                                    'city', 'suburb', 'street', 'path'),
+                           help='Type of places to output (default: street)')
+        group.add_argument('--output-format',
+                           default='street;suburb;city;county;state;country',
+                           help=("Semicolon-separated list of address types "
+                                 "(see --output-type). Additionally accepts:"
+                                 "placeid,postcode"))
+        group.add_argument('--language',
+                           help=("Preferred language for output "
+                                 "(use local name, if omitted)"))
+        group = parser.add_argument_group('Filter arguments')
+        group.add_argument('--restrict-to-country', metavar='COUNTRY_CODE',
+                           help='Export only objects within country')
+        group.add_argument('--restrict-to-osm-node', metavar='ID', type=int,
+                           dest='node',
+                           help='Export only children of this OSM node')
+        group.add_argument('--restrict-to-osm-way', metavar='ID', type=int,
+                           dest='way',
+                           help='Export only children of this OSM way')
+        group.add_argument('--restrict-to-osm-relation', metavar='ID', type=int,
+                           dest='relation',
+                           help='Export only children of this OSM relation')
+
+
+    def run(self, args: NominatimArgs) -> int:
+        return asyncio.run(export(args))
+
+
+async def export(args: NominatimArgs) -> int:
+    """ The actual export as a asynchronous function.
+    """
+
+    api = napi.NominatimAPIAsync(args.project_dir)
+
+    try:
+        output_range = RANK_RANGE_MAP[args.output_type]
+
+        writer = init_csv_writer(args.output_format)
+
+        async with api.begin() as conn, api.begin() as detail_conn:
+            t = conn.t.placex
+
+            sql = sa.select(t.c.place_id, t.c.parent_place_id,
+                        t.c.osm_type, t.c.osm_id, t.c.name,
+                        t.c.class_, t.c.type, t.c.admin_level,
+                        t.c.address, t.c.extratags,
+                        t.c.housenumber, t.c.postcode, t.c.country_code,
+                        t.c.importance, t.c.wikipedia, t.c.indexed_date,
+                        t.c.rank_address, t.c.rank_search,
+                        t.c.centroid)\
+                     .where(t.c.linked_place_id == None)\
+                     .where(t.c.rank_address.between(*output_range))
+
+            parent_place_id = await get_parent_id(conn, args.node, args.way, args.relation)
+            if parent_place_id:
+                taddr = conn.t.addressline
+
+                sql = sql.join(taddr, taddr.c.place_id == t.c.place_id)\
+                         .where(taddr.c.address_place_id == parent_place_id)\
+                         .where(taddr.c.isaddress)
+
+            if args.restrict_to_country:
+                sql = sql.where(t.c.country_code == args.restrict_to_country.lower())
+
+            results = []
+            for row in await conn.execute(sql):
+                result = create_from_placex_row(row, ReverseResult)
+                if result is not None:
+                    results.append(result)
+
+                if len(results) == 1000:
+                    await dump_results(detail_conn, results, writer, args.language)
+                    results = []
+
+            if results:
+                await dump_results(detail_conn, results, writer, args.language)
+    finally:
+        await api.close()
+
+    return 0
+
+
+def init_csv_writer(output_format: str) -> 'csv.DictWriter[str]':
+    fields = output_format.split(';')
+    writer = csv.DictWriter(sys.stdout, fieldnames=fields, extrasaction='ignore')
+    writer.writeheader()
+
+    return writer
+
+
+async def dump_results(conn: napi.SearchConnection,
+                       results: List[ReverseResult],
+                       writer: 'csv.DictWriter[str]',
+                       lang: Optional[str]) -> None:
+    locale = napi.Locales([lang] if lang else None)
+    await add_result_details(conn, results,
+                             LookupDetails(address_details=True, locales=locale))
+
+
+    for result in results:
+        data = {'placeid': result.place_id,
+                'postcode': result.postcode}
+
+        for line in (result.address_rows or []):
+            if line.isaddress and line.local_name:
+                if line.category[1] == 'postcode':
+                    data['postcode'] = line.local_name
+                elif line.rank_address in RANK_TO_OUTPUT_MAP:
+                    data[RANK_TO_OUTPUT_MAP[line.rank_address]] = line.local_name
+
+        writer.writerow(data)
+
+
+async def get_parent_id(conn: napi.SearchConnection, node_id: Optional[int],
+                        way_id: Optional[int],
+                        relation_id: Optional[int]) -> Optional[int]:
+    """ Get the place ID for the given OSM object.
+    """
+    if node_id is not None:
+        osm_type, osm_id = 'N', node_id
+    elif way_id is not None:
+        osm_type, osm_id = 'W', way_id
+    elif relation_id is not None:
+        osm_type, osm_id = 'R', relation_id
+    else:
+        return None
+
+    t = conn.t.placex
+    sql = sa.select(t.c.place_id).limit(1)\
+            .where(t.c.osm_type == osm_type)\
+            .where(t.c.osm_id == osm_id)\
+            .where(t.c.rank_address > 0)\
+            .order_by(t.c.rank_address)
+
+    for result in await conn.execute(sql):
+        return cast(int, result[0])
+
+    raise UsageError(f'Cannot find a place {osm_type}{osm_id}.')
index ea605ea09e5d970996f676a51a6ebd6f44cd989b..5e1b044e734336bf305f1c69c649bf18fda0fddc 100644 (file)
@@ -128,7 +128,7 @@ class UpdateRefresh:
             LOG.warning('Import secondary importance raster data from %s', args.project_dir)
             if refresh.import_secondary_importance(args.config.get_libpq_dsn(),
                                                 args.project_dir) > 0:
-                LOG.fatal('FATAL: Cannot update sendary importance raster data')
+                LOG.fatal('FATAL: Cannot update secondary importance raster data')
                 return 1
 
         if args.functions:
@@ -141,10 +141,10 @@ class UpdateRefresh:
         if args.wiki_data:
             data_path = Path(args.config.WIKIPEDIA_DATA_PATH
                              or args.project_dir)
-            LOG.warning('Import wikipdia article importance from %s', data_path)
+            LOG.warning('Import wikipedia article importance from %s', data_path)
             if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
                                                  data_path) > 0:
-                LOG.fatal('FATAL: Wikipedia importance dump file not found')
+                LOG.fatal('FATAL: Wikipedia importance file not found in %s', data_path)
                 return 1
 
         # Attention: importance MUST come after wiki data import.
index 8464e151f4f1534034c442446a44bc7c27bce22c..3d212ff980994d7b6511bc234215e1d3201cdd5e 100644 (file)
@@ -40,13 +40,15 @@ class SetupAll:
 
     def add_args(self, parser: argparse.ArgumentParser) -> None:
         group_name = parser.add_argument_group('Required arguments')
-        group1 = group_name.add_mutually_exclusive_group(required=True)
+        group1 = group_name.add_argument_group()
         group1.add_argument('--osm-file', metavar='FILE', action='append',
                            help='OSM file to be imported'
-                                ' (repeat for importing multiple files)')
+                                ' (repeat for importing multiple files)',
+                                default=None)
         group1.add_argument('--continue', dest='continue_at',
-                           choices=['load-data', 'indexing', 'db-postprocess'],
-                           help='Continue an import that was interrupted')
+                           choices=['import-from-file', 'load-data', 'indexing', 'db-postprocess'],
+                           help='Continue an import that was interrupted',
+                           default=None)
         group2 = parser.add_argument_group('Optional arguments')
         group2.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
                            help='Size of cache to be used by osm2pgsql (in MB)')
@@ -65,9 +67,11 @@ class SetupAll:
                            help='Continue import even when errors in SQL are present')
         group3.add_argument('--index-noanalyse', action='store_true',
                            help='Do not perform analyse operations during index (expert only)')
+        group3.add_argument('--prepare-database', action='store_true',
+                            help='Create the database but do not import any data')
 
 
-    def run(self, args: NominatimArgs) -> int: # pylint: disable=too-many-statements
+    def run(self, args: NominatimArgs) -> int: # pylint: disable=too-many-statements, too-many-branches
         from ..data import country_info
         from ..tools import database_import, refresh, postcodes, freeze
         from ..indexer.indexer import Indexer
@@ -76,43 +80,61 @@ class SetupAll:
 
         country_info.setup_country_config(args.config)
 
-        if args.continue_at is None:
+        if args.osm_file is None and args.continue_at is None and not args.prepare_database:
+            raise UsageError("No input files (use --osm-file).")
+
+        if args.osm_file is not None and args.continue_at not in ('import-from-file', None):
+            raise UsageError(f"Cannot use --continue {args.continue_at} and --osm-file together.")
+
+        if args.continue_at is not None and args.prepare_database:
+            raise UsageError(
+                "Cannot use --continue and --prepare-database together."
+            )
+
+
+        if args.prepare_database or args.continue_at is None:
+            LOG.warning('Creating database')
+            database_import.setup_database_skeleton(args.config.get_libpq_dsn(),
+                                                        rouser=args.config.DATABASE_WEBUSER)
+            if args.prepare_database:
+                return 0
+
+        if args.continue_at in (None, 'import-from-file'):
             files = args.get_osm_file_list()
             if not files:
                 raise UsageError("No input files (use --osm-file).")
 
-            LOG.warning('Creating database')
-            database_import.setup_database_skeleton(args.config.get_libpq_dsn(),
-                                                    rouser=args.config.DATABASE_WEBUSER)
-
-            LOG.warning('Setting up country tables')
-            country_info.setup_country_tables(args.config.get_libpq_dsn(),
-                                              args.config.lib_dir.data,
-                                              args.no_partitions)
-
-            LOG.warning('Importing OSM data file')
-            database_import.import_osm_data(files,
-                                            args.osm2pgsql_options(0, 1),
-                                            drop=args.no_updates,
-                                            ignore_errors=args.ignore_errors)
-
-            LOG.warning('Importing wikipedia importance data')
-            data_path = Path(args.config.WIKIPEDIA_DATA_PATH or args.project_dir)
-            if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
-                                                 data_path) > 0:
-                LOG.error('Wikipedia importance dump file not found. '
-                          'Calculating importance values of locations will not '
-                          'use Wikipedia importance data.')
-
-            LOG.warning('Importing secondary importance raster data')
-            if refresh.import_secondary_importance(args.config.get_libpq_dsn(),
-                                                   args.project_dir) != 0:
-                LOG.error('Secondary importance file not imported. '
-                          'Falling back to default ranking.')
-
-            self._setup_tables(args.config, args.reverse_only)
-
-        if args.continue_at is None or args.continue_at == 'load-data':
+            if args.continue_at in ('import-from-file', None):
+                # Check if the correct plugins are installed
+                database_import.check_existing_database_plugins(args.config.get_libpq_dsn())
+                LOG.warning('Setting up country tables')
+                country_info.setup_country_tables(args.config.get_libpq_dsn(),
+                                                args.config.lib_dir.data,
+                                                args.no_partitions)
+
+                LOG.warning('Importing OSM data file')
+                database_import.import_osm_data(files,
+                                                args.osm2pgsql_options(0, 1),
+                                                drop=args.no_updates,
+                                                ignore_errors=args.ignore_errors)
+
+                LOG.warning('Importing wikipedia importance data')
+                data_path = Path(args.config.WIKIPEDIA_DATA_PATH or args.project_dir)
+                if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
+                                                    data_path) > 0:
+                    LOG.error('Wikipedia importance dump file not found. '
+                            'Calculating importance values of locations will not '
+                            'use Wikipedia importance data.')
+
+                LOG.warning('Importing secondary importance raster data')
+                if refresh.import_secondary_importance(args.config.get_libpq_dsn(),
+                                                    args.project_dir) != 0:
+                    LOG.error('Secondary importance file not imported. '
+                            'Falling back to default ranking.')
+
+                self._setup_tables(args.config, args.reverse_only)
+
+        if args.continue_at in ('import-from-file', 'load-data', None):
             LOG.warning('Initialise tables')
             with connect(args.config.get_libpq_dsn()) as conn:
                 database_import.truncate_data_tables(conn)
@@ -123,12 +145,13 @@ class SetupAll:
         LOG.warning("Setting up tokenizer")
         tokenizer = self._get_tokenizer(args.continue_at, args.config)
 
-        if args.continue_at is None or args.continue_at == 'load-data':
+        if args.continue_at in ('import-from-file', 'load-data', None):
             LOG.warning('Calculate postcodes')
             postcodes.update_postcodes(args.config.get_libpq_dsn(),
                                        args.project_dir, tokenizer)
 
-        if args.continue_at is None or args.continue_at in ('load-data', 'indexing'):
+        if args.continue_at in \
+            ('import-from-file', 'load-data', 'indexing', None):
             LOG.warning('Indexing places')
             indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, num_threads)
             indexer.index_full(analyse=not args.index_noanalyse)
@@ -185,7 +208,7 @@ class SetupAll:
         """
         from ..tokenizer import factory as tokenizer_factory
 
-        if continue_at is None or continue_at == 'load-data':
+        if continue_at in ('import-from-file', 'load-data', None):
             # (re)initialise the tokenizer data
             return tokenizer_factory.create_tokenizer(config)
 
index 3a4c3a6bed038ec9bb03f27eeb90979fbf675ca9..3344a425a5667d58f6e2cd79e916f0b9bbb8c839 100644 (file)
@@ -47,17 +47,15 @@ def flatten_config_list(content: Any, section: str = '') -> List[Any]:
 
 
 class Configuration:
-    """ Load and manage the project configuration.
-
-        Nominatim uses dotenv to configure the software. Configuration options
-        are resolved in the following order:
-
-         * from the OS environment (or the dictionary given in `environ`)
-         * from the .env file in the project directory of the installation
-         * from the default installation in the configuration directory
+    """ This class wraps access to the configuration settings
+        for the Nominatim instance in use.
 
         All Nominatim configuration options are prefixed with 'NOMINATIM_' to
-        avoid conflicts with other environment variables.
+        avoid conflicts with other environment variables. All settings can
+        be accessed as properties of the class under the same name as the
+        setting but with the `NOMINATIM_` prefix removed. In addition, there
+        are accessor functions that convert the setting values to types
+        other than string.
     """
 
     def __init__(self, project_dir: Optional[Path],
@@ -99,14 +97,29 @@ class Configuration:
 
     def get_bool(self, name: str) -> bool:
         """ Return the given configuration parameter as a boolean.
-            Values of '1', 'yes' and 'true' are accepted as truthy values,
-            everything else is interpreted as false.
+
+            Parameters:
+              name: Name of the configuration parameter with the NOMINATIM_
+                prefix removed.
+
+            Returns:
+              `True` for values of '1', 'yes' and 'true', `False` otherwise.
         """
         return getattr(self, name).lower() in ('1', 'yes', 'true')
 
 
     def get_int(self, name: str) -> int:
         """ Return the given configuration parameter as an int.
+
+            Parameters:
+              name: Name of the configuration parameter with the NOMINATIM_
+                prefix removed.
+
+            Returns:
+              The configuration value converted to int.
+
+            Raises:
+              ValueError: when the value is not a number.
         """
         try:
             return int(getattr(self, name))
@@ -118,8 +131,17 @@ class Configuration:
     def get_str_list(self, name: str) -> Optional[List[str]]:
         """ Return the given configuration parameter as a list of strings.
             The values are assumed to be given as a comma-sparated list and
-            will be stripped before returning them. On empty values None
-            is returned.
+            will be stripped before returning them. 
+
+            Parameters:
+              name: Name of the configuration parameter with the NOMINATIM_
+                prefix removed.
+
+            Returns:
+              (List[str]): The comma-split parameter as a list. The
+                elements are stripped of leading and final spaces before
+                being returned.
+              (None): The configuration parameter was unset or empty.
         """
         raw = getattr(self, name)
 
@@ -128,9 +150,16 @@ class Configuration:
 
     def get_path(self, name: str) -> Optional[Path]:
         """ Return the given configuration parameter as a Path.
-            If a relative path is configured, then the function converts this
-            into an absolute path with the project directory as root path.
-            If the configuration is unset, None is returned.
+
+            Parameters:
+              name: Name of the configuration parameter with the NOMINATIM_
+                prefix removed.
+
+            Returns:
+              (Path): A Path object of the parameter value.
+                  If a relative path is configured, then the function converts this
+                  into an absolute path with the project directory as root path.
+              (None): The configuration parameter was unset or empty.
         """
         value = getattr(self, name)
         if not value:
index dad35b7a9965c6a6d4c90149c5879c764ff8e5cc..132dd41fe97df9ff9c86fa8ca37869e7d1aeb76b 100644 (file)
@@ -25,7 +25,7 @@ class CountryPostcodeMatcher:
 
         pc_pattern = config['pattern'].replace('d', '[0-9]').replace('l', '[A-Z]')
 
-        self.norm_pattern = re.compile(f'\\s*(?:{country_code.upper()}[ -]?)?(.*)\\s*')
+        self.norm_pattern = re.compile(f'\\s*(?:{country_code.upper()}[ -]?)?({pc_pattern})\\s*')
         self.pattern = re.compile(pc_pattern)
 
         self.output = config.get('output', r'\g<0>')
index d1e542f572472ec60a6abd3be276d2094ed1458f..07767e48ad8934c5dc831c16e9c0040e9a3a6d01 100644 (file)
@@ -69,8 +69,8 @@ class DBConnection:
         self.current_params: Optional[Sequence[Any]] = None
         self.ignore_sql_errors = ignore_sql_errors
 
-        self.conn: Optional['psycopg2.connection'] = None
-        self.cursor: Optional['psycopg2.cursor'] = None
+        self.conn: Optional['psycopg2._psycopg.connection'] = None
+        self.cursor: Optional['psycopg2._psycopg.cursor'] = None
         self.connect(cursor_factory=cursor_factory)
 
     def close(self) -> None:
@@ -78,7 +78,7 @@ class DBConnection:
         """
         if self.conn is not None:
             if self.cursor is not None:
-                self.cursor.close() # type: ignore[no-untyped-call]
+                self.cursor.close()
                 self.cursor = None
             self.conn.close()
 
index 77d463d81d8e4dc0522bf1b2702aff0db66f21df..82801ae7995c9d1e5527baec0d9dd89c85e70e4d 100644 (file)
@@ -31,7 +31,7 @@ class Cursor(psycopg2.extras.DictCursor):
         """ Query execution that logs the SQL query when debugging is enabled.
         """
         if LOG.isEnabledFor(logging.DEBUG):
-            LOG.debug(self.mogrify(query, args).decode('utf-8')) # type: ignore[no-untyped-call]
+            LOG.debug(self.mogrify(query, args).decode('utf-8'))
 
         super().execute(query, args)
 
@@ -174,6 +174,15 @@ class Connection(psycopg2.extensions.connection):
 
         return (int(version_parts[0]), int(version_parts[1]))
 
+
+    def extension_loaded(self, extension_name: str) -> bool:
+        """ Return True if the hstore extension is loaded in the database.
+        """
+        with self.cursor() as cur:
+            cur.execute('SELECT extname FROM pg_extension WHERE extname = %s', (extension_name, ))
+            return cur.rowcount > 0
+
+
 class ConnectionContext(ContextManager[Connection]):
     """ Context manager of the connection that also provides direct access
         to the underlying connection.
diff --git a/nominatim/db/sqlalchemy_functions.py b/nominatim/db/sqlalchemy_functions.py
new file mode 100644 (file)
index 0000000..cb04f76
--- /dev/null
@@ -0,0 +1,212 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Custom functions and expressions for SQLAlchemy.
+"""
+from __future__ import annotations
+from typing import Any
+
+import sqlalchemy as sa
+from sqlalchemy.ext.compiler import compiles
+
+from nominatim.typing import SaColumn
+
+# pylint: disable=all
+
+class PlacexGeometryReverseLookuppolygon(sa.sql.functions.GenericFunction[Any]):
+    """ Check for conditions that allow partial index use on
+        'idx_placex_geometry_reverse_lookupPolygon'.
+
+        Needs to be constant, so that the query planner picks them up correctly
+        in prepared statements.
+    """
+    name = 'PlacexGeometryReverseLookuppolygon'
+    inherit_cache = True
+
+
+@compiles(PlacexGeometryReverseLookuppolygon) # type: ignore[no-untyped-call, misc]
+def _default_intersects(element: SaColumn,
+                        compiler: 'sa.Compiled', **kw: Any) -> str:
+    return ("(ST_GeometryType(placex.geometry) in ('ST_Polygon', 'ST_MultiPolygon')"
+            " AND placex.rank_address between 4 and 25"
+            " AND placex.type != 'postcode'"
+            " AND placex.name is not null"
+            " AND placex.indexed_status = 0"
+            " AND placex.linked_place_id is null)")
+
+
+@compiles(PlacexGeometryReverseLookuppolygon, 'sqlite') # type: ignore[no-untyped-call, misc]
+def _sqlite_intersects(element: SaColumn,
+                       compiler: 'sa.Compiled', **kw: Any) -> str:
+    return ("(ST_GeometryType(placex.geometry) in ('POLYGON', 'MULTIPOLYGON')"
+            " AND placex.rank_address between 4 and 25"
+            " AND placex.type != 'postcode'"
+            " AND placex.name is not null"
+            " AND placex.indexed_status = 0"
+            " AND placex.linked_place_id is null)")
+
+
+class IntersectsReverseDistance(sa.sql.functions.GenericFunction[Any]):
+    name = 'IntersectsReverseDistance'
+    inherit_cache = True
+
+    def __init__(self, table: sa.Table, geom: SaColumn) -> None:
+        super().__init__(table.c.geometry, # type: ignore[no-untyped-call]
+                         table.c.rank_search, geom)
+        self.tablename = table.name
+
+
+@compiles(IntersectsReverseDistance) # type: ignore[no-untyped-call, misc]
+def default_reverse_place_diameter(element: SaColumn,
+                                   compiler: 'sa.Compiled', **kw: Any) -> str:
+    table = element.tablename
+    return f"({table}.rank_address between 4 and 25"\
+           f" AND {table}.type != 'postcode'"\
+           f" AND {table}.name is not null"\
+           f" AND {table}.linked_place_id is null"\
+           f" AND {table}.osm_type = 'N'" + \
+           " AND ST_Buffer(%s, reverse_place_diameter(%s)) && %s)" % \
+               tuple(map(lambda c: compiler.process(c, **kw), element.clauses))
+
+
+@compiles(IntersectsReverseDistance, 'sqlite') # type: ignore[no-untyped-call, misc]
+def sqlite_reverse_place_diameter(element: SaColumn,
+                                  compiler: 'sa.Compiled', **kw: Any) -> str:
+    geom1, rank, geom2 = list(element.clauses)
+    table = element.tablename
+
+    return (f"({table}.rank_address between 4 and 25"\
+            f" AND {table}.type != 'postcode'"\
+            f" AND {table}.name is not null"\
+            f" AND {table}.linked_place_id is null"\
+            f" AND {table}.osm_type = 'N'"\
+             " AND MbrIntersects(%s, ST_Expand(%s, 14.0 * exp(-0.2 * %s) - 0.03))"\
+            f" AND {table}.place_id IN"\
+             " (SELECT place_id FROM placex_place_node_areas"\
+             "  WHERE ROWID IN (SELECT ROWID FROM SpatialIndex"\
+             "  WHERE f_table_name = 'placex_place_node_areas'"\
+             "  AND search_frame = %s)))") % (
+                compiler.process(geom1, **kw),
+                compiler.process(geom2, **kw),
+                compiler.process(rank, **kw),
+                compiler.process(geom2, **kw))
+
+
+class IsBelowReverseDistance(sa.sql.functions.GenericFunction[Any]):
+    name = 'IsBelowReverseDistance'
+    inherit_cache = True
+
+
+@compiles(IsBelowReverseDistance) # type: ignore[no-untyped-call, misc]
+def default_is_below_reverse_distance(element: SaColumn,
+                                      compiler: 'sa.Compiled', **kw: Any) -> str:
+    dist, rank = list(element.clauses)
+    return "%s < reverse_place_diameter(%s)" % (compiler.process(dist, **kw),
+                                                compiler.process(rank, **kw))
+
+
+@compiles(IsBelowReverseDistance, 'sqlite') # type: ignore[no-untyped-call, misc]
+def sqlite_is_below_reverse_distance(element: SaColumn,
+                                     compiler: 'sa.Compiled', **kw: Any) -> str:
+    dist, rank = list(element.clauses)
+    return "%s < 14.0 * exp(-0.2 * %s) - 0.03" % (compiler.process(dist, **kw),
+                                                  compiler.process(rank, **kw))
+
+
+def select_index_placex_geometry_reverse_lookupplacenode(table: str) -> 'sa.TextClause':
+    """ Create an expression with the necessary conditions over a placex
+        table that the index 'idx_placex_geometry_reverse_lookupPlaceNode'
+        can be used.
+    """
+    return sa.text(f"{table}.rank_address between 4 and 25"
+                   f" AND {table}.type != 'postcode'"
+                   f" AND {table}.name is not null"
+                   f" AND {table}.linked_place_id is null"
+                   f" AND {table}.osm_type = 'N'")
+
+
+class IsAddressPoint(sa.sql.functions.GenericFunction[Any]):
+    name = 'IsAddressPoint'
+    inherit_cache = True
+
+    def __init__(self, table: sa.Table) -> None:
+        super().__init__(table.c.rank_address, # type: ignore[no-untyped-call]
+                         table.c.housenumber, table.c.name)
+
+
+@compiles(IsAddressPoint) # type: ignore[no-untyped-call, misc]
+def default_is_address_point(element: SaColumn,
+                             compiler: 'sa.Compiled', **kw: Any) -> str:
+    rank, hnr, name = list(element.clauses)
+    return "(%s = 30 AND (%s IS NOT NULL OR %s ? 'addr:housename'))" % (
+                compiler.process(rank, **kw),
+                compiler.process(hnr, **kw),
+                compiler.process(name, **kw))
+
+
+@compiles(IsAddressPoint, 'sqlite') # type: ignore[no-untyped-call, misc]
+def sqlite_is_address_point(element: SaColumn,
+                            compiler: 'sa.Compiled', **kw: Any) -> str:
+    rank, hnr, name = list(element.clauses)
+    return "(%s = 30 AND coalesce(%s, json_extract(%s, '$.addr:housename')) IS NOT NULL)" % (
+                compiler.process(rank, **kw),
+                compiler.process(hnr, **kw),
+                compiler.process(name, **kw))
+
+
+class CrosscheckNames(sa.sql.functions.GenericFunction[Any]):
+    """ Check if in the given list of names in parameters 1 any of the names
+        from the JSON array in parameter 2 are contained.
+    """
+    name = 'CrosscheckNames'
+    inherit_cache = True
+
+@compiles(CrosscheckNames) # type: ignore[no-untyped-call, misc]
+def compile_crosscheck_names(element: SaColumn,
+                             compiler: 'sa.Compiled', **kw: Any) -> str:
+    arg1, arg2 = list(element.clauses)
+    return "coalesce(avals(%s) && ARRAY(SELECT * FROM json_array_elements_text(%s)), false)" % (
+            compiler.process(arg1, **kw), compiler.process(arg2, **kw))
+
+
+@compiles(CrosscheckNames, 'sqlite') # type: ignore[no-untyped-call, misc]
+def compile_sqlite_crosscheck_names(element: SaColumn,
+                                    compiler: 'sa.Compiled', **kw: Any) -> str:
+    arg1, arg2 = list(element.clauses)
+    return "EXISTS(SELECT *"\
+           " FROM json_each(%s) as name, json_each(%s) as match_name"\
+           " WHERE name.value = match_name.value)"\
+           % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))
+
+
+class JsonArrayEach(sa.sql.functions.GenericFunction[Any]):
+    """ Return elements of a json array as a set.
+    """
+    name = 'JsonArrayEach'
+    inherit_cache = True
+
+
+@compiles(JsonArrayEach) # type: ignore[no-untyped-call, misc]
+def default_json_array_each(element: SaColumn, compiler: 'sa.Compiled', **kw: Any) -> str:
+    return "json_array_elements(%s)" % compiler.process(element.clauses, **kw)
+
+
+@compiles(JsonArrayEach, 'sqlite') # type: ignore[no-untyped-call, misc]
+def sqlite_json_array_each(element: SaColumn, compiler: 'sa.Compiled', **kw: Any) -> str:
+    return "json_each(%s)" % compiler.process(element.clauses, **kw)
+
+
+class Greatest(sa.sql.functions.GenericFunction[Any]):
+    """ Function to compute maximum of all its input parameters.
+    """
+    name = 'greatest'
+    inherit_cache = True
+
+
+@compiles(Greatest, 'sqlite') # type: ignore[no-untyped-call, misc]
+def sqlite_greatest(element: SaColumn, compiler: 'sa.Compiled', **kw: Any) -> str:
+    return "max(%s)" % compiler.process(element.clauses, **kw)
index 7af3d44cd65dbe62ea8bb6b6af0ac0f1690d9773..7dd1e0ce0b046182b6224eab7b5ec16769719b96 100644 (file)
@@ -10,9 +10,10 @@ SQLAlchemy definitions for all tables used by the frontend.
 from typing import Any
 
 import sqlalchemy as sa
-from sqlalchemy.dialects.postgresql import HSTORE, ARRAY, JSONB
+from sqlalchemy.dialects.postgresql import HSTORE, ARRAY, JSONB, array
 from sqlalchemy.dialects.sqlite import JSON as sqlite_json
 
+import nominatim.db.sqlalchemy_functions #pylint: disable=unused-import
 from nominatim.db.sqlalchemy_types import Geometry
 
 class PostgresTypes:
@@ -21,6 +22,7 @@ class PostgresTypes:
     Composite = HSTORE
     Json = JSONB
     IntArray = ARRAY(sa.Integer()) #pylint: disable=invalid-name
+    to_array = array
 
 
 class SqliteTypes:
@@ -30,10 +32,19 @@ class SqliteTypes:
     Json = sqlite_json
     IntArray = sqlite_json
 
+    @staticmethod
+    def to_array(arr: Any) -> Any:
+        """ Sqlite has no special conversion for arrays.
+        """
+        return arr
+
 
 #pylint: disable=too-many-instance-attributes
 class SearchTables:
     """ Data class that holds the tables of the Nominatim database.
+
+        This schema strictly reflects the read-access view of the database.
+        Any data used for updates only will not be visible.
     """
 
     def __init__(self, meta: sa.MetaData, engine_name: str) -> None:
@@ -56,14 +67,13 @@ class SearchTables:
             sa.Column('value', sa.Text))
 
         self.placex = sa.Table('placex', meta,
-            sa.Column('place_id', sa.BigInteger, nullable=False, unique=True),
+            sa.Column('place_id', sa.BigInteger, nullable=False),
             sa.Column('parent_place_id', sa.BigInteger),
             sa.Column('linked_place_id', sa.BigInteger),
             sa.Column('importance', sa.Float),
             sa.Column('indexed_date', sa.DateTime),
             sa.Column('rank_address', sa.SmallInteger),
             sa.Column('rank_search', sa.SmallInteger),
-            sa.Column('partition', sa.SmallInteger),
             sa.Column('indexed_status', sa.SmallInteger),
             sa.Column('osm_type', sa.String(1), nullable=False),
             sa.Column('osm_id', sa.BigInteger, nullable=False),
@@ -81,33 +91,31 @@ class SearchTables:
             sa.Column('centroid', Geometry))
 
         self.addressline = sa.Table('place_addressline', meta,
-            sa.Column('place_id', sa.BigInteger, index=True),
-            sa.Column('address_place_id', sa.BigInteger, index=True),
+            sa.Column('place_id', sa.BigInteger),
+            sa.Column('address_place_id', sa.BigInteger),
             sa.Column('distance', sa.Float),
-            sa.Column('cached_rank_address', sa.SmallInteger),
             sa.Column('fromarea', sa.Boolean),
             sa.Column('isaddress', sa.Boolean))
 
         self.postcode = sa.Table('location_postcode', meta,
-            sa.Column('place_id', sa.BigInteger, unique=True),
+            sa.Column('place_id', sa.BigInteger),
             sa.Column('parent_place_id', sa.BigInteger),
             sa.Column('rank_search', sa.SmallInteger),
             sa.Column('rank_address', sa.SmallInteger),
             sa.Column('indexed_status', sa.SmallInteger),
             sa.Column('indexed_date', sa.DateTime),
             sa.Column('country_code', sa.String(2)),
-            sa.Column('postcode', sa.Text, index=True),
+            sa.Column('postcode', sa.Text),
             sa.Column('geometry', Geometry))
 
         self.osmline = sa.Table('location_property_osmline', meta,
-            sa.Column('place_id', sa.BigInteger, nullable=False, unique=True),
+            sa.Column('place_id', sa.BigInteger, nullable=False),
             sa.Column('osm_id', sa.BigInteger),
             sa.Column('parent_place_id', sa.BigInteger),
             sa.Column('indexed_date', sa.DateTime),
             sa.Column('startnumber', sa.Integer),
             sa.Column('endnumber', sa.Integer),
             sa.Column('step', sa.SmallInteger),
-            sa.Column('partition', sa.SmallInteger),
             sa.Column('indexed_status', sa.SmallInteger),
             sa.Column('linegeo', Geometry),
             sa.Column('address', self.types.Composite),
@@ -118,7 +126,6 @@ class SearchTables:
             sa.Column('country_code', sa.String(2)),
             sa.Column('name', self.types.Composite),
             sa.Column('derived_name', self.types.Composite),
-            sa.Column('country_default_language_code', sa.Text),
             sa.Column('partition', sa.Integer))
 
         self.country_grid = sa.Table('country_osm_grid', meta,
@@ -128,12 +135,12 @@ class SearchTables:
 
         # The following tables are not necessarily present.
         self.search_name = sa.Table('search_name', meta,
-            sa.Column('place_id', sa.BigInteger, index=True),
+            sa.Column('place_id', sa.BigInteger),
             sa.Column('importance', sa.Float),
             sa.Column('search_rank', sa.SmallInteger),
             sa.Column('address_rank', sa.SmallInteger),
-            sa.Column('name_vector', self.types.IntArray, index=True),
-            sa.Column('nameaddress_vector', self.types.IntArray, index=True),
+            sa.Column('name_vector', self.types.IntArray),
+            sa.Column('nameaddress_vector', self.types.IntArray),
             sa.Column('country_code', sa.String(2)),
             sa.Column('centroid', Geometry))
 
@@ -143,6 +150,5 @@ class SearchTables:
             sa.Column('startnumber', sa.Integer),
             sa.Column('endnumber', sa.Integer),
             sa.Column('step', sa.SmallInteger),
-            sa.Column('partition', sa.SmallInteger),
             sa.Column('linegeo', Geometry),
             sa.Column('postcode', sa.Text))
index ed4aef1f15ec088d9336efc6072e24837d96ab3a..a36e8c462acfce3b4cc5e730b2eb5c008f1dfa14 100644 (file)
 """
 Custom types for SQLAlchemy.
 """
+from __future__ import annotations
 from typing import Callable, Any, cast
 import sys
 
 import sqlalchemy as sa
+from sqlalchemy.ext.compiler import compiles
 from sqlalchemy import types
 
 from nominatim.typing import SaColumn, SaBind
 
 #pylint: disable=all
 
+class Geometry_DistanceSpheroid(sa.sql.expression.FunctionElement[float]):
+    """ Function to compute the spherical distance in meters.
+    """
+    type = sa.Float()
+    name = 'Geometry_DistanceSpheroid'
+    inherit_cache = True
+
+
+@compiles(Geometry_DistanceSpheroid) # type: ignore[no-untyped-call, misc]
+def _default_distance_spheroid(element: SaColumn,
+                               compiler: 'sa.Compiled', **kw: Any) -> str:
+    return "ST_DistanceSpheroid(%s,"\
+           " 'SPHEROID[\"WGS 84\",6378137,298.257223563, AUTHORITY[\"EPSG\",\"7030\"]]')"\
+             % compiler.process(element.clauses, **kw)
+
+
+@compiles(Geometry_DistanceSpheroid, 'sqlite') # type: ignore[no-untyped-call, misc]
+def _spatialite_distance_spheroid(element: SaColumn,
+                                  compiler: 'sa.Compiled', **kw: Any) -> str:
+    return "COALESCE(Distance(%s, true), 0.0)" % compiler.process(element.clauses, **kw)
+
+
+class Geometry_IsLineLike(sa.sql.expression.FunctionElement[Any]):
+    """ Check if the geometry is a line or multiline.
+    """
+    name = 'Geometry_IsLineLike'
+    inherit_cache = True
+
+
+@compiles(Geometry_IsLineLike) # type: ignore[no-untyped-call, misc]
+def _default_is_line_like(element: SaColumn,
+                          compiler: 'sa.Compiled', **kw: Any) -> str:
+    return "ST_GeometryType(%s) IN ('ST_LineString', 'ST_MultiLineString')" % \
+               compiler.process(element.clauses, **kw)
+
+
+@compiles(Geometry_IsLineLike, 'sqlite') # type: ignore[no-untyped-call, misc]
+def _sqlite_is_line_like(element: SaColumn,
+                         compiler: 'sa.Compiled', **kw: Any) -> str:
+    return "ST_GeometryType(%s) IN ('LINESTRING', 'MULTILINESTRING')" % \
+               compiler.process(element.clauses, **kw)
+
+
+class Geometry_IsAreaLike(sa.sql.expression.FunctionElement[Any]):
+    """ Check if the geometry is a polygon or multipolygon.
+    """
+    name = 'Geometry_IsLineLike'
+    inherit_cache = True
+
+
+@compiles(Geometry_IsAreaLike) # type: ignore[no-untyped-call, misc]
+def _default_is_area_like(element: SaColumn,
+                          compiler: 'sa.Compiled', **kw: Any) -> str:
+    return "ST_GeometryType(%s) IN ('ST_Polygon', 'ST_MultiPolygon')" % \
+               compiler.process(element.clauses, **kw)
+
+
+@compiles(Geometry_IsAreaLike, 'sqlite') # type: ignore[no-untyped-call, misc]
+def _sqlite_is_area_like(element: SaColumn,
+                         compiler: 'sa.Compiled', **kw: Any) -> str:
+    return "ST_GeometryType(%s) IN ('POLYGON', 'MULTIPOLYGON')" % \
+               compiler.process(element.clauses, **kw)
+
+
+class Geometry_IntersectsBbox(sa.sql.expression.FunctionElement[Any]):
+    """ Check if the bounding boxes of the given geometries intersect.
+    """
+    name = 'Geometry_IntersectsBbox'
+    inherit_cache = True
+
+
+@compiles(Geometry_IntersectsBbox) # type: ignore[no-untyped-call, misc]
+def _default_intersects(element: SaColumn,
+                        compiler: 'sa.Compiled', **kw: Any) -> str:
+    arg1, arg2 = list(element.clauses)
+    return "%s && %s" % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))
+
+
+@compiles(Geometry_IntersectsBbox, 'sqlite') # type: ignore[no-untyped-call, misc]
+def _sqlite_intersects(element: SaColumn,
+                       compiler: 'sa.Compiled', **kw: Any) -> str:
+    return "MbrIntersects(%s) = 1" % compiler.process(element.clauses, **kw)
+
+
+class Geometry_ColumnIntersectsBbox(sa.sql.expression.FunctionElement[Any]):
+    """ Check if the bounding box of the geometry intersects with the
+        given table column, using the spatial index for the column.
+
+        The index must exist or the query may return nothing.
+    """
+    name = 'Geometry_ColumnIntersectsBbox'
+    inherit_cache = True
+
+
+@compiles(Geometry_ColumnIntersectsBbox) # type: ignore[no-untyped-call, misc]
+def default_intersects_column(element: SaColumn,
+                              compiler: 'sa.Compiled', **kw: Any) -> str:
+    arg1, arg2 = list(element.clauses)
+    return "%s && %s" % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))
+
+
+@compiles(Geometry_ColumnIntersectsBbox, 'sqlite') # type: ignore[no-untyped-call, misc]
+def spatialite_intersects_column(element: SaColumn,
+                                 compiler: 'sa.Compiled', **kw: Any) -> str:
+    arg1, arg2 = list(element.clauses)
+    return "MbrIntersects(%s, %s) = 1 and "\
+           "%s.ROWID IN (SELECT ROWID FROM SpatialIndex "\
+                        "WHERE f_table_name = '%s' AND f_geometry_column = '%s' "\
+                        "AND search_frame = %s)" %(
+              compiler.process(arg1, **kw),
+              compiler.process(arg2, **kw),
+              arg1.table.name, arg1.table.name, arg1.name,
+              compiler.process(arg2, **kw))
+
+
+class Geometry_ColumnDWithin(sa.sql.expression.FunctionElement[Any]):
+    """ Check if the geometry is within the distance of the
+        given table column, using the spatial index for the column.
+
+        The index must exist or the query may return nothing.
+    """
+    name = 'Geometry_ColumnDWithin'
+    inherit_cache = True
+
+
+@compiles(Geometry_ColumnDWithin) # type: ignore[no-untyped-call, misc]
+def default_dwithin_column(element: SaColumn,
+                           compiler: 'sa.Compiled', **kw: Any) -> str:
+    return "ST_DWithin(%s)" % compiler.process(element.clauses, **kw)
+
+@compiles(Geometry_ColumnDWithin, 'sqlite') # type: ignore[no-untyped-call, misc]
+def spatialite_dwithin_column(element: SaColumn,
+                              compiler: 'sa.Compiled', **kw: Any) -> str:
+    geom1, geom2, dist = list(element.clauses)
+    return "ST_Distance(%s, %s) < %s and "\
+           "%s.ROWID IN (SELECT ROWID FROM SpatialIndex "\
+                        "WHERE f_table_name = '%s' AND f_geometry_column = '%s' "\
+                        "AND search_frame = ST_Expand(%s, %s))" %(
+              compiler.process(geom1, **kw),
+              compiler.process(geom2, **kw),
+              compiler.process(dist, **kw),
+              geom1.table.name, geom1.table.name, geom1.name,
+              compiler.process(geom2, **kw),
+              compiler.process(dist, **kw))
+
+
+
 class Geometry(types.UserDefinedType): # type: ignore[type-arg]
     """ Simplified type decorator for PostGIS geometry. This type
         only supports geometries in 4326 projection.
@@ -34,9 +183,9 @@ class Geometry(types.UserDefinedType): # type: ignore[type-arg]
     def bind_processor(self, dialect: 'sa.Dialect') -> Callable[[Any], str]:
         def process(value: Any) -> str:
             if isinstance(value, str):
-                return 'SRID=4326;' + value
+                return value
 
-            return 'SRID=4326;' + cast(str, value.to_wkt())
+            return cast(str, value.to_wkt())
         return process
 
 
@@ -47,26 +196,45 @@ class Geometry(types.UserDefinedType): # type: ignore[type-arg]
         return process
 
 
+    def column_expression(self, col: SaColumn) -> SaColumn:
+        return sa.func.ST_AsEWKB(col)
+
+
     def bind_expression(self, bindvalue: SaBind) -> SaColumn:
-        return sa.func.ST_GeomFromText(bindvalue, type_=self)
+        return sa.func.ST_GeomFromText(bindvalue, sa.text('4326'), type_=self)
 
 
     class comparator_factory(types.UserDefinedType.Comparator): # type: ignore[type-arg]
 
         def intersects(self, other: SaColumn) -> 'sa.Operators':
-            return self.op('&&')(other)
+            if isinstance(self.expr, sa.Column):
+                return Geometry_ColumnIntersectsBbox(self.expr, other)
+
+            return Geometry_IntersectsBbox(self.expr, other)
+
 
         def is_line_like(self) -> SaColumn:
-            return sa.func.ST_GeometryType(self, type_=sa.String).in_(('ST_LineString',
-                                                                       'ST_MultiLineString'))
+            return Geometry_IsLineLike(self)
+
 
         def is_area(self) -> SaColumn:
-            return sa.func.ST_GeometryType(self, type_=sa.String).in_(('ST_Polygon',
-                                                                       'ST_MultiPolygon'))
+            return Geometry_IsAreaLike(self)
 
 
         def ST_DWithin(self, other: SaColumn, distance: SaColumn) -> SaColumn:
-            return sa.func.ST_DWithin(self, other, distance, type_=sa.Float)
+            if isinstance(self.expr, sa.Column):
+                return Geometry_ColumnDWithin(self.expr, other, distance)
+
+            return sa.func.ST_DWithin(self.expr, other, distance)
+
+
+        def ST_DWithin_no_index(self, other: SaColumn, distance: SaColumn) -> SaColumn:
+            return sa.func.ST_DWithin(sa.func.coalesce(sa.null(), self),
+                                      other, distance)
+
+
+        def ST_Intersects_no_index(self, other: SaColumn) -> 'sa.Operators':
+            return Geometry_IntersectsBbox(sa.func.coalesce(sa.null(), self), other)
 
 
         def ST_Distance(self, other: SaColumn) -> SaColumn:
@@ -74,11 +242,16 @@ class Geometry(types.UserDefinedType): # type: ignore[type-arg]
 
 
         def ST_Contains(self, other: SaColumn) -> SaColumn:
-            return sa.func.ST_Contains(self, other, type_=sa.Float)
+            return sa.func.ST_Contains(self, other, type_=sa.Boolean)
+
+
+        def ST_CoveredBy(self, other: SaColumn) -> SaColumn:
+            return sa.func.ST_CoveredBy(self, other, type_=sa.Boolean)
 
 
         def ST_ClosestPoint(self, other: SaColumn) -> SaColumn:
-            return sa.func.ST_ClosestPoint(self, other, type_=Geometry)
+            return sa.func.coalesce(sa.func.ST_ClosestPoint(self, other, type_=Geometry),
+                                    other)
 
 
         def ST_Buffer(self, other: SaColumn) -> SaColumn:
@@ -103,3 +276,55 @@ class Geometry(types.UserDefinedType): # type: ignore[type-arg]
 
         def ST_LineLocatePoint(self, other: SaColumn) -> SaColumn:
             return sa.func.ST_LineLocatePoint(self, other, type_=sa.Float)
+
+
+        def distance_spheroid(self, other: SaColumn) -> SaColumn:
+            return Geometry_DistanceSpheroid(self, other)
+
+
+@compiles(Geometry, 'sqlite') # type: ignore[no-untyped-call]
+def get_col_spec(self, *args, **kwargs): # type: ignore[no-untyped-def]
+    return 'GEOMETRY'
+
+
+SQLITE_FUNCTION_ALIAS = (
+    ('ST_AsEWKB', sa.Text, 'AsEWKB'),
+    ('ST_GeomFromEWKT', Geometry, 'GeomFromEWKT'),
+    ('ST_AsGeoJSON', sa.Text, 'AsGeoJSON'),
+    ('ST_AsKML', sa.Text, 'AsKML'),
+    ('ST_AsSVG', sa.Text, 'AsSVG'),
+    ('ST_LineLocatePoint', sa.Float, 'ST_Line_Locate_Point'),
+    ('ST_LineInterpolatePoint', sa.Float, 'ST_Line_Interpolate_Point'),
+)
+
+def _add_function_alias(func: str, ftype: type, alias: str) -> None:
+    _FuncDef = type(func, (sa.sql.functions.GenericFunction, ), {
+        "type": ftype(),
+        "name": func,
+        "identifier": func,
+        "inherit_cache": True})
+
+    func_templ = f"{alias}(%s)"
+
+    def _sqlite_impl(element: Any, compiler: Any, **kw: Any) -> Any:
+        return func_templ % compiler.process(element.clauses, **kw)
+
+    compiles(_FuncDef, 'sqlite')(_sqlite_impl) # type: ignore[no-untyped-call]
+
+for alias in SQLITE_FUNCTION_ALIAS:
+    _add_function_alias(*alias)
+
+
+class ST_DWithin(sa.sql.functions.GenericFunction[Any]):
+    name = 'ST_DWithin'
+    inherit_cache = True
+
+
+@compiles(ST_DWithin, 'sqlite') # type: ignore[no-untyped-call, misc]
+def default_json_array_each(element: SaColumn, compiler: 'sa.Compiled', **kw: Any) -> str:
+    geom1, geom2, dist = list(element.clauses)
+    return "(MbrIntersects(%s, ST_Expand(%s, %s)) = 1 AND ST_Distance(%s, %s) <= %s)" % (
+        compiler.process(geom1, **kw), compiler.process(geom2, **kw),
+        compiler.process(dist, **kw),
+        compiler.process(geom1, **kw), compiler.process(geom2, **kw),
+        compiler.process(dist, **kw))
index 9a7b4f164787b8abb03831477fe3b876357e9b25..e3f0712a11b6f53551bc90e4734798e441f33142 100644 (file)
@@ -118,4 +118,4 @@ class CopyBuffer:
         """
         if self.buffer.tell() > 0:
             self.buffer.seek(0)
-            cur.copy_from(self.buffer, table, columns=columns) # type: ignore[no-untyped-call]
+            cur.copy_from(self.buffer, table, columns=columns)
index c11cf4a845de734f1f68319990ebb6b179ca40d1..5ec418a6f3b4aa5ad4c1f287506cb843b3b03504 100644 (file)
@@ -7,8 +7,9 @@
 """
 Server implementation using the falcon webserver framework.
 """
-from typing import Optional, Mapping, cast, Any
+from typing import Optional, Mapping, cast, Any, List
 from pathlib import Path
+import datetime as dt
 
 from falcon.asgi import App, Request, Response
 
@@ -36,6 +37,17 @@ async def nominatim_error_handler(req: Request, resp: Response, #pylint: disable
     resp.content_type = exception.content_type
 
 
+async def timeout_error_handler(req: Request, resp: Response, #pylint: disable=unused-argument
+                                exception: TimeoutError, #pylint: disable=unused-argument
+                                _: Any) -> None:
+    """ Special error handler that passes message and content type as
+        per exception info.
+    """
+    resp.status = 503
+    resp.text = "Query took too long to process."
+    resp.content_type = 'text/plain; charset=utf-8'
+
+
 class ParamWrapper(api_impl.ASGIAdaptor):
     """ Adaptor class for server glue to Falcon framework.
     """
@@ -59,12 +71,16 @@ class ParamWrapper(api_impl.ASGIAdaptor):
         return HTTPNominatimError(msg, status, self.content_type)
 
 
-    def create_response(self, status: int, output: str) -> None:
+    def create_response(self, status: int, output: str, num_results: int) -> None:
+        self.response.context.num_results = num_results
         self.response.status = status
         self.response.text = output
         self.response.content_type = self.content_type
 
 
+    def base_uri(self) -> str:
+        return cast (str, self.request.forwarded_prefix)
+
     def config(self) -> Configuration:
         return self._config
 
@@ -73,7 +89,8 @@ class EndpointWrapper:
     """ Converter for server glue endpoint functions to Falcon request handlers.
     """
 
-    def __init__(self, func: api_impl.EndpointFunc, api: NominatimAPIAsync) -> None:
+    def __init__(self, name: str, func: api_impl.EndpointFunc, api: NominatimAPIAsync) -> None:
+        self.name = name
         self.func = func
         self.api = api
 
@@ -84,18 +101,73 @@ class EndpointWrapper:
         await self.func(self.api, ParamWrapper(req, resp, self.api.config))
 
 
+class FileLoggingMiddleware:
+    """ Middleware to log selected requests into a file.
+    """
+
+    def __init__(self, file_name: str):
+        self.fd = open(file_name, 'a', buffering=1, encoding='utf8') # pylint: disable=R1732
+
+
+    async def process_request(self, req: Request, _: Response) -> None:
+        """ Callback before the request starts timing.
+        """
+        req.context.start = dt.datetime.now(tz=dt.timezone.utc)
+
+
+    async def process_response(self, req: Request, resp: Response,
+                               resource: Optional[EndpointWrapper],
+                               req_succeeded: bool) -> None:
+        """ Callback after requests writes to the logfile. It only
+            writes logs for sucessful requests for search, reverse and lookup.
+        """
+        if not req_succeeded or resource is None or resp.status != 200\
+            or resource.name not in ('reverse', 'search', 'lookup', 'details'):
+            return
+
+        finish = dt.datetime.now(tz=dt.timezone.utc)
+        duration = (finish - req.context.start).total_seconds()
+        params = req.scope['query_string'].decode('utf8')
+        start = req.context.start.replace(tzinfo=None)\
+                                 .isoformat(sep=' ', timespec='milliseconds')
+
+        self.fd.write(f"[{start}] "
+                      f"{duration:.4f} {getattr(resp.context, 'num_results', 0)} "
+                      f'{resource.name} "{params}"\n')
+
+
+class APIShutdown:
+    """ Middleware that closes any open database connections.
+    """
+
+    def __init__(self, api: NominatimAPIAsync) -> None:
+        self.api = api
+
+    async def process_shutdown(self, *_: Any) -> None:
+        """Process the ASGI lifespan shutdown event.
+        """
+        await self.api.close()
+
+
 def get_application(project_dir: Path,
                     environ: Optional[Mapping[str, str]] = None) -> App:
     """ Create a Nominatim Falcon ASGI application.
     """
     api = NominatimAPIAsync(project_dir, environ)
 
-    app = App(cors_enable=api.config.get_bool('CORS_NOACCESSCONTROL'))
+    middleware: List[object] = [APIShutdown(api)]
+    log_file = api.config.LOG_FILE
+    if log_file:
+        middleware.append(FileLoggingMiddleware(log_file))
+
+    app = App(cors_enable=api.config.get_bool('CORS_NOACCESSCONTROL'),
+              middleware=middleware)
     app.add_error_handler(HTTPNominatimError, nominatim_error_handler)
+    app.add_error_handler(TimeoutError, timeout_error_handler)
 
     legacy_urls = api.config.get_bool('SERVE_LEGACY_URLS')
     for name, func in api_impl.ROUTES:
-        endpoint = EndpointWrapper(func, api)
+        endpoint = EndpointWrapper(name, func, api)
         app.add_route(f"/{name}", endpoint)
         if legacy_urls:
             app.add_route(f"/{name}.php", endpoint)
index f81b122f274e17ddf0e0565139b09e003663a05e..33ab22c7bcee7c2b1994ab56496acec886f38d30 100644 (file)
@@ -7,15 +7,17 @@
 """
 Server implementation using the starlette webserver framework.
 """
-from typing import Any, Optional, Mapping, Callable, cast, Coroutine
+from typing import Any, Optional, Mapping, Callable, cast, Coroutine, Dict, Awaitable
 from pathlib import Path
+import datetime as dt
 
 from starlette.applications import Starlette
 from starlette.routing import Route
 from starlette.exceptions import HTTPException
-from starlette.responses import Response
+from starlette.responses import Response, PlainTextResponse
 from starlette.requests import Request
 from starlette.middleware import Middleware
+from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
 from starlette.middleware.cors import CORSMiddleware
 
 from nominatim.api import NominatimAPIAsync
@@ -43,10 +45,24 @@ class ParamWrapper(api_impl.ASGIAdaptor):
                              headers={'content-type': self.content_type})
 
 
-    def create_response(self, status: int, output: str) -> Response:
+    def create_response(self, status: int, output: str, num_results: int) -> Response:
+        self.request.state.num_results = num_results
         return Response(output, status_code=status, media_type=self.content_type)
 
 
+    def base_uri(self) -> str:
+        scheme = self.request.url.scheme
+        host = self.request.url.hostname
+        port = self.request.url.port
+        root = self.request.scope['root_path']
+        if (scheme == 'http' and port == 80) or (scheme == 'https' and port == 443):
+            port = None
+        if port is not None:
+            return f"{scheme}://{host}:{port}{root}"
+
+        return f"{scheme}://{host}{root}"
+
+
     def config(self) -> Configuration:
         return cast(Configuration, self.request.app.state.API.config)
 
@@ -59,6 +75,48 @@ def _wrap_endpoint(func: api_impl.EndpointFunc)\
     return _callback
 
 
+class FileLoggingMiddleware(BaseHTTPMiddleware):
+    """ Middleware to log selected requests into a file.
+    """
+
+    def __init__(self, app: Starlette, file_name: str = ''):
+        super().__init__(app)
+        self.fd = open(file_name, 'a', buffering=1, encoding='utf8') # pylint: disable=R1732
+
+    async def dispatch(self, request: Request,
+                       call_next: RequestResponseEndpoint) -> Response:
+        start = dt.datetime.now(tz=dt.timezone.utc)
+        response = await call_next(request)
+
+        if response.status_code != 200:
+            return response
+
+        finish = dt.datetime.now(tz=dt.timezone.utc)
+
+        for endpoint in ('reverse', 'search', 'lookup', 'details'):
+            if request.url.path.startswith('/' + endpoint):
+                qtype = endpoint
+                break
+        else:
+            return response
+
+        duration = (finish - start).total_seconds()
+        params = request.scope['query_string'].decode('utf8')
+
+        self.fd.write(f"[{start.replace(tzinfo=None).isoformat(sep=' ', timespec='milliseconds')}] "
+                      f"{duration:.4f} {getattr(request.state, 'num_results', 0)} "
+                      f'{qtype} "{params}"\n')
+
+        return response
+
+
+async def timeout_error(request: Request, #pylint: disable=unused-argument
+                        _: Exception) -> Response:
+    """ Error handler for query timeouts.
+    """
+    return PlainTextResponse("Query took too long to process.", status_code=503)
+
+
 def get_application(project_dir: Path,
                     environ: Optional[Mapping[str, str]] = None,
                     debug: bool = True) -> Starlette:
@@ -76,12 +134,24 @@ def get_application(project_dir: Path,
 
     middleware = []
     if config.get_bool('CORS_NOACCESSCONTROL'):
-        middleware.append(Middleware(CORSMiddleware, allow_origins=['*']))
+        middleware.append(Middleware(CORSMiddleware,
+                                     allow_origins=['*'],
+                                     allow_methods=['GET', 'OPTIONS'],
+                                     max_age=86400))
+
+    log_file = config.LOG_FILE
+    if log_file:
+        middleware.append(Middleware(FileLoggingMiddleware, file_name=log_file))
+
+    exceptions: Dict[Any, Callable[[Request, Exception], Awaitable[Response]]] = {
+        TimeoutError: timeout_error
+    }
 
     async def _shutdown() -> None:
         await app.state.API.close()
 
     app = Starlette(debug=debug, routes=routes, middleware=middleware,
+                    exception_handlers=exceptions,
                     on_shutdown=[_shutdown])
 
     app.state.API = NominatimAPIAsync(project_dir, environ)
index afbd1914b35d84219812afdd64f3061d306944f5..061cff36b99f22273e55e350d410d4291c425b91 100644 (file)
@@ -13,6 +13,7 @@ from typing import List, Tuple, Dict, Any, Optional, Iterable
 from pathlib import Path
 
 from nominatim.config import Configuration
+from nominatim.db.connection import Connection
 from nominatim.data.place_info import PlaceInfo
 from nominatim.typing import Protocol
 
@@ -52,8 +53,8 @@ class AbstractAnalyzer(ABC):
 
             Returns:
                 The function returns the list of all tuples that could be
-                found for the given words. Each list entry is a tuple of
-                (original word, word token, word id).
+                    found for the given words. Each list entry is a tuple of
+                    (original word, word token, word id).
         """
 
 
@@ -117,7 +118,7 @@ class AbstractAnalyzer(ABC):
 
             Returns:
                 A JSON-serialisable structure that will be handed into
-                the database via the `token_info` field.
+                    the database via the `token_info` field.
         """
 
 
@@ -143,8 +144,6 @@ class AbstractTokenizer(ABC):
                 tables should be skipped. This option is only required for
                 migration purposes and can be safely ignored by custom
                 tokenizers.
-
-            TODO: can we move the init_db parameter somewhere else?
         """
 
 
@@ -196,8 +195,8 @@ class AbstractTokenizer(ABC):
 
             Returns:
               If an issue was found, return an error message with the
-              description of the issue as well as hints for the user on
-              how to resolve the issue. If everything is okay, return `None`.
+                  description of the issue as well as hints for the user on
+                  how to resolve the issue. If everything is okay, return `None`.
         """
 
 
@@ -233,6 +232,17 @@ class AbstractTokenizer(ABC):
         """
 
 
+    @abstractmethod
+    def most_frequent_words(self, conn: Connection, num: int) -> List[str]:
+        """ Return a list of the most frequent full words in the database.
+
+            Arguments:
+              conn: Open connection to the database which may be used to
+                    retrieve the words.
+              num: Maximum number of words to return.
+        """
+
+
 class TokenizerModule(Protocol):
     """ Interface that must be exported by modules that implement their
         own tokenizer.
index b6e646377b0e7f9d6dea691cf1ffcc6e1295632d..799ff559b94599c43e4f66270f82ec94ac0138cc 100644 (file)
@@ -183,6 +183,18 @@ class ICUTokenizer(AbstractTokenizer):
                                self.loader.make_token_analysis())
 
 
+    def most_frequent_words(self, conn: Connection, num: int) -> List[str]:
+        """ Return a list of the `num` most frequent full words
+            in the database.
+        """
+        with conn.cursor() as cur:
+            cur.execute("""SELECT word, sum((info->>'count')::int) as count
+                             FROM word WHERE type = 'W'
+                             GROUP BY word
+                             ORDER BY count DESC LIMIT %s""", (num,))
+            return list(s[0].split('@')[0] for s in cur)
+
+
     def _install_php(self, phpdir: Path, overwrite: bool = True) -> None:
         """ Install the php script for the tokenizer.
         """
index e09700d9ddb8856a8d52fc6f7de1e9b748b9bbd7..1b68a494383bb72804d662b8bcdff8456b87c6db 100644 (file)
@@ -256,6 +256,16 @@ class LegacyTokenizer(AbstractTokenizer):
         return LegacyNameAnalyzer(self.dsn, normalizer)
 
 
+    def most_frequent_words(self, conn: Connection, num: int) -> List[str]:
+        """ Return a list of the `num` most frequent full words
+            in the database.
+        """
+        with conn.cursor() as cur:
+            cur.execute(""" SELECT word FROM word WHERE word is not null
+                              ORDER BY search_name_count DESC LIMIT %s""", (num,))
+            return list(s[0] for s in cur)
+
+
     def _install_php(self, config: Configuration, overwrite: bool = True) -> None:
         """ Install the php script for the tokenizer.
         """
index 9b4f763ac81780508efbef1c9a66df14fd013cd0..79396a75a6893fe968e6c613873097301a2cedc0 100644 (file)
@@ -41,9 +41,9 @@ class SanitizerConfig(_BaseUserDict):
 
             Returns:
                 If the parameter value is a simple string, it is returned as a
-                one-item list. If the parameter value does not exist, the given
-                default is returned. If the parameter value is a list, it is
-                checked to contain only strings before being returned.
+                    one-item list. If the parameter value does not exist, the given
+                    default is returned. If the parameter value is a list, it is
+                    checked to contain only strings before being returned.
         """
         values = self.data.get(param, None)
 
@@ -94,10 +94,10 @@ class SanitizerConfig(_BaseUserDict):
 
             Returns:
                 A regular expression pattern which can be used to
-                split a string. The regular expression makes sure that the
-                resulting names are stripped and that repeated delimiters
-                are ignored. It may still create empty fields on occasion. The
-                code needs to filter those.
+                    split a string. The regular expression makes sure that the
+                    resulting names are stripped and that repeated delimiters
+                    are ignored. It may still create empty fields on occasion. The
+                    code needs to filter those.
         """
         delimiter_set = set(self.data.get('delimiters', default))
         if not delimiter_set:
@@ -133,8 +133,8 @@ class SanitizerConfig(_BaseUserDict):
 
             Returns:
                 A filter function that takes a target string as the argument and
-                returns True if it fully matches any of the regular expressions
-                otherwise returns False.
+                    returns True if it fully matches any of the regular expressions
+                    otherwise returns False.
         """
         filters = self.get_string_list(param) or default
 
diff --git a/nominatim/tokenizer/sanitizers/tag_japanese.py b/nominatim/tokenizer/sanitizers/tag_japanese.py
new file mode 100644 (file)
index 0000000..3663860
--- /dev/null
@@ -0,0 +1,117 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2022 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+This sanitizer maps OSM data to Japanese block addresses.
+It replaces blocknumber and housenumber with housenumber,
+and quarter and neighbourhood with place.
+"""
+
+
+from typing import Callable
+from typing import List, Optional
+
+from nominatim.tokenizer.sanitizers.base import ProcessInfo
+from nominatim.tokenizer.sanitizers.config import SanitizerConfig
+from nominatim.data.place_name import PlaceName
+
+def create(_: SanitizerConfig) -> Callable[[ProcessInfo], None]:
+    """Set up the sanitizer
+    """
+    return tag_japanese
+
+def reconbine_housenumber(
+    new_address: List[PlaceName],
+    tmp_housenumber: Optional[str],
+    tmp_blocknumber: Optional[str]
+) -> List[PlaceName]:
+    """ Recombine the tag of housenumber by using housenumber and blocknumber
+    """
+    if tmp_blocknumber and tmp_housenumber:
+        new_address.append(
+            PlaceName(
+                kind='housenumber',
+                name=f'{tmp_blocknumber}-{tmp_housenumber}',
+                suffix=''
+            )
+        )
+    elif tmp_blocknumber:
+        new_address.append(
+            PlaceName(
+                kind='housenumber',
+                name=tmp_blocknumber,
+                suffix=''
+            )
+        )
+    elif tmp_housenumber:
+        new_address.append(
+            PlaceName(
+                kind='housenumber',
+                name=tmp_housenumber,
+                suffix=''
+            )
+        )
+    return new_address
+
+def reconbine_place(
+    new_address: List[PlaceName],
+    tmp_neighbourhood: Optional[str],
+    tmp_quarter: Optional[str]
+) -> List[PlaceName]:
+    """ Recombine the tag of place by using neighbourhood and quarter
+    """
+    if tmp_neighbourhood and tmp_quarter:
+        new_address.append(
+            PlaceName(
+                kind='place',
+                name=f'{tmp_quarter}{tmp_neighbourhood}',
+                suffix=''
+            )
+        )
+    elif tmp_neighbourhood:
+        new_address.append(
+            PlaceName(
+                kind='place',
+                name=tmp_neighbourhood,
+                suffix=''
+            )
+        )
+    elif tmp_quarter:
+        new_address.append(
+            PlaceName(
+                kind='place',
+                name=tmp_quarter,
+                suffix=''
+            )
+        )
+    return new_address
+def tag_japanese(obj: ProcessInfo) -> None:
+    """Recombine kind of address
+    """
+    if obj.place.country_code != 'jp':
+        return
+    tmp_housenumber = None
+    tmp_blocknumber = None
+    tmp_neighbourhood = None
+    tmp_quarter = None
+
+    new_address = []
+    for item in obj.address:
+        if item.kind == 'housenumber':
+            tmp_housenumber = item.name
+        elif item.kind == 'block_number':
+            tmp_blocknumber = item.name
+        elif item.kind == 'neighbourhood':
+            tmp_neighbourhood = item.name
+        elif item.kind == 'quarter':
+            tmp_quarter = item.name
+        else:
+            new_address.append(item)
+
+    new_address = reconbine_housenumber(new_address, tmp_housenumber, tmp_blocknumber)
+    new_address = reconbine_place(new_address, tmp_neighbourhood, tmp_quarter)
+
+    obj.address = [item for item in new_address if item.name is not None]
index 68046f9621306b0341366702ce81b43b640e922e..c7ec61c9fd683c7b105b2ccb3c750995dd435e09 100644 (file)
@@ -28,8 +28,8 @@ class Analyzer(Protocol):
 
             Returns:
                 ID string with a canonical form of the name. The string may
-                be empty, when the analyzer cannot analyze the name at all,
-                for example because the character set in use does not match.
+                    be empty, when the analyzer cannot analyze the name at all,
+                    for example because the character set in use does not match.
         """
 
     def compute_variants(self, canonical_id: str) -> List[str]:
@@ -42,13 +42,13 @@ class Analyzer(Protocol):
 
             Returns:
                 A list of possible spelling variants. All strings must have
-                been transformed with the global normalizer and
-                transliterator ICU rules. Otherwise they cannot be matched
-                against the input by the query frontend.
-                The list may be empty, when there are no useful
-                spelling variants. This may happen when an analyzer only
-                usually outputs additional variants to the canonical spelling
-                and there are no such variants.
+                    been transformed with the global normalizer and
+                    transliterator ICU rules. Otherwise they cannot be matched
+                    against the input by the query frontend.
+                    The list may be empty, when there are no useful
+                    spelling variants. This may happen when an analyzer only
+                    usually outputs additional variants to the canonical spelling
+                    and there are no such variants.
         """
 
 
@@ -74,8 +74,8 @@ class AnalysisModule(Protocol):
 
             Returns:
                 A data object with configuration data. This will be handed
-                as is into the `create()` function and may be
-                used freely by the analysis module as needed.
+                    as is into the `create()` function and may be
+                    used freely by the analysis module as needed.
         """
 
     def create(self, normalizer: Any, transliterator: Any, config: Any) -> Analyzer:
@@ -92,5 +92,5 @@ class AnalysisModule(Protocol):
 
             Returns:
                 A new analyzer instance. This must be an object that implements
-                the Analyzer protocol.
+                    the Analyzer protocol.
         """
index da7845ebc949696145588b393664d4be2077eb37..c37d0811ff84b0ebb7276c4b90293f8bf71369d6 100644 (file)
@@ -11,6 +11,7 @@ from typing import Optional, Tuple, Any, cast
 import logging
 
 from psycopg2.extras import Json, register_hstore
+from psycopg2 import DataError
 
 from nominatim.config import Configuration
 from nominatim.db.connection import connect, Cursor
@@ -87,3 +88,19 @@ def analyse_indexing(config: Configuration, osm_id: Optional[str] = None,
 
         for msg in conn.notices:
             print(msg)
+
+
+def clean_deleted_relations(config: Configuration, age: str) -> None:
+    """ Clean deleted relations older than a given age
+    """
+    with connect(config.get_libpq_dsn()) as conn:
+        with conn.cursor() as cur:
+            try:
+                cur.execute("""SELECT place_force_delete(p.place_id)
+                            FROM import_polygon_delete d, placex p
+                            WHERE p.osm_type = d.osm_type AND p.osm_id = d.osm_id
+                            AND age(p.indexed_date) > %s::interval""",
+                            (age, ))
+            except DataError as exc:
+                raise UsageError('Invalid PostgreSQL time interval format') from exc
+        conn.commit()
index 29e1cd535672c768daaecc6151a343166a29a522..c8fda908c731324e28eb074e9fc0a31bd99f574e 100644 (file)
@@ -12,14 +12,13 @@ import os
 import subprocess
 import sys
 from pathlib import Path
-from typing import List, Optional, Tuple, Union, cast
+from typing import List, Optional, Tuple, Union
 
 import psutil
 from psycopg2.extensions import make_dsn, parse_dsn
 
 from nominatim.config import Configuration
 from nominatim.db.connection import connect
-from nominatim.typing import DictCursorResults
 from nominatim.version import NOMINATIM_VERSION
 
 
@@ -107,15 +106,15 @@ def report_system_information(config: Configuration) -> None:
         postgresql_ver: str = convert_version(conn.server_version_tuple())
 
         with conn.cursor() as cur:
-            cur.execute(f"""
-            SELECT datname FROM pg_catalog.pg_database 
-            WHERE datname='{parse_dsn(config.get_libpq_dsn())['dbname']}'""")
-            nominatim_db_exists = cast(Optional[DictCursorResults], cur.fetchall())
-            if nominatim_db_exists:
-                with connect(config.get_libpq_dsn()) as conn:
-                    postgis_ver: str = convert_version(conn.postgis_version_tuple())
-            else:
-                postgis_ver = "Unable to connect to database"
+            num = cur.scalar("SELECT count(*) FROM pg_catalog.pg_database WHERE datname=%s",
+                             (parse_dsn(config.get_libpq_dsn())['dbname'], ))
+            nominatim_db_exists = num == 1 if isinstance(num, int) else False
+
+    if nominatim_db_exists:
+        with connect(config.get_libpq_dsn()) as conn:
+            postgis_ver: str = convert_version(conn.postgis_version_tuple())
+    else:
+        postgis_ver = "Unable to connect to database"
 
     postgresql_config: str = get_postgresql_config(int(float(postgresql_ver)))
 
diff --git a/nominatim/tools/convert_sqlite.py b/nominatim/tools/convert_sqlite.py
new file mode 100644 (file)
index 0000000..0702e5d
--- /dev/null
@@ -0,0 +1,156 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Exporting a Nominatim database to SQlite.
+"""
+from typing import Set
+import logging
+from pathlib import Path
+
+import sqlalchemy as sa
+
+from nominatim.typing import SaSelect
+from nominatim.db.sqlalchemy_types import Geometry
+import nominatim.api as napi
+
+LOG = logging.getLogger()
+
+async def convert(project_dir: Path, outfile: Path, options: Set[str]) -> None:
+    """ Export an existing database to sqlite. The resulting database
+        will be usable against the Python frontend of Nominatim.
+    """
+    api = napi.NominatimAPIAsync(project_dir)
+
+    try:
+        outapi = napi.NominatimAPIAsync(project_dir,
+                                        {'NOMINATIM_DATABASE_DSN': f"sqlite:dbname={outfile}"})
+
+        async with api.begin() as src, outapi.begin() as dest:
+            writer = SqliteWriter(src, dest, options)
+            await writer.write()
+    finally:
+        await api.close()
+
+
+class SqliteWriter:
+    """ Worker class which creates a new SQLite database.
+    """
+
+    def __init__(self, src: napi.SearchConnection,
+                 dest: napi.SearchConnection, options: Set[str]) -> None:
+        self.src = src
+        self.dest = dest
+        self.options = options
+
+
+    async def write(self) -> None:
+        """ Create the database structure and copy the data from
+            the source database to the destination.
+        """
+        await self.dest.execute(sa.select(sa.func.InitSpatialMetaData(True, 'WGS84')))
+
+        await self.create_tables()
+        await self.copy_data()
+        await self.create_indexes()
+
+
+    async def create_tables(self) -> None:
+        """ Set up the database tables.
+        """
+        if 'search' not in self.options:
+            self.dest.t.meta.remove(self.dest.t.search_name)
+
+        await self.dest.connection.run_sync(self.dest.t.meta.create_all)
+
+        # Convert all Geometry columns to Spatialite geometries
+        for table in self.dest.t.meta.sorted_tables:
+            for col in table.c:
+                if isinstance(col.type, Geometry):
+                    await self.dest.execute(sa.select(
+                        sa.func.RecoverGeometryColumn(table.name, col.name, 4326,
+                                                      col.type.subtype.upper(), 'XY')))
+
+
+    async def copy_data(self) -> None:
+        """ Copy data for all registered tables.
+        """
+        for table in self.dest.t.meta.sorted_tables:
+            LOG.warning("Copying '%s'", table.name)
+            async_result = await self.src.connection.stream(self.select_from(table.name))
+
+            async for partition in async_result.partitions(10000):
+                data = [{('class_' if k == 'class' else k): getattr(r, k) for k in r._fields}
+                        for r in partition]
+                await self.dest.execute(table.insert(), data)
+
+
+    async def create_indexes(self) -> None:
+        """ Add indexes necessary for the frontend.
+        """
+        # reverse place node lookup needs an extra table to simulate a
+        # partial index with adaptive buffering.
+        await self.dest.execute(sa.text(
+            """ CREATE TABLE placex_place_node_areas AS
+                  SELECT place_id, ST_Expand(geometry,
+                                             14.0 * exp(-0.2 * rank_search) - 0.03) as geometry
+                  FROM placex
+                  WHERE rank_address between 5 and 25
+                        and osm_type = 'N'
+                        and linked_place_id is NULL """))
+        await self.dest.execute(sa.select(
+            sa.func.RecoverGeometryColumn('placex_place_node_areas', 'geometry',
+                                          4326, 'GEOMETRY', 'XY')))
+        await self.dest.execute(sa.select(sa.func.CreateSpatialIndex(
+                                             'placex_place_node_areas', 'geometry')))
+
+        # Remaining indexes.
+        await self.create_spatial_index('country_grid', 'geometry')
+        await self.create_spatial_index('placex', 'geometry')
+        await self.create_spatial_index('osmline', 'linegeo')
+        await self.create_spatial_index('tiger', 'linegeo')
+        await self.create_index('placex', 'place_id')
+        await self.create_index('placex', 'parent_place_id')
+        await self.create_index('placex', 'rank_address')
+        await self.create_index('addressline', 'place_id')
+
+
+    async def create_spatial_index(self, table: str, column: str) -> None:
+        """ Create a spatial index on the given table and column.
+        """
+        await self.dest.execute(sa.select(
+                  sa.func.CreateSpatialIndex(getattr(self.dest.t, table).name, column)))
+
+
+    async def create_index(self, table_name: str, column: str) -> None:
+        """ Create a simple index on the given table and column.
+        """
+        table = getattr(self.dest.t, table_name)
+        await self.dest.connection.run_sync(
+            sa.Index(f"idx_{table}_{column}", getattr(table.c, column)).create)
+
+
+    def select_from(self, table: str) -> SaSelect:
+        """ Create the SQL statement to select the source columns and rows.
+        """
+        columns = self.src.t.meta.tables[table].c
+
+        if table == 'placex':
+            # SQLite struggles with Geometries that are larger than 5MB,
+            # so simplify those.
+            return sa.select(*(c for c in columns if not isinstance(c.type, Geometry)),
+                             sa.func.ST_AsText(columns.centroid).label('centroid'),
+                             sa.func.ST_AsText(
+                               sa.case((sa.func.ST_MemSize(columns.geometry) < 5000000,
+                                        columns.geometry),
+                                       else_=sa.func.ST_SimplifyPreserveTopology(
+                                                columns.geometry, 0.0001)
+                                )).label('geometry'))
+
+        sql = sa.select(*(sa.func.ST_AsText(c).label(c.name)
+                             if isinstance(c.type, Geometry) else c for c in columns))
+
+        return sql
index cb620d41fb8f31126fe69a622bf14130e38494d1..de7e6a4aa2018c06e7284b4120973351b8a04ea5 100644 (file)
@@ -23,7 +23,8 @@ from nominatim.db.async_connection import DBConnection
 from nominatim.db.sql_preprocessor import SQLPreprocessor
 from nominatim.tools.exec_utils import run_osm2pgsql
 from nominatim.errors import UsageError
-from nominatim.version import POSTGRESQL_REQUIRED_VERSION, POSTGIS_REQUIRED_VERSION
+from nominatim.version import POSTGRESQL_REQUIRED_VERSION, \
+                              POSTGIS_REQUIRED_VERSION
 
 LOG = logging.getLogger()
 
@@ -38,6 +39,25 @@ def _require_version(module: str, actual: Tuple[int, int], expected: Tuple[int,
         raise UsageError(f'{module} is too old.')
 
 
+def _require_loaded(extension_name: str, conn: Connection) -> None:
+    """ Check that the given extension is loaded. """
+    if not conn.extension_loaded(extension_name):
+        LOG.fatal('Required module %s is not loaded.', extension_name)
+        raise UsageError(f'{extension_name} is not loaded.')
+
+
+def check_existing_database_plugins(dsn: str) -> None:
+    """ Check that the database has the required plugins installed."""
+    with connect(dsn) as conn:
+        _require_version('PostgreSQL server',
+                         conn.server_version_tuple(),
+                         POSTGRESQL_REQUIRED_VERSION)
+        _require_version('PostGIS',
+                         conn.postgis_version_tuple(),
+                         POSTGIS_REQUIRED_VERSION)
+        _require_loaded('hstore', conn)
+
+
 def setup_database_skeleton(dsn: str, rouser: Optional[str] = None) -> None:
     """ Create a new database for Nominatim and populate it with the
         essential extensions.
index 566ac06edc57d4374b4979095866c64c1e959d60..c742e3e0061d1d8778d55c8ab975b52873f3fc91 100644 (file)
 """
 Helper functions for executing external programs.
 """
-from typing import Any, Union, Optional, Mapping, IO
-from pathlib import Path
+from typing import Any, Mapping, IO
 import logging
 import os
 import subprocess
 import urllib.request as urlrequest
-from urllib.parse import urlencode
 
-from nominatim.config import Configuration
 from nominatim.typing import StrPath
 from nominatim.version import NOMINATIM_VERSION
 from nominatim.db.connection import get_pg_env
 
 LOG = logging.getLogger()
 
-def run_legacy_script(script: StrPath, *args: Union[int, str],
-                      config: Configuration,
-                      throw_on_fail: bool = False) -> int:
-    """ Run a Nominatim PHP script with the given arguments.
-
-        Returns the exit code of the script. If `throw_on_fail` is True
-        then throw a `CalledProcessError` on a non-zero exit.
-    """
-    cmd = ['/usr/bin/env', 'php', '-Cq',
-           str(config.lib_dir.php / 'admin' / script)]
-    cmd.extend([str(a) for a in args])
-
-    env = config.get_os_env()
-    env['NOMINATIM_DATADIR'] = str(config.lib_dir.data)
-    env['NOMINATIM_SQLDIR'] = str(config.lib_dir.sql)
-    env['NOMINATIM_CONFIGDIR'] = str(config.config_dir)
-    env['NOMINATIM_DATABASE_MODULE_SRC_PATH'] = str(config.lib_dir.module)
-    if not env['NOMINATIM_OSM2PGSQL_BINARY']:
-        env['NOMINATIM_OSM2PGSQL_BINARY'] = str(config.lib_dir.osm2pgsql)
-
-    proc = subprocess.run(cmd, cwd=str(config.project_dir), env=env,
-                          check=throw_on_fail)
-
-    return proc.returncode
-
-def run_api_script(endpoint: str, project_dir: Path,
-                   extra_env: Optional[Mapping[str, str]] = None,
-                   phpcgi_bin: Optional[Path] = None,
-                   params: Optional[Mapping[str, Any]] = None) -> int:
-    """ Execute a Nominatim API function.
-
-        The function needs a project directory that contains the website
-        directory with the scripts to be executed. The scripts will be run
-        using php_cgi. Query parameters can be added as named arguments.
-
-        Returns the exit code of the script.
-    """
-    log = logging.getLogger()
-    webdir = str(project_dir / 'website')
-    query_string = urlencode(params or {})
-
-    env = dict(QUERY_STRING=query_string,
-               SCRIPT_NAME=f'/{endpoint}.php',
-               REQUEST_URI=f'/{endpoint}.php?{query_string}',
-               CONTEXT_DOCUMENT_ROOT=webdir,
-               SCRIPT_FILENAME=f'{webdir}/{endpoint}.php',
-               HTTP_HOST='localhost',
-               HTTP_USER_AGENT='nominatim-tool',
-               REMOTE_ADDR='0.0.0.0',
-               DOCUMENT_ROOT=webdir,
-               REQUEST_METHOD='GET',
-               SERVER_PROTOCOL='HTTP/1.1',
-               GATEWAY_INTERFACE='CGI/1.1',
-               REDIRECT_STATUS='CGI')
-
-    if extra_env:
-        env.update(extra_env)
-
-    if phpcgi_bin is None:
-        cmd = ['/usr/bin/env', 'php-cgi']
-    else:
-        cmd = [str(phpcgi_bin)]
-
-    proc = subprocess.run(cmd, cwd=str(project_dir), env=env,
-                          stdout=subprocess.PIPE,
-                          stderr=subprocess.PIPE,
-                          check=False)
-
-    if proc.returncode != 0 or proc.stderr:
-        if proc.stderr:
-            log.error(proc.stderr.decode('utf-8').replace('\\n', '\n'))
-        else:
-            log.error(proc.stdout.decode('utf-8').replace('\\n', '\n'))
-        return proc.returncode or 1
-
-    result = proc.stdout.decode('utf-8')
-    content_start = result.find('\r\n\r\n')
-
-    print(result[content_start + 4:].replace('\\n', '\n'))
-
-    return 0
-
-
 def run_php_server(server_address: str, base_dir: StrPath) -> None:
     """ Run the built-in server from the given directory.
     """
@@ -143,6 +57,11 @@ def run_osm2pgsql(options: Mapping[str, Any]) -> None:
         if options['tablespaces'][key]:
             cmd.extend((param, options['tablespaces'][key]))
 
+    if options['tablespaces']['main_data']:
+        env['NOMINATIM_TABLESPACE_PLACE_DATA'] = options['tablespaces']['main_data']
+    if options['tablespaces']['main_index']:
+        env['NOMINATIM_TABLESPACE_PLACE_INDEX'] = options['tablespaces']['main_index']
+
     if options.get('disable_jit', False):
         env['PGOPTIONS'] = '-c jit=off -c max_parallel_workers_per_gather=0'
 
index c6df9982bd6109f4a8ba907989ed96e0ea165604..43e5b1eb387a4641ef49274bc63eaa485c171eda 100644 (file)
@@ -120,6 +120,7 @@ PHP_CONST_DEFS = (
     ('Search_NameOnlySearchFrequencyThreshold', 'SEARCH_NAME_ONLY_THRESHOLD', str),
     ('Use_US_Tiger_Data', 'USE_US_TIGER_DATA', bool),
     ('MapIcon_URL', 'MAPICON_URL', str),
+    ('Search_WithinCountries', 'SEARCH_WITHIN_COUNTRIES', bool),
 )
 
 
index ebb5e1e9d56f05fe91e52dda6c73d59365afee81..7274f1d396f8159b714c80fff14fd25b3455b345 100644 (file)
@@ -62,6 +62,7 @@ if TYPE_CHECKING:
 else:
     TypeAlias = str
 
+SaLambdaSelect: TypeAlias = 'Union[sa.Select[Any], sa.StatementLambdaElement]'
 SaSelect: TypeAlias = 'sa.Select[Any]'
 SaScalarSelect: TypeAlias = 'sa.ScalarSelect[Any]'
 SaRow: TypeAlias = 'sa.Row[Any]'
index beec32a53c366f573ff835fc555934e58ce50cf3..95420b341f33f6c8dd3a2a95efd20b06ec785b29 100644 (file)
@@ -2,7 +2,7 @@
 #
 # This file is part of Nominatim. (https://nominatim.org)
 #
-# Copyright (C) 2022 by the Nominatim developer community.
+# Copyright (C) 2023 by the Nominatim developer community.
 # For a full list of authors see the git log.
 """
 Version information for Nominatim.
@@ -34,7 +34,7 @@ class NominatimVersion(NamedTuple):
         return f"{self.major}.{self.minor}.{self.patch_level}-{self.db_patch_level}"
 
 
-NOMINATIM_VERSION = NominatimVersion(4, 2, 99, 2)
+NOMINATIM_VERSION = NominatimVersion(4, 3, 0, 0)
 
 POSTGRESQL_REQUIRED_VERSION = (9, 6)
 POSTGIS_REQUIRED_VERSION = (2, 2)
index 4facd1aea451cea220261c361698b8e5f18a9327..415de9abdf2d003a5c0a0abe8e8fc139acacc2b5 160000 (submodule)
--- a/osm2pgsql
+++ b/osm2pgsql
@@ -1 +1 @@
-Subproject commit 4facd1aea451cea220261c361698b8e5f18a9327
+Subproject commit 415de9abdf2d003a5c0a0abe8e8fc139acacc2b5
index 9c2f7cac18da3636b445663627dd400a7683848c..64a160c7197b9c7593a10670c439d66de3c7fab1 100644 (file)
@@ -77,20 +77,10 @@ NOMINATIM_HTTP_PROXY_PASSWORD=
 # EXPERT ONLY. You should usually use the supplied osm2pgsql.
 NOMINATIM_OSM2PGSQL_BINARY=
 
-# Directory where to find US Tiger data files to import.
-# OBSOLETE: use `nominatim add-data --tiger-data <dir>` to explicitly state
-#           the directory on import
-NOMINATIM_TIGER_DATA_PATH=
-
 # Directory where to find pre-computed Wikipedia importance files.
 # When unset, the data is expected to be located in the project directory.
 NOMINATIM_WIKIPEDIA_DATA_PATH=
 
-# Configuration file for special phrase import.
-# OBSOLETE: use `nominatim special-phrases --config <file>` or simply put
-#           a custom phrase-settings.json into your project directory.
-NOMINATIM_PHRASE_CONFIG=
-
 # Configuration file for rank assignments.
 NOMINATIM_ADDRESS_LEVEL_CONFIG=address-levels.json
 
@@ -209,6 +199,27 @@ NOMINATIM_POLYGON_OUTPUT_MAX_TYPES=1
 # under <endpoint>.php
 NOMINATIM_SERVE_LEGACY_URLS=yes
 
+# Maximum number of connection a single API object can use. (Python API only)
+# When running Nominatim as a server, then this is the maximum number
+# of connections _per worker_.
+NOMINATIM_API_POOL_SIZE=10
+
+# Timeout is seconds after which a single query to the database is cancelled.
+# The user receives a 503 response, when a query times out.
+# When empty, then timeouts are disabled.
+NOMINATIM_QUERY_TIMEOUT=10
+
+# Maximum time a single request is allowed to take. When the timeout is
+# exceeeded, the available results are returned.
+# When empty, then timouts are disabled.
+NOMINATIM_REQUEST_TIMEOUT=60
+
+# Search elements just within countries
+# If, despite not finding a point within the static grid of countries, it
+# finds a geometry of a region, do not return the geometry. Return "Unable
+# to geocode" instead.
+NOMINATIM_SEARCH_WITHIN_COUNTRIES=False
+
 ### Log settings
 #
 # The following options allow to enable logging of API requests.
@@ -224,5 +235,5 @@ NOMINATIM_LOG_DB=no
 NOMINATIM_LOG_FILE=
 
 # Echo raw SQL from SQLAlchemy statements.
-# Works only in command line/library use.
+# EXPERT: Works only in command line/library use.
 NOMINATIM_DEBUG_SQL=no
index fbfb4d54620decaf791550ebc76e0302c2792896..dc2c12eef1f2d4bb4f2c67b075376ebcd2f4323c 100644 (file)
@@ -30,6 +30,8 @@ local place_table = osm2pgsql.define_table{
         { column = 'extratags', type = 'hstore' },
         { column = 'geometry', type = 'geometry', projection = 'WGS84', not_null = true },
     },
+    data_tablespace = os.getenv("NOMINATIM_TABLESPACE_PLACE_DATA"),
+    index_tablespace = os.getenv("NOMINATIM_TABLESPACE_PLACE_INDEX"),
     indexes = {}
 }
 
index 55b3274a3a9b9f682982ceb464aff947495cef2c..db6c8420869849ac555a952bd027b32297bbe0e8 100644 (file)
@@ -1,14 +1,14 @@
-- "[𞥐𐒠߀𖭐꤀𖩠𑓐𑑐𑋰𑄶꩐꘠᱀᭐᮰᠐០᥆༠໐꧰႐᪐᪀᧐𑵐꯰᱐𑱐𑜰𑛀𑙐𑇐꧐꣐෦𑁦0𝟶𝟘𝟬𝟎𝟢₀⓿⓪⁰] > 0"
-- "[𞥑𐒡߁𖭑꤁𖩡𑓑𑑑𑋱𑄷꩑꘡᱁᭑᮱᠑១᥇༡໑꧱႑᪑᪁᧑𑵑꯱᱑𑱑𑜱𑛁𑙑𑇑꧑꣑෧𑁧1𝟷𝟙𝟭𝟏𝟣₁¹①⑴⒈❶➀➊⓵] > 1"
-- "[𞥒𐒢߂𖭒꤂𖩢𑓒𑑒𑋲𑄸꩒꘢᱂᭒᮲᠒២᥈༢໒꧲႒᪒᪂᧒𑵒꯲᱒𑱒𑜲𑛂𑙒𑇒꧒꣒෨𑁨2𝟸𝟚𝟮𝟐𝟤₂²②⑵⒉❷➁➋⓶] > 2"
-- "[𞥓𐒣߃𖭓꤃𖩣𑓓𑑓𑋳𑄹꩓꘣᱃᭓᮳᠓៣᥉༣໓꧳႓᪓᪃᧓𑵓꯳᱓𑱓𑜳𑛃𑙓𑇓꧓꣓෩𑁩3𝟹𝟛𝟯𝟑𝟥₃³③⑶⒊❸➂➌⓷] > 3"
-- "[𞥔𐒤߄𖭔꤄𖩤𑓔𑑔𑋴𑄺꩔꘤᱄᭔᮴᠔៤᥊༤໔꧴႔᪔᪄᧔𑵔꯴᱔𑱔𑜴𑛄𑙔𑇔꧔꣔෪𑁪4𝟺𝟜𝟰𝟒𝟦₄⁴④⑷⒋❹➃➍⓸] > 4"
-- "[𞥕𐒥߅𖭕꤅𖩥𑓕𑑕𑋵𑄻꩕꘥᱅᭕᮵᠕៥᥋༥໕꧵႕᪕᪅᧕𑵕꯵᱕𑱕𑜵𑛅𑙕𑇕꧕꣕෫𑁫5𝟻𝟝𝟱𝟓𝟧₅⁵⑤⑸⒌❺➄➎⓹] > 5"
-- "[𞥖𐒦߆𖭖꤆𖩦𑓖𑑖𑋶𑄼꩖꘦᱆᭖᮶᠖៦᥌༦໖꧶႖᪖᪆᧖𑵖꯶᱖𑱖𑜶𑛆𑙖𑇖꧖꣖෬𑁬6𝟼𝟞𝟲𝟔𝟨₆⁶⑥⑹⒍❻➅➏⓺] > 6"
-- "[𞥗𐒧߇𖭗꤇𖩧𑓗𑑗𑋷𑄽꩗꘧᱇᭗᮷᠗៧᥍༧໗꧷႗᪗᪇᧗𑵗꯷᱗𑱗𑜷𑛇𑙗𑇗꧗꣗෭𑁭7𝟽𝟟𝟳𝟕𝟩₇⁷⑦⑺⒎❼➆➐⓻] > 7"
-- "[𞥘𐒨߈𖭘꤈𖩨𑓘𑑘𑋸𑄾꩘꘨᱈᭘᮸᠘៨᥎༨໘꧸႘᪘᪈᧘𑵘꯸᱘𑱘𑜸𑛈𑙘𑇘꧘꣘෮𑁮8𝟾𝟠𝟴𝟖𝟪₈⁸⑧⑻⒏❽➇➑⓼] > 8"
-- "[𞥙𐒩߉𖭙꤉𖩩𑓙𑑙𑋹𑄿꩙꘩᱉᭙᮹᠙៩᥏༩໙꧹႙᪙᪉᧙𑵙꯹᱙𑱙𑜹𑛉𑙙𑇙꧙꣙෯𑁯9𝟿𝟡𝟵𝟗𝟫₉⁹⑨⑼⒐❾➈➒⓽] > 9"
-- "[𑜺⑩⑽⒑❿➉➓⓾] > '10'"
+- "[𞥐𐒠߀𖭐꤀𖩠𑓐𑑐𑋰𑄶꩐꘠᱀᭐᮰᠐០᥆༠໐꧰႐᪐᪀᧐𑵐꯰᱐𑱐𑜰𑛀𑙐𑇐꧐꣐෦𑁦0𝟶𝟘𝟬𝟎𝟢₀⓿⓪⁰] > 0"
+- "[𞥑𐒡߁𖭑꤁𖩡𑓑𑑑𑋱𑄷꩑꘡᱁᭑᮱᠑១᥇༡໑꧱႑᪑᪁᧑𑵑꯱᱑𑱑𑜱𑛁𑙑𑇑꧑꣑෧𑁧1𝟷𝟙𝟭𝟏𝟣₁¹①⑴⒈❶➀➊⓵] > 1"
+- "[𞥒𐒢߂𖭒꤂𖩢𑓒𑑒𑋲𑄸꩒꘢᱂᭒᮲᠒២᥈༢໒꧲႒᪒᪂᧒𑵒꯲᱒𑱒𑜲𑛂𑙒𑇒꧒꣒෨𑁨2𝟸𝟚𝟮𝟐𝟤₂²②⑵⒉❷➁➋⓶] > 2"
+- "[𞥓𐒣߃𖭓꤃𖩣𑓓𑑓𑋳𑄹꩓꘣᱃᭓᮳᠓៣᥉༣໓꧳႓᪓᪃᧓𑵓꯳᱓𑱓𑜳𑛃𑙓𑇓꧓꣓෩𑁩3𝟹𝟛𝟯𝟑𝟥₃³③⑶⒊❸➂➌⓷] > 3"
+- "[𞥔𐒤߄𖭔꤄𖩤𑓔𑑔𑋴𑄺꩔꘤᱄᭔᮴᠔៤᥊༤໔꧴႔᪔᪄᧔𑵔꯴᱔𑱔𑜴𑛄𑙔𑇔꧔꣔෪𑁪4𝟺𝟜𝟰𝟒𝟦₄⁴④⑷⒋❹➃➍⓸] > 4"
+- "[𞥕𐒥߅𖭕꤅𖩥𑓕𑑕𑋵𑄻꩕꘥᱅᭕᮵᠕៥᥋༥໕꧵႕᪕᪅᧕𑵕꯵᱕𑱕𑜵𑛅𑙕𑇕꧕꣕෫𑁫5𝟻𝟝𝟱𝟓𝟧₅⁵⑤⑸⒌❺➄➎⓹] > 5"
+- "[𞥖𐒦߆𖭖꤆𖩦𑓖𑑖𑋶𑄼꩖꘦᱆᭖᮶᠖៦᥌༦໖꧶႖᪖᪆᧖𑵖꯶᱖𑱖𑜶𑛆𑙖𑇖꧖꣖෬𑁬6𝟼𝟞𝟲𝟔𝟨₆⁶⑥⑹⒍❻➅➏⓺] > 6"
+- "[𞥗𐒧߇𖭗꤇𖩧𑓗𑑗𑋷𑄽꩗꘧᱇᭗᮷᠗៧᥍༧໗꧷႗᪗᪇᧗𑵗꯷᱗𑱗𑜷𑛇𑙗𑇗꧗꣗෭𑁭7𝟽𝟟𝟳𝟕𝟩₇⁷⑦⑺⒎❼➆➐⓻] > 7"
+- "[𞥘𐒨߈𖭘꤈𖩨𑓘𑑘𑋸𑄾꩘꘨᱈᭘᮸᠘៨᥎༨໘꧸႘᪘᪈᧘𑵘꯸᱘𑱘𑜸𑛈𑙘𑇘꧘꣘෮𑁮8𝟾𝟠𝟴𝟖𝟪₈⁸⑧⑻⒏❽➇➑⓼] > 8"
+- "[𞥙𐒩߉𖭙꤉𖩩𑓙𑑙𑋹𑄿꩙꘩᱉᭙᮹᠙៩᥏༩໙꧹႙᪙᪉᧙𑵙꯹᱙𑱙𑜹𑛉𑙙𑇙꧙꣙෯𑁯9𝟿𝟡𝟵𝟗𝟫₉⁹⑨⑼⒐❾➈➒⓽] > 9"
+- "[𑜺⑩⑽⒑❿➉➓⓾] > '10'"
 - "[⑪⑾⒒⓫] > '11'"
 - "[⑫⑿⒓⓬] > '12'"
 - "[⑬⒀⒔⓭] > '13'"
index 1fa467befebfa4c5977103d6731064dd760791ff..c5a809c68319f3095f2d9b4bf06c6456ff4b2b05 100644 (file)
@@ -45,6 +45,7 @@ sanitizers:
       whitelist: [bg,ca,cs,da,de,el,en,es,et,eu,fi,fr,gl,hu,it,ja,mg,ms,nl,no,pl,pt,ro,ru,sk,sl,sv,tr,uk,vi]
       use-defaults: all
       mode: append
+    - step: tag-japanese
 token-analysis:
     - analyzer: generic
     - id: "@housenumber"
index 5fca4002a6e867ad2592625e0acda3da502da447..78eac5f5221d77ed69d9234f8593ffc4ae7ec952 100644 (file)
@@ -2,12 +2,38 @@ local flex = require('flex-base')
 
 flex.set_main_tags{
     boundary = {administrative = 'named'},
-    landuse = 'fallback',
-    place = 'always'
+    landuse = {residential = 'fallback',
+               farm = 'fallback',
+               farmyard = 'fallback',
+               industrial = 'fallback',
+               commercial = 'fallback',
+               allotments = 'fallback',
+               retail = 'fallback'},
+    place = {county = 'always',
+             district = 'always',
+             municipality = 'always',
+             city = 'always',
+             town = 'always',
+             borough = 'always',
+             village = 'always',
+             suburb = 'always',
+             hamlet = 'always',
+             croft = 'always',
+             subdivision = 'always',
+             allotments = 'always',
+             neighbourhood = 'always',
+             quarter = 'always',
+             isolated_dwelling = 'always',
+             farm = 'always',
+             city_block = 'always',
+             mountain_pass = 'always',
+             square = 'always',
+             locality = 'always'}
 }
 
 flex.set_prefilters{delete_keys = {'building', 'source', 'highway',
                                    'addr:housenumber', 'addr:street', 'addr:city',
+                                   'addr:interpolation',
                                    'source', '*source', 'type',
                                    'is_in:postcode', '*:wikidata', '*:wikipedia',
                                    '*:prefix', '*:suffix', 'name:prefix:*', 'name:suffix:*',
index 6611c81b527df0f791a1078ba660c61a22741c13..5351ce417185a5a813e9ee5ffcf1ce18491828a1 100644 (file)
@@ -1,3 +1,4 @@
+@SQLITE
 @APIDB
 Feature: Localization of search results
 
index 3d5635de126c6136c575a2f7e860e6f7492d7234..0fb641712446a268a6a877239c83fcd6bc2c1db9 100644 (file)
@@ -2,6 +2,7 @@
 Feature: Object details
     Testing different parameter options for details API.
 
+    @SQLITE
     Scenario: JSON Details
         When sending json details query for W297699560
         Then the result is valid json
@@ -11,6 +12,7 @@ Feature: Object details
             | type  |
             | Point |
 
+    @SQLITE
     Scenario: JSON Details with pretty printing
         When sending json details query for W297699560
             | pretty |
@@ -19,6 +21,7 @@ Feature: Object details
         And result has attributes geometry
         And result has not attributes keywords,address,linked_places,parentof
 
+    @SQLITE
      Scenario: JSON Details with addressdetails
         When sending json details query for W297699560
             | addressdetails |
@@ -26,6 +29,7 @@ Feature: Object details
         Then the result is valid json
         And result has attributes address
 
+    @SQLITE
     Scenario: JSON Details with linkedplaces
         When sending json details query for R123924
             | linkedplaces |
@@ -33,6 +37,7 @@ Feature: Object details
         Then the result is valid json
         And result has attributes linked_places
 
+    @SQLITE
     Scenario: JSON Details with hierarchy
         When sending json details query for W297699560
             | hierarchy |
@@ -40,6 +45,7 @@ Feature: Object details
         Then the result is valid json
         And result has attributes hierarchy
 
+    @SQLITE
     Scenario: JSON Details with grouped hierarchy
         When sending json details query for W297699560
             | hierarchy | group_hierarchy |
@@ -69,6 +75,7 @@ Feature: Object details
         Then the result is valid json
         And result has attributes keywords
 
+    @SQLITE
     Scenario Outline: JSON details with full geometry
         When sending json details query for <osmid>
             | polygon_geojson |
index 4c54b0d67c0fe8d6a8e78153da771638003dc450..99d3422334b3baf36b18040dc883727b72c76bd7 100644 (file)
@@ -1,3 +1,4 @@
+@SQLITE
 @APIDB
 Feature: Object details
     Check details page for correctness
index 8a8e656113cf81daf6e8801a5007ed0f1ee58b2c..e279a8fa9e12978b8c93622538b2bffa640c8360 100644 (file)
@@ -1,3 +1,4 @@
+@SQLITE
 @APIDB
 Feature: Places by osm_type and osm_id Tests
     Simple tests for errors in various response formats.
index 9ea388122868c54b53d9b2211b942bfb33ecdab2..1e5b8ee77744672571a0603d9b7b0e8981f0f032 100644 (file)
@@ -1,3 +1,4 @@
+@SQLITE
 @APIDB
 Feature: Places by osm_type and osm_id Tests
     Simple tests for response format.
index 2c14dd5f2a5d19d40b6b7264dc41debd5074afff..33fadbbdff5a106d26c8679f8c0fbf31d270b6be 100644 (file)
@@ -1,3 +1,4 @@
+@SQLITE
 @APIDB
 Feature: Geometries for reverse geocoding
     Tests for returning geometries with reverse
@@ -9,7 +10,7 @@ Feature: Geometries for reverse geocoding
           | 1            |
         Then results contain
           | geotext |
-          | POLYGON((9.5225302 47.138066,9.5225348 47.1379282,9.5226142 47.1379294,9.5226143 47.1379257,9.522615 47.137917,9.5226225 47.1379098,9.5226334 47.1379052,9.5226461 47.1379037,9.5226588 47.1379056,9.5226693 47.1379107,9.5226762 47.1379181,9.5226762 47.1379268,9.5226761 47.1379308,9.5227366 47.1379317,9.5227352 47.1379753,9.5227608 47.1379757,9.5227595 47.1380148,9.5227355 47.1380145,9.5227337 47.1380692,9.5225302 47.138066)) |
+          | ^POLYGON\(\(9.5225302 47.138066, ?9.5225348 47.1379282, ?9.5226142 47.1379294, ?9.5226143 47.1379257, ?9.522615 47.137917, ?9.5226225 47.1379098, ?9.5226334 47.1379052, ?9.5226461 47.1379037, ?9.5226588 47.1379056, ?9.5226693 47.1379107, ?9.5226762 47.1379181, ?9.5226762 47.1379268, ?9.5226761 47.1379308, ?9.5227366 47.1379317, ?9.5227352 47.1379753, ?9.5227608 47.1379757, ?9.5227595 47.1380148, ?9.5227355 47.1380145, ?9.5227337 47.1380692, ?9.5225302 47.138066\)\) |
 
 
     Scenario: Polygons can be slightly simplified
@@ -18,7 +19,7 @@ Feature: Geometries for reverse geocoding
           | 1            | 0.00001            |
         Then results contain
           | geotext |
-          | POLYGON((9.5225302 47.138066,9.5225348 47.1379282,9.5226142 47.1379294,9.5226225 47.1379098,9.5226588 47.1379056,9.5226761 47.1379308,9.5227366 47.1379317,9.5227352 47.1379753,9.5227608 47.1379757,9.5227595 47.1380148,9.5227355 47.1380145,9.5227337 47.1380692,9.5225302 47.138066)) |
+          | ^POLYGON\(\(9.5225302 47.138066, ?9.5225348 47.1379282, ?9.5226142 47.1379294, ?9.5226225 47.1379098, ?9.5226588 47.1379056, ?9.5226761 47.1379308, ?9.5227366 47.1379317, ?9.5227352 47.1379753, ?9.5227608 47.1379757, ?9.5227595 47.1380148, ?9.5227355 47.1380145, ?9.5227337 47.1380692, ?9.5225302 47.138066\)\) |
 
 
     Scenario: Polygons can be much simplified
@@ -27,7 +28,7 @@ Feature: Geometries for reverse geocoding
           | 1            | 0.9               |
         Then results contain
           | geotext |
-          | POLYGON((9.5225302 47.138066,9.5225348 47.1379282,9.5227608 47.1379757,9.5227337 47.1380692,9.5225302 47.138066)) |
+          | ^POLYGON\(\(9.5225302 47.138066, ?9.5225348 47.1379282, ?9.5227608 47.1379757, ?9.5227337 47.1380692, ?9.5225302 47.138066\)\) |
 
 
     Scenario: For polygons return the centroid as center point
index e42689f73d12a9aeb0df1bd7e783398e274e93b4..69f84ebc4a030cdbe4cd89d47307d90b9b86c046 100644 (file)
@@ -1,3 +1,4 @@
+@SQLITE
 @APIDB
 Feature: Localization of reverse search results
 
index d4c334a54e170fa5ff28e908767851835458b7e2..ef02886478cbc81aae2358b99f15f2a6692c8b55 100644 (file)
@@ -1,3 +1,4 @@
+@SQLITE
 @APIDB
 Feature: Layer parameter in reverse geocoding
     Testing correct function of layer selection while reverse geocoding
@@ -57,7 +58,7 @@ Feature: Layer parameter in reverse geocoding
 
     @v1-api-python-only
     Scenario Outline: Search for mountain peaks begins at level 12
-        When sending v1/reverse at 47.08221,9.56769
+        When sending v1/reverse at 47.08293,9.57109
           | layer   | zoom   |
           | natural | <zoom> |
         Then results contain
@@ -71,7 +72,7 @@ Feature: Layer parameter in reverse geocoding
 
 
     @v1-api-python-only
-     Scenario Outline: Reverse serach with manmade layers
+     Scenario Outline: Reverse search with manmade layers
         When sending v1/reverse at 32.46904,-86.44439
           | layer   |
           | <layer> |
@@ -84,5 +85,5 @@ Feature: Layer parameter in reverse geocoding
           | manmade         | leisure  | park        |
           | address         | highway  | residential |
           | poi             | leisure  | pitch       |
-          | natural         | waterway | stream      |
+          | natural         | waterway | river       |
           | natural,manmade | leisure  | park        |
index d51378d6443dab6e2a0254dc7a23bf969daba2b6..37abb22d4095317a397c2bf35fff71924efdbd63 100644 (file)
@@ -1,3 +1,4 @@
+@SQLITE
 @APIDB
 Feature: Reverse geocoding
     Testing the reverse function
index c9112b94c591439779163a0b4f8626b71b125326..56b85e2009d2618c0db9a071eff528ab031dc7dd 100644 (file)
@@ -1,3 +1,4 @@
+@SQLITE
 @APIDB
 Feature: Geocodejson for Reverse API
     Testing correctness of geocodejson output (API version v1).
index 0b6ad0d3a3ff6744a277855abcec1a88d7a1b577..e705529d38be7a6e8730dc71128c1adb2dbbb6ae 100644 (file)
@@ -1,3 +1,4 @@
+@SQLITE
 @APIDB
 Feature: Geojson for Reverse API
     Testing correctness of geojson output (API version v1).
index ac3c799ed8d71c9c9956ad6bc450a1f8fa39f57a..1f629c0fa110941d1e4ea70e6c3b3b6fbce8d00b 100644 (file)
@@ -1,3 +1,4 @@
+@SQLITE
 @APIDB
 Feature: Json output for Reverse API
     Testing correctness of json and jsonv2 output (API version v1).
@@ -93,7 +94,7 @@ Feature: Json output for Reverse API
           | polygon_text | 1     |
        Then results contain
           | geotext |
-          | LINESTRING(9.5039353 47.0657546,9.5040437 47.0657781,9.5040808 47.065787,9.5054298 47.0661407) |
+          | ^LINESTRING\(9.5039353 47.0657546, ?9.5040437 47.0657781, ?9.5040808 47.065787, ?9.5054298 47.0661407\) |
 
        Examples:
           | format |
index 70a6505bfa7730e042348860d23bc58512dcb606..a1f08afd37d6919e20af7f00741012ff07ba0bae 100644 (file)
@@ -1,3 +1,4 @@
+@SQLITE
 @APIDB
 Feature: v1/reverse Parameter Tests
     Tests for parameter inputs for the v1 reverse endpoint.
index 75f27220497009eb65c2d7e0daee3b0b426eed50..95e7478ca6daadf2bcbd54f23cccab3187ae0ef6 100644 (file)
@@ -1,3 +1,4 @@
+@SQLITE
 @APIDB
 Feature: XML output for Reverse API
     Testing correctness of xml output (API version v1).
@@ -66,7 +67,7 @@ Feature: XML output for Reverse API
           | polygon_text | 1     |
        Then results contain
           | geotext |
-          | LINESTRING(9.5039353 47.0657546,9.5040437 47.0657781,9.5040808 47.065787,9.5054298 47.0661407) |
+          | ^LINESTRING\(9.5039353 47.0657546, ?9.5040437 47.0657781, ?9.5040808 47.065787, ?9.5054298 47.0661407\) |
 
 
     Scenario: Output of SVG
index f0474460db1deed73df3e09d6bb0ca5c12cdc01e..847f1dbf02823aff211fdfa073b65be4a042380a 100644 (file)
@@ -2,6 +2,14 @@
 Feature: Search queries
     Generic search result correctness
 
+    Scenario: Search for natural object
+        When sending json search query "Samina"
+          | accept-language |
+          | en |
+        Then results contain
+          | ID | class    | type  | display_name    |
+          | 0  | waterway | river | Samina, Austria |
+
     Scenario: House number search for non-street address
         When sending json search query "6 Silum, Liechtenstein" with address
           | accept-language |
index f8044dad18db1e249f0438758ad86d0cb4e95d15..517c0eddd229c16e2a2d33057783d9f280a3dad7 100644 (file)
@@ -56,7 +56,7 @@ Feature: Structured search queries
           | Liechtenstein |
         And  results contain
           | class   | type |
-          | amenity | ^(pub)\|(bar) |
+          | amenity | ^(pub)\|(bar)\|(restaurant) |
 
     #176
     Scenario: Structured search restricts rank
index 1323caa161ec581b2700401edb3e3f5c95b913b8..993fa1ecb48e31bcbc876831476957df3f65ab40 100644 (file)
@@ -1,3 +1,4 @@
+@SQLITE
 @APIDB
 Feature: Status queries
     Testing status query
diff --git a/test/bdd/db/query/japanese.feature b/test/bdd/db/query/japanese.feature
new file mode 100644 (file)
index 0000000..4960c50
--- /dev/null
@@ -0,0 +1,30 @@
+@DB
+Feature: Searches in Japan
+    Test specifically for searches of Japanese addresses and in Japanese language.
+    @fail-legacy
+    Scenario: A block house-number is parented to the neighbourhood
+        Given the grid with origin JP
+          | 1 |   |   |   | 2 |
+          |   | 3 |   |   |   |
+          |   |   | 9 |   |   |
+          |   |   |   | 6 |   |
+        And the places
+          | osm | class   | type        | name       | geometry |
+          | W1  | highway | residential | 雉子橋通り | 1,2      |
+        And the places
+          | osm | class   | type       | housenr | addr+block_number | addr+neighbourhood | geometry |
+          | N3  | amenity | restaurant | 2       | 6                 | 2丁目              | 3        |
+        And the places
+          | osm | class | type          | name  | geometry |
+          | N9  | place | neighbourhood | 2丁目 | 9        |
+        And the places
+          | osm | class | type    | name | geometry |
+          | N6  | place | quarter | 加瀬 | 6        |
+        When importing
+        Then placex contains
+          | object | parent_place_id |
+          | N3     | N9              |
+        When sending search query "2丁目 6-2"
+        Then results contain
+          | osm |
+          | N3  |
index a3ca70352a33ca0883d71a7abaeeb112db3f6127..78a26a90f5dfd723a2d3511e23a03a0293023536 100644 (file)
@@ -11,7 +11,7 @@ Feature: Querying fo postcode variants
         When sending search query "399174"
         Then results contain
             | ID | type     | display_name |
-            | 0  | postcode | 399174       |
+            | 0  | postcode | 399174, Singapore |
 
 
     @fail-legacy
@@ -25,11 +25,11 @@ Feature: Querying fo postcode variants
         When sending search query "3993 DX"
         Then results contain
             | ID | type     | display_name |
-            | 0  | postcode | 3993 DX      |
+            | 0  | postcode | 3993 DX, Nederland      |
         When sending search query "3993dx"
         Then results contain
             | ID | type     | display_name |
-            | 0  | postcode | 3993 DX      |
+            | 0  | postcode | 3993 DX, Nederland      |
 
         Examples:
             | postcode |
@@ -49,7 +49,7 @@ Feature: Querying fo postcode variants
         When sending search query "399174"
         Then results contain
             | ID | type     | display_name |
-            | 0  | postcode | 399174       |
+            | 0  | postcode | 399174, Singapore       |
 
 
     @fail-legacy
@@ -63,11 +63,11 @@ Feature: Querying fo postcode variants
         When sending search query "675"
         Then results contain
             | ID | type     | display_name |
-            | 0  | postcode | AD675        |
+            | 0  | postcode | AD675, Andorra |
         When sending search query "AD675"
         Then results contain
             | ID | type     | display_name |
-            | 0  | postcode | AD675        |
+            | 0  | postcode | AD675, Andorra |
 
         Examples:
             | postcode |
@@ -89,9 +89,9 @@ Feature: Querying fo postcode variants
         When sending search query "EH4 7EA"
         Then results contain
            | type     | display_name |
-           | postcode | EH4 7EA      |
+           | postcode | EH4 7EA, United Kingdom |
         When sending search query "E4 7EA"
         Then results contain
            | type     | display_name |
-           | postcode | E4 7EA       |
+           | postcode | E4 7EA, United Kingdom |
 
index afaa51512a7c1041e659725e2bc6ca9bfaaae77e..664b5ac79e7d2013182ebff5036f04889870f586 100644 (file)
@@ -46,7 +46,10 @@ def before_all(context):
 
 
 def before_scenario(context, scenario):
-    if 'DB' in context.tags:
+    if not 'SQLITE' in context.tags \
+       and context.config.userdata['API_TEST_DB'].startswith('sqlite:'):
+        context.scenario.skip("Not usable with Sqlite database.")
+    elif 'DB' in context.tags:
         context.nominatim.setup_db(context)
     elif 'APIDB' in context.tags:
         context.nominatim.setup_api_db()
index 3d0830405082fc9d6087e747da4813a41355e173..e2fd665aa34799517536dc669124aec70aa6e5b6 100644 (file)
@@ -488,3 +488,26 @@ Feature: Tag evaluation
         Then placex contains exactly
           | object       | type     | admin_level |
           | R10:boundary | informal | 4           |
+
+
+    Scenario: Main tag and geometry is changed
+        When loading osm data
+          """
+          n1 x40 y40
+          n2 x40.0001 y40
+          n3 x40.0001 y40.0001
+          n4 x40 y40.0001
+          w5 Tbuilding=house,name=Foo Nn1,n2,n3,n4,n1
+          """
+        Then place contains exactly
+          | object      | type  |
+          | W5:building | house |
+
+        When updating osm data
+          """
+          n1 x39.999 y40
+          w5 Tbuilding=terrace,name=Bar Nn1,n2,n3,n4,n1
+          """
+        Then place contains exactly
+          | object      | type    |
+          | W5:building | terrace |
index 572c571a1318e18097311a8fa7c61f7aeedf8ae3..11dede3049854a323388fceae13fa61b428fb689 100644 (file)
@@ -86,7 +86,10 @@ class NominatimEnvironment:
             be picked up by dotenv and creates a project directory with the
             appropriate website scripts.
         """
-        dsn = 'pgsql:dbname={}'.format(dbname)
+        if dbname.startswith('sqlite:'):
+            dsn = 'sqlite:dbname={}'.format(dbname[7:])
+        else:
+            dsn = 'pgsql:dbname={}'.format(dbname)
         if self.db_host:
             dsn += ';host=' + self.db_host
         if self.db_port:
@@ -197,6 +200,9 @@ class NominatimEnvironment:
         """
         self.write_nominatim_config(self.api_test_db)
 
+        if self.api_test_db.startswith('sqlite:'):
+            return
+
         if not self.api_db_done:
             self.api_db_done = True
 
@@ -305,7 +311,6 @@ class NominatimEnvironment:
         cli.nominatim(module_dir='',
                       osm2pgsql_path=str(self.build_dir / 'osm2pgsql' / 'osm2pgsql'),
                       cli_args=cmdline,
-                      phpcgi_path='',
                       environ=self.test_env)
 
 
index 1b5b88ed5f15bfee12e964a4be6aaf63cc1df98c..cb7f324a393fa24e2ddb097e710b92662fc96bd2 100644 (file)
@@ -16,6 +16,7 @@ import sqlalchemy as sa
 
 import nominatim.api as napi
 from nominatim.db.sql_preprocessor import SQLPreprocessor
+from nominatim.tools import convert_sqlite
 import nominatim.api.logging as loglib
 
 class APITester:
@@ -178,7 +179,6 @@ def apiobj(temp_db_with_extensions, temp_db_conn, monkeypatch):
     testapi.async_to_sync(testapi.create_tables())
 
     proc = SQLPreprocessor(temp_db_conn, testapi.api.config)
-    proc.run_sql_file(temp_db_conn, 'functions/address_lookup.sql')
     proc.run_sql_file(temp_db_conn, 'functions/ranking.sql')
 
     loglib.set_log_output('text')
@@ -186,3 +186,21 @@ def apiobj(temp_db_with_extensions, temp_db_conn, monkeypatch):
     print(loglib.get_and_disable())
 
     testapi.api.close()
+
+
+@pytest.fixture(params=['postgres_db', 'sqlite_db'])
+def frontend(request, event_loop, tmp_path):
+    if request.param == 'sqlite_db':
+        db = str(tmp_path / 'test_nominatim_python_unittest.sqlite')
+
+        def mkapi(apiobj, options={'reverse'}):
+            event_loop.run_until_complete(convert_sqlite.convert(Path('/invalid'),
+                                                                 db, options))
+            return napi.NominatimAPI(Path('/invalid'),
+                                     {'NOMINATIM_DATABASE_DSN': f"sqlite:dbname={db}",
+                                      'NOMINATIM_USE_US_TIGER_DATA': 'yes'})
+    elif request.param == 'postgres_db':
+        def mkapi(apiobj, options=None):
+            return apiobj.api
+
+    return mkapi
diff --git a/test/python/api/fake_adaptor.py b/test/python/api/fake_adaptor.py
new file mode 100644 (file)
index 0000000..d886d34
--- /dev/null
@@ -0,0 +1,55 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Provides dummy implementations of ASGIAdaptor for testing.
+"""
+from collections import namedtuple
+
+import nominatim.api.v1.server_glue as glue
+from nominatim.config import Configuration
+
+class FakeError(BaseException):
+
+    def __init__(self, msg, status):
+        self.msg = msg
+        self.status = status
+
+    def __str__(self):
+        return f'{self.status} -- {self.msg}'
+
+FakeResponse = namedtuple('FakeResponse', ['status', 'output', 'content_type'])
+
+class FakeAdaptor(glue.ASGIAdaptor):
+
+    def __init__(self, params=None, headers=None, config=None):
+        self.params = params or {}
+        self.headers = headers or {}
+        self._config = config or Configuration(None)
+
+
+    def get(self, name, default=None):
+        return self.params.get(name, default)
+
+
+    def get_header(self, name, default=None):
+        return self.headers.get(name, default)
+
+
+    def error(self, msg, status=400):
+        return FakeError(msg, status)
+
+
+    def create_response(self, status, output, num_results):
+        return FakeResponse(status, output, self.content_type)
+
+
+    def base_uri(self) -> str:
+        return 'http://test'
+
+    def config(self):
+        return self._config
+
index f8c9c2dc865ba9f8ca527014c1d292dfbba14313..fe850ce902930a817981bd42c6c549fc5bd91ec3 100644 (file)
@@ -28,12 +28,12 @@ def mktoken(tid: int):
                                          ('COUNTRY', 'COUNTRY'),
                                          ('POSTCODE', 'POSTCODE')])
 def test_phrase_compatible(ptype, ttype):
-    assert query.PhraseType[ptype].compatible_with(query.TokenType[ttype])
+    assert query.PhraseType[ptype].compatible_with(query.TokenType[ttype], False)
 
 
 @pytest.mark.parametrize('ptype', ['COUNTRY', 'POSTCODE'])
 def test_phrase_incompatible(ptype):
-    assert not query.PhraseType[ptype].compatible_with(query.TokenType.PARTIAL)
+    assert not query.PhraseType[ptype].compatible_with(query.TokenType.PARTIAL, True)
 
 
 def test_query_node_empty():
@@ -99,3 +99,36 @@ def test_query_struct_incompatible_token():
 
     assert q.get_tokens(query.TokenRange(0, 1), query.TokenType.PARTIAL) == []
     assert len(q.get_tokens(query.TokenRange(1, 2), query.TokenType.COUNTRY)) == 1
+
+
+def test_query_struct_amenity_single_word():
+    q = query.QueryStruct([query.Phrase(query.PhraseType.AMENITY, 'bar')])
+    q.add_node(query.BreakType.END, query.PhraseType.NONE)
+
+    q.add_token(query.TokenRange(0, 1), query.TokenType.PARTIAL, mktoken(1))
+    q.add_token(query.TokenRange(0, 1), query.TokenType.NEAR_ITEM, mktoken(2))
+    q.add_token(query.TokenRange(0, 1), query.TokenType.QUALIFIER, mktoken(3))
+
+    assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.PARTIAL)) == 1
+    assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.NEAR_ITEM)) == 1
+    assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.QUALIFIER)) == 0
+
+
+def test_query_struct_amenity_two_words():
+    q = query.QueryStruct([query.Phrase(query.PhraseType.AMENITY, 'foo bar')])
+    q.add_node(query.BreakType.WORD, query.PhraseType.AMENITY)
+    q.add_node(query.BreakType.END, query.PhraseType.NONE)
+
+    for trange in [(0, 1), (1, 2)]:
+        q.add_token(query.TokenRange(*trange), query.TokenType.PARTIAL, mktoken(1))
+        q.add_token(query.TokenRange(*trange), query.TokenType.NEAR_ITEM, mktoken(2))
+        q.add_token(query.TokenRange(*trange), query.TokenType.QUALIFIER, mktoken(3))
+
+    assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.PARTIAL)) == 1
+    assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.NEAR_ITEM)) == 0
+    assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.QUALIFIER)) == 1
+
+    assert len(q.get_tokens(query.TokenRange(1, 2), query.TokenType.PARTIAL)) == 1
+    assert len(q.get_tokens(query.TokenRange(1, 2), query.TokenType.NEAR_ITEM)) == 0
+    assert len(q.get_tokens(query.TokenRange(1, 2), query.TokenType.QUALIFIER)) == 1
+
index 9631850e1b284fcf760c3171b5cf2c09a78bed64..87d75261528283574aae5d6a83b09d5645ac406e 100644 (file)
@@ -21,21 +21,18 @@ class MyToken(Token):
 
 
 def make_query(*args):
-    q = None
+    q = QueryStruct([Phrase(PhraseType.NONE, '')])
 
-    for tlist in args:
-        if q is None:
-            q = QueryStruct([Phrase(PhraseType.NONE, '')])
-        else:
-            q.add_node(BreakType.WORD, PhraseType.NONE)
+    for _ in range(max(inner[0] for tlist in args for inner in tlist)):
+        q.add_node(BreakType.WORD, PhraseType.NONE)
+    q.add_node(BreakType.END, PhraseType.NONE)
 
-        start = len(q.nodes) - 1
+    for start, tlist in enumerate(args):
         for end, ttype, tinfo in tlist:
             for tid, word in tinfo:
                 q.add_token(TokenRange(start, end), ttype,
                             MyToken(0.5 if ttype == TokenType.PARTIAL else 0.0, tid, 1, word, True))
 
-    q.add_node(BreakType.END, PhraseType.NONE)
 
     return q
 
@@ -68,7 +65,7 @@ def test_country_search_with_country_restriction():
     assert set(search.countries.values) == {'en'}
 
 
-def test_country_search_with_confllicting_country_restriction():
+def test_country_search_with_conflicting_country_restriction():
     q = make_query([(1, TokenType.COUNTRY, [(2, 'de'), (3, 'en')])])
     builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'fr'}))
 
@@ -150,27 +147,27 @@ def test_postcode_with_address_with_full_word():
 
 @pytest.mark.parametrize('kwargs', [{'viewbox': '0,0,1,1', 'bounded_viewbox': True},
                                     {'near': '10,10'}])
-def test_category_only(kwargs):
-    q = make_query([(1, TokenType.CATEGORY, [(2, 'foo')])])
+def test_near_item_only(kwargs):
+    q = make_query([(1, TokenType.NEAR_ITEM, [(2, 'foo')])])
     builder = SearchBuilder(q, SearchDetails.from_kwargs(kwargs))
 
-    searches = list(builder.build(TokenAssignment(category=TokenRange(0, 1))))
+    searches = list(builder.build(TokenAssignment(near_item=TokenRange(0, 1))))
 
     assert len(searches) == 1
 
     search = searches[0]
 
     assert isinstance(search, dbs.PoiSearch)
-    assert search.categories.values == [('this', 'that')]
+    assert search.qualifiers.values == [('this', 'that')]
 
 
 @pytest.mark.parametrize('kwargs', [{'viewbox': '0,0,1,1'},
                                     {}])
-def test_category_skipped(kwargs):
-    q = make_query([(1, TokenType.CATEGORY, [(2, 'foo')])])
+def test_near_item_skipped(kwargs):
+    q = make_query([(1, TokenType.NEAR_ITEM, [(2, 'foo')])])
     builder = SearchBuilder(q, SearchDetails.from_kwargs(kwargs))
 
-    searches = list(builder.build(TokenAssignment(category=TokenRange(0, 1))))
+    searches = list(builder.build(TokenAssignment(near_item=TokenRange(0, 1))))
 
     assert len(searches) == 0
 
@@ -287,13 +284,13 @@ def test_name_and_complex_address():
 
 
 def test_name_only_near_search():
-    q = make_query([(1, TokenType.CATEGORY, [(88, 'g')])],
+    q = make_query([(1, TokenType.NEAR_ITEM, [(88, 'g')])],
                    [(2, TokenType.PARTIAL, [(1, 'a')]),
                     (2, TokenType.WORD, [(100, 'a')])])
     builder = SearchBuilder(q, SearchDetails())
 
     searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
-                                                  category=TokenRange(0, 1))))
+                                                  near_item=TokenRange(0, 1))))
 
     assert len(searches) == 1
     search = searches[0]
@@ -312,10 +309,68 @@ def test_name_only_search_with_category():
     assert len(searches) == 1
     search = searches[0]
 
+    assert isinstance(search, dbs.PlaceSearch)
+    assert search.qualifiers.values == [('foo', 'bar')]
+
+
+def test_name_with_near_item_search_with_category_mismatch():
+    q = make_query([(1, TokenType.NEAR_ITEM, [(88, 'g')])],
+                   [(2, TokenType.PARTIAL, [(1, 'a')]),
+                    (2, TokenType.WORD, [(100, 'a')])])
+    builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar')]}))
+
+    searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
+                                                  near_item=TokenRange(0, 1))))
+
+    assert len(searches) == 0
+
+
+def test_name_with_near_item_search_with_category_match():
+    q = make_query([(1, TokenType.NEAR_ITEM, [(88, 'g')])],
+                   [(2, TokenType.PARTIAL, [(1, 'a')]),
+                    (2, TokenType.WORD, [(100, 'a')])])
+    builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar'),
+                                                                         ('this', 'that')]}))
+
+    searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
+                                                  near_item=TokenRange(0, 1))))
+
+    assert len(searches) == 1
+    search = searches[0]
+
     assert isinstance(search, dbs.NearSearch)
     assert isinstance(search.search, dbs.PlaceSearch)
 
 
+def test_name_with_qualifier_search_with_category_mismatch():
+    q = make_query([(1, TokenType.QUALIFIER, [(88, 'g')])],
+                   [(2, TokenType.PARTIAL, [(1, 'a')]),
+                    (2, TokenType.WORD, [(100, 'a')])])
+    builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar')]}))
+
+    searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
+                                                  qualifier=TokenRange(0, 1))))
+
+    assert len(searches) == 0
+
+
+def test_name_with_qualifier_search_with_category_match():
+    q = make_query([(1, TokenType.QUALIFIER, [(88, 'g')])],
+                   [(2, TokenType.PARTIAL, [(1, 'a')]),
+                    (2, TokenType.WORD, [(100, 'a')])])
+    builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar'),
+                                                                         ('this', 'that')]}))
+
+    searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
+                                                  qualifier=TokenRange(0, 1))))
+
+    assert len(searches) == 1
+    search = searches[0]
+
+    assert isinstance(search, dbs.PlaceSearch)
+    assert search.qualifiers.values == [('this', 'that')]
+
+
 def test_name_only_search_with_countries():
     q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
                     (1, TokenType.WORD, [(100, 'a')])])
@@ -332,9 +387,10 @@ def test_name_only_search_with_countries():
     assert not search.housenumbers.values
 
 
-def make_counted_searches(name_part, name_full, address_part, address_full):
+def make_counted_searches(name_part, name_full, address_part, address_full,
+                          num_address_parts=1):
     q = QueryStruct([Phrase(PhraseType.NONE, '')])
-    for i in range(2):
+    for i in range(1 + num_address_parts):
         q.add_node(BreakType.WORD, PhraseType.NONE)
     q.add_node(BreakType.END, PhraseType.NONE)
 
@@ -342,15 +398,16 @@ def make_counted_searches(name_part, name_full, address_part, address_full):
                 MyToken(0.5, 1, name_part, 'name_part', True))
     q.add_token(TokenRange(0, 1), TokenType.WORD,
                 MyToken(0, 101, name_full, 'name_full', True))
-    q.add_token(TokenRange(1, 2), TokenType.PARTIAL,
-                MyToken(0.5, 2, address_part, 'address_part', True))
-    q.add_token(TokenRange(1, 2), TokenType.WORD,
-                MyToken(0, 102, address_full, 'address_full', True))
+    for i in range(num_address_parts):
+        q.add_token(TokenRange(i + 1, i + 2), TokenType.PARTIAL,
+                    MyToken(0.5, 2, address_part, 'address_part', True))
+        q.add_token(TokenRange(i + 1, i + 2), TokenType.WORD,
+                    MyToken(0, 102, address_full, 'address_full', True))
 
     builder = SearchBuilder(q, SearchDetails())
 
     return list(builder.build(TokenAssignment(name=TokenRange(0, 1),
-                                              address=[TokenRange(1, 2)])))
+                                              address=[TokenRange(1, 1 + num_address_parts)])))
 
 
 def test_infrequent_partials_in_name():
@@ -367,29 +424,27 @@ def test_infrequent_partials_in_name():
             {('name_vector', 'lookup_all'), ('nameaddress_vector', 'restrict')}
 
 
-def test_frequent_partials_in_name_but_not_in_address():
-    searches = make_counted_searches(10000, 1, 1, 1)
+def test_frequent_partials_in_name_and_address():
+    searches = make_counted_searches(9999, 1, 9999, 1)
 
-    assert len(searches) == 1
-    search = searches[0]
+    assert len(searches) == 2
 
-    assert isinstance(search, dbs.PlaceSearch)
-    assert len(search.lookups) == 2
-    assert len(search.rankings) == 2
+    assert all(isinstance(s, dbs.PlaceSearch) for s in searches)
+    searches.sort(key=lambda s: s.penalty)
 
-    assert set((l.column, l.lookup_type) for l in search.lookups) == \
-            {('nameaddress_vector', 'lookup_all'), ('name_vector', 'restrict')}
+    assert set((l.column, l.lookup_type) for l in searches[0].lookups) == \
+            {('name_vector', 'lookup_any'), ('nameaddress_vector', 'restrict')}
+    assert set((l.column, l.lookup_type) for l in searches[1].lookups) == \
+            {('nameaddress_vector', 'lookup_all'), ('name_vector', 'lookup_all')}
 
 
-def test_frequent_partials_in_name_and_address():
-    searches = make_counted_searches(10000, 1, 10000, 1)
+def test_too_frequent_partials_in_name_and_address():
+    searches = make_counted_searches(20000, 1, 10000, 1)
 
-    assert len(searches) == 2
+    assert len(searches) == 1
 
     assert all(isinstance(s, dbs.PlaceSearch) for s in searches)
     searches.sort(key=lambda s: s.penalty)
 
     assert set((l.column, l.lookup_type) for l in searches[0].lookups) == \
             {('name_vector', 'lookup_any'), ('nameaddress_vector', 'restrict')}
-    assert set((l.column, l.lookup_type) for l in searches[1].lookups) == \
-            {('nameaddress_vector', 'lookup_all'), ('name_vector', 'lookup_all')}
index faf8137526106a0e5db77b325c0fa73f30b94a05..a88ca8b82e4facc800aa23c507f309f57c4c8311 100644 (file)
@@ -134,7 +134,7 @@ async def test_category_words_only_at_beginning(conn):
 
     assert query.num_token_slots() == 3
     assert len(query.nodes[0].starting) == 1
-    assert query.nodes[0].starting[0].ttype == TokenType.CATEGORY
+    assert query.nodes[0].starting[0].ttype == TokenType.NEAR_ITEM
     assert not query.nodes[2].starting
 
 
@@ -148,9 +148,9 @@ async def test_qualifier_words(conn):
     query = await ana.analyze_query(make_phrase('foo BAR foo BAR foo'))
 
     assert query.num_token_slots() == 5
-    assert set(t.ttype for t in query.nodes[0].starting) == {TokenType.CATEGORY, TokenType.QUALIFIER}
+    assert set(t.ttype for t in query.nodes[0].starting) == {TokenType.NEAR_ITEM, TokenType.QUALIFIER}
     assert set(t.ttype for t in query.nodes[2].starting) == {TokenType.QUALIFIER}
-    assert set(t.ttype for t in query.nodes[4].starting) == {TokenType.CATEGORY, TokenType.QUALIFIER}
+    assert set(t.ttype for t in query.nodes[4].starting) == {TokenType.NEAR_ITEM, TokenType.QUALIFIER}
 
 
 @pytest.mark.asyncio
index cdea6ede7cd842fa0c0f25b6915b8c0fcb2f0fe9..507afaeceee8023eade86f5c2f1db7d382d44842 100644 (file)
@@ -212,7 +212,7 @@ async def test_category_words_only_at_beginning(conn):
 
     assert query.num_token_slots() == 3
     assert len(query.nodes[0].starting) == 1
-    assert query.nodes[0].starting[0].ttype == TokenType.CATEGORY
+    assert query.nodes[0].starting[0].ttype == TokenType.NEAR_ITEM
     assert not query.nodes[2].starting
 
 
@@ -226,9 +226,9 @@ async def test_qualifier_words(conn):
     query = await ana.analyze_query(make_phrase('foo BAR foo BAR foo'))
 
     assert query.num_token_slots() == 5
-    assert set(t.ttype for t in query.nodes[0].starting) == {TokenType.CATEGORY, TokenType.QUALIFIER}
+    assert set(t.ttype for t in query.nodes[0].starting) == {TokenType.NEAR_ITEM, TokenType.QUALIFIER}
     assert set(t.ttype for t in query.nodes[2].starting) == {TokenType.QUALIFIER}
-    assert set(t.ttype for t in query.nodes[4].starting) == {TokenType.CATEGORY, TokenType.QUALIFIER}
+    assert set(t.ttype for t in query.nodes[4].starting) == {TokenType.NEAR_ITEM, TokenType.QUALIFIER}
 
 
 @pytest.mark.asyncio
diff --git a/test/python/api/search/test_query.py b/test/python/api/search/test_query.py
new file mode 100644 (file)
index 0000000..a4b3282
--- /dev/null
@@ -0,0 +1,49 @@
+
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Test data types for search queries.
+"""
+import pytest
+
+import nominatim.api.search.query as nq
+
+def test_token_range_equal():
+    assert nq.TokenRange(2, 3) == nq.TokenRange(2, 3)
+    assert not (nq.TokenRange(2, 3) != nq.TokenRange(2, 3))
+
+
+@pytest.mark.parametrize('lop,rop', [((1, 2), (3, 4)),
+                                    ((3, 4), (3, 5)),
+                                    ((10, 12), (11, 12))])
+def test_token_range_unequal(lop, rop):
+    assert not (nq.TokenRange(*lop) == nq.TokenRange(*rop))
+    assert nq.TokenRange(*lop) != nq.TokenRange(*rop)
+
+
+def test_token_range_lt():
+    assert nq.TokenRange(1, 3) < nq.TokenRange(10, 12)
+    assert nq.TokenRange(5, 6) < nq.TokenRange(7, 8)
+    assert nq.TokenRange(1, 4) < nq.TokenRange(4, 5)
+    assert not(nq.TokenRange(5, 6) < nq.TokenRange(5, 6))
+    assert not(nq.TokenRange(10, 11) < nq.TokenRange(4, 5))
+
+
+def test_token_rankge_gt():
+    assert nq.TokenRange(3, 4) > nq.TokenRange(1, 2)
+    assert nq.TokenRange(100, 200) > nq.TokenRange(10, 11)
+    assert nq.TokenRange(10, 11) > nq.TokenRange(4, 10)
+    assert not(nq.TokenRange(5, 6) > nq.TokenRange(5, 6))
+    assert not(nq.TokenRange(1, 2) > nq.TokenRange(3, 4))
+    assert not(nq.TokenRange(4, 10) > nq.TokenRange(3, 5))
+
+
+def test_token_range_unimplemented_ops():
+    with pytest.raises(TypeError):
+        nq.TokenRange(1, 3) <= nq.TokenRange(10, 12)
+    with pytest.raises(TypeError):
+        nq.TokenRange(1, 3) >= nq.TokenRange(10, 12)
index bb0abc39d88f78e57755c446cf4c918978e76780..82b1d37fe30ba52c4d1bd90b0415a10099815893 100644 (file)
@@ -59,3 +59,70 @@ def test_find_from_fallback_countries(apiobj):
 
 def test_find_none(apiobj):
     assert len(run_search(apiobj, 0.0, ['xx'])) == 0
+
+
+@pytest.mark.parametrize('coord,numres', [((0.5, 1), 1), ((10, 10), 0)])
+def test_find_near(apiobj, coord, numres):
+    apiobj.add_country('ro', 'POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))')
+    apiobj.add_country_name('ro', {'name': 'România'})
+
+    results = run_search(apiobj, 0.0, ['ro'],
+                         details=SearchDetails(near=napi.Point(*coord),
+                                               near_radius=0.1))
+
+    assert len(results) == numres
+
+
+class TestCountryParameters:
+
+    @pytest.fixture(autouse=True)
+    def fill_database(self, apiobj):
+        apiobj.add_placex(place_id=55, class_='boundary', type='administrative',
+                          rank_search=4, rank_address=4,
+                          name={'name': 'Lolaland'},
+                          country_code='yw',
+                          centroid=(10, 10),
+                          geometry='POLYGON((9.5 9.5, 9.5 10.5, 10.5 10.5, 10.5 9.5, 9.5 9.5))')
+        apiobj.add_country('ro', 'POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))')
+        apiobj.add_country_name('ro', {'name': 'România'})
+
+
+    @pytest.mark.parametrize('geom', [napi.GeometryFormat.GEOJSON,
+                                      napi.GeometryFormat.KML,
+                                      napi.GeometryFormat.SVG,
+                                      napi.GeometryFormat.TEXT])
+    @pytest.mark.parametrize('cc', ['yw', 'ro'])
+    def test_return_geometries(self, apiobj, geom, cc):
+        results = run_search(apiobj, 0.5, [cc],
+                             details=SearchDetails(geometry_output=geom))
+
+        assert len(results) == 1
+        assert geom.name.lower() in results[0].geometry
+
+
+    @pytest.mark.parametrize('pid,rids', [(76, [55]), (55, [])])
+    def test_exclude_place_id(self, apiobj, pid, rids):
+        results = run_search(apiobj, 0.5, ['yw', 'ro'],
+                             details=SearchDetails(excluded=[pid]))
+
+        assert [r.place_id for r in results] == rids
+
+
+    @pytest.mark.parametrize('viewbox,rids', [((9, 9, 11, 11), [55]),
+                                              ((-10, -10, -3, -3), [])])
+    def test_bounded_viewbox_in_placex(self, apiobj, viewbox, rids):
+        results = run_search(apiobj, 0.5, ['yw'],
+                             details=SearchDetails.from_kwargs({'viewbox': viewbox,
+                                                                'bounded_viewbox': True}))
+
+        assert [r.place_id for r in results] == rids
+
+
+    @pytest.mark.parametrize('viewbox,numres', [((0, 0, 1, 1), 1),
+                                              ((-10, -10, -3, -3), 0)])
+    def test_bounded_viewbox_in_fallback(self, apiobj, viewbox, numres):
+        results = run_search(apiobj, 0.5, ['ro'],
+                             details=SearchDetails.from_kwargs({'viewbox': viewbox,
+                                                                'bounded_viewbox': True}))
+
+        assert len(results) == numres
index cfbdadb2a551f23d565096df80ebe70ff12bcd5e..2a0acb745969a777a75856f8cc002ea7e33da91f 100644 (file)
@@ -16,18 +16,21 @@ from nominatim.api.search.db_search_fields import WeightedStrings, WeightedCateg
                                                   FieldLookup, FieldRanking, RankedTokens
 
 
-def run_search(apiobj, global_penalty, cat, cat_penalty=None,
+def run_search(apiobj, global_penalty, cat, cat_penalty=None, ccodes=[],
                details=SearchDetails()):
 
     class PlaceSearchData:
         penalty = 0.0
         postcodes = WeightedStrings([], [])
-        countries = WeightedStrings([], [])
+        countries = WeightedStrings(ccodes, [0.0] * len(ccodes))
         housenumbers = WeightedStrings([], [])
         qualifiers = WeightedStrings([], [])
         lookups = [FieldLookup('name_vector', [56], 'lookup_all')]
         rankings = []
 
+    if ccodes is not None:
+        details.countries = ccodes
+
     place_search = PlaceSearch(0.0, PlaceSearchData(), 2)
 
     if cat_penalty is None:
@@ -49,6 +52,18 @@ def test_no_results_inner_query(apiobj):
     assert not run_search(apiobj, 0.4, [('this', 'that')])
 
 
+def test_no_appropriate_results_inner_query(apiobj):
+    apiobj.add_placex(place_id=100, country_code='us',
+                      centroid=(5.6, 4.3),
+                      geometry='POLYGON((0.0 0.0, 10.0 0.0, 10.0 2.0, 0.0 2.0, 0.0 0.0))')
+    apiobj.add_search_name(100, names=[56], country_code='us',
+                           centroid=(5.6, 4.3))
+    apiobj.add_placex(place_id=22, class_='amenity', type='bank',
+                      centroid=(5.6001, 4.2994))
+
+    assert not run_search(apiobj, 0.4, [('amenity', 'bank')])
+
+
 class TestNearSearch:
 
     @pytest.fixture(autouse=True)
@@ -100,3 +115,51 @@ class TestNearSearch:
 
         assert [r.place_id for r in results] == [22]
 
+
+    @pytest.mark.parametrize('cc,rid', [('us', 22), ('mx', 23)])
+    def test_restrict_by_country(self, apiobj, cc, rid):
+        apiobj.add_placex(place_id=22, class_='amenity', type='bank',
+                          centroid=(5.6001, 4.2994),
+                          country_code='us')
+        apiobj.add_placex(place_id=122, class_='amenity', type='bank',
+                          centroid=(5.6001, 4.2994),
+                          country_code='mx')
+        apiobj.add_placex(place_id=23, class_='amenity', type='bank',
+                          centroid=(-10.3001, 56.9),
+                          country_code='mx')
+        apiobj.add_placex(place_id=123, class_='amenity', type='bank',
+                          centroid=(-10.3001, 56.9),
+                          country_code='us')
+
+        results = run_search(apiobj, 0.1, [('amenity', 'bank')], ccodes=[cc, 'fr'])
+
+        assert [r.place_id for r in results] == [rid]
+
+
+    @pytest.mark.parametrize('excluded,rid', [(22, 122), (122, 22)])
+    def test_exclude_place_by_id(self, apiobj, excluded, rid):
+        apiobj.add_placex(place_id=22, class_='amenity', type='bank',
+                          centroid=(5.6001, 4.2994),
+                          country_code='us')
+        apiobj.add_placex(place_id=122, class_='amenity', type='bank',
+                          centroid=(5.6001, 4.2994),
+                          country_code='us')
+
+
+        results = run_search(apiobj, 0.1, [('amenity', 'bank')],
+                             details=SearchDetails(excluded=[excluded]))
+
+        assert [r.place_id for r in results] == [rid]
+
+
+    @pytest.mark.parametrize('layer,rids', [(napi.DataLayer.POI, [22]),
+                                            (napi.DataLayer.MANMADE, [])])
+    def test_with_layer(self, apiobj, layer, rids):
+        apiobj.add_placex(place_id=22, class_='amenity', type='bank',
+                          centroid=(5.6001, 4.2994),
+                          country_code='us')
+
+        results = run_search(apiobj, 0.1, [('amenity', 'bank')],
+                             details=SearchDetails(layers=layer))
+
+        assert [r.place_id for r in results] == rids
index df369b81e1d36f77c65d0c66ea0fb9182a5051e3..8a363e97735b585aee1372ea6d87d05a3a12a17e 100644 (file)
@@ -7,6 +7,8 @@
 """
 Tests for running the generic place searcher.
 """
+import json
+
 import pytest
 
 import nominatim.api as napi
@@ -130,23 +132,48 @@ class TestNameOnlySearches:
         assert geom.name.lower() in results[0].geometry
 
 
+    @pytest.mark.parametrize('factor,npoints', [(0.0, 3), (1.0, 2)])
+    def test_return_simplified_geometry(self, apiobj, factor, npoints):
+        apiobj.add_placex(place_id=333, country_code='us',
+                          centroid=(9.0, 9.0),
+                          geometry='LINESTRING(8.9 9.0, 9.0 9.0, 9.1 9.0)')
+        apiobj.add_search_name(333, names=[55], country_code='us',
+                               centroid=(5.6, 4.3))
+
+        lookup = FieldLookup('name_vector', [55], 'lookup_all')
+        ranking = FieldRanking('name_vector', 0.9, [RankedTokens(0.0, [21])])
+
+        results = run_search(apiobj, 0.1, [lookup], [ranking],
+                             details=SearchDetails(geometry_output=napi.GeometryFormat.GEOJSON,
+                                                   geometry_simplification=factor))
+
+        assert len(results) == 1
+        result = results[0]
+        geom = json.loads(result.geometry['geojson'])
+
+        assert result.place_id == 333
+        assert len(geom['coordinates']) == npoints
+
+
     @pytest.mark.parametrize('viewbox', ['5.0,4.0,6.0,5.0', '5.7,4.0,6.0,5.0'])
-    def test_prefer_viewbox(self, apiobj, viewbox):
+    @pytest.mark.parametrize('wcount,rids', [(2, [100, 101]), (20000, [100])])
+    def test_prefer_viewbox(self, apiobj, viewbox, wcount, rids):
         lookup = FieldLookup('name_vector', [1, 2], 'lookup_all')
-        ranking = FieldRanking('name_vector', 0.9, [RankedTokens(0.0, [21])])
+        ranking = FieldRanking('name_vector', 0.2, [RankedTokens(0.0, [21])])
 
         results = run_search(apiobj, 0.1, [lookup], [ranking])
         assert [r.place_id for r in results] == [101, 100]
 
-        results = run_search(apiobj, 0.1, [lookup], [ranking],
+        results = run_search(apiobj, 0.1, [lookup], [ranking], count=wcount,
                              details=SearchDetails.from_kwargs({'viewbox': viewbox}))
-        assert [r.place_id for r in results] == [100, 101]
+        assert [r.place_id for r in results] == rids
 
 
-    def test_force_viewbox(self, apiobj):
+    @pytest.mark.parametrize('viewbox', ['5.0,4.0,6.0,5.0', '5.55,4.27,5.62,4.31'])
+    def test_force_viewbox(self, apiobj, viewbox):
         lookup = FieldLookup('name_vector', [1, 2], 'lookup_all')
 
-        details=SearchDetails.from_kwargs({'viewbox': '5.0,4.0,6.0,5.0',
+        details=SearchDetails.from_kwargs({'viewbox': viewbox,
                                            'bounded_viewbox': True})
 
         results = run_search(apiobj, 0.1, [lookup], [], details=details)
@@ -166,11 +193,12 @@ class TestNameOnlySearches:
         assert [r.place_id for r in results] == [100, 101]
 
 
-    def test_force_near(self, apiobj):
+    @pytest.mark.parametrize('radius', [0.09, 0.11])
+    def test_force_near(self, apiobj, radius):
         lookup = FieldLookup('name_vector', [1, 2], 'lookup_all')
 
         details=SearchDetails.from_kwargs({'near': '5.6,4.3',
-                                           'near_radius': 0.11})
+                                           'near_radius': radius})
 
         results = run_search(apiobj, 0.1, [lookup], [], details=details)
 
@@ -253,6 +281,37 @@ class TestStreetWithHousenumber:
         assert [r.place_id for r in results] == [2, 92, 2000]
 
 
+    def test_lookup_only_house_qualifier(self, apiobj):
+        lookup = FieldLookup('name_vector', [1,2], 'lookup_all')
+        ranking = FieldRanking('name_vector', 0.3, [RankedTokens(0.0, [10])])
+
+        results = run_search(apiobj, 0.1, [lookup], [ranking], hnrs=['22'],
+                             quals=[('place', 'house')])
+
+        assert [r.place_id for r in results] == [2, 92]
+
+
+    def test_lookup_only_street_qualifier(self, apiobj):
+        lookup = FieldLookup('name_vector', [1,2], 'lookup_all')
+        ranking = FieldRanking('name_vector', 0.3, [RankedTokens(0.0, [10])])
+
+        results = run_search(apiobj, 0.1, [lookup], [ranking], hnrs=['22'],
+                             quals=[('highway', 'residential')])
+
+        assert [r.place_id for r in results] == [1000, 2000]
+
+
+    @pytest.mark.parametrize('rank,found', [(26, True), (27, False), (30, False)])
+    def test_lookup_min_rank(self, apiobj, rank, found):
+        lookup = FieldLookup('name_vector', [1,2], 'lookup_all')
+        ranking = FieldRanking('name_vector', 0.3, [RankedTokens(0.0, [10])])
+
+        results = run_search(apiobj, 0.1, [lookup], [ranking], hnrs=['22'],
+                             details=SearchDetails(min_rank=rank))
+
+        assert [r.place_id for r in results] == ([2, 92, 1000, 2000] if found else [2, 92])
+
+
     @pytest.mark.parametrize('geom', [napi.GeometryFormat.GEOJSON,
                                       napi.GeometryFormat.KML,
                                       napi.GeometryFormat.SVG,
@@ -267,6 +326,54 @@ class TestStreetWithHousenumber:
         assert all(geom.name.lower() in r.geometry for r in results)
 
 
+def test_very_large_housenumber(apiobj):
+    apiobj.add_placex(place_id=93, class_='place', type='house',
+                      parent_place_id=2000,
+                      housenumber='2467463524544', country_code='pt')
+    apiobj.add_placex(place_id=2000, class_='highway', type='residential',
+                      rank_search=26, rank_address=26,
+                      country_code='pt')
+    apiobj.add_search_name(2000, names=[1,2],
+                           search_rank=26, address_rank=26,
+                           country_code='pt')
+
+    lookup = FieldLookup('name_vector', [1, 2], 'lookup_all')
+
+    results = run_search(apiobj, 0.1, [lookup], [], hnrs=['2467463524544'],
+                         details=SearchDetails())
+
+    assert results
+    assert [r.place_id for r in results] == [93, 2000]
+
+
+@pytest.mark.parametrize('wcount,rids', [(2, [990, 991]), (30000, [990])])
+def test_name_and_postcode(apiobj, wcount, rids):
+    apiobj.add_placex(place_id=990, class_='highway', type='service',
+                      rank_search=27, rank_address=27,
+                      postcode='11225',
+                      centroid=(10.0, 10.0),
+                      geometry='LINESTRING(9.995 10, 10.005 10)')
+    apiobj.add_search_name(990, names=[111], centroid=(10.0, 10.0),
+                           search_rank=27, address_rank=27)
+    apiobj.add_placex(place_id=991, class_='highway', type='service',
+                      rank_search=27, rank_address=27,
+                      postcode='11221',
+                      centroid=(10.1, 10.1),
+                      geometry='LINESTRING(9.995 10.1, 10.005 10.1)')
+    apiobj.add_search_name(991, names=[111], centroid=(10.1, 10.1),
+                           search_rank=27, address_rank=27)
+    apiobj.add_postcode(place_id=100, country_code='ch', postcode='11225',
+                        geometry='POINT(10 10)')
+
+    lookup = FieldLookup('name_vector', [111], 'lookup_all')
+
+    results = run_search(apiobj, 0.1, [lookup], [], pcs=['11225'], count=wcount,
+                         details=SearchDetails())
+
+    assert results
+    assert [r.place_id for r in results] == rids
+
+
 class TestInterpolations:
 
     @pytest.fixture(autouse=True)
@@ -298,6 +405,21 @@ class TestInterpolations:
         assert [r.place_id for r in results] == res + [990]
 
 
+    @pytest.mark.parametrize('geom', [napi.GeometryFormat.GEOJSON,
+                                      napi.GeometryFormat.KML,
+                                      napi.GeometryFormat.SVG,
+                                      napi.GeometryFormat.TEXT])
+    def test_osmline_with_geometries(self, apiobj, geom):
+        lookup = FieldLookup('name_vector', [111], 'lookup_all')
+
+        results = run_search(apiobj, 0.1, [lookup], [], hnrs=['21'],
+                             details=SearchDetails(geometry_output=geom))
+
+        assert results[0].place_id == 992
+        assert geom.name.lower() in results[0].geometry
+
+
+
 class TestTiger:
 
     @pytest.fixture(autouse=True)
@@ -331,6 +453,20 @@ class TestTiger:
         assert [r.place_id for r in results] == res + [990]
 
 
+    @pytest.mark.parametrize('geom', [napi.GeometryFormat.GEOJSON,
+                                      napi.GeometryFormat.KML,
+                                      napi.GeometryFormat.SVG,
+                                      napi.GeometryFormat.TEXT])
+    def test_tiger_with_geometries(self, apiobj, geom):
+        lookup = FieldLookup('name_vector', [111], 'lookup_all')
+
+        results = run_search(apiobj, 0.1, [lookup], [], hnrs=['21'],
+                             details=SearchDetails(geometry_output=geom))
+
+        assert results[0].place_id == 992
+        assert geom.name.lower() in results[0].geometry
+
+
 class TestLayersRank30:
 
     @pytest.fixture(autouse=True)
index a43bc8975585801e875999d70805bc8aaff9ffbe..e7153f38bf8b6147a268d5b051422d0d8d415680 100644 (file)
@@ -62,9 +62,11 @@ class TestPostcodeSearchWithAddress:
     @pytest.fixture(autouse=True)
     def fill_database(self, apiobj):
         apiobj.add_postcode(place_id=100, country_code='ch',
-                            parent_place_id=1000, postcode='12345')
+                            parent_place_id=1000, postcode='12345',
+                            geometry='POINT(17 5)')
         apiobj.add_postcode(place_id=101, country_code='pl',
-                            parent_place_id=2000, postcode='12345')
+                            parent_place_id=2000, postcode='12345',
+                            geometry='POINT(-45 7)')
         apiobj.add_placex(place_id=1000, class_='place', type='village',
                           rank_search=22, rank_address=22,
                           country_code='ch')
@@ -95,3 +97,64 @@ class TestPostcodeSearchWithAddress:
 
         assert [r.place_id for r in results] == [100]
 
+
+    @pytest.mark.parametrize('coord,place_id', [((16.5, 5), 100),
+                                                ((-45.1, 7.004), 101)])
+    def test_lookup_near(self, apiobj, coord, place_id):
+        lookup = FieldLookup('name_vector', [1,2], 'restrict')
+        ranking = FieldRanking('name_vector', 0.3, [RankedTokens(0.0, [10])])
+
+        results = run_search(apiobj, 0.1, ['12345'],
+                             lookup=[lookup], ranking=[ranking],
+                             details=SearchDetails(near=napi.Point(*coord),
+                                                   near_radius=0.6))
+
+        assert [r.place_id for r in results] == [place_id]
+
+
+    @pytest.mark.parametrize('geom', [napi.GeometryFormat.GEOJSON,
+                                      napi.GeometryFormat.KML,
+                                      napi.GeometryFormat.SVG,
+                                      napi.GeometryFormat.TEXT])
+    def test_return_geometries(self, apiobj, geom):
+        results = run_search(apiobj, 0.1, ['12345'],
+                             details=SearchDetails(geometry_output=geom))
+
+        assert results
+        assert all(geom.name.lower() in r.geometry for r in results)
+
+
+    @pytest.mark.parametrize('viewbox, rids', [('-46,6,-44,8', [101,100]),
+                                               ('16,4,18,6', [100,101])])
+    def test_prefer_viewbox(self, apiobj, viewbox, rids):
+        results = run_search(apiobj, 0.1, ['12345'],
+                             details=SearchDetails.from_kwargs({'viewbox': viewbox}))
+
+        assert [r.place_id for r in results] == rids
+
+
+    @pytest.mark.parametrize('viewbox, rid', [('-46,6,-44,8', 101),
+                                               ('16,4,18,6', 100)])
+    def test_restrict_to_viewbox(self, apiobj, viewbox, rid):
+        results = run_search(apiobj, 0.1, ['12345'],
+                             details=SearchDetails.from_kwargs({'viewbox': viewbox,
+                                                                'bounded_viewbox': True}))
+
+        assert [r.place_id for r in results] == [rid]
+
+
+    @pytest.mark.parametrize('coord,rids', [((17.05, 5), [100, 101]),
+                                            ((-45, 7.1), [101, 100])])
+    def test_prefer_near(self, apiobj, coord, rids):
+        results = run_search(apiobj, 0.1, ['12345'],
+                             details=SearchDetails(near=napi.Point(*coord)))
+
+        assert [r.place_id for r in results] == rids
+
+
+    @pytest.mark.parametrize('pid,rid', [(100, 101), (101, 100)])
+    def test_exclude(self, apiobj, pid, rid):
+        results = run_search(apiobj, 0.1, ['12345'],
+                             details=SearchDetails(excluded=[pid]))
+
+        assert [r.place_id for r in results] == [rid]
index dc123403ab24185aa78e59d842cecb0bce48e296..2ed55a0f80afb06372ef27f6a49fcf840e3854cc 100644 (file)
@@ -18,21 +18,17 @@ class MyToken(Token):
 
 
 def make_query(*args):
-    q = None
+    q = QueryStruct([Phrase(args[0][1], '')])
     dummy = MyToken(3.0, 45, 1, 'foo', True)
 
-    for btype, ptype, tlist in args:
-        if q is None:
-            q = QueryStruct([Phrase(ptype, '')])
-        else:
-            q.add_node(btype, ptype)
+    for btype, ptype, _ in args[1:]:
+        q.add_node(btype, ptype)
+    q.add_node(BreakType.END, PhraseType.NONE)
 
-        start = len(q.nodes) - 1
-        for end, ttype in tlist:
+    for start, t in enumerate(args):
+        for end, ttype in t[2]:
             q.add_token(TokenRange(start, end), ttype, dummy)
 
-    q.add_node(BreakType.END, PhraseType.NONE)
-
     return q
 
 
@@ -80,11 +76,11 @@ def test_single_country_name():
 
 def test_single_word_poi_search():
     q = make_query((BreakType.START, PhraseType.NONE,
-                    [(1, TokenType.CATEGORY),
+                    [(1, TokenType.NEAR_ITEM),
                      (1, TokenType.QUALIFIER)]))
 
     res = list(yield_token_assignments(q))
-    assert res == [TokenAssignment(category=TokenRange(0, 1))]
+    assert res == [TokenAssignment(near_item=TokenRange(0, 1))]
 
 
 @pytest.mark.parametrize('btype', [BreakType.WORD, BreakType.PART, BreakType.TOKEN])
@@ -186,7 +182,7 @@ def test_country_housenumber_postcode():
 
 
 @pytest.mark.parametrize('ttype', [TokenType.POSTCODE, TokenType.COUNTRY,
-                                   TokenType.CATEGORY, TokenType.QUALIFIER])
+                                   TokenType.NEAR_ITEM, TokenType.QUALIFIER])
 def test_housenumber_with_only_special_terms(ttype):
     q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.HOUSENUMBER)]),
                    (BreakType.WORD, PhraseType.NONE, [(2, ttype)]))
@@ -270,27 +266,27 @@ def test_postcode_with_designation_backwards():
                                       address=[TokenRange(0, 1)]))
 
 
-def test_category_at_beginning():
-    q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.CATEGORY)]),
+def test_near_item_at_beginning():
+    q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.NEAR_ITEM)]),
                    (BreakType.WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]))
 
     check_assignments(yield_token_assignments(q),
                       TokenAssignment(penalty=0.1, name=TokenRange(1, 2),
-                                      category=TokenRange(0, 1)))
+                                      near_item=TokenRange(0, 1)))
 
 
-def test_category_at_end():
+def test_near_item_at_end():
     q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
-                   (BreakType.WORD, PhraseType.NONE, [(2, TokenType.CATEGORY)]))
+                   (BreakType.WORD, PhraseType.NONE, [(2, TokenType.NEAR_ITEM)]))
 
     check_assignments(yield_token_assignments(q),
                       TokenAssignment(penalty=0.1, name=TokenRange(0, 1),
-                                      category=TokenRange(1, 2)))
+                                      near_item=TokenRange(1, 2)))
 
 
-def test_category_in_middle():
+def test_near_item_in_middle():
     q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
-                   (BreakType.WORD, PhraseType.NONE, [(2, TokenType.CATEGORY)]),
+                   (BreakType.WORD, PhraseType.NONE, [(2, TokenType.NEAR_ITEM)]),
                    (BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
 
     check_assignments(yield_token_assignments(q))
diff --git a/test/python/api/test_api_deletable_v1.py b/test/python/api/test_api_deletable_v1.py
new file mode 100644 (file)
index 0000000..4c5d96b
--- /dev/null
@@ -0,0 +1,67 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Tests for the deletable v1 API call.
+"""
+import json
+from pathlib import Path
+
+import pytest
+import pytest_asyncio
+
+import psycopg2.extras
+
+from fake_adaptor import FakeAdaptor, FakeError, FakeResponse
+
+import nominatim.api.v1.server_glue as glue
+import nominatim.api as napi
+
+@pytest_asyncio.fixture
+async def api():
+    api = napi.NominatimAPIAsync(Path('/invalid'))
+    yield api
+    await api.close()
+
+
+class TestDeletableEndPoint:
+
+    @pytest.fixture(autouse=True)
+    def setup_deletable_table(self, temp_db_cursor, table_factory, temp_db_with_extensions):
+        psycopg2.extras.register_hstore(temp_db_cursor)
+        table_factory('import_polygon_delete',
+                      definition='osm_id bigint, osm_type char(1), class text, type text',
+                      content=[(345, 'N', 'boundary', 'administrative'),
+                               (781, 'R', 'landuse', 'wood'),
+                               (781, 'R', 'landcover', 'grass')])
+        table_factory('placex',
+                      definition="""place_id bigint, osm_id bigint, osm_type char(1),
+                                    class text, type text, name HSTORE, country_code char(2)""",
+                      content=[(1, 345, 'N', 'boundary', 'administrative', {'old_name': 'Former'}, 'ab'),
+                               (2, 781, 'R', 'landuse', 'wood', {'name': 'Wood'}, 'cd'),
+                               (3, 781, 'R', 'landcover', 'grass', None, 'cd')])
+
+
+
+    @pytest.mark.asyncio
+    async def test_deletable(self, api):
+        a = FakeAdaptor()
+
+        resp = await glue.deletable_endpoint(api, a)
+        results = json.loads(resp.output)
+
+        results.sort(key=lambda r: r['place_id'])
+
+        assert results == [{'place_id': 1, 'country_code': 'ab', 'name': None,
+                            'osm_id': 345, 'osm_type': 'N',
+                            'class': 'boundary', 'type': 'administrative'},
+                           {'place_id': 2, 'country_code': 'cd', 'name': 'Wood',
+                            'osm_id': 781, 'osm_type': 'R',
+                            'class': 'landuse', 'type': 'wood'},
+                           {'place_id': 3, 'country_code': 'cd', 'name': None,
+                            'osm_id': 781, 'osm_type': 'R',
+                            'class': 'landcover', 'type': 'grass'}]
+
index 101dfd13429439e87212013a15e6308a595477c4..596876d4503dac42d94ab8a06d4cd3f74154a41c 100644 (file)
@@ -15,7 +15,7 @@ import nominatim.api as napi
 
 @pytest.mark.parametrize('idobj', (napi.PlaceID(332), napi.OsmID('W', 4),
                                    napi.OsmID('W', 4, 'highway')))
-def test_lookup_in_placex(apiobj, idobj):
+def test_lookup_in_placex(apiobj, frontend, idobj):
     import_date = dt.datetime(2022, 12, 7, 14, 14, 46, 0)
     apiobj.add_placex(place_id=332, osm_type='W', osm_id=4,
                      class_='highway', type='residential',
@@ -31,7 +31,8 @@ def test_lookup_in_placex(apiobj, idobj):
                      indexed_date=import_date,
                      geometry='LINESTRING(23 34, 23.1 34, 23.1 34.1, 23 34)')
 
-    result = apiobj.api.details(idobj)
+    api = frontend(apiobj, options={'details'})
+    result = api.details(idobj)
 
     assert result is not None
 
@@ -69,7 +70,7 @@ def test_lookup_in_placex(apiobj, idobj):
     assert result.geometry == {'type': 'ST_LineString'}
 
 
-def test_lookup_in_placex_minimal_info(apiobj):
+def test_lookup_in_placex_minimal_info(apiobj, frontend):
     import_date = dt.datetime(2022, 12, 7, 14, 14, 46, 0)
     apiobj.add_placex(place_id=332, osm_type='W', osm_id=4,
                      class_='highway', type='residential',
@@ -79,7 +80,8 @@ def test_lookup_in_placex_minimal_info(apiobj):
                      indexed_date=import_date,
                      geometry='LINESTRING(23 34, 23.1 34, 23.1 34.1, 23 34)')
 
-    result = apiobj.api.details(napi.PlaceID(332))
+    api = frontend(apiobj, options={'details'})
+    result = api.details(napi.PlaceID(332))
 
     assert result is not None
 
@@ -117,16 +119,17 @@ def test_lookup_in_placex_minimal_info(apiobj):
     assert result.geometry == {'type': 'ST_LineString'}
 
 
-def test_lookup_in_placex_with_geometry(apiobj):
+def test_lookup_in_placex_with_geometry(apiobj, frontend):
     apiobj.add_placex(place_id=332,
                       geometry='LINESTRING(23 34, 23.1 34)')
 
-    result = apiobj.api.details(napi.PlaceID(332), geometry_output=napi.GeometryFormat.GEOJSON)
+    api = frontend(apiobj, options={'details'})
+    result = api.details(napi.PlaceID(332), geometry_output=napi.GeometryFormat.GEOJSON)
 
     assert result.geometry == {'geojson': '{"type":"LineString","coordinates":[[23,34],[23.1,34]]}'}
 
 
-def test_lookup_placex_with_address_details(apiobj):
+def test_lookup_placex_with_address_details(apiobj, frontend):
     apiobj.add_placex(place_id=332, osm_type='W', osm_id=4,
                      class_='highway', type='residential',  name='Street',
                      country_code='pl',
@@ -143,24 +146,28 @@ def test_lookup_placex_with_address_details(apiobj):
                               country_code='pl',
                               rank_search=17, rank_address=16)
 
-    result = apiobj.api.details(napi.PlaceID(332), address_details=True)
+    api = frontend(apiobj, options={'details'})
+    result = api.details(napi.PlaceID(332), address_details=True)
 
     assert result.address_rows == [
                napi.AddressLine(place_id=332, osm_object=('W', 4),
                                 category=('highway', 'residential'),
                                 names={'name': 'Street'}, extratags={},
                                 admin_level=15, fromarea=True, isaddress=True,
-                                rank_address=26, distance=0.0),
+                                rank_address=26, distance=0.0,
+                                local_name='Street'),
                napi.AddressLine(place_id=1000, osm_object=('N', 3333),
                                 category=('place', 'suburb'),
                                 names={'name': 'Smallplace'}, extratags={},
                                 admin_level=13, fromarea=False, isaddress=True,
-                                rank_address=23, distance=0.0034),
+                                rank_address=23, distance=0.0034,
+                                local_name='Smallplace'),
                napi.AddressLine(place_id=1001, osm_object=('N', 3334),
                                 category=('place', 'city'),
                                 names={'name': 'Bigplace'}, extratags={},
                                 admin_level=15, fromarea=True, isaddress=True,
-                                rank_address=16, distance=0.0),
+                                rank_address=16, distance=0.0,
+                                local_name='Bigplace'),
                napi.AddressLine(place_id=None, osm_object=None,
                                 category=('place', 'country_code'),
                                 names={'ref': 'pl'}, extratags={},
@@ -169,18 +176,19 @@ def test_lookup_placex_with_address_details(apiobj):
            ]
 
 
-def test_lookup_place_with_linked_places_none_existing(apiobj):
+def test_lookup_place_with_linked_places_none_existing(apiobj, frontend):
     apiobj.add_placex(place_id=332, osm_type='W', osm_id=4,
                      class_='highway', type='residential',  name='Street',
                      country_code='pl', linked_place_id=45,
                      rank_search=27, rank_address=26)
 
-    result = apiobj.api.details(napi.PlaceID(332), linked_places=True)
+    api = frontend(apiobj, options={'details'})
+    result = api.details(napi.PlaceID(332), linked_places=True)
 
     assert result.linked_rows == []
 
 
-def test_lookup_place_with_linked_places_existing(apiobj):
+def test_lookup_place_with_linked_places_existing(apiobj, frontend):
     apiobj.add_placex(place_id=332, osm_type='W', osm_id=4,
                      class_='highway', type='residential',  name='Street',
                      country_code='pl', linked_place_id=45,
@@ -194,7 +202,8 @@ def test_lookup_place_with_linked_places_existing(apiobj):
                      country_code='pl', linked_place_id=332,
                      rank_search=27, rank_address=26)
 
-    result = apiobj.api.details(napi.PlaceID(332), linked_places=True)
+    api = frontend(apiobj, options={'details'})
+    result = api.details(napi.PlaceID(332), linked_places=True)
 
     assert result.linked_rows == [
                napi.AddressLine(place_id=1001, osm_object=('W', 5),
@@ -210,18 +219,19 @@ def test_lookup_place_with_linked_places_existing(apiobj):
     ]
 
 
-def test_lookup_place_with_parented_places_not_existing(apiobj):
+def test_lookup_place_with_parented_places_not_existing(apiobj, frontend):
     apiobj.add_placex(place_id=332, osm_type='W', osm_id=4,
                      class_='highway', type='residential',  name='Street',
                      country_code='pl', parent_place_id=45,
                      rank_search=27, rank_address=26)
 
-    result = apiobj.api.details(napi.PlaceID(332), parented_places=True)
+    api = frontend(apiobj, options={'details'})
+    result = api.details(napi.PlaceID(332), parented_places=True)
 
     assert result.parented_rows == []
 
 
-def test_lookup_place_with_parented_places_existing(apiobj):
+def test_lookup_place_with_parented_places_existing(apiobj, frontend):
     apiobj.add_placex(place_id=332, osm_type='W', osm_id=4,
                      class_='highway', type='residential',  name='Street',
                      country_code='pl', parent_place_id=45,
@@ -235,7 +245,8 @@ def test_lookup_place_with_parented_places_existing(apiobj):
                      country_code='pl', parent_place_id=332,
                      rank_search=27, rank_address=26)
 
-    result = apiobj.api.details(napi.PlaceID(332), parented_places=True)
+    api = frontend(apiobj, options={'details'})
+    result = api.details(napi.PlaceID(332), parented_places=True)
 
     assert result.parented_rows == [
                napi.AddressLine(place_id=1001, osm_object=('N', 5),
@@ -247,7 +258,7 @@ def test_lookup_place_with_parented_places_existing(apiobj):
 
 
 @pytest.mark.parametrize('idobj', (napi.PlaceID(4924), napi.OsmID('W', 9928)))
-def test_lookup_in_osmline(apiobj, idobj):
+def test_lookup_in_osmline(apiobj, frontend, idobj):
     import_date = dt.datetime(2022, 12, 7, 14, 14, 46, 0)
     apiobj.add_osmline(place_id=4924, osm_id=9928,
                        parent_place_id=12,
@@ -257,7 +268,8 @@ def test_lookup_in_osmline(apiobj, idobj):
                        indexed_date=import_date,
                        geometry='LINESTRING(23 34, 23 35)')
 
-    result = apiobj.api.details(idobj)
+    api = frontend(apiobj, options={'details'})
+    result = api.details(idobj)
 
     assert result is not None
 
@@ -295,7 +307,7 @@ def test_lookup_in_osmline(apiobj, idobj):
     assert result.geometry == {'type': 'ST_LineString'}
 
 
-def test_lookup_in_osmline_split_interpolation(apiobj):
+def test_lookup_in_osmline_split_interpolation(apiobj, frontend):
     apiobj.add_osmline(place_id=1000, osm_id=9,
                        startnumber=2, endnumber=4, step=1)
     apiobj.add_osmline(place_id=1001, osm_id=9,
@@ -303,18 +315,19 @@ def test_lookup_in_osmline_split_interpolation(apiobj):
     apiobj.add_osmline(place_id=1002, osm_id=9,
                        startnumber=11, endnumber=20, step=1)
 
+    api = frontend(apiobj, options={'details'})
     for i in range(1, 6):
-        result = apiobj.api.details(napi.OsmID('W', 9, str(i)))
+        result = api.details(napi.OsmID('W', 9, str(i)))
         assert result.place_id == 1000
     for i in range(7, 11):
-        result = apiobj.api.details(napi.OsmID('W', 9, str(i)))
+        result = api.details(napi.OsmID('W', 9, str(i)))
         assert result.place_id == 1001
     for i in range(12, 22):
-        result = apiobj.api.details(napi.OsmID('W', 9, str(i)))
+        result = api.details(napi.OsmID('W', 9, str(i)))
         assert result.place_id == 1002
 
 
-def test_lookup_osmline_with_address_details(apiobj):
+def test_lookup_osmline_with_address_details(apiobj, frontend):
     apiobj.add_osmline(place_id=9000, osm_id=9,
                        startnumber=2, endnumber=4, step=1,
                        parent_place_id=332)
@@ -334,29 +347,28 @@ def test_lookup_osmline_with_address_details(apiobj):
                               country_code='pl',
                               rank_search=17, rank_address=16)
 
-    result = apiobj.api.details(napi.PlaceID(9000), address_details=True)
+    api = frontend(apiobj, options={'details'})
+    result = api.details(napi.PlaceID(9000), address_details=True)
 
     assert result.address_rows == [
-               napi.AddressLine(place_id=None, osm_object=None,
-                                category=('place', 'house_number'),
-                                names={'ref': '2'}, extratags={},
-                                admin_level=None, fromarea=True, isaddress=True,
-                                rank_address=28, distance=0.0),
                napi.AddressLine(place_id=332, osm_object=('W', 4),
                                 category=('highway', 'residential'),
                                 names={'name': 'Street'}, extratags={},
                                 admin_level=15, fromarea=True, isaddress=True,
-                                rank_address=26, distance=0.0),
+                                rank_address=26, distance=0.0,
+                                local_name='Street'),
                napi.AddressLine(place_id=1000, osm_object=('N', 3333),
                                 category=('place', 'suburb'),
                                 names={'name': 'Smallplace'}, extratags={},
                                 admin_level=13, fromarea=False, isaddress=True,
-                                rank_address=23, distance=0.0034),
+                                rank_address=23, distance=0.0034,
+                                local_name='Smallplace'),
                napi.AddressLine(place_id=1001, osm_object=('N', 3334),
                                 category=('place', 'city'),
                                 names={'name': 'Bigplace'}, extratags={},
                                 admin_level=15, fromarea=True, isaddress=True,
-                                rank_address=16, distance=0.0),
+                                rank_address=16, distance=0.0,
+                                local_name='Bigplace'),
                napi.AddressLine(place_id=None, osm_object=None,
                                 category=('place', 'country_code'),
                                 names={'ref': 'pl'}, extratags={},
@@ -365,7 +377,7 @@ def test_lookup_osmline_with_address_details(apiobj):
            ]
 
 
-def test_lookup_in_tiger(apiobj):
+def test_lookup_in_tiger(apiobj, frontend):
     apiobj.add_tiger(place_id=4924,
                      parent_place_id=12,
                      startnumber=1, endnumber=4, step=1,
@@ -376,7 +388,8 @@ def test_lookup_in_tiger(apiobj):
                       osm_type='W', osm_id=6601223,
                       geometry='LINESTRING(23 34, 23 35)')
 
-    result = apiobj.api.details(napi.PlaceID(4924))
+    api = frontend(apiobj, options={'details'})
+    result = api.details(napi.PlaceID(4924))
 
     assert result is not None
 
@@ -414,7 +427,7 @@ def test_lookup_in_tiger(apiobj):
     assert result.geometry == {'type': 'ST_LineString'}
 
 
-def test_lookup_tiger_with_address_details(apiobj):
+def test_lookup_tiger_with_address_details(apiobj, frontend):
     apiobj.add_tiger(place_id=9000,
                      startnumber=2, endnumber=4, step=1,
                      parent_place_id=332)
@@ -434,29 +447,28 @@ def test_lookup_tiger_with_address_details(apiobj):
                               country_code='us',
                               rank_search=17, rank_address=16)
 
-    result = apiobj.api.details(napi.PlaceID(9000), address_details=True)
+    api = frontend(apiobj, options={'details'})
+    result = api.details(napi.PlaceID(9000), address_details=True)
 
     assert result.address_rows == [
-               napi.AddressLine(place_id=None, osm_object=None,
-                                category=('place', 'house_number'),
-                                names={'ref': '2'}, extratags={},
-                                admin_level=None, fromarea=True, isaddress=True,
-                                rank_address=28, distance=0.0),
                napi.AddressLine(place_id=332, osm_object=('W', 4),
                                 category=('highway', 'residential'),
                                 names={'name': 'Street'}, extratags={},
                                 admin_level=15, fromarea=True, isaddress=True,
-                                rank_address=26, distance=0.0),
+                                rank_address=26, distance=0.0,
+                                local_name='Street'),
                napi.AddressLine(place_id=1000, osm_object=('N', 3333),
                                 category=('place', 'suburb'),
                                 names={'name': 'Smallplace'}, extratags={},
                                 admin_level=13, fromarea=False, isaddress=True,
-                                rank_address=23, distance=0.0034),
+                                rank_address=23, distance=0.0034,
+                                local_name='Smallplace'),
                napi.AddressLine(place_id=1001, osm_object=('N', 3334),
                                 category=('place', 'city'),
                                 names={'name': 'Bigplace'}, extratags={},
                                 admin_level=15, fromarea=True, isaddress=True,
-                                rank_address=16, distance=0.0),
+                                rank_address=16, distance=0.0,
+                                local_name='Bigplace'),
                napi.AddressLine(place_id=None, osm_object=None,
                                 category=('place', 'country_code'),
                                 names={'ref': 'us'}, extratags={},
@@ -465,7 +477,7 @@ def test_lookup_tiger_with_address_details(apiobj):
            ]
 
 
-def test_lookup_in_postcode(apiobj):
+def test_lookup_in_postcode(apiobj, frontend):
     import_date = dt.datetime(2022, 12, 7, 14, 14, 46, 0)
     apiobj.add_postcode(place_id=554,
                         parent_place_id=152,
@@ -475,7 +487,8 @@ def test_lookup_in_postcode(apiobj):
                         indexed_date=import_date,
                         geometry='POINT(-9.45 5.6)')
 
-    result = apiobj.api.details(napi.PlaceID(554))
+    api = frontend(apiobj, options={'details'})
+    result = api.details(napi.PlaceID(554))
 
     assert result is not None
 
@@ -513,7 +526,7 @@ def test_lookup_in_postcode(apiobj):
     assert result.geometry == {'type': 'ST_Point'}
 
 
-def test_lookup_postcode_with_address_details(apiobj):
+def test_lookup_postcode_with_address_details(apiobj, frontend):
     apiobj.add_postcode(place_id=9000,
                         parent_place_id=332,
                         postcode='34 425',
@@ -529,24 +542,28 @@ def test_lookup_postcode_with_address_details(apiobj):
                               country_code='gb',
                               rank_search=17, rank_address=16)
 
-    result = apiobj.api.details(napi.PlaceID(9000), address_details=True)
+    api = frontend(apiobj, options={'details'})
+    result = api.details(napi.PlaceID(9000), address_details=True)
 
     assert result.address_rows == [
+               napi.AddressLine(place_id=9000, osm_object=None,
+                                category=('place', 'postcode'),
+                                names={'ref': '34 425'}, extratags={},
+                                admin_level=15, fromarea=True, isaddress=True,
+                                rank_address=25, distance=0.0,
+                                local_name='34 425'),
                napi.AddressLine(place_id=332, osm_object=('N', 3333),
                                 category=('place', 'suburb'),
                                 names={'name': 'Smallplace'}, extratags={},
                                 admin_level=13, fromarea=True, isaddress=True,
-                                rank_address=23, distance=0.0),
+                                rank_address=23, distance=0.0,
+                                local_name='Smallplace'),
                napi.AddressLine(place_id=1001, osm_object=('N', 3334),
                                 category=('place', 'city'),
                                 names={'name': 'Bigplace'}, extratags={},
                                 admin_level=15, fromarea=True, isaddress=True,
-                                rank_address=16, distance=0.0),
-               napi.AddressLine(place_id=None, osm_object=None,
-                                category=('place', 'postcode'),
-                                names={'ref': '34 425'}, extratags={},
-                                admin_level=None, fromarea=False, isaddress=True,
-                                rank_address=5, distance=0.0),
+                                rank_address=16, distance=0.0,
+                                local_name='Bigplace'),
                napi.AddressLine(place_id=None, osm_object=None,
                                 category=('place', 'country_code'),
                                 names={'ref': 'gb'}, extratags={},
@@ -557,18 +574,20 @@ def test_lookup_postcode_with_address_details(apiobj):
 @pytest.mark.parametrize('objid', [napi.PlaceID(1736),
                                    napi.OsmID('W', 55),
                                    napi.OsmID('N', 55, 'amenity')])
-def test_lookup_missing_object(apiobj, objid):
+def test_lookup_missing_object(apiobj, frontend, objid):
     apiobj.add_placex(place_id=1, osm_type='N', osm_id=55,
                       class_='place', type='suburb')
 
-    assert apiobj.api.details(objid) is None
+    api = frontend(apiobj, options={'details'})
+    assert api.details(objid) is None
 
 
 @pytest.mark.parametrize('gtype', (napi.GeometryFormat.KML,
                                     napi.GeometryFormat.SVG,
                                     napi.GeometryFormat.TEXT))
-def test_lookup_unsupported_geometry(apiobj, gtype):
+def test_lookup_unsupported_geometry(apiobj, frontend, gtype):
     apiobj.add_placex(place_id=332)
 
+    api = frontend(apiobj, options={'details'})
     with pytest.raises(ValueError):
-        apiobj.api.details(napi.PlaceID(332), geometry_output=gtype)
+        api.details(napi.PlaceID(332), geometry_output=gtype)
index 619bc74710df52a8b241d50c7f908264f26c79ad..48b0777667c8199934e4cd7994e82feffde54570 100644 (file)
@@ -7,22 +7,26 @@
 """
 Tests for lookup API call.
 """
+import json
+
 import pytest
 
 import nominatim.api as napi
 
-def test_lookup_empty_list(apiobj):
-    assert apiobj.api.lookup([]) == []
+def test_lookup_empty_list(apiobj, frontend):
+    api = frontend(apiobj, options={'details'})
+    assert api.lookup([]) == []
 
 
-def test_lookup_non_existing(apiobj):
-    assert apiobj.api.lookup((napi.PlaceID(332), napi.OsmID('W', 4),
-                              napi.OsmID('W', 4, 'highway'))) == []
+def test_lookup_non_existing(apiobj, frontend):
+    api = frontend(apiobj, options={'details'})
+    assert api.lookup((napi.PlaceID(332), napi.OsmID('W', 4),
+                       napi.OsmID('W', 4, 'highway'))) == []
 
 
 @pytest.mark.parametrize('idobj', (napi.PlaceID(332), napi.OsmID('W', 4),
                                    napi.OsmID('W', 4, 'highway')))
-def test_lookup_single_placex(apiobj, idobj):
+def test_lookup_single_placex(apiobj, frontend, idobj):
     apiobj.add_placex(place_id=332, osm_type='W', osm_id=4,
                      class_='highway', type='residential',
                      name={'name': 'Road'}, address={'city': 'Barrow'},
@@ -36,7 +40,8 @@ def test_lookup_single_placex(apiobj, idobj):
                      centroid=(23, 34),
                      geometry='LINESTRING(23 34, 23.1 34, 23.1 34.1, 23 34)')
 
-    result = apiobj.api.lookup([idobj])
+    api = frontend(apiobj, options={'details'})
+    result = api.lookup([idobj])
 
     assert len(result) == 1
 
@@ -72,7 +77,7 @@ def test_lookup_single_placex(apiobj, idobj):
     assert result.geometry == {}
 
 
-def test_lookup_multiple_places(apiobj):
+def test_lookup_multiple_places(apiobj, frontend):
     apiobj.add_placex(place_id=332, osm_type='W', osm_id=4,
                      class_='highway', type='residential',
                      name={'name': 'Road'}, address={'city': 'Barrow'},
@@ -93,10 +98,66 @@ def test_lookup_multiple_places(apiobj):
                        geometry='LINESTRING(23 34, 23 35)')
 
 
-    result = apiobj.api.lookup((napi.OsmID('W', 1),
-                                napi.OsmID('W', 4),
-                                napi.OsmID('W', 9928)))
+    api = frontend(apiobj, options={'details'})
+    result = api.lookup((napi.OsmID('W', 1),
+                         napi.OsmID('W', 4),
+                         napi.OsmID('W', 9928)))
 
     assert len(result) == 2
 
     assert set(r.place_id for r in result) == {332, 4924}
+
+
+@pytest.mark.parametrize('gtype', list(napi.GeometryFormat))
+def test_simple_place_with_geometry(apiobj, frontend, gtype):
+    apiobj.add_placex(place_id=332, osm_type='W', osm_id=4,
+                     class_='highway', type='residential',
+                     name={'name': 'Road'}, address={'city': 'Barrow'},
+                     extratags={'surface': 'paved'},
+                     parent_place_id=34, linked_place_id=55,
+                     admin_level=15, country_code='gb',
+                     housenumber='4',
+                     postcode='34425', wikipedia='en:Faa',
+                     rank_search=27, rank_address=26,
+                     importance=0.01,
+                     centroid=(23, 34),
+                     geometry='POLYGON((23 34, 23.1 34, 23.1 34.1, 23 34))')
+
+    api = frontend(apiobj, options={'details'})
+    result = api.lookup([napi.OsmID('W', 4)], geometry_output=gtype)
+
+    assert len(result) == 1
+    assert result[0].place_id == 332
+
+    if gtype == napi.GeometryFormat.NONE:
+        assert list(result[0].geometry.keys()) == []
+    else:
+        assert list(result[0].geometry.keys()) == [gtype.name.lower()]
+
+
+def test_simple_place_with_geometry_simplified(apiobj, frontend):
+    apiobj.add_placex(place_id=332, osm_type='W', osm_id=4,
+                     class_='highway', type='residential',
+                     name={'name': 'Road'}, address={'city': 'Barrow'},
+                     extratags={'surface': 'paved'},
+                     parent_place_id=34, linked_place_id=55,
+                     admin_level=15, country_code='gb',
+                     housenumber='4',
+                     postcode='34425', wikipedia='en:Faa',
+                     rank_search=27, rank_address=26,
+                     importance=0.01,
+                     centroid=(23, 34),
+                     geometry='POLYGON((23 34, 22.999 34, 23.1 34, 23.1 34.1, 23 34))')
+
+    api = frontend(apiobj, options={'details'})
+    result = api.lookup([napi.OsmID('W', 4)],
+                        geometry_output=napi.GeometryFormat.GEOJSON,
+                        geometry_simplification=0.1)
+
+    assert len(result) == 1
+    assert result[0].place_id == 332
+
+    geom = json.loads(result[0].geometry['geojson'])
+
+    assert geom['type']  == 'Polygon'
+    assert geom['coordinates'] == [[[23, 34], [23.1, 34], [23.1, 34.1], [23, 34]]]
diff --git a/test/python/api/test_api_polygons_v1.py b/test/python/api/test_api_polygons_v1.py
new file mode 100644 (file)
index 0000000..6842f79
--- /dev/null
@@ -0,0 +1,111 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Tests for the deletable v1 API call.
+"""
+import json
+import datetime as dt
+from pathlib import Path
+
+import pytest
+import pytest_asyncio
+
+import psycopg2.extras
+
+from fake_adaptor import FakeAdaptor, FakeError, FakeResponse
+
+import nominatim.api.v1.server_glue as glue
+import nominatim.api as napi
+
+@pytest_asyncio.fixture
+async def api():
+    api = napi.NominatimAPIAsync(Path('/invalid'))
+    yield api
+    await api.close()
+
+
+class TestPolygonsEndPoint:
+
+    @pytest.fixture(autouse=True)
+    def setup_deletable_table(self, temp_db_cursor, table_factory, temp_db_with_extensions):
+        psycopg2.extras.register_hstore(temp_db_cursor)
+
+        self.now = dt.datetime.now()
+        self.recent = dt.datetime.now() - dt.timedelta(days=3)
+
+        table_factory('import_polygon_error',
+                      definition="""osm_id bigint,
+                                    osm_type character(1),
+                                    class text,
+                                    type text,
+                                    name hstore,
+                                    country_code character varying(2),
+                                    updated timestamp without time zone,
+                                    errormessage text,
+                                    prevgeometry geometry(Geometry,4326),
+                                    newgeometry geometry(Geometry,4326)""",
+                    content=[(345, 'N', 'boundary', 'administrative',
+                              {'name': 'Foo'}, 'xx', self.recent,
+                              'some text', None, None),
+                             (781, 'R', 'landuse', 'wood',
+                              None, 'ds', self.now,
+                              'Area reduced by lots', None, None)])
+
+
+    @pytest.mark.asyncio
+    async def test_polygons_simple(self, api):
+        a = FakeAdaptor()
+
+        resp = await glue.polygons_endpoint(api, a)
+        results = json.loads(resp.output)
+
+        results.sort(key=lambda r: (r['osm_type'], r['osm_id']))
+
+        assert results == [{'osm_type': 'N', 'osm_id': 345,
+                            'class': 'boundary', 'type': 'administrative',
+                            'name': 'Foo', 'country_code': 'xx',
+                            'errormessage': 'some text',
+                            'updated': self.recent.isoformat(sep=' ', timespec='seconds')},
+                           {'osm_type': 'R', 'osm_id': 781,
+                            'class': 'landuse', 'type': 'wood',
+                            'name': None, 'country_code': 'ds',
+                            'errormessage': 'Area reduced by lots',
+                            'updated': self.now.isoformat(sep=' ', timespec='seconds')}]
+
+
+    @pytest.mark.asyncio
+    async def test_polygons_days(self, api):
+        a = FakeAdaptor()
+        a.params['days'] = '2'
+
+        resp = await glue.polygons_endpoint(api, a)
+        results = json.loads(resp.output)
+
+        assert [r['osm_id'] for r in results] == [781]
+
+
+    @pytest.mark.asyncio
+    async def test_polygons_class(self, api):
+        a = FakeAdaptor()
+        a.params['class'] = 'landuse'
+
+        resp = await glue.polygons_endpoint(api, a)
+        results = json.loads(resp.output)
+
+        assert [r['osm_id'] for r in results] == [781]
+
+
+
+    @pytest.mark.asyncio
+    async def test_polygons_reduced(self, api):
+        a = FakeAdaptor()
+        a.params['reduced'] = '1'
+
+        resp = await glue.polygons_endpoint(api, a)
+        results = json.loads(resp.output)
+
+        assert [r['osm_id'] for r in results] == [781]
index 3296e98fdd6055ad0beb2571e4da2877b6d37f7d..414115e113783a575c6a8d52453e1e04f1a80a98 100644 (file)
@@ -16,20 +16,23 @@ import pytest
 
 import nominatim.api as napi
 
-def test_reverse_rank_30(apiobj):
+API_OPTIONS = {'reverse'}
+
+def test_reverse_rank_30(apiobj, frontend):
     apiobj.add_placex(place_id=223, class_='place', type='house',
                       housenumber='1',
                       centroid=(1.3, 0.7),
                       geometry='POINT(1.3 0.7)')
 
-    result = apiobj.api.reverse((1.3, 0.7))
+    api = frontend(apiobj, options=API_OPTIONS)
+    result = api.reverse((1.3, 0.7))
 
     assert result is not None
     assert result.place_id == 223
 
 
 @pytest.mark.parametrize('country', ['de', 'us'])
-def test_reverse_street(apiobj, country):
+def test_reverse_street(apiobj, frontend, country):
     apiobj.add_placex(place_id=990, class_='highway', type='service',
                       rank_search=27, rank_address=27,
                       name = {'name': 'My Street'},
@@ -37,17 +40,19 @@ def test_reverse_street(apiobj, country):
                       country_code=country,
                       geometry='LINESTRING(9.995 10, 10.005 10)')
 
-    assert apiobj.api.reverse((9.995, 10)).place_id == 990
+    api = frontend(apiobj, options=API_OPTIONS)
+    assert api.reverse((9.995, 10)).place_id == 990
 
 
-def test_reverse_ignore_unindexed(apiobj):
+def test_reverse_ignore_unindexed(apiobj, frontend):
     apiobj.add_placex(place_id=223, class_='place', type='house',
                       housenumber='1',
                       indexed_status=2,
                       centroid=(1.3, 0.7),
                       geometry='POINT(1.3 0.7)')
 
-    result = apiobj.api.reverse((1.3, 0.7))
+    api = frontend(apiobj, options=API_OPTIONS)
+    result = api.reverse((1.3, 0.7))
 
     assert result is None
 
@@ -60,8 +65,9 @@ def test_reverse_ignore_unindexed(apiobj):
                                               (0.7, napi.DataLayer.RAILWAY, 226),
                                               (0.7, napi.DataLayer.NATURAL, 227),
                                               (0.70003, napi.DataLayer.MANMADE | napi.DataLayer.RAILWAY, 225),
-                                              (0.70003, napi.DataLayer.MANMADE | napi.DataLayer.NATURAL, 225)])
-def test_reverse_rank_30_layers(apiobj, y, layer, place_id):
+                                              (0.70003, napi.DataLayer.MANMADE | napi.DataLayer.NATURAL, 225),
+                                              (5, napi.DataLayer.ADDRESS, 229)])
+def test_reverse_rank_30_layers(apiobj, frontend, y, layer, place_id):
     apiobj.add_placex(place_id=223, class_='place', type='house',
                       housenumber='1',
                       rank_address=30,
@@ -83,22 +89,29 @@ def test_reverse_rank_30_layers(apiobj, y, layer, place_id):
                       rank_address=0,
                       rank_search=30,
                       centroid=(1.3, 0.70005))
+    apiobj.add_placex(place_id=229, class_='place', type='house',
+                      name={'addr:housename': 'Old Cottage'},
+                      rank_address=30,
+                      rank_search=30,
+                      centroid=(1.3, 5))
 
-    assert apiobj.api.reverse((1.3, y), layers=layer).place_id == place_id
+    api = frontend(apiobj, options=API_OPTIONS)
+    assert api.reverse((1.3, y), layers=layer).place_id == place_id
 
 
-def test_reverse_poi_layer_with_no_pois(apiobj):
+def test_reverse_poi_layer_with_no_pois(apiobj, frontend):
     apiobj.add_placex(place_id=223, class_='place', type='house',
                       housenumber='1',
                       rank_address=30,
                       rank_search=30,
                       centroid=(1.3, 0.70001))
 
-    assert apiobj.api.reverse((1.3, 0.70001), max_rank=29,
+    api = frontend(apiobj, options=API_OPTIONS)
+    assert api.reverse((1.3, 0.70001), max_rank=29,
                               layers=napi.DataLayer.POI) is None
 
 
-def test_reverse_housenumber_on_street(apiobj):
+def test_reverse_housenumber_on_street(apiobj, frontend):
     apiobj.add_placex(place_id=990, class_='highway', type='service',
                       rank_search=27, rank_address=27,
                       name = {'name': 'My Street'},
@@ -110,12 +123,13 @@ def test_reverse_housenumber_on_street(apiobj):
                       housenumber='23',
                       centroid=(10.0, 10.00001))
 
-    assert apiobj.api.reverse((10.0, 10.0), max_rank=30).place_id == 991
-    assert apiobj.api.reverse((10.0, 10.0), max_rank=27).place_id == 990
-    assert apiobj.api.reverse((10.0, 10.00001), max_rank=30).place_id == 991
+    api = frontend(apiobj, options=API_OPTIONS)
+    assert api.reverse((10.0, 10.0), max_rank=30).place_id == 991
+    assert api.reverse((10.0, 10.0), max_rank=27).place_id == 990
+    assert api.reverse((10.0, 10.00001), max_rank=30).place_id == 991
 
 
-def test_reverse_housenumber_interpolation(apiobj):
+def test_reverse_housenumber_interpolation(apiobj, frontend):
     apiobj.add_placex(place_id=990, class_='highway', type='service',
                       rank_search=27, rank_address=27,
                       name = {'name': 'My Street'},
@@ -132,10 +146,11 @@ def test_reverse_housenumber_interpolation(apiobj):
                        centroid=(10.0, 10.00001),
                        geometry='LINESTRING(9.995 10.00001, 10.005 10.00001)')
 
-    assert apiobj.api.reverse((10.0, 10.0)).place_id == 992
+    api = frontend(apiobj, options=API_OPTIONS)
+    assert api.reverse((10.0, 10.0)).place_id == 992
 
 
-def test_reverse_housenumber_point_interpolation(apiobj):
+def test_reverse_housenumber_point_interpolation(apiobj, frontend):
     apiobj.add_placex(place_id=990, class_='highway', type='service',
                       rank_search=27, rank_address=27,
                       name = {'name': 'My Street'},
@@ -147,12 +162,13 @@ def test_reverse_housenumber_point_interpolation(apiobj):
                        centroid=(10.0, 10.00001),
                        geometry='POINT(10.0 10.00001)')
 
-    res = apiobj.api.reverse((10.0, 10.0))
+    api = frontend(apiobj, options=API_OPTIONS)
+    res = api.reverse((10.0, 10.0))
     assert res.place_id == 992
     assert res.housenumber == '42'
 
 
-def test_reverse_tiger_number(apiobj):
+def test_reverse_tiger_number(apiobj, frontend):
     apiobj.add_placex(place_id=990, class_='highway', type='service',
                       rank_search=27, rank_address=27,
                       name = {'name': 'My Street'},
@@ -165,11 +181,12 @@ def test_reverse_tiger_number(apiobj):
                      centroid=(10.0, 10.00001),
                      geometry='LINESTRING(9.995 10.00001, 10.005 10.00001)')
 
-    assert apiobj.api.reverse((10.0, 10.0)).place_id == 992
-    assert apiobj.api.reverse((10.0, 10.00001)).place_id == 992
+    api = frontend(apiobj, options=API_OPTIONS)
+    assert api.reverse((10.0, 10.0)).place_id == 992
+    assert api.reverse((10.0, 10.00001)).place_id == 992
 
 
-def test_reverse_point_tiger(apiobj):
+def test_reverse_point_tiger(apiobj, frontend):
     apiobj.add_placex(place_id=990, class_='highway', type='service',
                       rank_search=27, rank_address=27,
                       name = {'name': 'My Street'},
@@ -182,12 +199,13 @@ def test_reverse_point_tiger(apiobj):
                      centroid=(10.0, 10.00001),
                      geometry='POINT(10.0 10.00001)')
 
-    res = apiobj.api.reverse((10.0, 10.0))
+    api = frontend(apiobj, options=API_OPTIONS)
+    res = api.reverse((10.0, 10.0))
     assert res.place_id == 992
     assert res.housenumber == '1'
 
 
-def test_reverse_low_zoom_address(apiobj):
+def test_reverse_low_zoom_address(apiobj, frontend):
     apiobj.add_placex(place_id=1001, class_='place', type='house',
                       housenumber='1',
                       rank_address=30,
@@ -201,11 +219,12 @@ def test_reverse_low_zoom_address(apiobj):
                       geometry="""POLYGON((59.3 80.70001, 59.3001 80.70001,
                                         59.3001 80.70101, 59.3 80.70101, 59.3 80.70001))""")
 
-    assert apiobj.api.reverse((59.30005, 80.7005)).place_id == 1001
-    assert apiobj.api.reverse((59.30005, 80.7005), max_rank=18).place_id == 1002
+    api = frontend(apiobj, options=API_OPTIONS)
+    assert api.reverse((59.30005, 80.7005)).place_id == 1001
+    assert api.reverse((59.30005, 80.7005), max_rank=18).place_id == 1002
 
 
-def test_reverse_place_node_in_area(apiobj):
+def test_reverse_place_node_in_area(apiobj, frontend):
     apiobj.add_placex(place_id=1002, class_='place', type='town',
                       name={'name': 'Town Area'},
                       rank_address=16,
@@ -220,7 +239,8 @@ def test_reverse_place_node_in_area(apiobj):
                       rank_search=18,
                       centroid=(59.30004, 80.70055))
 
-    assert apiobj.api.reverse((59.30004, 80.70055)).place_id == 1003
+    api = frontend(apiobj, options=API_OPTIONS)
+    assert api.reverse((59.30004, 80.70055)).place_id == 1003
 
 
 @pytest.mark.parametrize('layer,place_id', [(napi.DataLayer.MANMADE, 225),
@@ -228,7 +248,7 @@ def test_reverse_place_node_in_area(apiobj):
                                             (napi.DataLayer.NATURAL, 227),
                                             (napi.DataLayer.MANMADE | napi.DataLayer.RAILWAY, 225),
                                             (napi.DataLayer.MANMADE | napi.DataLayer.NATURAL, 225)])
-def test_reverse_larger_area_layers(apiobj, layer, place_id):
+def test_reverse_larger_area_layers(apiobj, frontend, layer, place_id):
     apiobj.add_placex(place_id=225, class_='man_made', type='dam',
                       name={'name': 'Dam'},
                       rank_address=0,
@@ -245,17 +265,19 @@ def test_reverse_larger_area_layers(apiobj, layer, place_id):
                       rank_search=16,
                       centroid=(1.3, 0.70005))
 
-    assert apiobj.api.reverse((1.3, 0.7), layers=layer).place_id == place_id
+    api = frontend(apiobj, options=API_OPTIONS)
+    assert api.reverse((1.3, 0.7), layers=layer).place_id == place_id
 
 
-def test_reverse_country_lookup_no_objects(apiobj):
+def test_reverse_country_lookup_no_objects(apiobj, frontend):
     apiobj.add_country('xx', 'POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))')
 
-    assert apiobj.api.reverse((0.5, 0.5)) is None
+    api = frontend(apiobj, options=API_OPTIONS)
+    assert api.reverse((0.5, 0.5)) is None
 
 
 @pytest.mark.parametrize('rank', [4, 30])
-def test_reverse_country_lookup_country_only(apiobj, rank):
+def test_reverse_country_lookup_country_only(apiobj, frontend, rank):
     apiobj.add_country('xx', 'POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))')
     apiobj.add_placex(place_id=225, class_='place', type='country',
                       name={'name': 'My Country'},
@@ -264,10 +286,11 @@ def test_reverse_country_lookup_country_only(apiobj, rank):
                       country_code='xx',
                       centroid=(0.7, 0.7))
 
-    assert apiobj.api.reverse((0.5, 0.5), max_rank=rank).place_id == 225
+    api = frontend(apiobj, options=API_OPTIONS)
+    assert api.reverse((0.5, 0.5), max_rank=rank).place_id == 225
 
 
-def test_reverse_country_lookup_place_node_inside(apiobj):
+def test_reverse_country_lookup_place_node_inside(apiobj, frontend):
     apiobj.add_country('xx', 'POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))')
     apiobj.add_placex(place_id=225, class_='place', type='state',
                       osm_type='N',
@@ -277,11 +300,12 @@ def test_reverse_country_lookup_place_node_inside(apiobj):
                       country_code='xx',
                       centroid=(0.5, 0.505))
 
-    assert apiobj.api.reverse((0.5, 0.5)).place_id == 225
+    api = frontend(apiobj, options=API_OPTIONS)
+    assert api.reverse((0.5, 0.5)).place_id == 225
 
 
 @pytest.mark.parametrize('gtype', list(napi.GeometryFormat))
-def test_reverse_geometry_output_placex(apiobj, gtype):
+def test_reverse_geometry_output_placex(apiobj, frontend, gtype):
     apiobj.add_country('xx', 'POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))')
     apiobj.add_placex(place_id=1001, class_='place', type='house',
                       housenumber='1',
@@ -296,34 +320,37 @@ def test_reverse_geometry_output_placex(apiobj, gtype):
                       country_code='xx',
                       centroid=(0.5, 0.5))
 
-    assert apiobj.api.reverse((59.3, 80.70001), geometry_output=gtype).place_id == 1001
-    assert apiobj.api.reverse((0.5, 0.5), geometry_output=gtype).place_id == 1003
+    api = frontend(apiobj, options=API_OPTIONS)
+    assert api.reverse((59.3, 80.70001), geometry_output=gtype).place_id == 1001
+    assert api.reverse((0.5, 0.5), geometry_output=gtype).place_id == 1003
 
 
-def test_reverse_simplified_geometry(apiobj):
+def test_reverse_simplified_geometry(apiobj, frontend):
     apiobj.add_placex(place_id=1001, class_='place', type='house',
                       housenumber='1',
                       rank_address=30,
                       rank_search=30,
                       centroid=(59.3, 80.70001))
 
+    api = frontend(apiobj, options=API_OPTIONS)
     details = dict(geometry_output=napi.GeometryFormat.GEOJSON,
                    geometry_simplification=0.1)
-    assert apiobj.api.reverse((59.3, 80.70001), **details).place_id == 1001
+    assert api.reverse((59.3, 80.70001), **details).place_id == 1001
 
 
-def test_reverse_interpolation_geometry(apiobj):
+def test_reverse_interpolation_geometry(apiobj, frontend):
     apiobj.add_osmline(place_id=992,
                        parent_place_id=990,
                        startnumber=1, endnumber=3, step=1,
                        centroid=(10.0, 10.00001),
                        geometry='LINESTRING(9.995 10.00001, 10.005 10.00001)')
 
-    assert apiobj.api.reverse((10.0, 10.0), geometry_output=napi.GeometryFormat.TEXT)\
+    api = frontend(apiobj, options=API_OPTIONS)
+    assert api.reverse((10.0, 10.0), geometry_output=napi.GeometryFormat.TEXT)\
                      .geometry['text'] == 'POINT(10 10.00001)'
 
 
-def test_reverse_tiger_geometry(apiobj):
+def test_reverse_tiger_geometry(apiobj, frontend):
     apiobj.add_placex(place_id=990, class_='highway', type='service',
                       rank_search=27, rank_address=27,
                       name = {'name': 'My Street'},
@@ -336,7 +363,8 @@ def test_reverse_tiger_geometry(apiobj):
                      centroid=(10.0, 10.00001),
                      geometry='LINESTRING(9.995 10.00001, 10.005 10.00001)')
 
-    output = apiobj.api.reverse((10.0, 10.0),
+    api = frontend(apiobj, options=API_OPTIONS)
+    output = api.reverse((10.0, 10.0),
                                 geometry_output=napi.GeometryFormat.GEOJSON).geometry['geojson']
 
     assert json.loads(output) == {'coordinates': [10, 10.00001], 'type': 'Point'}
index 036a235c9a0181d4b2a92fa853fb9215d9eff9d3..2acde2ecb49ee23f3f2c1722112f7dd1e43d0224 100644 (file)
@@ -14,8 +14,9 @@ import pytest
 from nominatim.version import NOMINATIM_VERSION, NominatimVersion
 import nominatim.api as napi
 
-def test_status_no_extra_info(apiobj):
-    result = apiobj.api.status()
+def test_status_no_extra_info(apiobj, frontend):
+    api = frontend(apiobj)
+    result = api.status()
 
     assert result.status == 0
     assert result.message == 'OK'
@@ -24,14 +25,15 @@ def test_status_no_extra_info(apiobj):
     assert result.data_updated is None
 
 
-def test_status_full(apiobj):
+def test_status_full(apiobj, frontend):
     import_date = dt.datetime(2022, 12, 7, 14, 14, 46, 0, tzinfo=dt.timezone.utc)
     apiobj.add_data('import_status',
                     [{'lastimportdate': import_date}])
     apiobj.add_data('properties',
                     [{'property': 'database_version', 'value': '99.5.4-2'}])
 
-    result = apiobj.api.status()
+    api = frontend(apiobj)
+    result = api.status()
 
     assert result.status == 0
     assert result.message == 'OK'
diff --git a/test/python/api/test_export.py b/test/python/api/test_export.py
new file mode 100644 (file)
index 0000000..0fd5274
--- /dev/null
@@ -0,0 +1,72 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Tests for export CLI function.
+"""
+import pytest
+
+import nominatim.cli
+
+@pytest.fixture
+def run_export(tmp_path, capsys):
+    def _exec(args):
+        assert 0 == nominatim.cli.nominatim(module_dir='MODULE NOT AVAILABLE',
+                                            osm2pgsql_path='OSM2PGSQL NOT AVAILABLE',
+                                            cli_args=['export', '--project-dir', str(tmp_path)]
+                                                     + args)
+        return capsys.readouterr().out.split('\r\n')
+
+    return _exec
+
+
+@pytest.fixture(autouse=True)
+def setup_database_with_context(apiobj):
+    apiobj.add_placex(place_id=332, osm_type='W', osm_id=4,
+                     class_='highway', type='residential',  name='Street',
+                     country_code='pl', postcode='55674',
+                     rank_search=27, rank_address=26)
+    apiobj.add_address_placex(332, fromarea=False, isaddress=False,
+                              distance=0.0034,
+                              place_id=1000, osm_type='N', osm_id=3333,
+                              class_='place', type='suburb', name='Smallplace',
+                              country_code='pl', admin_level=13,
+                              rank_search=24, rank_address=23)
+    apiobj.add_address_placex(332, fromarea=True, isaddress=True,
+                              place_id=1001, osm_type='N', osm_id=3334,
+                              class_='place', type='city', name='Bigplace',
+                              country_code='pl',
+                              rank_search=17, rank_address=16)
+
+
+def test_export_default(run_export):
+    csv = run_export([])
+
+    assert csv == ['street,suburb,city,county,state,country', 'Street,,Bigplace,,,', '']
+
+
+def test_export_output_type(run_export):
+    csv = run_export(['--output-type', 'city'])
+
+    assert csv == ['street,suburb,city,county,state,country', ',,Bigplace,,,', '']
+
+
+def test_export_output_format(run_export):
+    csv = run_export(['--output-format', 'placeid;street;nothing;postcode'])
+
+    assert csv == ['placeid,street,nothing,postcode', '332,Street,,55674', '']
+
+
+def test_export_restrict_to_node_good(run_export):
+    csv = run_export(['--restrict-to-osm-node', '3334'])
+
+    assert csv == ['street,suburb,city,county,state,country', 'Street,,Bigplace,,,', '']
+
+
+def test_export_restrict_to_node_not_address(run_export):
+    csv = run_export(['--restrict-to-osm-node', '3333'])
+
+    assert csv == ['street,suburb,city,county,state,country', '']
index 45f538dea34fb777e90d39969b87fba2ab6c489e..e4862b0d807bb4569ff2161807987b76265c557f 100644 (file)
@@ -11,7 +11,11 @@ import pytest
 
 import nominatim.api.v1.helpers as helper
 
-@pytest.mark.parametrize('inp', ['', 'abc', '12 23', 'abc -78.90, 12.456 def'])
+@pytest.mark.parametrize('inp', ['',
+                                 'abc',
+                                 '12 23',
+                                 'abc -78.90, 12.456 def',
+                                 '40 N 60 W'])
 def test_extract_coords_no_coords(inp):
     query, x, y = helper.extract_coords_from_query(inp)
 
index 232740b417fa759fa6624c83fc22f564f9cb4c6f..2a279028b370cb4815684609b3b4dc4365ad5c3b 100644 (file)
@@ -23,6 +23,8 @@ def mkpoint(x, y):
 
 class FakeRow:
     def __init__(self, **kwargs):
+        if 'parent_place_id' not in kwargs:
+            kwargs['parent_place_id'] = None
         for k, v in kwargs.items():
             setattr(self, k, v)
         self._mapping = kwargs
index a731e72034df09c0dadfc985d8057ff656bf6b97..5a7430f48db2b57c668c06ef27f88b80edb704ab 100644 (file)
@@ -7,56 +7,18 @@
 """
 Tests for the Python web frameworks adaptor, v1 API.
 """
-from collections import namedtuple
 import json
 import xml.etree.ElementTree as ET
 from pathlib import Path
 
 import pytest
 
-from nominatim.config import Configuration
+from fake_adaptor import FakeAdaptor, FakeError, FakeResponse
+
 import nominatim.api.v1.server_glue as glue
 import nominatim.api as napi
 import nominatim.api.logging as loglib
 
-class FakeError(BaseException):
-
-    def __init__(self, msg, status):
-        self.msg = msg
-        self.status = status
-
-    def __str__(self):
-        return f'{self.status} -- {self.msg}'
-
-FakeResponse = namedtuple('FakeResponse', ['status', 'output', 'content_type'])
-
-class FakeAdaptor(glue.ASGIAdaptor):
-
-    def __init__(self, params=None, headers=None, config=None):
-        self.params = params or {}
-        self.headers = headers or {}
-        self._config = config or Configuration(None)
-
-
-    def get(self, name, default=None):
-        return self.params.get(name, default)
-
-
-    def get_header(self, name, default=None):
-        return self.headers.get(name, default)
-
-
-    def error(self, msg, status=400):
-        return FakeError(msg, status)
-
-
-    def create_response(self, status, output):
-        return FakeResponse(status, output, self.content_type)
-
-
-    def config(self):
-        return self._config
-
 
 # ASGIAdaptor.get_int/bool()
 
@@ -105,7 +67,7 @@ def test_adaptor_parse_format_use_configured():
     adaptor = FakeAdaptor(params={'format': 'json'})
 
     assert adaptor.parse_format(napi.StatusResult, 'text') == 'json'
-    assert adaptor.content_type == 'application/json'
+    assert adaptor.content_type == 'application/json; charset=utf-8'
 
 
 def test_adaptor_parse_format_invalid_value():
@@ -170,7 +132,7 @@ class TestAdaptorRaiseError:
 
 
     def test_json(self):
-        self.adaptor.content_type = 'application/json'
+        self.adaptor.content_type = 'application/json; charset=utf-8'
 
         err = self.run_raise_error('TEST', 501)
 
@@ -227,7 +189,7 @@ def test_build_response_with_status():
     assert isinstance(resp, FakeResponse)
     assert resp.status == 404
     assert resp.output == 'stuff\nmore stuff'
-    assert resp.content_type == 'application/json'
+    assert resp.content_type == 'application/json; charset=utf-8'
 
 
 def test_build_response_jsonp_with_json():
@@ -239,7 +201,7 @@ def test_build_response_jsonp_with_json():
     assert isinstance(resp, FakeResponse)
     assert resp.status == 200
     assert resp.output == 'test.func({})'
-    assert resp.content_type == 'application/javascript'
+    assert resp.content_type == 'application/javascript; charset=utf-8'
 
 
 def test_build_response_jsonp_without_json():
@@ -308,7 +270,7 @@ class TestStatusEndpoint:
 
         assert isinstance(resp, FakeResponse)
         assert resp.status == 200
-        assert resp.content_type == 'application/json'
+        assert resp.content_type == 'application/json; charset=utf-8'
 
 
     @pytest.mark.asyncio
@@ -546,9 +508,8 @@ class TestSearchEndPointSearch:
         a.params['q'] = 'something'
         a.params['city'] = 'ignored'
 
-        res = await glue.search_endpoint(napi.NominatimAPIAsync(Path('/invalid')), a)
-
-        assert len(json.loads(res.output)) == 1
+        with pytest.raises(FakeError, match='^400 -- .*cannot be used together'):
+            res = await glue.search_endpoint(napi.NominatimAPIAsync(Path('/invalid')), a)
 
 
     @pytest.mark.asyncio
diff --git a/test/python/api/test_warm.py b/test/python/api/test_warm.py
new file mode 100644 (file)
index 0000000..af48732
--- /dev/null
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Tests for warm-up CLI function.
+"""
+import pytest
+
+import nominatim.cli
+
+@pytest.fixture(autouse=True)
+def setup_database_with_context(apiobj, table_factory):
+    table_factory('word',
+                  definition='word_id INT, word_token TEXT, type TEXT, word TEXT, info JSONB',
+                  content=[(55, 'test', 'W', 'test', None),
+                           (2, 'test', 'w', 'test', None)])
+
+    apiobj.add_data('properties',
+                    [{'property': 'tokenizer', 'value': 'icu'},
+                     {'property': 'tokenizer_import_normalisation', 'value': ':: lower();'},
+                     {'property': 'tokenizer_import_transliteration', 'value': "'1' > '/1/'; 'ä' > 'ä '"},
+                    ])
+
+
+@pytest.mark.parametrize('args', [['--search-only'], ['--reverse-only']])
+def test_warm_all(tmp_path, args):
+    assert 0 == nominatim.cli.nominatim(module_dir='MODULE NOT AVAILABLE',
+                                        osm2pgsql_path='OSM2PGSQL NOT AVAILABLE',
+                                        cli_args=['admin', '--project-dir', str(tmp_path),
+                                                  '--warm'] + args)
index 09bfd3534ad4ee96032e49db1d18d70eee9d5f32..7aea2c5917c8c716f679ec975df0bedd61a7f058 100644 (file)
@@ -46,26 +46,18 @@ class DummyTokenizer:
 
 
 @pytest.fixture
-def cli_call(src_dir):
+def cli_call():
     """ Call the nominatim main function with the correct paths set.
         Returns a function that can be called with the desired CLI arguments.
     """
     def _call_nominatim(*args):
         return nominatim.cli.nominatim(module_dir='MODULE NOT AVAILABLE',
                                        osm2pgsql_path='OSM2PGSQL NOT AVAILABLE',
-                                       phpcgi_path='/usr/bin/php-cgi',
                                        cli_args=args)
 
     return _call_nominatim
 
 
-@pytest.fixture
-def mock_run_legacy(monkeypatch):
-    mock = MockParamCapture()
-    monkeypatch.setattr(nominatim.cli, 'run_legacy_script', mock)
-    return mock
-
-
 @pytest.fixture
 def mock_func_factory(monkeypatch):
     def get_mock(module, func):
index f1bb75a97582a790e656746be82443201d178e98..93e8610887a94dd501d1b4fd3a203ed417267851 100644 (file)
@@ -100,35 +100,6 @@ def test_cli_serve_uvicorn_based(cli_call, engine, mock_func_factory):
     assert func.last_kwargs['host'] == '127.0.0.1'
     assert func.last_kwargs['port'] == 8088
 
-def test_cli_export_command(cli_call, mock_run_legacy):
-    assert cli_call('export', '--output-all-postcodes') == 0
-
-    assert mock_run_legacy.called == 1
-    assert mock_run_legacy.last_args[0] == 'export.php'
-
-
-@pytest.mark.parametrize("param,value", [('output-type', 'country'),
-                                         ('output-format', 'street;city'),
-                                         ('language', 'xf'),
-                                         ('restrict-to-country', 'us'),
-                                         ('restrict-to-osm-node', '536'),
-                                         ('restrict-to-osm-way', '727'),
-                                         ('restrict-to-osm-relation', '197532')
-                                        ])
-def test_export_parameters(src_dir, tmp_path, param, value, monkeypatch):
-    (tmp_path / 'admin').mkdir()
-    (tmp_path / 'admin' / 'export.php').write_text(f"""<?php
-        exit(strpos(implode(' ', $_SERVER['argv']), '--{param} {value}') >= 0 ? 0 : 10);
-        """)
-
-    monkeypatch.setattr(nominatim.paths, 'PHPLIB_DIR', tmp_path)
-
-    assert nominatim.cli.nominatim(module_dir='MODULE NOT AVAILABLE',
-                                   osm2pgsql_path='OSM2PGSQL NOT AVAILABLE',
-                                   phpcgi_path='/usr/bin/php-cgi',
-                                   cli_args=['export', '--' + param, value]) == 0
-
-
 
 class TestCliWithDb:
 
index 696e2dd2a616ea64b2a5369842db6dfb40b32d8f..45104ea6850fd75ea596bc7818774a715a17063f 100644 (file)
@@ -19,17 +19,6 @@ import nominatim.tools.migration
 import nominatim.clicmd.admin
 
 
-@pytest.mark.parametrize("params", [('--warm', ),
-                                    ('--warm', '--reverse-only'),
-                                    ('--warm', '--search-only')])
-def test_admin_command_legacy(cli_call, mock_func_factory, params):
-    mock_run_legacy = mock_func_factory(nominatim.clicmd.admin, 'run_legacy_script')
-
-    assert cli_call('admin', *params) == 0
-
-    assert mock_run_legacy.called == 1
-
-
 def test_admin_command_check_database(cli_call, mock_func_factory):
     mock = mock_func_factory(nominatim.tools.check_database, 'check_database')
 
@@ -44,6 +33,17 @@ def test_admin_migrate(cli_call, mock_func_factory):
     assert mock.called == 1
 
 
+def test_admin_clean_deleted_relations(cli_call, mock_func_factory):
+    mock = mock_func_factory(nominatim.tools.admin, 'clean_deleted_relations')
+
+    assert cli_call('admin', '--clean-deleted', '1 month') == 0
+    assert mock.called == 1
+
+def test_admin_clean_deleted_relations_no_age(cli_call, mock_func_factory):
+    mock = mock_func_factory(nominatim.tools.admin, 'clean_deleted_relations')
+
+    assert cli_call('admin', '--clean-deleted') == 1
+
 class TestCliAdminWithDb:
 
     @pytest.fixture(autouse=True)
index 05e3c4f09f7171d20209833a9832445a022bdb9d..ca160a359e5cb97b7f1de5d8ec96afea676f98e4 100644 (file)
@@ -67,7 +67,9 @@ class TestCliReverseCall:
         result = napi.ReverseResult(napi.SourceTable.PLACEX, ('place', 'thing'),
                                     napi.Point(1.0, -3.0),
                                     names={'name':'Name', 'name:fr': 'Nom'},
-                                    extratags={'extra':'Extra'})
+                                    extratags={'extra':'Extra'},
+                                    locale_name='Name',
+                                    display_name='Name')
 
         monkeypatch.setattr(napi.NominatimAPI, 'reverse',
                             lambda *args, **kwargs: result)
@@ -109,16 +111,6 @@ class TestCliReverseCall:
         assert out['type'] == 'FeatureCollection'
 
 
-    def test_reverse_language(self, cli_call, tmp_path, capsys):
-        result = cli_call('reverse', '--project-dir', str(tmp_path),
-                          '--lat', '34', '--lon', '34', '--lang', 'fr')
-
-        assert result == 0
-
-        out = json.loads(capsys.readouterr().out)
-        assert out['name'] == 'Nom'
-
-
 class TestCliLookupCall:
 
     @pytest.fixture(autouse=True)
@@ -126,7 +118,9 @@ class TestCliLookupCall:
         result = napi.SearchResult(napi.SourceTable.PLACEX, ('place', 'thing'),
                                     napi.Point(1.0, -3.0),
                                     names={'name':'Name', 'name:fr': 'Nom'},
-                                    extratags={'extra':'Extra'})
+                                    extratags={'extra':'Extra'},
+                                    locale_name='Name',
+                                    display_name='Name')
 
         monkeypatch.setattr(napi.NominatimAPI, 'lookup',
                             lambda *args, **kwargs: napi.SearchResults([result]))
@@ -150,9 +144,11 @@ class TestCliLookupCall:
                                              ])
 def test_search(cli_call, tmp_path, capsys, monkeypatch, endpoint, params):
     result = napi.SearchResult(napi.SourceTable.PLACEX, ('place', 'thing'),
-                                napi.Point(1.0, -3.0),
-                                names={'name':'Name', 'name:fr': 'Nom'},
-                                extratags={'extra':'Extra'})
+                               napi.Point(1.0, -3.0),
+                               names={'name':'Name', 'name:fr': 'Nom'},
+                               extratags={'extra':'Extra'},
+                               locale_name='Name',
+                               display_name='Name')
 
     monkeypatch.setattr(napi.NominatimAPI, endpoint,
                         lambda *args, **kwargs: napi.SearchResults([result]))
index f2c965ad9b1db0017864b5bbaec1677023b1d838..14498cbc0abd5cfeba585e09b2aa627a6616d57e 100644 (file)
@@ -89,6 +89,143 @@ def test_postcode_sweden_fail(sanitize, postcode):
     assert sanitize(country='se', postcode=postcode) == []
 
 
+@pytest.mark.parametrize("postcode", ('AD123', '123', 'AD 123', 'AD-123'))
+def test_postcode_andorra_pass(sanitize, postcode):
+    assert sanitize(country='ad', postcode=postcode) == [('postcode', 'AD123')]
+
+
+@pytest.mark.parametrize("postcode", ('AD1234', 'AD AD123', 'XX123'))
+@pytest.mark.sanitizer_params(convert_to_address=False)
+def test_postcode_andorra_fail(sanitize, postcode):
+    assert sanitize(country='ad', postcode=postcode) == []
+
+
+@pytest.mark.parametrize("postcode", ('AI-2640', '2640', 'AI 2640'))
+def test_postcode_anguilla_pass(sanitize, postcode):
+    assert sanitize(country='ai', postcode=postcode) == [('postcode', 'AI-2640')]
+
+
+@pytest.mark.parametrize("postcode", ('AI-2000', 'AI US-2640', 'AI AI-2640'))
+@pytest.mark.sanitizer_params(convert_to_address=False)
+def test_postcode_anguilla_fail(sanitize, postcode):
+    assert sanitize(country='ai', postcode=postcode) == []
+
+
+@pytest.mark.parametrize("postcode", ('BN1111', 'BN 1111', 'BN BN1111', 'BN BN 1111'))
+def test_postcode_brunei_pass(sanitize, postcode):
+    assert sanitize(country='bn', postcode=postcode) == [('postcode', 'BN1111')]
+
+
+@pytest.mark.parametrize("postcode", ('BN-1111', 'BNN1111'))
+@pytest.mark.sanitizer_params(convert_to_address=False)
+def test_postcode_brunei_fail(sanitize, postcode):
+    assert sanitize(country='bn', postcode=postcode) == []
+
+
+@pytest.mark.parametrize("postcode", ('IM1 1AA', 'IM11AA', 'IM IM11AA'))
+def test_postcode_isle_of_man_pass(sanitize, postcode):
+    assert sanitize(country='im', postcode=postcode) == [('postcode', 'IM1 1AA')]
+
+
+@pytest.mark.parametrize("postcode", ('IZ1 1AA', 'IM1 AA'))
+@pytest.mark.sanitizer_params(convert_to_address=False)
+def test_postcode_isle_of_man_fail(sanitize, postcode):
+    assert sanitize(country='im', postcode=postcode) == []
+
+
+@pytest.mark.parametrize("postcode", ('JE5 0LA', 'JE50LA', 'JE JE50LA', 'je JE5 0LA'))
+def test_postcode_jersey_pass(sanitize, postcode):
+    assert sanitize(country='je', postcode=postcode) == [('postcode', 'JE5 0LA')]
+
+
+@pytest.mark.parametrize("postcode", ('gb JE5 0LA', 'IM50LA', 'IM5 012'))
+@pytest.mark.sanitizer_params(convert_to_address=False)
+def test_postcode_jersey_fail(sanitize, postcode):
+    assert sanitize(country='je', postcode=postcode) == []
+
+
+@pytest.mark.parametrize("postcode", ('KY1-1234', '1-1234', 'KY 1-1234'))
+def test_postcode_cayman_islands_pass(sanitize, postcode):
+    assert sanitize(country='ky', postcode=postcode) == [('postcode', 'KY1-1234')]
+
+
+@pytest.mark.parametrize("postcode", ('KY-1234', 'KZ1-1234', 'KY1 1234', 'KY1-123', 'KY KY1-1234'))
+@pytest.mark.sanitizer_params(convert_to_address=False)
+def test_postcode_cayman_islands_fail(sanitize, postcode):
+    assert sanitize(country='ky', postcode=postcode) == []
+
+
+@pytest.mark.parametrize("postcode", ('LC11 222', '11 222', '11222', 'LC 11 222'))
+def test_postcode_saint_lucia_pass(sanitize, postcode):
+    assert sanitize(country='lc', postcode=postcode) == [('postcode', 'LC11 222')]
+
+
+@pytest.mark.parametrize("postcode", ('11 2222', 'LC LC11 222'))
+@pytest.mark.sanitizer_params(convert_to_address=False)
+def test_postcode_saint_lucia_fail(sanitize, postcode):
+    assert sanitize(country='lc', postcode=postcode) == []
+
+
+@pytest.mark.parametrize("postcode", ('LV-1111', '1111', 'LV 1111', 'LV1111',))
+def test_postcode_latvia_pass(sanitize, postcode):
+    assert sanitize(country='lv', postcode=postcode) == [('postcode', 'LV-1111')]
+
+
+@pytest.mark.parametrize("postcode", ('111', '11111', 'LV LV-1111'))
+@pytest.mark.sanitizer_params(convert_to_address=False)
+def test_postcode_latvia_fail(sanitize, postcode):
+    assert sanitize(country='lv', postcode=postcode) == []
+
+
+@pytest.mark.parametrize("postcode", ('MD-1111', '1111', 'MD 1111', 'MD1111'))
+def test_postcode_moldova_pass(sanitize, postcode):
+    assert sanitize(country='md', postcode=postcode) == [('postcode', 'MD-1111')]
+
+
+@pytest.mark.parametrize("postcode", ("MD MD-1111", "MD MD1111", "MD MD 1111"))
+@pytest.mark.sanitizer_params(convert_to_address=False)
+def test_postcode_moldova_fail(sanitize, postcode):
+    assert sanitize(country='md', postcode=postcode) == []
+
+
+@pytest.mark.parametrize("postcode", ('VLT 1117', 'GDJ 1234', 'BZN 2222'))
+def test_postcode_malta_pass(sanitize, postcode):
+    assert sanitize(country='mt', postcode=postcode) == [('postcode', postcode)]
+
+
+@pytest.mark.parametrize("postcode", ('MTF 1111', 'MT MTF 1111', 'MTF1111', 'MT MTF1111'))
+def test_postcode_malta_mtarfa_pass(sanitize, postcode):
+    assert sanitize(country='mt', postcode=postcode) == [('postcode', 'MTF 1111')]
+
+
+@pytest.mark.parametrize("postcode", ('1111', 'MTMT 1111'))
+@pytest.mark.sanitizer_params(convert_to_address=False)
+def test_postcode_malta_fail(sanitize, postcode):
+    assert sanitize(country='mt', postcode=postcode) == []
+
+
+@pytest.mark.parametrize("postcode", ('VC1111', '1111', 'VC-1111', 'VC 1111'))
+def test_postcode_saint_vincent_pass(sanitize, postcode):
+    assert sanitize(country='vc', postcode=postcode) == [('postcode', 'VC1111')]
+
+
+@pytest.mark.parametrize("postcode", ('VC11', 'VC VC1111'))
+@pytest.mark.sanitizer_params(convert_to_address=False)
+def test_postcode_saint_vincent_fail(sanitize, postcode):
+    assert sanitize(country='vc', postcode=postcode) == []
+
+
+@pytest.mark.parametrize("postcode", ('VG1111', '1111', 'VG 1111', 'VG-1111'))
+def test_postcode_virgin_islands_pass(sanitize, postcode):
+    assert sanitize(country='vg', postcode=postcode) == [('postcode', 'VG1111')]
+
+
+@pytest.mark.parametrize("postcode", ('111', '11111', 'VG VG1111'))
+@pytest.mark.sanitizer_params(convert_to_address=False)
+def test_postcode_virgin_islands_fail(sanitize, postcode):
+    assert sanitize(country='vg', postcode=postcode) == []
+
+
 @pytest.mark.parametrize("postcode", ('AB1', '123-456-7890', '1 as 44'))
 @pytest.mark.sanitizer_params(default_pattern='[A-Z0-9- ]{3,12}')
 def test_postcode_default_pattern_pass(sanitize, postcode):
@@ -99,4 +236,3 @@ def test_postcode_default_pattern_pass(sanitize, postcode):
 @pytest.mark.sanitizer_params(convert_to_address=False, default_pattern='[A-Z0-9- ]{3,12}')
 def test_postcode_default_pattern_fail(sanitize, postcode):
     assert sanitize(country='an', postcode=postcode) == []
-
diff --git a/test/python/tokenizer/sanitizers/test_tag_japanese.py b/test/python/tokenizer/sanitizers/test_tag_japanese.py
new file mode 100644 (file)
index 0000000..946f137
--- /dev/null
@@ -0,0 +1,80 @@
+from nominatim.data.place_info import PlaceInfo
+from nominatim.data.place_name import PlaceName
+from nominatim.tokenizer.place_sanitizer import PlaceSanitizer
+from typing import Mapping, Optional, List
+import pytest
+
+class TestTagJapanese:
+    @pytest.fixture(autouse=True)
+    def setup_country(self, def_config):
+        self.config = def_config
+
+    def run_sanitizer_on(self,type, **kwargs):
+        place = PlaceInfo({
+            'address': kwargs,
+            'country_code': 'jp'
+        })
+        sanitizer_args = {'step': 'tag-japanese'}
+        _, address = PlaceSanitizer([sanitizer_args], self.config).process_names(place)
+        tmp_list = [(p.name,p.kind) for p in address]
+        return sorted(tmp_list)
+
+    def test_on_address(self):
+        res = self.run_sanitizer_on('address', name='foo', ref='bar', ref_abc='baz')
+        assert res == [('bar','ref'),('baz','ref_abc'),('foo','name')]
+
+    def test_housenumber(self):
+        res = self.run_sanitizer_on('address', housenumber='2')
+        assert res == [('2','housenumber')]
+
+    def test_blocknumber(self):
+        res = self.run_sanitizer_on('address', block_number='6')
+        assert res == [('6','housenumber')]
+
+    def test_neighbourhood(self):
+        res = self.run_sanitizer_on('address', neighbourhood='8')
+        assert res == [('8','place')]
+
+    def test_quarter(self):
+        res = self.run_sanitizer_on('address', quarter='kase')
+        assert res==[('kase','place')]
+
+    def test_housenumber_blocknumber(self):
+        res = self.run_sanitizer_on('address', housenumber='2', block_number='6')
+        assert res == [('6-2','housenumber')]
+
+    def test_quarter_neighbourhood(self):
+        res = self.run_sanitizer_on('address', quarter='kase', neighbourhood='8')
+        assert res == [('kase8','place')]
+
+    def test_blocknumber_housenumber_quarter(self):
+        res = self.run_sanitizer_on('address', block_number='6', housenumber='2', quarter='kase')
+        assert res == [('6-2','housenumber'),('kase','place')]
+
+    def test_blocknumber_housenumber_quarter_neighbourhood(self):
+        res = self.run_sanitizer_on('address', block_number='6', housenumber='2', neighbourhood='8')
+        assert res == [('6-2','housenumber'),('8','place')]
+
+    def test_blocknumber_quarter_neighbourhood(self):
+        res = self.run_sanitizer_on('address',block_number='6', quarter='kase', neighbourhood='8')
+        assert res == [('6','housenumber'),('kase8','place')]
+
+    def test_blocknumber_quarter(self):
+        res = self.run_sanitizer_on('address',block_number='6', quarter='kase')
+        assert res == [('6','housenumber'),('kase','place')]
+
+    def test_blocknumber_neighbourhood(self):
+        res = self.run_sanitizer_on('address',block_number='6', neighbourhood='8')
+        assert res == [('6','housenumber'),('8','place')]
+
+    def test_housenumber_quarter_neighbourhood(self):
+        res = self.run_sanitizer_on('address',housenumber='2', quarter='kase', neighbourhood='8')
+        assert res == [('2','housenumber'),('kase8','place')]
+
+    def test_housenumber_quarter(self):
+        res = self.run_sanitizer_on('address',housenumber='2', quarter='kase')
+        assert res == [('2','housenumber'),('kase','place')]
+
+    def test_housenumber_blocknumber_neighbourhood_quarter(self):
+        res = self.run_sanitizer_on('address', block_number='6', housenumber='2', quarter='kase', neighbourhood='8')
+        assert res == [('6-2','housenumber'),('kase8','place')]
index 9c010b9d4b77e00bc784b305eb2c1cfceab3b2be..ae5944afa185f0a35327d5ebf1f5359862df5b22 100644 (file)
@@ -12,6 +12,7 @@ import pytest
 from nominatim.errors import UsageError
 from nominatim.tools import admin
 from nominatim.tokenizer import factory
+from nominatim.db.sql_preprocessor import SQLPreprocessor
 
 @pytest.fixture(autouse=True)
 def create_placex_table(project_env, tokenizer_mock, temp_db_cursor, placex_table):
@@ -70,3 +71,78 @@ def test_analyse_indexing_with_osm_id(project_env, temp_db_cursor):
                               VALUES(9988, 'N', 10000)""")
 
     admin.analyse_indexing(project_env, osm_id='N10000')
+
+
+class TestAdminCleanDeleted:
+
+    @pytest.fixture(autouse=True)
+    def setup_polygon_delete(self, project_env, table_factory, place_table, osmline_table, temp_db_cursor, temp_db_conn, def_config, src_dir):
+        """ Set up place_force_delete function and related tables
+        """
+        self.project_env = project_env
+        self.temp_db_cursor = temp_db_cursor
+        table_factory('import_polygon_delete',
+                      """osm_id BIGINT,
+                      osm_type CHAR(1),
+                      class TEXT NOT NULL,
+                      type TEXT NOT NULL""",
+                      ((100, 'N', 'boundary', 'administrative'),
+                      (145, 'N', 'boundary', 'administrative'),
+                      (175, 'R', 'landcover', 'grass')))
+        temp_db_cursor.execute("""INSERT INTO placex (place_id, osm_id, osm_type, class, type, indexed_date, indexed_status)
+                              VALUES(1, 100, 'N', 'boundary', 'administrative', current_date - INTERVAL '1 month', 1),
+                               (2, 145, 'N', 'boundary', 'administrative', current_date - INTERVAL '3 month', 1),
+                               (3, 175, 'R', 'landcover', 'grass', current_date - INTERVAL '3 months', 1)""")
+        # set up tables and triggers for utils function
+        table_factory('place_to_be_deleted',
+                      """osm_id BIGINT,
+                      osm_type CHAR(1),
+                      class TEXT NOT NULL,
+                      type TEXT NOT NULL,
+                      deferred BOOLEAN""")
+        table_factory('country_name', 'partition INT')
+        table_factory('import_polygon_error', """osm_id BIGINT,
+                      osm_type CHAR(1),
+                      class TEXT NOT NULL,
+                      type TEXT NOT NULL""")
+        temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION place_delete()
+                               RETURNS TRIGGER AS $$
+                               BEGIN RETURN NULL; END;
+                               $$ LANGUAGE plpgsql;""")
+        temp_db_cursor.execute("""CREATE TRIGGER place_before_delete BEFORE DELETE ON place
+                               FOR EACH ROW EXECUTE PROCEDURE place_delete();""")
+        orig_sql = def_config.lib_dir.sql
+        def_config.lib_dir.sql = src_dir / 'lib-sql'
+        sqlproc = SQLPreprocessor(temp_db_conn, def_config)
+        sqlproc.run_sql_file(temp_db_conn, 'functions/utils.sql')
+        def_config.lib_dir.sql = orig_sql
+        
+
+    def test_admin_clean_deleted_no_records(self):
+        admin.clean_deleted_relations(self.project_env, age='1 year')
+        assert self.temp_db_cursor.row_set('SELECT osm_id, osm_type, class, type, indexed_status FROM placex') == {(100, 'N', 'boundary', 'administrative', 1),
+                                                                                                                   (145, 'N', 'boundary', 'administrative', 1),
+                                                                                                                   (175, 'R', 'landcover', 'grass', 1)}
+        assert self.temp_db_cursor.table_rows('import_polygon_delete') == 3
+
+
+    @pytest.mark.parametrize('test_age', ['T week', '1 welk', 'P1E'])
+    def test_admin_clean_deleted_bad_age(self, test_age):
+        with pytest.raises(UsageError):
+            admin.clean_deleted_relations(self.project_env, age = test_age)
+
+
+    def test_admin_clean_deleted_partial(self):
+        admin.clean_deleted_relations(self.project_env, age = '2 months')
+        assert self.temp_db_cursor.row_set('SELECT osm_id, osm_type, class, type, indexed_status FROM placex') == {(100, 'N', 'boundary', 'administrative', 1),
+                                                                                                                   (145, 'N', 'boundary', 'administrative', 100),
+                                                                                                                   (175, 'R', 'landcover', 'grass', 100)}
+        assert self.temp_db_cursor.table_rows('import_polygon_delete') == 1
+
+    @pytest.mark.parametrize('test_age', ['1 week', 'P3D', '5 hours'])
+    def test_admin_clean_deleted(self, test_age):
+        admin.clean_deleted_relations(self.project_env, age = test_age)
+        assert self.temp_db_cursor.row_set('SELECT osm_id, osm_type, class, type, indexed_status FROM placex') == {(100, 'N', 'boundary', 'administrative', 100),
+                                                                                                                   (145, 'N', 'boundary', 'administrative', 100),
+                                                                                                                   (175, 'R', 'landcover', 'grass', 100)}
+        assert self.temp_db_cursor.table_rows('import_polygon_delete') == 0
index f73aec308404000e8fbab2f059b72ac5415dd73b..b4439c122dd4dee9d9630bc72e9f309a285cde26 100644 (file)
@@ -16,118 +16,6 @@ from nominatim.config import Configuration
 import nominatim.tools.exec_utils as exec_utils
 import nominatim.paths
 
-class TestRunLegacyScript:
-
-    @pytest.fixture(autouse=True)
-    def setup_nominatim_env(self, tmp_path, monkeypatch):
-        tmp_phplib_dir = tmp_path / 'phplib'
-        tmp_phplib_dir.mkdir()
-        (tmp_phplib_dir / 'admin').mkdir()
-
-        monkeypatch.setattr(nominatim.paths, 'PHPLIB_DIR', tmp_phplib_dir)
-
-        self.phplib_dir = tmp_phplib_dir
-        self.config = Configuration(tmp_path)
-        self.config.set_libdirs(module='.', osm2pgsql='default_osm2pgsql',
-                                php=tmp_phplib_dir)
-
-
-    def mk_script(self, code):
-        codefile = self.phplib_dir / 'admin' / 't.php'
-        codefile.write_text('<?php\n' + code + '\n')
-
-        return 't.php'
-
-
-    @pytest.mark.parametrize("return_code", (0, 1, 15, 255))
-    def test_run_legacy_return_exit_code(self, return_code):
-        fname = self.mk_script('exit({});'.format(return_code))
-        assert return_code == \
-                 exec_utils.run_legacy_script(fname, config=self.config)
-
-
-    def test_run_legacy_return_throw_on_fail(self):
-        fname = self.mk_script('exit(11);')
-        with pytest.raises(subprocess.CalledProcessError):
-            exec_utils.run_legacy_script(fname, config=self.config,
-                                         throw_on_fail=True)
-
-
-    def test_run_legacy_return_dont_throw_on_success(self):
-        fname = self.mk_script('exit(0);')
-        assert exec_utils.run_legacy_script(fname, config=self.config,
-                                            throw_on_fail=True) == 0
-
-    def test_run_legacy_use_given_module_path(self):
-        fname = self.mk_script("exit($_SERVER['NOMINATIM_DATABASE_MODULE_PATH'] == '' ? 0 : 23);")
-
-        assert exec_utils.run_legacy_script(fname, config=self.config) == 0
-
-
-    def test_run_legacy_do_not_overwrite_module_path(self, monkeypatch):
-        monkeypatch.setenv('NOMINATIM_DATABASE_MODULE_PATH', 'other')
-        fname = self.mk_script(
-            "exit($_SERVER['NOMINATIM_DATABASE_MODULE_PATH'] == 'other' ? 0 : 1);")
-
-        assert exec_utils.run_legacy_script(fname, config=self.config) == 0
-
-
-    def test_run_legacy_default_osm2pgsql_binary(self, monkeypatch):
-        fname = self.mk_script("exit($_SERVER['NOMINATIM_OSM2PGSQL_BINARY'] == 'default_osm2pgsql' ? 0 : 23);")
-
-        assert exec_utils.run_legacy_script(fname, config=self.config) == 0
-
-
-    def test_run_legacy_override_osm2pgsql_binary(self, monkeypatch):
-        monkeypatch.setenv('NOMINATIM_OSM2PGSQL_BINARY', 'somethingelse')
-
-        fname = self.mk_script("exit($_SERVER['NOMINATIM_OSM2PGSQL_BINARY'] == 'somethingelse' ? 0 : 23);")
-
-        assert exec_utils.run_legacy_script(fname, config=self.config) == 0
-
-
-class TestRunApiScript:
-
-    @staticmethod
-    @pytest.fixture(autouse=True)
-    def setup_project_dir(tmp_path):
-        webdir = tmp_path / 'website'
-        webdir.mkdir()
-        (webdir / 'test.php').write_text("<?php\necho 'OK\n';")
-
-
-    @staticmethod
-    def test_run_api(tmp_path):
-        assert exec_utils.run_api_script('test', tmp_path) == 0
-
-    @staticmethod
-    def test_run_api_execution_error(tmp_path):
-        assert exec_utils.run_api_script('badname', tmp_path) != 0
-
-    @staticmethod
-    def test_run_api_with_extra_env(tmp_path):
-        extra_env = dict(SCRIPT_FILENAME=str(tmp_path / 'website' / 'test.php'))
-        assert exec_utils.run_api_script('badname', tmp_path, extra_env=extra_env) == 0
-
-    @staticmethod
-    def test_custom_phpcgi(tmp_path, capfd):
-        assert exec_utils.run_api_script('test', tmp_path, phpcgi_bin='env',
-                                         params={'q' : 'Berlin'}) == 0
-        captured = capfd.readouterr()
-
-        assert '?q=Berlin' in captured.out
-
-    @staticmethod
-    def test_fail_on_error_output(tmp_path):
-        # Starting PHP 8 the PHP CLI no longer has STDERR defined as constant
-        php = """
-        <?php
-        if(!defined('STDERR')) define('STDERR', fopen('php://stderr', 'wb'));
-        fwrite(STDERR, 'WARNING'.PHP_EOL);
-        """
-        (tmp_path / 'website' / 'bad.php').write_text(php)
-
-        assert exec_utils.run_api_script('bad', tmp_path) == 1
 
 ### run_osm2pgsql
 
index e52bdee7645c6ea8f889406e9fb2b100f71e7827..720e80c890161296e520841004bffb58db564f61 100755 (executable)
@@ -20,11 +20,10 @@ export DEBIAN_FRONTEND=noninteractive #DOCS:
 
 # Now you can install all packages needed for Nominatim:
 
-    sudo apt install -y php-cgi
     sudo apt install -y build-essential cmake g++ libboost-dev libboost-system-dev \
                         libboost-filesystem-dev libexpat1-dev zlib1g-dev \
                         libbz2-dev libpq-dev liblua5.3-dev lua5.3 lua-dkjson \
-                        postgresql-12-postgis-3 \
+                        nlohmann-json3-dev postgresql-12-postgis-3 \
                         postgresql-contrib-12 postgresql-12-postgis-3-scripts \
                         php-cli php-pgsql php-intl libicu-dev python3-dotenv \
                         python3-psycopg2 python3-psutil python3-jinja2 python3-pip \
@@ -79,8 +78,8 @@ fi                                 #DOCS:
 # ---------------------
 #
 # Tune the postgresql configuration, which is located in 
-# `/etc/postgresql/12/main/postgresql.conf`. See section *Postgres Tuning* in
-# [the installation page](../admin/Installation.md#postgresql-tuning)
+# `/etc/postgresql/12/main/postgresql.conf`. See section *Tuning the PostgreSQL database*
+# in [the installation page](../admin/Installation.md#tuning-the-postgresql-database)
 # for the parameters to change.
 #
 # Restart the postgresql service after updating this config file.
index fdb38203cc1959069ade4498e6f53103c15c5d10..174b8a771ab8ef95d277850c8699c7476ee9f48a 100755 (executable)
@@ -20,16 +20,15 @@ export DEBIAN_FRONTEND=noninteractive #DOCS:
 
 # Now you can install all packages needed for Nominatim:
 
-    sudo apt install -y php-cgi
     sudo apt install -y build-essential cmake g++ libboost-dev libboost-system-dev \
                         libboost-filesystem-dev libexpat1-dev zlib1g-dev \
                         libbz2-dev libpq-dev liblua5.3-dev lua5.3 lua-dkjson \
-                        postgresql-server-dev-14 postgresql-14-postgis-3 \
+                        nlohmann-json3-dev postgresql-14-postgis-3 \
                         postgresql-contrib-14 postgresql-14-postgis-3-scripts \
                         php-cli php-pgsql php-intl libicu-dev python3-dotenv \
                         python3-psycopg2 python3-psutil python3-jinja2 \
                         python3-icu python3-datrie python3-sqlalchemy \
-                        python3-asyncpg git
+                        python3-asyncpg python3-yaml git
 
 #
 # System Configuration
@@ -74,8 +73,8 @@ fi                                 #DOCS:
 # ---------------------
 #
 # Tune the postgresql configuration, which is located in 
-# `/etc/postgresql/14/main/postgresql.conf`. See section *Postgres Tuning* in
-# [the installation page](../admin/Installation.md#postgresql-tuning)
+# `/etc/postgresql/14/main/postgresql.conf`. See section *Tuning the PostgreSQL database*
+# in [the installation page](../admin/Installation.md#tuning-the-postgresql-database)
 # for the parameters to change.
 #
 # Restart the postgresql service after updating this config file.