shell: bash
- name: Install${{ matrix.flavour }} prerequisites
run: |
- sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev liblua${LUA_VERSION}-dev lua${LUA_VERSION} lua-dkjson
+ sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev liblua${LUA_VERSION}-dev lua${LUA_VERSION} lua-dkjson nlohmann-json3-dev libspatialite7 libsqlite3-mod-spatialite
if [ "$FLAVOUR" == "oldstuff" ]; then
- pip3 install MarkupSafe==2.0.1 python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu==2.9 osmium PyYAML==5.1 sqlalchemy==1.4 datrie asyncpg
+ pip3 install MarkupSafe==2.0.1 python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu==2.9 osmium PyYAML==5.1 sqlalchemy==1.4.31 datrie asyncpg aiosqlite
else
sudo apt-get install -y -qq python3-icu python3-datrie python3-pyosmium python3-jinja2 python3-psutil python3-psycopg2 python3-dotenv python3-yaml
- pip3 install sqlalchemy psycopg
+ pip3 install sqlalchemy psycopg aiosqlite
fi
shell: bash
env:
runs-on: ubuntu-latest
steps:
- - uses: actions/checkout@v3
+ - uses: actions/checkout@v4
with:
submodules: true
if: matrix.flavour != 'oldstuff'
- name: Install newer pytest-asyncio
- run: pip3 install -U pytest-asyncio
+ run: pip3 install -U pytest-asyncio==0.21.1
if: matrix.flavour == 'ubuntu-20'
- name: Install test prerequsites (from pip for Ubuntu 18)
if: matrix.flavour == 'oldstuff'
- name: Install Python webservers
- run: pip3 install falcon starlette
+ run: pip3 install falcon starlette asgi_lifespan
- name: Install latest pylint
- run: pip3 install -U pylint asgi_lifespan
+ run: pip3 install -U pylint
+ if: matrix.flavour != 'oldstuff'
- name: PHP linting
run: phpcs --report-width=120 .
working-directory: Nominatim
+ if: matrix.flavour != 'oldstuff'
- name: Python linting
run: python3 -m pylint nominatim
working-directory: Nominatim
+ if: matrix.flavour != 'oldstuff'
- name: PHP unit tests
run: phpunit ./
- name: Clean up database (reverse-only import)
run: nominatim refresh --postcodes --word-tokens
working-directory: /home/nominatim/nominatim-project
+
+ install-no-superuser:
+ runs-on: ubuntu-latest
+ needs: create-archive
+
+ strategy:
+ matrix:
+ name: [Ubuntu-22]
+ include:
+ - name: Ubuntu-22
+ image: "ubuntu:22.04"
+ ubuntu: 22
+ install_mode: install-apache
+
+ container:
+ image: ${{ matrix.image }}
+ env:
+ LANG: en_US.UTF-8
+
+ defaults:
+ run:
+ shell: sudo -Hu nominatim bash --noprofile --norc -eo pipefail {0}
+
+ steps:
+ - name: Prepare container (Ubuntu)
+ run: |
+ export APT_LISTCHANGES_FRONTEND=none
+ export DEBIAN_FRONTEND=noninteractive
+ apt-get update -qq
+ apt-get install -y git sudo wget
+ ln -snf /usr/share/zoneinfo/$CONTAINER_TIMEZONE /etc/localtime && echo $CONTAINER_TIMEZONE > /etc/timezone
+ shell: bash
+
+ - name: Setup import user
+ run: |
+ useradd -m nominatim
+ echo 'nominatim ALL=(ALL:ALL) NOPASSWD: ALL' > /etc/sudoers.d/nominiatim
+ echo "/home/nominatim/Nominatim/vagrant/Install-on-${OS}.sh no $INSTALL_MODE" > /home/nominatim/vagrant.sh
+ shell: bash
+ env:
+ OS: ${{ matrix.name }}
+ INSTALL_MODE: ${{ matrix.install_mode }}
+
+ - uses: actions/download-artifact@v3
+ with:
+ name: full-source
+ path: /home/nominatim
+
+ - name: Install Nominatim
+ run: |
+ export USERNAME=nominatim
+ export USERHOME=/home/nominatim
+ export NOSYSTEMD=yes
+ export HAVE_SELINUX=no
+ tar xf nominatim-src.tar.bz2
+ . vagrant.sh
+ working-directory: /home/nominatim
+
+ - name: Prepare import environment
+ run: |
+ mv Nominatim/test/testdb/apidb-test-data.pbf test.pbf
+ mv Nominatim/settings/flex-base.lua flex-base.lua
+ mv Nominatim/settings/import-extratags.lua import-extratags.lua
+ mv Nominatim/settings/taginfo.lua taginfo.lua
+ rm -rf Nominatim
+ mkdir data-env-reverse
+ working-directory: /home/nominatim
+
+ - name: Prepare Database
+ run: |
+ nominatim import --prepare-database
+ working-directory: /home/nominatim/nominatim-project
+
+ - name: Create import user
+ run: |
+ sudo -u postgres createuser -S osm-import
+ sudo -u postgres psql -c "ALTER USER \"osm-import\" WITH PASSWORD 'osm-import';"
+ working-directory: /home/nominatim/nominatim-project
+
+ - name: Grant import user rights
+ run: |
+ sudo -u postgres psql -c "GRANT INSERT, UPDATE ON ALL TABLES IN SCHEMA public TO \"osm-import\";"
+ working-directory: /home/nominatim/nominatim-project
+
+ - name: Run import
+ run: |
+ NOMINATIM_DATABASE_DSN="pgsql:host=127.0.0.1;dbname=nominatim;user=osm-import;password=osm-import" nominatim import --continue import-from-file --osm-file ../test.pbf
+ working-directory: /home/nominatim/nominatim-project
+
+ - name: Check full import
+ run: nominatim admin --check-database
+ working-directory: /home/nominatim/nominatim-project
\ No newline at end of file
project(nominatim)
set(NOMINATIM_VERSION_MAJOR 4)
-set(NOMINATIM_VERSION_MINOR 2)
+set(NOMINATIM_VERSION_MINOR 3)
set(NOMINATIM_VERSION_PATCH 0)
set(NOMINATIM_VERSION "${NOMINATIM_VERSION_MAJOR}.${NOMINATIM_VERSION_MINOR}.${NOMINATIM_VERSION_PATCH}")
else()
message (STATUS "Using PHP binary " ${PHP_BIN})
endif()
- if (NOT PHPCGI_BIN)
- find_program (PHPCGI_BIN php-cgi)
- endif()
- # sanity check if PHP binary exists
- if (NOT EXISTS ${PHPCGI_BIN})
- message(WARNING "php-cgi binary not found. nominatim tool will not provide query functions.")
- set (PHPCGI_BIN "")
- else()
- message (STATUS "Using php-cgi binary " ${PHPCGI_BIN})
- endif()
endif()
#-----------------------------------------------------------------------------
+4.3.0
+ * fix failing importance recalculation command
+ * fix merging of linked names into unnamed boundaries
+ * fix a number of corner cases with interpolation splitting resulting in
+ invalid geometries
+ * fix failure in website generation when password contains curly brackets
+ * fix broken use of ST_Project in PostGIS 3.4
+ * new NOMINATIM_SEARCH_WITHIN_COUNTRIES setting to restrict reverse lookups
+ to known countries (thanks @alfmarcua)
+ * allow negative OSM IDs (thanks @alfmarcua)
+ * disallow import of Tiger data in a frozen DB
+ * avoid UPDATE to change settings to be compatible with r/o DBs (thanks @t-tomek)
+ * update bundled osm2pgsql to 1.9.2
+ * reorganise osm2pgsql flex style and make it the default
+ * exclude names ending in :wikipedia from indexing
+ * no longer accept comma as a list separator in name tags
+ * process forward dependencies on update to catch updates in geometries
+ of ways and relations
+ * fix handling of isolated silent letters during transliteration
+ * no longer assign postcodes to large linear features like rivers
+ * introduce nominatim.paths module for finding data and libraries
+ * documentation layout changed to material theme
+ * new documentation section for library
+ * various smaller fixes to existing documentation
+ (thanks @woodpeck, @bloom256, @biswajit-k)
+ * updates to vagrant install scripts, drop support for Ubunut 18
+ (thanks @n-timofeev)
+ * removed obsolete configuration variables from env.defaults
+ * add script for generating a taginfo description (thanks @biswajit-k)
+ * modernize Python code around BDD test and add testing of Python frontend
+ * lots of new BDD tests for API output
+
+4.2.3
+
+ * fix deletion handling for 'nominatim add-data'
+ * adapt place_force_delete() to new deletion handling
+ * flex style: avoid dropping of postcode areas
+ * fix update errors on address interpolation handling
+
4.2.2
* extend flex-style library to fully support all default styles
| Version | End of support for security updates |
| ------- | ----------------------------------- |
+| 4.3.x | 2025-09-07 |
| 4.2.x | 2024-11-24 |
| 4.1.x | 2024-08-05 |
-| 4.0.x | 2023-11-02 |
-| 3.7.x | 2023-04-05 |
## Reporting a Vulnerability
## List of Previous Incidents
-* 2020-05-04 - [SQL injection issue on /details endpoint](https://lists.openstreetmap.org/pipermail/geocoding/2020-May/002012.html)
+* 2023-11-20 - [SQL injection vulnerability](https://nominatim.org/2023/11/20/release-432.html)
* 2023-02-21 - [cross-site scripting vulnerability](https://nominatim.org/2023/02/21/release-421.html)
+* 2020-05-04 - [SQL injection issue on /details endpoint](https://lists.openstreetmap.org/pipermail/geocoding/2020-May/002012.html)
version.GIT_COMMIT_HASH = '@GIT_HASH@'
exit(cli.nominatim(module_dir='@NOMINATIM_LIBDIR@/module',
- osm2pgsql_path='@NOMINATIM_LIBDIR@/osm2pgsql',
- phpcgi_path='@PHPCGI_BIN@'))
+ osm2pgsql_path='@NOMINATIM_LIBDIR@/osm2pgsql'))
version.GIT_COMMIT_HASH = '@GIT_HASH@'
exit(cli.nominatim(module_dir='@CMAKE_BINARY_DIR@/module',
- osm2pgsql_path='@CMAKE_BINARY_DIR@/osm2pgsql/osm2pgsql',
- phpcgi_path='@PHPCGI_BIN@'))
+ osm2pgsql_path='@CMAKE_BINARY_DIR@/osm2pgsql/osm2pgsql'))
develop
api
customize
+ library
index.md
extra.css
styles.css
ADD_CUSTOM_TARGET(doc
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-20.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-20.md
COMMAND ${CMAKE_CURRENT_SOURCE_DIR}/bash2md.sh ${PROJECT_SOURCE_DIR}/vagrant/Install-on-Ubuntu-22.sh ${CMAKE_CURRENT_BINARY_DIR}/appendix/Install-on-Ubuntu-22.md
- COMMAND PYTHONPATH=${PROJECT_SOURCE_DIR} mkdocs build -d ${CMAKE_CURRENT_BINARY_DIR}/../site-html -f ${CMAKE_CURRENT_BINARY_DIR}/../mkdocs.yml
+ COMMAND mkdocs build -d ${CMAKE_CURRENT_BINARY_DIR}/../site-html -f ${CMAKE_CURRENT_BINARY_DIR}/../mkdocs.yml
)
ADD_CUSTOM_TARGET(serve-doc
- COMMAND PYTHONPATH=${PROJECT_SOURCE_DIR} mkdocs serve
- WORKING_DIRECTORY ${PROJECT_BINARY_DIR}
+ COMMAND mkdocs serve -f ${CMAKE_CURRENT_BINARY_DIR}/../mkdocs.yml
+ WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
)
```bash
update
- ├── europe
- │ ├── andorra
- │ │ └── sequence.state
- │ └── monaco
- │ └── sequence.state
- └── tmp
- └── europe
- ├── andorra-latest.osm.pbf
- └── monaco-latest.osm.pbf
-
+ ├── europe
+ │ ├── andorra
+ │ │ └── sequence.state
+ │ └── monaco
+ │ └── sequence.state
+ └── tmp
+ └── europe
+ ├── andorra-latest.osm.pbf
+ └── monaco-latest.osm.pbf
```
This will get diffs from the replication server, import diffs and index
the database. The default replication server in the
-script([Geofabrik](https://download.geofabrik.de)) provides daily updates.
+script ([Geofabrik](https://download.geofabrik.de)) provides daily updates.
## Using an external PostgreSQL database
-# Deploying Nominatim
+# Deploying Nominatim using the PHP frontend
The Nominatim API is implemented as a PHP application. The `website/` directory
in the project directory contains the configured website. You can serve this
This section gives a quick overview on how to configure Apache and Nginx to
serve Nominatim. It is not meant as a full system administration guide on how
to run a web service. Please refer to the documentation of
-[Apache](http://httpd.apache.org/docs/current/) and
+[Apache](https://httpd.apache.org/docs/current/) and
[Nginx](https://nginx.org/en/docs/)
for background information on configuring the services.
!!! Note
- Throughout this page, we assume that your Nominatim project directory is
- located in `/srv/nominatim-project` and that you have installed Nominatim
+ Throughout this page, we assume your Nominatim project directory is
+ located in `/srv/nominatim-project` and you have installed Nominatim
using the default installation prefix `/usr/local`. If you have put it
somewhere else, you need to adjust the commands and configuration
accordingly.
--- /dev/null
+# Deploying the Nominatim Python frontend
+
+The Nominatim can be run as a Python-based
+[ASGI web application](https://asgi.readthedocs.io/en/latest/). You have the
+choice between [Falcon](https://falcon.readthedocs.io/en/stable/)
+and [Starlette](https://www.starlette.io/) as the ASGI framework.
+
+This section gives a quick overview on how to configure Nginx to serve
+Nominatim. Please refer to the documentation of
+[Nginx](https://nginx.org/en/docs/) for background information on how
+to configure it.
+
+!!! Note
+ Throughout this page, we assume your Nominatim project directory is
+ located in `/srv/nominatim-project` and you have installed Nominatim
+ using the default installation prefix `/usr/local`. If you have put it
+ somewhere else, you need to adjust the commands and configuration
+ accordingly.
+
+ We further assume that your web server runs as user `www-data`. Older
+ versions of CentOS may still use the user name `apache`. You also need
+ to adapt the instructions in this case.
+
+### Installing the required packages
+
+The recommended way to deploy a Python ASGI application is to run
+the ASGI runner [uvicorn](https://uvicorn.org/)
+together with [gunicorn](https://gunicorn.org/) HTTP server. We use
+Falcon here as the web framework.
+
+Create a virtual environment for the Python packages and install the necessary
+dependencies:
+
+``` sh
+sudo apt install virtualenv
+virtualenv /srv/nominatim-venv
+/srv/nominatim-venv/bin/pip install SQLAlchemy PyICU psycopg[binary] \
+ psycopg2-binary python-dotenv PyYAML falcon uvicorn gunicorn
+```
+
+### Setting up Nominatim as a systemd job
+
+Next you need to set up the service that runs the Nominatim frontend. This is
+easiest done with a systemd job.
+
+First you need to tell systemd to create a socket file to be used by
+hunicorn. Crate the following file `/etc/systemd/system/nominatim.socket`:
+
+``` systemd
+[Unit]
+Description=Gunicorn socket for Nominatim
+
+[Socket]
+ListenStream=/run/nominatim.sock
+SocketUser=www-data
+
+[Install]
+WantedBy=multi-user.target
+```
+
+Now you can add the systemd service for Nominatim itself.
+Create the following file `/etc/systemd/system/nominatim.service`:
+
+``` systemd
+[Unit]
+Description=Nominatim running as a gunicorn application
+After=network.target
+Requires=nominatim.socket
+
+[Service]
+Type=simple
+Environment="PYTHONPATH=/usr/local/lib/nominatim/lib-python/"
+User=www-data
+Group=www-data
+WorkingDirectory=/srv/nominatim-project
+ExecStart=/srv/nominatim-venv/bin/gunicorn -b unix:/run/nominatim.sock -w 4 -k uvicorn.workers.UvicornWorker nominatim.server.falcon.server:run_wsgi
+ExecReload=/bin/kill -s HUP $MAINPID
+StandardOutput=append:/var/log/gunicorn-nominatim.log
+StandardError=inherit
+PrivateTmp=true
+TimeoutStopSec=5
+KillMode=mixed
+
+[Install]
+WantedBy=multi-user.target
+```
+
+This sets up gunicorn with 4 workers (`-w 4` in ExecStart). Each worker runs
+its own Python process using
+[`NOMINATIM_API_POOL_SIZE`](../customize/Settings.md#nominatim_api_pool_size)
+connections to the database to serve requests in parallel.
+
+Make the new services known to systemd and start it:
+
+``` sh
+sudo systemctl daemon-reload
+sudo systemctl enable nominatim.socket
+sudo systemctl start nominatim.socket
+sudo systemctl enable nominatim.service
+sudo systemctl start nominatim.service
+```
+
+This sets the service up, so that Nominatim is automatically started
+on reboot.
+
+### Configuring nginx
+
+To make the service available to the world, you need to proxy it through
+nginx. Add the following definition to the default configuration:
+
+``` nginx
+upstream nominatim_service {
+ server unix:/run/nominatim.sock fail_timeout=0;
+}
+
+server {
+ listen 80;
+ listen [::]:80;
+
+ root /var/www/html;
+ index /search;
+
+ location / {
+ proxy_set_header Host $http_host;
+ proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for;
+ proxy_set_header X-Forwarded-Proto $scheme;
+ proxy_redirect off;
+ proxy_pass http://nominatim_service;
+ }
+}
+```
+
+Reload nginx with
+
+```
+sudo systemctl reload nginx
+```
+
+and you should be able to see the status of your server under
+`http://localhost/status`.
nominatim admin --check-database
```
-Now you can try out your installation by running:
+Now you can try out your installation by executing a simple query on the
+command line:
+
+``` sh
+nominatim search --query Berlin
+```
+
+or, when you have a reverse-only installation:
+
+``` sh
+nominatim reverse --lat 51 --lon 45
+```
+
+If you want to run Nominatim as a service, you need to make a choice between
+running the traditional PHP frontend or the new experimental Python frontend.
+Make sure you have installed the right packages as per
+[Installation](Installation.md#software).
+
+#### Testing the PHP frontend
+
+You can run a small test server with the PHP frontend like this:
```sh
nominatim serve
```
-This runs a small test server normally used for development. You can use it
-to verify that your installation is working. Go to
-`http://localhost:8088/status.php` and you should see the message `OK`.
-You can also run a search query, e.g. `http://localhost:8088/search.php?q=Berlin`.
+Go to `http://localhost:8088/status.php` and you should see the message `OK`.
+You can also run a search query, e.g. `http://localhost:8088/search.php?q=Berlin`
+or, for reverse-only installations a reverse query,
+e.g. `http://localhost:8088/reverse.php?lat=27.1750090510034&lon=78.04209025`.
+
+Do not use this test server in production.
+To run Nominatim via webservers like Apache or nginx, please continue reading
+[Deploy the PHP frontend](Deployment-PHP.md).
+
+#### Testing the Python frontend
+
+To run the test server against the Python frontend, you must choose a
+web framework to use, either starlette or falcon. Make sure the appropriate
+packages are installed. Then run
+
+``` sh
+nominatim serve --engine falcon
+```
+
+or
+
+``` sh
+nominatim serve --engine starlette
+```
+
+Go to `http://localhost:8088/status.php` and you should see the message `OK`.
+You can also run a search query, e.g. `http://localhost:8088/search.php?q=Berlin`
+or, for reverse-only installations a reverse query,
+e.g. `http://localhost:8088/reverse.php?lat=27.1750090510034&lon=78.04209025`.
-Note that search query is not supported for reverse-only imports. You can run a
-reverse query, e.g. `http://localhost:8088/reverse.php?lat=27.1750090510034&lon=78.04209025`.
+Do not use this test server in production.
+To run Nominatim via webservers like Apache or nginx, please continue reading
+[Deploy the Python frontend](Deployment-Python.md).
-To run Nominatim via webservers like Apache or nginx, please read the
-[Deployment chapter](Deployment.md).
-## Adding search through category phrases
+## Enabling search by category phrases
-If you want to be able to search for places by their type through
+To be able to search for places by their type using
[special phrases](https://wiki.openstreetmap.org/wiki/Nominatim/Special_Phrases)
you also need to import these key phrases like this:
* [bzip2](http://www.bzip.org/)
* [zlib](https://www.zlib.net/)
* [ICU](http://site.icu-project.org/)
+ * [nlohmann/json](https://json.nlohmann.me/)
* [Boost libraries](https://www.boost.org/), including system and filesystem
* PostgreSQL client libraries
* a recent C++ compiler (gcc 5+ or Clang 3.8+)
* [Python Dotenv](https://github.com/theskumar/python-dotenv)
* [psutil](https://github.com/giampaolo/psutil)
* [Jinja2](https://palletsprojects.com/p/jinja/)
- * [SQLAlchemy](https://www.sqlalchemy.org/) (1.4+ with greenlet support)
+ * [SQLAlchemy](https://www.sqlalchemy.org/) (1.4.31+ with greenlet support)
* [asyncpg](https://magicstack.github.io/asyncpg) (0.8+)
* [PyICU](https://pypi.org/project/PyICU/)
* [PyYaml](https://pyyaml.org/) (5.1+)
* [datrie](https://github.com/pytries/datrie)
+
+When running the PHP frontend:
+
* [PHP](https://php.net) (7.3+)
* PHP-pgsql
* PHP-intl (bundled with PHP)
- * PHP-cgi (for running queries from the command line)
For running continuous updates:
Fast disks are essential. Using NVME disks is recommended.
Even on a well configured machine the import of a full planet takes
-around 2 days. On traditional spinning disks, 7-8 days are more realistic.
+around 2 days. When using traditional SSDs, 4-5 days are more realistic.
## Tuning the PostgreSQL database
and even reduce `autovacuum_work_mem` further. This will reduce the amount
of memory that autovacuum takes away from the import process.
-For the initial import, you should also set:
-
- fsync = off
- full_page_writes = off
-
-Don't forget to re-enable them after the initial import or you risk database
-corruption.
-
-
## Downloading and building Nominatim
### Downloading the latest release
## Removing large deleted objects
+Command: `nominatim admin --clean-deleted <PostgreSQL Time Interval>`
+
Nominatim refuses to delete very large areas because often these deletions are
accidental and are reverted within hours. Instead the deletions are logged in
the `import_polygon_delete` table and left to the administrator to clean up.
-There is currently no command to do that. You can use the following SQL
-query to force a deletion on all objects that have been deleted more than
-a certain timespan ago (here: 1 month):
+To run this command you will need to pass a PostgreSQL time interval. For example to
+delete any objects that have been deleted more than a month ago you would run:
+`nominatim admin --clean-deleted '1 month'`
-```sql
-SELECT place_force_delete(p.place_id) FROM import_polygon_delete d, placex p
-WHERE p.osm_type = d.osm_type and p.osm_id = d.osm_id
- and age(p.indexed_date) > '1 month'::interval
-```
If you are migrating from a version <3.6, then you still have to follow
the manual migration steps up to 3.6.
-## 4.1.0 -> master
+## 4.2.0 -> 4.3.0
### New indexes for reverse lookup
version update or create the index manually **before** starting the update
using the following SQL:
-```
+```sql
CREATE INDEX IF NOT EXISTS idx_placex_geometry_reverse_lookupPlaceNode
ON placex USING gist (ST_Buffer(geometry, reverse_place_diameter(rank_search)))
WHERE rank_address between 4 and 25 AND type != 'postcode'
Show all details about a single place saved in the database.
+This API endpoint is meant for visual inspection of the data in the database,
+mainly together with [Nominatim-UI](https://github.com/osm-search/nominatim-ui/).
+The parameters of the endpoint and the output may change occasionally between
+versions of Nominatim. Do not rely on the output in scripts or applications.
+
!!! warning
- The details page exists for debugging only. You may not use it in scripts
- or to automatically query details about a result.
+ The details endpoint at https://nominatim.openstreetmap.org
+ may not used in scripts or bots at all.
See [Nominatim Usage Policy](https://operations.osmfoundation.org/policies/nominatim/).
-## Parameters
The details API supports the following two request formats:
changes when data gets reimported. Therefore it cannot be used as
a permanent id and shouldn't be used in bug reports.
+!!! danger "Deprecation warning"
+ The API can also be used with the URL
+ `https://nominatim.openstreetmap.org/details.php`. This is now deprecated
+ and will be removed in future versions.
-Additional optional parameters are explained below.
+
+## Parameters
+
+This section lists additional optional parameters.
### Output format
-* `json_callback=<string>`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| json_callback | function name | _unset_ |
-Wrap JSON output in a callback function (JSONP) i.e. `<string>(<json>)`.
+When set, then JSON output will be wrapped in a callback function with
+the given name. See [JSONP](https://en.wikipedia.org/wiki/JSONP) for more
+information.
-* `pretty=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| pretty | 0 or 1 | 0 |
-Add indentation to make it more human-readable. (Default: 0)
+`[PHP-only]` Add indentation to the output to make it more human-readable.
### Output details
-* `addressdetails=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| addressdetails | 0 or 1 | 0 |
-Include a breakdown of the address into elements. (Default: 0)
+When set to 1, include a breakdown of the address into elements.
-* `keywords=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| keywords | 0 or 1 | 0 |
-Include a list of name keywords and address keywords (word ids). (Default: 0)
+When set to 1, include a list of name keywords and address keywords
+in the result.
-* `linkedplaces=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| linkedplaces | 0 or 1 | 1 |
-Include a details of places that are linked with this one. Places get linked
+Include details of places that are linked with this one. Places get linked
together when they are different forms of the same physical object. Nominatim
links two kinds of objects together: place nodes get linked with the
corresponding administrative boundaries. Waterway relations get linked together with their
members.
-(Default: 1)
-* `hierarchy=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| hierarchy | 0 or 1 | 0 |
+
+Include details of places lower in the address hierarchy.
+
+`[Python-only]` will only return properly parented places. These are address
+or POI-like places that reuse the address of their parent street or place.
-Include details of places lower in the address hierarchy. (Default: 0)
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| group_hierarchy | 0 or 1 | 0 |
-* `group_hierarchy=[0|1]`
+When set to 1, the output of the address hierarchy will be
+grouped by type.
-For JSON output will group the places by type. (Default: 0)
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| polygon_geojson | 0 or 1 | 0 |
-* `polygon_geojson=[0|1]`
-Include geometry of result. (Default: 0)
+Include geometry of result.
### Language of results
-* `accept-language=<browser language string>`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| accept-language | browser language string | content of "Accept-Language" HTTP header |
-Preferred language order for showing result, overrides the value
-specified in the "Accept-Language" HTTP header.
-Either use a standard RFC2616 accept-language string or a simple
-comma-separated list of language codes.
+Preferred language order for showing search results. This may either be
+a simple comma-separated list of language codes or have the same format
+as the ["Accept-Language" HTTP header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language).
## Examples
The lookup API allows to query the address and other details of one or
multiple OSM objects like node, way or relation.
-## Parameters
+## Endpoint
The lookup API has the following format:
prefixed with its type, one of node(N), way(W) or relation(R). Up to 50 ids
can be queried at the same time.
-Additional optional parameters are explained below.
+!!! danger "Deprecation warning"
+ The API can also be used with the URL
+ `https://nominatim.openstreetmap.org/lookup.php`. This is now deprecated
+ and will be removed in future versions.
+
+
+## Parameters
+
+This section lists additional optional parameters.
### Output format
-* `format=[xml|json|jsonv2|geojson|geocodejson]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| format | one of: `xml`, `json`, `jsonv2`, `geojson`, `geocodejson` | `jsonv2` |
+
+See [Place Output Formats](Output.md) for details on each format.
+
-See [Place Output Formats](Output.md) for details on each format. (Default: xml)
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| json_callback | function name | _unset_ |
-* `json_callback=<string>`
+When given, then JSON output will be wrapped in a callback function with
+the given name. See [JSONP](https://en.wikipedia.org/wiki/JSONP) for more
+information.
-Wrap JSON output in a callback function (JSONP) i.e. `<string>(<json>)`.
Only has an effect for JSON output formats.
+
### Output details
-* `addressdetails=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| addressdetails | 0 or 1 | 0 |
+
+When set to 1, include a breakdown of the address into elements.
+The exact content of the address breakdown depends on the output format.
-Include a breakdown of the address into elements. (Default: 0)
+!!! tip
+ If you are interested in a stable classification of address categories
+ (suburb, city, state, etc), have a look at the `geocodejson` format.
+ All other formats return classifications according to OSM tagging.
+ There is a much larger set of categories and they are not always consistent,
+ which makes them very hard to work with.
-* `extratags=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| extratags | 0 or 1 | 0 |
-Include additional information in the result if available,
-e.g. wikipedia link, opening hours. (Default: 0)
+When set to 1, the response include any additional information in the result
+that is available in the database, e.g. wikipedia link, opening hours.
-* `namedetails=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| namedetails | 0 or 1 | 0 |
-Include a list of alternative names in the results. These may include
-language variants, references, operator and brand. (Default: 0)
+When set to 1, include a full list of names for the result. These may include
+language variants, older names, references and brand.
### Language of results
-* `accept-language=<browser language string>`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| accept-language | browser language string | content of "Accept-Language" HTTP header |
+
+Preferred language order for showing search results. This may either be
+a simple comma-separated list of language codes or have the same format
+as the ["Accept-Language" HTTP header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language).
+
+!!! tip
+ First-time users of Nominatim tend to be confused that they get different
+ results when using Nominatim in the browser versus in a command-line tool
+ like wget or curl. The command-line tools
+ usually don't send any Accept-Language header, prompting Nominatim
+ to show results in the local language. Browsers on the contratry always
+ send the currently chosen browser language.
-Preferred language order for showing search results, overrides the value
-specified in the "Accept-Language" HTTP header.
-Either use a standard RFC2616 accept-language string or a simple
-comma-separated list of language codes.
### Polygon output
-* `polygon_geojson=1`
-* `polygon_kml=1`
-* `polygon_svg=1`
-* `polygon_text=1`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| polygon_geojson | 0 or 1 | 0 |
+| polygon_kml | 0 or 1 | 0 |
+| polygon_svg | 0 or 1 | 0 |
+| polygon_text | 0 or 1 | 0 |
-Output geometry of results as a GeoJSON, KML, SVG or WKT. Only one of these
-options can be used at a time. (Default: 0)
+Add the full geometry of the place to the result output. Output formats
+in GeoJSON, KML, SVG or WKT are supported. Only one of these
+options can be used at a time.
-* `polygon_threshold=0.0`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| polygon_threshold | floating-point number | 0.0 |
-Return a simplified version of the output geometry. The parameter is the
+When one of the polygon_* outputs is chosen, return a simplified version
+of the output geometry. The parameter describes the
tolerance in degrees with which the geometry may differ from the original
-geometry. Topology is preserved in the result. (Default: 0.0)
+geometry. Topology is preserved in the geometry.
+
### Other
-* `email=<valid email address>`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| email | valid email address | _unset_ |
If you are making large numbers of request please include an appropriate email
-address to identify your requests. See Nominatim's [Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.
+address to identify your requests. See Nominatim's
+[Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.
+
-* `debug=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| debug | 0 or 1 | 0 |
Output assorted developer debug information. Data on internals of Nominatim's
-"Search Loop" logic, and SQL queries. The output is (rough) HTML format.
-This overrides the specified machine readable format. (Default: 0)
+"search loop" logic, and SQL queries. The output is HTML format.
+This overrides the specified machine readable format.
## Examples
### Nominatim API
-Nominatim indexes named (or numbered) features within the OpenStreetMap (OSM) dataset and a subset of other unnamed features (pubs, hotels, churches, etc).
+!!! Attention
+ The current version of Nominatim implements two different search frontends:
+ the old PHP frontend and the new Python frontend. They have a very similar
+ API but differ in some implementation details. These are marked in the
+ documentation as `[Python-only]` or `[PHP-only]`.
-Its API has the following endpoints for querying the data:
+ `https://nominatim.openstreetmap.org` implements the **Python frontend**.
+ So users should refer to the **`[Python-only]`** comments.
+
+This section describes the API V1 of the Nominatim web service. The
+service offers the following endpoints:
* __[/search](Search.md)__ - search OSM objects by name or type
* __[/reverse](Reverse.md)__ - search OSM object by their location
back in Nominatim in case the deletion was accidental
* __/polygons__ - list of broken polygons detected by Nominatim
* __[/details](Details.md)__ - show internal details for an object (for debugging only)
+
+
+
# Reverse Geocoding
-Reverse geocoding generates an address from a latitude and longitude.
+Reverse geocoding generates an address from a coordinate given as
+latitude and longitude.
## How it works
have a similar enough address to the coordinate you were requesting. For
example, in dense city areas it may belong to a completely different street.
-
-## Parameters
+## Endpoint
The main format of the reverse API is
projection. The API returns exactly one result or an error when the coordinate
is in an area with no OSM data coverage.
-Additional parameters are accepted as listed below.
-!!! warning "Deprecation warning"
+!!! danger "Deprecation warning"
The reverse API used to allow address lookup for a single OSM object by
- its OSM id. This use is now deprecated. Use the [Address Lookup API](Lookup.md)
- instead.
+ its OSM id for `[PHP-only]`. The use is considered deprecated.
+ Use the [Address Lookup API](Lookup.md) instead.
+
+!!! danger "Deprecation warning"
+ The API can also be used with the URL
+ `https://nominatim.openstreetmap.org/reverse.php`. This is now deprecated
+ and will be removed in future versions.
+
+
+## Parameters
+
+This section lists additional parameters to further influence the output.
### Output format
-* `format=[xml|json|jsonv2|geojson|geocodejson]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| format | one of: `xml`, `json`, `jsonv2`, `geojson`, `geocodejson` | `xml` |
+
+See [Place Output Formats](Output.md) for details on each format.
+
-See [Place Output Formats](Output.md) for details on each format. (Default: xml)
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| json_callback | function name | _unset_ |
-* `json_callback=<string>`
+When given, then JSON output will be wrapped in a callback function with
+the given name. See [JSONP](https://en.wikipedia.org/wiki/JSONP) for more
+information.
-Wrap JSON output in a callback function ([JSONP](https://en.wikipedia.org/wiki/JSONP)) i.e. `<string>(<json>)`.
Only has an effect for JSON output formats.
+
### Output details
-* `addressdetails=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| addressdetails | 0 or 1 | 1 |
+
+When set to 1, include a breakdown of the address into elements.
+The exact content of the address breakdown depends on the output format.
-Include a breakdown of the address into elements. (Default: 1)
+!!! tip
+ If you are interested in a stable classification of address categories
+ (suburb, city, state, etc), have a look at the `geocodejson` format.
+ All other formats return classifications according to OSM tagging.
+ There is a much larger set of categories and they are not always consistent,
+ which makes them very hard to work with.
-* `extratags=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| extratags | 0 or 1 | 0 |
-Include additional information in the result if available,
-e.g. wikipedia link, opening hours. (Default: 0)
+When set to 1, the response include any additional information in the result
+that is available in the database, e.g. wikipedia link, opening hours.
-* `namedetails=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| namedetails | 0 or 1 | 0 |
-Include a list of alternative names in the results. These may include
-language variants, references, operator and brand. (Default: 0)
+When set to 1, include a full list of names for the result. These may include
+language variants, older names, references and brand.
### Language of results
-* `accept-language=<browser language string>`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| accept-language | browser language string | content of "Accept-Language" HTTP header |
-Preferred language order for showing search results, overrides the value
-specified in the "Accept-Language" HTTP header.
-Either use a standard RFC2616 accept-language string or a simple
-comma-separated list of language codes.
+Preferred language order for showing search results. This may either be
+a simple comma-separated list of language codes or have the same format
+as the ["Accept-Language" HTTP header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language).
-### Result limitation
+!!! tip
+ First-time users of Nominatim tend to be confused that they get different
+ results when using Nominatim in the browser versus in a command-line tool
+ like wget or curl. The command-line tools
+ usually don't send any Accept-Language header, prompting Nominatim
+ to show results in the local language. Browsers on the contratry always
+ send the currently chosen browser language.
-* `zoom=[0-18]`
-Level of detail required for the address. Default: 18. This is a number that
+### Result restriction
+
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| zoom | 0-18 | 18 |
+
+Level of detail required for the address. This is a number that
corresponds roughly to the zoom level used in XYZ tile sources in frameworks
like Leaflet.js, Openlayers etc.
In terms of address details the zoom levels are as follows:
12 | town / borough
13 | village / suburb
14 | neighbourhood
- 15 | locality
+ 15 | any settlement
16 | major streets
17 | major and minor streets
18 | building
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| layer | comma-separated list of: `address`, `poi`, `railway`, `natural`, `manmade` | _unset_ (no restriction) |
+
+The layer filter allows to select places by themes.
+
+The `address` layer contains all places that make up an address:
+address points with house numbers, streets, inhabited places (suburbs, villages,
+cities, states etc.) and administrative boundaries.
+
+The `poi` layer selects all point of interest. This includes classic points
+of interest like restaurants, shops, hotels but also less obvious features
+like recycling bins, guideposts or benches.
+
+The `railway` layer includes railway infrastructure like tracks.
+Note that in Nominatim's standard configuration, only very few railway
+features are imported into the database.
+
+The `natural` layer collects feautures like rivers, lakes and mountains while
+the `manmade` layer functions as a catch-all for features not covered by the
+other layers.
+
+
### Polygon output
-* `polygon_geojson=1`
-* `polygon_kml=1`
-* `polygon_svg=1`
-* `polygon_text=1`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| polygon_geojson | 0 or 1 | 0 |
+| polygon_kml | 0 or 1 | 0 |
+| polygon_svg | 0 or 1 | 0 |
+| polygon_text | 0 or 1 | 0 |
-Output geometry of results as a GeoJSON, KML, SVG or WKT. Only one of these
-options can be used at a time. (Default: 0)
+Add the full geometry of the place to the result output. Output formats
+in GeoJSON, KML, SVG or WKT are supported. Only one of these
+options can be used at a time.
-* `polygon_threshold=0.0`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| polygon_threshold | floating-point number | 0.0 |
-Return a simplified version of the output geometry. The parameter is the
+When one of the polygon_* outputs is chosen, return a simplified version
+of the output geometry. The parameter describes the
tolerance in degrees with which the geometry may differ from the original
-geometry. Topology is preserved in the result. (Default: 0.0)
+geometry. Topology is preserved in the geometry.
+
### Other
-* `email=<valid email address>`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| email | valid email address | _unset_ |
-If you are making a large number of requests, please include an appropriate email
-address to identify your requests. See Nominatim's [Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.
+If you are making large numbers of request please include an appropriate email
+address to identify your requests. See Nominatim's
+[Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.
-* `debug=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| debug | 0 or 1 | 0 |
Output assorted developer debug information. Data on internals of Nominatim's
-"Search Loop" logic, and SQL queries. The output is (rough) HTML format.
-This overrides the specified machine readable format. (Default: 0)
+"search loop" logic, and SQL queries. The output is HTML format.
+This overrides the specified machine readable format.
## Examples
which are translated into specific OpenStreetMap (OSM) tags (e.g. Pub => `amenity=pub`).
This can be used to narrow down the kind of objects to be returned.
-!!! warning
+!!! note
Special phrases are not suitable to query all objects of a certain type in an
area. Nominatim will always just return a collection of the best matches. To
download OSM data by object type, use the [Overpass API](https://overpass-api.de/).
-## Parameters
+## Endpoint
The search API has the following format:
https://nominatim.openstreetmap.org/search?<params>
```
-The search term may be specified with two different sets of parameters:
+!!! danger "Deprecation warning"
+ The API can also be used with the URL
+ `https://nominatim.openstreetmap.org/search.php`. This is now deprecated
+ and will be removed in future versions.
+
+The query term can be given in two different forms: free-form or structured.
+
+### Free-form query
+
+| Parameter | Value |
+|-----------| ----- |
+| q | Free-form query string to search for |
-* `q=<query>`
+In this form, the query can be unstructured.
+Free-form queries are processed first left-to-right and then right-to-left if that fails. So you may search for
+[pilkington avenue, birmingham](https://nominatim.openstreetmap.org/search?q=pilkington+avenue,birmingham) as well as for
+[birmingham, pilkington avenue](https://nominatim.openstreetmap.org/search?q=birmingham,+pilkington+avenue).
+Commas are optional, but improve performance by reducing the complexity of the search.
- Free-form query string to search for.
- Free-form queries are processed first left-to-right and then right-to-left if that fails. So you may search for
- [pilkington avenue, birmingham](https://nominatim.openstreetmap.org/search?q=pilkington+avenue,birmingham) as well as for
- [birmingham, pilkington avenue](https://nominatim.openstreetmap.org/search?q=birmingham,+pilkington+avenue).
- Commas are optional, but improve performance by reducing the complexity of the search.
+The free-form may also contain special phrases to describe the type of
+place to be returned or a coordinate to search close to a position.
-* `amenity=<name and/or type of POI>`
-* `street=<housenumber> <streetname>`
-* `city=<city>`
-* `county=<county>`
-* `state=<state>`
-* `country=<country>`
-* `postalcode=<postalcode>`
+### Structured query
- Alternative query string format split into several parameters for structured requests.
- Structured requests are faster but are less robust against alternative
- OSM tagging schemas. **Do not combine with** `q=<query>` **parameter**.
+| Parameter | Value |
+|----------- | ----- |
+| amenity | name and/or type of POI |
+| street | housenumber and streetname |
+| city | city |
+| county | county |
+| state | state |
+| country | country |
+| postalcode | postal code |
-Both query forms accept the additional parameters listed below.
+The structured form of the search query allows to lookup up an address
+that is already split into its components. Each parameter represents a field
+of the address. All parameters are optional. You should only use the ones
+that are relevant for the address you want to geocode.
+
+!!! Attention
+ Cannot be combined with the `q=<query>` parameter. Newer versions of
+ the API will return an error if you do so. Older versions simply return
+ unexpected results.
+
+## Parameters
+
+The following parameters can be used to further restrict the search and
+change the output. They are usable for both forms of the search query.
### Output format
-* `format=[xml|json|jsonv2|geojson|geocodejson]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| format | one of: `xml`, `json`, `jsonv2`, `geojson`, `geocodejson` | `jsonv2` |
-See [Place Output Formats](Output.md) for details on each format. (Default: jsonv2)
+See [Place Output Formats](Output.md) for details on each format.
!!! note
The Nominatim service at
has a different default behaviour for historical reasons. When the
`format` parameter is omitted, the request will be forwarded to the Web UI.
-* `json_callback=<string>`
-Wrap JSON output in a callback function ([JSONP](https://en.wikipedia.org/wiki/JSONP)) i.e. `<string>(<json>)`.
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| json_callback | function name | _unset_ |
+
+When given, then JSON output will be wrapped in a callback function with
+the given name. See [JSONP](https://en.wikipedia.org/wiki/JSONP) for more
+information.
+
Only has an effect for JSON output formats.
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| limit | number | 10 |
+
+Limit the maximum number of returned results. Cannot be more than 40.
+Nominatim may decide to return less results than given, if additional
+results do not sufficiently match the query.
+
+
### Output details
-* `addressdetails=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| addressdetails | 0 or 1 | 0 |
+
+When set to 1, include a breakdown of the address into elements.
+The exact content of the address breakdown depends on the output format.
-Include a breakdown of the address into elements. (Default: 0)
+!!! tip
+ If you are interested in a stable classification of address categories
+ (suburb, city, state, etc), have a look at the `geocodejson` format.
+ All other formats return classifications according to OSM tagging.
+ There is a much larger set of categories and they are not always consistent,
+ which makes them very hard to work with.
-* `extratags=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| extratags | 0 or 1 | 0 |
-Include additional information in the result if available,
-e.g. wikipedia link, opening hours. (Default: 0)
+When set to 1, the response include any additional information in the result
+that is available in the database, e.g. wikipedia link, opening hours.
-* `namedetails=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| namedetails | 0 or 1 | 0 |
-Include a list of alternative names in the results. These may include
-language variants, references, operator and brand. (Default: 0)
+When set to 1, include a full list of names for the result. These may include
+language variants, older names, references and brand.
### Language of results
-* `accept-language=<browser language string>`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| accept-language | browser language string | content of "Accept-Language" HTTP header |
-Preferred language order for showing search results, overrides the value
-specified in the ["Accept-Language" HTTP header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language).
-Either use a standard RFC2616 accept-language string or a simple
-comma-separated list of language codes.
+Preferred language order for showing search results. This may either be
+a simple comma-separated list of language codes or have the same format
+as the ["Accept-Language" HTTP header](https://developer.mozilla.org/en-US/docs/Web/HTTP/Headers/Accept-Language).
-### Result limitation
+!!! tip
+ First-time users of Nominatim tend to be confused that they get different
+ results when using Nominatim in the browser versus in a command-line tool
+ like wget or curl. The command-line tools
+ usually don't send any Accept-Language header, prompting Nominatim
+ to show results in the local language. Browsers on the contratry always
+ send the currently chosen browser language.
-* `countrycodes=<countrycode>[,<countrycode>][,<countrycode>]...`
+### Result restriction
-Limit search results to one or more countries. `<countrycode>` must be the
-[ISO 3166-1alpha2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) code,
-e.g. `gb` for the United Kingdom, `de` for Germany.
+There are two ways to influence the results. *Filters* exclude certain
+kinds of results completely. *Boost parameters* only change the order of the
+results and thus give a preference to some results over others.
+
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| countrycodes | comma-separated list of country codes | _unset_ |
+
+Filer that limits the search results to one or more countries.
+The country code must be the
+[ISO 3166-1alpha2](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2) code
+of the country, e.g. `gb` for the United Kingdom, `de` for Germany.
Each place in Nominatim is assigned to one country code based
on OSM country boundaries. In rare cases a place may not be in any country
-at all, for example, in international waters.
+at all, for example, when it is in international waters. These places are
+also excluded when the filter is set.
+
+!!! Note
+ This parameter should not be confused with the 'country' parameter of
+ the structured query. The 'country' parameter contains a search term
+ and will be handled with some fuzziness. The `countrycodes` parameter
+ is a hard filter and as such should be prefered. Having both parameters
+ in the same query will work. If the parameters contradict each other,
+ the search will come up empty.
+
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| layer | comma-separated list of: `address`, `poi`, `railway`, `natural`, `manmade` | _unset_ (no restriction) |
-* `exclude_place_ids=<place_id,[place_id],[place_id]`
+The layer filter allows to select places by themes.
+
+The `address` layer contains all places that make up an address:
+address points with house numbers, streets, inhabited places (suburbs, villages,
+cities, states tec.) and administrative boundaries.
+
+The `poi` layer selects all point of interest. This includes classic POIs like
+restaurants, shops, hotels but also less obvious features like recycling bins,
+guideposts or benches.
+
+The `railway` layer includes railway infrastructure like tracks.
+Note that in Nominatim's standard configuration, only very few railway
+features are imported into the database.
+
+The `natural` layer collects feautures like rivers, lakes and mountains while
+the `manmade` layer functions as a catch-all for features not covered by the
+other layers.
+
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| featureType | one of: `country`, `state`, `city`, `settlement` | _unset_ |
+
+The featureType allows to have a more fine-grained selection for places
+from the address layer. Results can be restricted to places that make up
+the 'state', 'country' or 'city' part of an address. A featureType of
+settlement selects any human inhabited feature from 'state' down to
+'neighbourhood'.
+
+When featureType ist set, then results are automatically restricted
+to the address layer (see above).
+
+!!! tip
+ Instead of using the featureType filters `country`, `state` or `city`,
+ you can also use a structured query without the finer-grained parameters
+ amenity or street.
+
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| exclude_place_ids | comma-separeted list of place ids |
If you do not want certain OSM objects to appear in the search
result, give a comma separated list of the `place_id`s you want to skip.
previous query only returned a few results, then including those here would
cause the search to return other, less accurate, matches (if possible).
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| viewbox | `<x1>,<y1>,<x2>,<y2>` | _unset_ |
-* `limit=<integer>`
-
-Limit the number of returned results. (Default: 10, Maximum: 50)
+Boost parameter which focuses the search on the given area.
+Any two corner points of the box are accepted as long as they make a proper
+box. `x` is longitude, `y` is latitude.
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| bounded | 0 or 1 | 0 |
-* `viewbox=<x1>,<y1>,<x2>,<y2>`
+When set to 1, then it turns the 'viewbox' parameter (see above) into
+a filter paramter, excluding any results outside the viewbox.
-The preferred area to find search results. Any two corner points of the box
-are accepted as long as they span a real box. `x` is longitude,
-`y` is latitude.
-
-
-* `bounded=[0|1]`
-
-When a viewbox is given, restrict the result to items contained within that
-viewbox (see above). When `viewbox` and `bounded=1` are given, an amenity
-only search is allowed. Give the special keyword for the amenity in square
+When `bounded=1` is given and the viewbox is small enough, then an amenity-only
+search is allowed. Give the special keyword for the amenity in square
brackets, e.g. `[pub]` and a selection of objects of this type is returned.
-There is no guarantee that the result is complete. (Default: 0)
+There is no guarantee that the result returns all objects in the area.
### Polygon output
-* `polygon_geojson=1`
-* `polygon_kml=1`
-* `polygon_svg=1`
-* `polygon_text=1`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| polygon_geojson | 0 or 1 | 0 |
+| polygon_kml | 0 or 1 | 0 |
+| polygon_svg | 0 or 1 | 0 |
+| polygon_text | 0 or 1 | 0 |
-Output geometry of results as a GeoJSON, KML, SVG or WKT. Only one of these
-options can be used at a time. (Default: 0)
+Add the full geometry of the place to the result output. Output formats
+in GeoJSON, KML, SVG or WKT are supported. Only one of these
+options can be used at a time.
-* `polygon_threshold=0.0`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| polygon_threshold | floating-point number | 0.0 |
-Return a simplified version of the output geometry. The parameter is the
+When one of the polygon_* outputs is chosen, return a simplified version
+of the output geometry. The parameter describes the
tolerance in degrees with which the geometry may differ from the original
-geometry. Topology is preserved in the result. (Default: 0.0)
+geometry. Topology is preserved in the geometry.
### Other
-* `email=<valid email address>`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| email | valid email address | _unset_ |
If you are making large numbers of request please include an appropriate email
-address to identify your requests. See Nominatim's [Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.
+address to identify your requests. See Nominatim's
+[Usage Policy](https://operations.osmfoundation.org/policies/nominatim/) for more details.
-* `dedupe=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| dedupe | 0 or 1 | 1 |
Sometimes you have several objects in OSM identifying the same place or
object in reality. The simplest case is a street being split into many
different OSM ways due to different characteristics. Nominatim will
-attempt to detect such duplicates and only return one match unless
-this parameter is set to 0. (Default: 1)
+attempt to detect such duplicates and only return one match. Setting
+this parameter to 0 disables this deduplication mechanism and
+ensures that all results are returned.
-* `debug=[0|1]`
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| debug | 0 or 1 | 0 |
Output assorted developer debug information. Data on internals of Nominatim's
-"Search Loop" logic, and SQL queries. The output is (rough) HTML format.
-This overrides the specified machine readable format. (Default: 0)
-
+"search loop" logic, and SQL queries. The output is HTML format.
+This overrides the specified machine readable format.
## Examples
-##### XML with kml polygon
+##### XML with KML polygon
-* [https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon_geojson=1&addressdetails=1](https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon_geojson=1&addressdetails=1)
+* [https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon_kml=1&addressdetails=1](https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue,+birmingham&format=xml&polygon_kml=1&addressdetails=1)
```xml
- <searchresults timestamp="Sat, 07 Nov 09 14:42:10 +0000" querystring="135 pilkington, avenue birmingham" polygon="true">
- <place
- place_id="1620612" osm_type="node" osm_id="452010817"
- boundingbox="52.548641204834,52.5488433837891,-1.81612110137939,-1.81592094898224"
- lat="52.5487429714954" lon="-1.81602098644987"
- display_name="135, Pilkington Avenue, Wylde Green, City of Birmingham, West Midlands (county), B72, United Kingdom"
- class="place" type="house">
- <geokml>
- <Polygon>
- <outerBoundaryIs>
- <LinearRing>
- <coordinates>-1.816513,52.548756599999997 -1.816434,52.548747300000002 -1.816429,52.5487629 -1.8163717,52.548756099999999 -1.8163464,52.548834599999999 -1.8164599,52.548848100000001 -1.8164685,52.5488213 -1.8164913,52.548824000000003 -1.816513,52.548756599999997</coordinates>
- </LinearRing>
- </outerBoundaryIs>
- </Polygon>
- </geokml>
- <house_number>135</house_number>
- <road>Pilkington Avenue</road>
- <village>Wylde Green</village>
- <town>Sutton Coldfield</town>
- <city>City of Birmingham</city>
- <county>West Midlands (county)</county>
- <postcode>B72</postcode>
- <country>United Kingdom</country>
- <country_code>gb</country_code>
- </place>
- </searchresults>
+<?xml version="1.0" encoding="UTF-8" ?>
+<searchresults timestamp="Tue, 08 Aug 2023 15:45:41 +00:00"
+ attribution="Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright"
+ querystring="135 pilkington avenue, birmingham"
+ more_url="https://nominatim.openstreetmap.org/search?q=135+pilkington+avenue%2C+birmingham&polygon_kml=1&addressdetails=1&limit=20&exclude_place_ids=125279639&format=xml"
+ exclude_place_ids="125279639">
+ <place place_id="125279639"
+ osm_type="way"
+ osm_id="90394480"
+ lat="52.5487921"
+ lon="-1.8164308"
+ boundingbox="52.5487473,52.5488481,-1.8165130,-1.8163464"
+ place_rank="30"
+ address_rank="30"
+ display_name="135, Pilkington Avenue, Maney, Sutton Coldfield, Wylde Green, Birmingham, West Midlands Combined Authority, England, B72 1LH, United Kingdom"
+ class="building"
+ type="residential"
+ importance="9.999999994736442e-08">
+ <geokml>
+ <Polygon>
+ <outerBoundaryIs>
+ <LinearRing>
+ <coordinates>-1.816513,52.5487566 -1.816434,52.5487473 -1.816429,52.5487629 -1.8163717,52.5487561 -1.8163464,52.5488346 -1.8164599,52.5488481 -1.8164685,52.5488213 -1.8164913,52.548824 -1.816513,52.5487566</coordinates>
+ </LinearRing>
+ </outerBoundaryIs>
+ </Polygon>
+ </geokml>
+ <house_number>135</house_number>
+ <road>Pilkington Avenue</road>
+ <hamlet>Maney</hamlet>
+ <town>Sutton Coldfield</town>
+ <village>Wylde Green</village>
+ <city>Birmingham</city>
+ <ISO3166-2-lvl8>GB-BIR</ISO3166-2-lvl8>
+ <state_district>West Midlands Combined Authority</state_district>
+ <state>England</state>
+ <ISO3166-2-lvl4>GB-ENG</ISO3166-2-lvl4>
+ <postcode>B72 1LH</postcode>
+ <country>United Kingdom</country>
+ <country_code>gb</country_code>
+ </place>
+</searchresults>
```
##### JSON with SVG polygon
-[https://nominatim.openstreetmap.org/search/Unter%20den%20Linden%201%20Berlin?format=json&addressdetails=1&limit=1&polygon_svg=1](https://nominatim.openstreetmap.org/search/Unter%20den%20Linden%201%20Berlin?format=json&addressdetails=1&limit=1&polygon_svg=1)
+[https://nominatim.openstreetmap.org/search?q=Unter%20den%20Linden%201%20Berlin&format=json&addressdetails=1&limit=1&polygon_svg=1](https://nominatim.openstreetmap.org/search?q=Unter%20den%20Linden%201%20Berlin&format=json&addressdetails=1&limit=1&polygon_svg=1)
```json
- {
- "address": {
- "city": "Berlin",
- "city_district": "Mitte",
- "construction": "Unter den Linden",
- "continent": "European Union",
- "country": "Deutschland",
- "country_code": "de",
- "house_number": "1",
- "neighbourhood": "Scheunenviertel",
- "postcode": "10117",
- "public_building": "Kommandantenhaus",
- "state": "Berlin",
- "suburb": "Mitte"
- },
- "boundingbox": [
- "52.5170783996582",
- "52.5173187255859",
- "13.3975105285645",
- "13.3981599807739"
- ],
- "class": "amenity",
- "display_name": "Kommandantenhaus, 1, Unter den Linden, Scheunenviertel, Mitte, Berlin, 10117, Deutschland, European Union",
- "importance": 0.73606775332943,
- "lat": "52.51719785",
- "licence": "Data \u00a9 OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright",
- "lon": "13.3978352028938",
- "osm_id": "15976890",
- "osm_type": "way",
- "place_id": "30848715",
- "svg": "M 13.397511 -52.517283599999999 L 13.397829400000001 -52.517299800000004 13.398131599999999 -52.517315099999998 13.398159400000001 -52.517112099999999 13.3975388 -52.517080700000001 Z",
- "type": "public_building"
- }
+[
+ {
+ "address": {
+ "ISO3166-2-lvl4": "DE-BE",
+ "borough": "Mitte",
+ "city": "Berlin",
+ "country": "Deutschland",
+ "country_code": "de",
+ "historic": "Kommandantenhaus",
+ "house_number": "1",
+ "neighbourhood": "Friedrichswerder",
+ "postcode": "10117",
+ "road": "Unter den Linden",
+ "suburb": "Mitte"
+ },
+ "boundingbox": [
+ "52.5170798",
+ "52.5173311",
+ "13.3975116",
+ "13.3981577"
+ ],
+ "class": "historic",
+ "display_name": "Kommandantenhaus, 1, Unter den Linden, Friedrichswerder, Mitte, Berlin, 10117, Deutschland",
+ "importance": 0.8135042058306902,
+ "lat": "52.51720765",
+ "licence": "Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright",
+ "lon": "13.397834399325466",
+ "osm_id": 15976890,
+ "osm_type": "way",
+ "place_id": 108681845,
+ "svg": "M 13.3975116 -52.5172905 L 13.397549 -52.5170798 13.397715 -52.5170906 13.3977122 -52.5171064 13.3977392 -52.5171086 13.3977417 -52.5170924 13.3979655 -52.5171069 13.3979623 -52.5171233 13.3979893 -52.5171248 13.3979922 -52.5171093 13.3981577 -52.5171203 13.398121 -52.5173311 13.3978115 -52.5173103 Z",
+ "type": "house"
+ }
+]
```
##### JSON with address details
-[https://nominatim.openstreetmap.org/?addressdetails=1&q=bakery+in+berlin+wedding&format=json&limit=1](https://nominatim.openstreetmap.org/?addressdetails=1&q=bakery+in+berlin+wedding&format=json&limit=1)
+[https://nominatim.openstreetmap.org/search?addressdetails=1&q=bakery+in+berlin+wedding&format=jsonv2&limit=1](https://nominatim.openstreetmap.org/search?addressdetails=1&q=bakery+in+berlin+wedding&format=jsonv2&limit=1)
```json
- {
- "address": {
- "bakery": "B\u00e4cker Kamps",
- "city_district": "Mitte",
- "continent": "European Union",
- "country": "Deutschland",
- "country_code": "de",
- "footway": "Bahnsteig U6",
- "neighbourhood": "Sprengelkiez",
- "postcode": "13353",
- "state": "Berlin",
- "suburb": "Wedding"
- },
- "boundingbox": [
- "52.5460929870605",
- "52.5460968017578",
- "13.3591794967651",
- "13.3591804504395"
- ],
- "class": "shop",
- "display_name": "B\u00e4cker Kamps, Bahnsteig U6, Sprengelkiez, Wedding, Mitte, Berlin, 13353, Deutschland, European Union",
- "icon": "https://nominatim.openstreetmap.org/images/mapicons/shopping_bakery.p.20.png",
- "importance": 0.201,
- "lat": "52.5460941",
- "licence": "Data \u00a9 OpenStreetMap contributors, ODbL 1.0. https://www.openstreetmap.org/copyright",
- "lon": "13.35918",
- "osm_id": "317179427",
- "osm_type": "node",
- "place_id": "1453068",
- "type": "bakery"
- }
+[
+ {
+ "address": {
+ "ISO3166-2-lvl4": "DE-BE",
+ "borough": "Mitte",
+ "city": "Berlin",
+ "country": "Deutschland",
+ "country_code": "de",
+ "neighbourhood": "Sprengelkiez",
+ "postcode": "13347",
+ "road": "Lindower Straße",
+ "shop": "Ditsch",
+ "suburb": "Wedding"
+ },
+ "addresstype": "shop",
+ "boundingbox": [
+ "52.5427201",
+ "52.5427654",
+ "13.3668619",
+ "13.3669442"
+ ],
+ "category": "shop",
+ "display_name": "Ditsch, Lindower Straße, Sprengelkiez, Wedding, Mitte, Berlin, 13347, Deutschland",
+ "importance": 9.99999999995449e-06,
+ "lat": "52.54274275",
+ "licence": "Data © OpenStreetMap contributors, ODbL 1.0. http://osm.org/copyright",
+ "lon": "13.36690305710228",
+ "name": "Ditsch",
+ "osm_id": 437595031,
+ "osm_type": "way",
+ "place_id": 204751033,
+ "place_rank": 30,
+ "type": "bakery"
+ }
+]
```
##### GeoJSON
# Status
-Useful for checking if the service and database is running. The JSON output also shows
+Report on the state of the service and database. Useful for checking if the
+service is up and running. The JSON output also reports
when the database was last updated.
+## Endpoint
+
+The status API has the following format:
+
+```
+https://nominatim.openstreetmap.org/status
+```
+
+!!! danger "Deprecation warning"
+ The API can also be used with the URL
+ `https://nominatim.openstreetmap.org/status.php`. This is now deprecated
+ and will be removed in future versions.
+
+
## Parameters
-* `format=[text|json]` (defaults to 'text')
+The status endpoint takes a single optional parameter:
+
+| Parameter | Value | Default |
+|-----------| ----- | ------- |
+| format | one of: `text`, `json` | 'text' |
+
+Selects the output format. See below.
## Output
#### Text format
-```
- https://nominatim.openstreetmap.org/status.php
-```
-
-will return HTTP status code 200 and print `OK`.
+When everything is okay, a status code 200 is returned and a simple message: `OK`
-On error it will return HTTP status code 500 and print a message, e.g.
+On error it will return HTTP status code 500 and print a detailed error message, e.g.
`ERROR: Database connection failed`.
#### JSON format
-```
- https://nominatim.openstreetmap.org/status.php?format=json
-```
+Always returns a HTTP code 200, when the status call could be executed.
-will return HTTP code 200 and a structure
+On success a JSON dictionary with the following structure is returned:
```json
{
the API. The `database_version` field contains the version of the data format
in the database.
-On error will also return HTTP status code 200 and a structure with error
-code and message, e.g.
+On error will return a shorter JSON dictionary with the error message
+and status only, e.g.
```json
{
"message": "Database connection failed"
}
```
-
-Possible status codes are
-
-| | message | notes |
-| --- | ------------------------------ | ----------------------------------------------------------------- |
-| 700 | "No database" | connection failed |
-| 701 | "Module failed" | database could not load nominatim.so |
-| 702 | "Module call failed" | nominatim.so loaded but calling a function failed |
-| 703 | "Query failed" | test query against a database table failed |
-| 704 | "No value" | test query worked but returned no results |
-| 705 | "Import date is not available" | No import dates were returned (enabling replication can fix this) |
| -------------- | --------------------------------------------------- |
| **Description:** | Tokenizer used for normalizing and parsing queries and names |
| **Format:** | string |
-| **Default:** | legacy |
+| **Default:** | icu |
| **After Changes:** | cannot be changed after import |
Sets the tokenizer type to use for the import. For more information on
objects when the area becomes too large.
-#### NOMINATIM_UPDATE_FORWARD_DEPENDENCIES
-
-| Summary | |
-| -------------- | --------------------------------------------------- |
-| **Description:** | Forward geometry changes to dependet objects |
-| **Format:** | bool |
-| **Default:** | no |
-| **Comment:** | EXPERT ONLY. Must not be enabled after import. |
-
-The geometry of OSM ways and relations may change when a node that is part
-of the object is moved around. These changes are not propagated per default.
-The geometry of ways/relations is only updated the next time that the object
-itself is touched. When enabling this option, then dependent objects will
-be marked for update when one of its member objects changes.
-
-Enabling this option may slow down updates significantly.
-
-!!! warning
- If you want to enable this option, it must be set already on import.
- Do not enable this option on an existing database that was imported with
- NOMINATIM_UPDATE_FORWARD_DEPENDENCIES=no.
- Updates will become unusably slow.
-
#### NOMINATIM_LANGUAGES
| Summary | |
| **Format:** | boolean |
| **Default:** | no |
| **After Changes:** | run `nominatim refresh --website` |
+| **Comment:** | PHP frontend only |
+
This feature is currently undocumented and potentially broken.
| **Format:** | integer |
| **Default:** | 500 |
| **After Changes:** | run `nominatim refresh --website` |
+| **Comment:** | PHP frontend only |
This setting defines the threshold over which a name is no longer considered
as rare. When searching for places with rare names, only the name is used
Setting this parameter to 0 disables polygon output completely.
+
+#### NOMINATIM_SEARCH_WITHIN_COUNTRIES
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Disable search for elements that are not in the country grid |
+| **Format:** | boolean |
+| **Default:** | no |
+| **After Changes:** | run `nominatim refresh --website` |
+| **Comment:** | PHP frontend only |
+
+Enable to search elements just within countries.
+
+When enabled, if, despite not finding a point within the static grid of countries, it
+finds a geometry of a region, do not return the geometry.
+Return "Unable to geocode" instead.
+
+
+#### NOMINATIM_SERVE_LEGACY_URLS
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Enable serving via URLs with a .php suffix |
+| **Format:** | boolean |
+| **Default:** | yes |
+| **Comment:** | Python frontend only |
+
+When enabled, then endpoints are reachable as `/<name>` as well as `/<name>.php`.
+This can be useful when you want to be backwards-compatible with previous
+versions of Nominatim.
+
+
+#### NOMINATIM_API_POOL_SIZE
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Number of parallel database connections per worker |
+| **Format:** | number |
+| **Default:** | 10 |
+| **Comment:** | Python frontend only |
+
+Sets the maximum number of database connections available for a single instance
+of Nominatim. When configuring the maximum number of connections that your
+PostgreSQL database can handle, you need at least
+`NOMINATIM_API_POOL_SIZE` * `<number of configured workers>` connections.
+For configuring the number of workers, refer to the section about
+[Deploying the Python frontend](../admin/Deployment-Python.md).
+
+#### NOMINATIM_QUERY_TIMEOUT
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Timeout for SQL queries to the database |
+| **Format:** | number (seconds) |
+| **Default:** | 10 |
+| **Comment:** | Python frontend only |
+
+When this timeout is set, then all SQL queries that run longer than the
+specified numbers of seconds will be cancelled and the user receives a
+timeout exceptions. Users of the API see a 503 HTTP error.
+
+The timeout does ont apply when using the
+[low-level DB access](../library/Low-Level-DB-Access.md)
+of the library. A timeout can be manually set, if required.
+
+
+#### NOMINATIM_REQUEST_TIMEOUT
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Timeout for search queries |
+| **Format:** | number (seconds) |
+| **Default:** | 60 |
+| **Comment:** | Python frontend only |
+
+When this timeout is set, a search query will finish sending queries
+to the database after the timeout has passed and immediately return the
+results gathered so far.
+
+Note that under high load you may observe that users receive different results
+than usual without seeing an error. This may cause some confusion.
+
### Logging Settings
#### NOMINATIM_LOG_DB
type contains the name of the endpoint used.
Can be used as the same time as NOMINATIM_LOG_DB.
+
+#### NOMINATIM_DEBUG_SQL
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Enable printing of raw SQL by SQLAlchemy |
+| **Format:** | boolean |
+| **Default:** | no |
+| **Comment:** | **For developers only.** |
+
+This settings enables
+[SQL debugging](https://docs.sqlalchemy.org/en/20/core/engines.html#dbengine-logging)
+by SQLAlchemy. This can be helpful when debugging some bugs with internal
+query handling. It should only be used together with the CLI query functions.
+Enabling it for server mode may have unintended consequences. Use the `debug`
+parameter instead, which prints information on how the search is executed
+including SQL statements.
##### split-name-list
::: nominatim.tokenizer.sanitizers.split_name_list
- selection:
+ options:
members: False
- rendering:
heading_level: 6
+ docstring_section_style: spacy
##### strip-brace-terms
::: nominatim.tokenizer.sanitizers.strip_brace_terms
- selection:
+ options:
members: False
- rendering:
heading_level: 6
+ docstring_section_style: spacy
##### tag-analyzer-by-language
::: nominatim.tokenizer.sanitizers.tag_analyzer_by_language
- selection:
+ options:
members: False
- rendering:
heading_level: 6
+ docstring_section_style: spacy
##### clean-housenumbers
::: nominatim.tokenizer.sanitizers.clean_housenumbers
- selection:
+ options:
members: False
- rendering:
heading_level: 6
+ docstring_section_style: spacy
##### clean-postcodes
::: nominatim.tokenizer.sanitizers.clean_postcodes
- selection:
+ options:
members: False
- rendering:
heading_level: 6
+ docstring_section_style: spacy
##### clean-tiger-tags
::: nominatim.tokenizer.sanitizers.clean_tiger_tags
- selection:
+ options:
members: False
- rendering:
heading_level: 6
+ docstring_section_style: spacy
#### delete-tags
::: nominatim.tokenizer.sanitizers.delete_tags
- selection:
+ options:
members: False
- rendering:
heading_level: 6
+ docstring_section_style: spacy
+
+#### tag-japanese
+
+::: nominatim.tokenizer.sanitizers.tag_japanese
+ options:
+ members: False
+ heading_level: 6
+ docstring_section_style: spacy
#### Token Analysis
The documentation is built with mkdocs:
* [mkdocs](https://www.mkdocs.org/) >= 1.1.2
-* [mkdocstrings](https://mkdocstrings.github.io/) >= 0.16
-* [mkdocstrings-python-legacy](https://mkdocstrings.github.io/python-legacy/)
+* [mkdocstrings](https://mkdocstrings.github.io/) >= 0.18
+* [mkdocstrings-python](https://mkdocstrings.github.io/python/)
### Installing prerequisites on Ubuntu/Debian
### Sanitizer configuration
::: nominatim.tokenizer.sanitizers.config.SanitizerConfig
- rendering:
- show_source: no
+ options:
heading_level: 6
### The main filter function of the sanitizer
The filter function receives a single object of type `ProcessInfo`
which has with three members:
- * `place`: read-only information about the place being processed.
+ * `place: PlaceInfo`: read-only information about the place being processed.
See PlaceInfo below.
- * `names`: The current list of names for the place. Each name is a
- PlaceName object.
- * `address`: The current list of address names for the place. Each name
- is a PlaceName object.
+ * `names: List[PlaceName]`: The current list of names for the place.
+ * `address: List[PlaceName]`: The current list of address names for the place.
While the `place` member is provided for information only, the `names` and
`address` lists are meant to be manipulated by the sanitizer. It may add and
#### PlaceInfo - information about the place
::: nominatim.data.place_info.PlaceInfo
- rendering:
- show_source: no
+ options:
heading_level: 6
#### PlaceName - extended naming information
::: nominatim.data.place_name.PlaceName
- rendering:
- show_source: no
+ options:
heading_level: 6
## Custom token analysis module
::: nominatim.tokenizer.token_analysis.base.AnalysisModule
- rendering:
- show_source: no
+ options:
heading_level: 6
::: nominatim.tokenizer.token_analysis.base.Analyzer
- rendering:
- show_source: no
+ options:
heading_level: 6
### Example: Creating acronym variants for long names
and implement the abstract functions defined there.
::: nominatim.tokenizer.base.AbstractTokenizer
- rendering:
- heading_level: 4
+ options:
+ heading_level: 6
### Python Analyzer Class
::: nominatim.tokenizer.base.AbstractAnalyzer
- rendering:
- heading_level: 4
+ options:
+ heading_level: 6
### PL/pgSQL Functions
display: none!important
}
+.wy-nav-content {
+ max-width: 900px!important
+}
+
table {
margin-bottom: 12pt
}
.doc-object h6 {
margin-bottom: 0.8em;
- font-size: 120%;
+ font-size: 130%;
}
.doc-object {
margin-bottom: 1.3em;
}
+
+.doc-children .doc-contents {
+ margin-left: 3em;
+}
+
+.md-footer__inner {
+ display: none;
+}
-Nominatim (from the Latin, 'by name') is a tool to search OSM data by name and address and to generate synthetic addresses of OSM points (reverse geocoding).
+Nominatim (from the Latin, 'by name') is a tool to search OSM data by name and
+address and to generate synthetic addresses of OSM points (reverse geocoding).
+It has also limited capability to search features by their type
+(pubs, hotels, churches, etc).
-This guide comes in four parts:
+This guide comes in five parts:
* __[API reference](api/Overview.md)__ for users of Nominatim
* __[Administration Guide](admin/Installation.md)__ for those who want
to install their own Nominatim server
* __[Customization Guide](customize/Overview.md)__ for those who want to
adapt their own installation to their special requirements
+ * __[Library Guide](library/Getting-Started.md)__ for Python developers who
+ want to use Nominatim as a library in their project
* __[Developer's Guide](develop/overview.md)__ for developers of the software
--- /dev/null
+# Configuration
+
+When using Nominatim through the library, it can be configured in exactly
+the same way as when running as a service. This means that you should have
+created a [project directory](../admin/Import.md#creating-the-project-directory)
+which contains all files belonging to the Nominatim instance. It can also contain
+an `.env` file with configuration options. Setting configuration parameters
+via environment variables works as well.
+
+Configuration options are resolved in the following order:
+
+* from the OS environment (or the dictionary given in `environ`,
+ (see NominatimAPI.md#nominatim.api.core.NominatimAPI.__init__)
+* from the .env file in the project directory of the installation
+* from the default installation in the configuration directory
+
+For more information on configuration via dotenv and a list of possible
+configuration parameters, see the [Configuration page](../customize/Settings.md).
+
+
+## `Configuration` class
+
+::: nominatim.config.Configuration
+ options:
+ members:
+ - get_bool
+ - get_int
+ - get_str_list
+ - get_path
+ heading_level: 6
+ show_signature_annotations: True
--- /dev/null
+# Getting Started
+
+The Nominatim search frontend can directly be used as a Python library in
+scripts and applications. When you have imported your own Nominatim database,
+then it is no longer necessary to run a full web service for it and access
+the database through http requests. There are
+also less constraints on the kinds of data that can be accessed. The library
+allows to get access to more detailed information about the objects saved
+in the database.
+
+!!! danger
+ The library interface is currently in an experimental stage. There might
+ be some smaller adjustments to the public interface until the next version.
+
+ The library also misses a proper installation routine, so some manipulation
+ of the PYTHONPATH is required. At the moment, use is only recommended for
+ developers with some experience in Python.
+
+## Installation
+
+To use the Nominatim library, you need access to a local Nominatim database.
+Follow the [installation](../admin/Installation.md) and
+[import](../admin/Import.md) instructions to set up your database.
+
+It is not yet possible to install it in the usual way via pip or inside a
+virtualenv. To get access to the library you need to set an appropriate
+`PYTHONPATH`. With the default installation, the python library can be found
+under `/usr/local/share/nominatim/lib-python`. If you have installed
+Nominatim under a different prefix, adapt the `/usr/local/` part accordingly.
+You can also point the `PYTHONPATH` to the Nominatim source code.
+
+### A simple search example
+
+To query the Nominatim database you need to first set up a connection. This
+is done by creating an Nominatim API object. This object exposes all the
+search functions of Nominatim that are also known from its web API.
+
+This code snippet implements a simple search for the town of 'Brugge':
+
+!!! example
+ === "NominatimAPIAsync"
+ ``` python
+ from pathlib import Path
+ import asyncio
+
+ import nominatim.api as napi
+
+ async def search(query):
+ api = napi.NominatimAPIAsync(Path('.'))
+
+ return await api.search(query)
+
+ results = asyncio.run(search('Brugge'))
+ if not results:
+ print('Cannot find Brugge')
+ else:
+ print(f'Found a place at {results[0].centroid.x},{results[0].centroid.y}')
+ ```
+
+ === "NominatimAPI"
+ ``` python
+ from pathlib import Path
+
+ import nominatim.api as napi
+
+ api = napi.NominatimAPI(Path('.'))
+
+ results = api.search('Brugge')
+
+ if not results:
+ print('Cannot find Brugge')
+ else:
+ print(f'Found a place at {results[0].centroid.x},{results[0].centroid.y}')
+ ```
+
+The Nominatim library is designed around
+[asyncio](https://docs.python.org/3/library/asyncio.html). `NominatimAPIAsync`
+provides you with an interface of coroutines.
+If you have many requests to make, coroutines can speed up your applications
+significantly.
+
+For smaller scripts there is also a synchronous wrapper around the API. By
+using `NominatimAPI`, you get exactly the same interface using classic functions.
+
+The examples in this chapter will always show-case both
+implementations. The documentation itself will usually refer only to
+'Nominatim API class' when both flavours are meant. If a functionality is
+available only for the synchronous or asynchronous version, this will be
+explicitly mentioned.
+
+### Defining which database to use
+
+The [Configuration](../admin/Import.md#configuration-setup-in-env)
+section explains how Nominatim is configured using the
+[dotenv](https://github.com/theskumar/python-dotenv) library.
+The same configuration mechanism is used with the
+Nominatim API library. You should therefore be sure you are familiar with
+the section.
+
+The constructor of the 'Nominatim API class' takes one mandatory parameter:
+the path to the [project directory](../admin/Import.md#creating-the-project-directory).
+You should have set up this directory as part of the Nominatim import.
+Any configuration found in the `.env` file in this directory will automatically
+used.
+
+Yo may also configure Nominatim be setting environment variables.
+Normally, Nominatim will check the operating system environment. This can be
+overwritten by giving the constructor a dictionary of configuration parameters.
+
+Let us look up 'Brugge' in the special database named 'belgium' instead of the
+standard 'nominatim' database:
+
+!!! example
+ === "NominatimAPIAsync"
+ ``` python
+ from pathlib import Path
+ import asyncio
+
+ import nominatim.api as napi
+
+ config_params = {
+ 'NOMINATIM_DATABASE_DSN': 'pgsql:dbname=belgium'
+ }
+
+ async def search(query):
+ api = napi.NominatimAPIAsync(Path('.'), environ=config_params)
+
+ return await api.search(query)
+
+ results = asyncio.run(search('Brugge'))
+ ```
+
+ === "NominatimAPI"
+ ``` python
+ from pathlib import Path
+
+ import nominatim.api as napi
+
+ config_params = {
+ 'NOMINATIM_DATABASE_DSN': 'pgsql:dbname=belgium'
+ }
+
+ api = napi.NominatimAPI(Path('.'), environ=config_params)
+
+ results = api.search('Brugge')
+ ```
+
+### Presenting results to humans
+
+All search functions return the raw results from the database. There is no
+full human-readable label. To create such a label, you need two things:
+
+* the address details of the place
+* adapt the result to the language you wish to use for display
+
+Again searching for 'Brugge', this time with a nicely formatted result:
+
+!!! example
+ === "NominatimAPIAsync"
+ ``` python
+ from pathlib import Path
+ import asyncio
+
+ import nominatim.api as napi
+
+ async def search(query):
+ api = napi.NominatimAPIAsync(Path('.'))
+
+ return await api.search(query, address_details=True)
+
+ results = asyncio.run(search('Brugge'))
+
+ locale = napi.Locales(['fr', 'en'])
+ for i, result in enumerate(results):
+ address_parts = result.address_rows.localize(locale)
+ print(f"{i + 1}. {', '.join(address_parts)}")
+ ```
+
+ === "NominatimAPI"
+ ``` python
+ from pathlib import Path
+
+ import nominatim.api as napi
+
+ api = napi.NominatimAPI(Path('.'))
+
+ results = api.search('Brugge', address_details=True)
+
+ locale = napi.Locales(['fr', 'en'])
+ for i, result in enumerate(results):
+ address_parts = result.address_rows.localize(locale)
+ print(f"{i + 1}. {', '.join(address_parts)}")
+ ```
+
+To request information about the address of a result, add the optional
+parameter 'address_details' to your search:
+
+``` python
+>>> results = api.search('Brugge', address_details=True)
+```
+
+An additional field `address_rows` will set in results that are returned.
+It contains a list of all places that make up the address of the place. For
+simplicity, this includes name and house number of the place itself. With
+the names in this list it is possible to create a human-readable description
+of the result. To do that, you first need to decide in which language the
+results should be presented. As with the names in the result itself, the
+places in `address_rows` contain all possible name translation for each row.
+
+The library has a helper class `Locale` which helps extracting a name of a
+place in the preferred language. It takes a single parameter with a list
+of language codes in the order of preference. So
+
+``` python
+locale = napi.Locale(['fr', 'en'])
+```
+
+creates a helper class that returns the name preferably in French. If that is
+not possible, it tries English and eventually falls back to the default `name`
+or `ref`.
+
+The `Locale` object can be applied to a name dictionary to return the best-matching
+name out of it:
+
+``` python
+>>> print(locale.display_name(results[0].names))
+'Brugges'
+```
+
+The `address_row` field has a helper function to apply the function to all
+its members and save the result in the `local_name` field. It also returns
+all the localized names as a convenient simple list. This list can be used
+to create a human-readable output:
+
+``` python
+>>> address_parts = results[0].address_rows.localize(locale)
+>>> print(', '.join(address_parts))
+Bruges, Flandre-Occidentale, Flandre, Belgique
+```
+
+This is a fairly simple way to create a human-readable description. The
+place information in `address_rows` contains further information about each
+place. For example, which OSM `adlin_level` was used, what category the place
+belongs to or what rank Nominatim has assigned. Use this to adapt the output
+to local address formats.
+
+For more information on address rows, see
+[detailed address description](Result-Handling.md#detailed-address-description).
--- /dev/null
+# Input Parameter Types
+
+This page describes in more detail some of the input parameter types used
+in the query functions of the API object.
+
+## Place identification
+
+The [details](NominatimAPI.md#nominatim.api.core.NominatimAPI.details) and
+[lookup](NominatimAPI.md#nominatim.api.core.NominatimAPI.lookup) functions
+require references to places in the database. Below the possible
+types for place identification are listed. All types are dataclasses.
+
+### PlaceID
+
+::: nominatim.api.PlaceID
+ options:
+ heading_level: 6
+
+### OsmID
+
+::: nominatim.api.OsmID
+ options:
+ heading_level: 6
+
+## Geometry types
+
+::: nominatim.api.GeometryFormat
+ options:
+ heading_level: 6
+ members_order: source
+
+## Geometry input
+
+### Point
+
+::: nominatim.api.Point
+ options:
+ heading_level: 6
+ show_signature_annotations: True
+
+### Bbox
+
+::: nominatim.api.Bbox
+ options:
+ heading_level: 6
+ show_signature_annotations: True
+ members_order: source
+ group_by_category: False
+
+## Layers
+
+Layers allow to restrict the search result to thematic groups. This is
+orthogonal to restriction by address ranks, which groups places by their
+geographic extent.
+
+
+::: nominatim.api.DataLayer
+ options:
+ heading_level: 6
+ members_order: source
+
+
--- /dev/null
+# Low-level connections
+
+The `NominatimAPIAsync` class allows to directly access the underlying
+database connection to explore the raw data. Nominatim uses
+[SQLAlchemy](https://docs.sqlalchemy.org/) for building queries. Please
+refer to the documentation of the library to understand how to write SQL.
+
+To get access to a search connection, use the `begin()` function of your
+API object. This returns a `SearchConnection` object described below
+wrapped in a context manager. Its
+`t` property has definitions for all Nominatim search tables. For an
+overview of available tables, refer to the
+[Development Layout](../develop/Database-Layout.md) in in the development
+chapter. Note that only tables that are needed for search are accessible
+as SQLAlchemy tables.
+
+!!! warning
+ The database layout is not part of the API definition and may change
+ without notice. If you play with the low-level access functions, you
+ need to be prepared for such changes.
+
+Here is a simple example, which prints how many places are available in
+the placex table:
+
+```
+import asyncio
+from pathlib import Path
+import sqlalchemy as sa
+from nominatim.api import NominatimAPIAsync
+
+async def print_table_size():
+ api = NominatimAPIAsync(Path('.'))
+
+ async with api.begin() as conn:
+ cnt = await conn.scalar(sa.select(sa.func.count()).select_from(conn.t.placex))
+ print(f'placex table has {cnt} rows.')
+
+asyncio.run(print_table_size())
+```
+
+!!! warning
+ Low-level connections may only be used to read data from the database.
+ Do not use it to add or modify data or you might break Nominatim's
+ normal functions.
+
+## SearchConnection class
+
+::: nominatim.api.SearchConnection
+ options:
+ members:
+ - scalar
+ - execute
+ - get_class_table
+ - get_db_property
+ - get_property
+ heading_level: 6
--- /dev/null
+# The Nominatim API classes
+
+The API classes are the core object of the search library. Always instantiate
+one of these classes first. The API classes are **not threadsafe**. You need
+to instantiate a separate instance for each thread.
+
+### NominatimAPI
+
+::: nominatim.api.NominatimAPI
+ options:
+ members:
+ - __init__
+ - config
+ - close
+ - status
+ - details
+ - lookup
+ - reverse
+ - search
+ - search_address
+ - search_category
+ heading_level: 6
+ group_by_category: False
+
+
+### NominatimAPIAsync
+
+::: nominatim.api.NominatimAPIAsync
+ options:
+ members:
+ - __init__
+ - setup_database
+ - close
+ - begin
+ heading_level: 6
+ group_by_category: False
--- /dev/null
+# Result handling
+
+The search functions of the Nominatim API always return a result object
+with the raw information about the place that is available in the
+database. This section discusses data types used in the results and utility
+functions that allow further processing of the results.
+
+## Result fields
+
+### Sources
+
+Nominatim takes the result data from multiple sources. The `source_table` field
+in the result describes, from which source the result was retrieved.
+
+::: nominatim.api.SourceTable
+ options:
+ heading_level: 6
+ members_order: source
+
+### Detailed address description
+
+When the `address_details` parameter is set, then functions return not
+only information about the result place but also about the place that
+make up the address. This information is almost always required when you
+want to present the user with a human-readable description of the result.
+See also [Localization](#localization) below.
+
+The address details are available in the `address_rows` field as a ordered
+list of `AddressLine` objects with the country information last. The list also
+contains the result place itself and some artificial entries, for example,
+for the house number or the country code. This makes processing and creating
+a full address easier.
+
+::: nominatim.api.AddressLine
+ options:
+ heading_level: 6
+ members_order: source
+
+### Detailed search terms
+
+The `details` function can return detailed information about which search terms
+may be used to find a place, when the `keywords` parameter is set. Search
+terms are split into terms for the name of the place and search terms for
+its address.
+
+::: nominatim.api.WordInfo
+ options:
+ heading_level: 6
+
+## Localization
+
+Results are always returned with the full list of available names.
+
+### Locale
+
+::: nominatim.api.Locales
+ options:
+ heading_level: 6
-site_name: Nominatim Documentation
-theme: readthedocs
+site_name: Nominatim Manual
+theme:
+ name: material
+ features:
+ - navigation.tabs
+copyright: Copyright © Nominatim developer community
docs_dir: ${CMAKE_CURRENT_BINARY_DIR}
site_url: https://nominatim.org
repo_url: https://github.com/openstreetmap/Nominatim
- 'Basic Installation': 'admin/Installation.md'
- 'Import' : 'admin/Import.md'
- 'Update' : 'admin/Update.md'
- - 'Deploy' : 'admin/Deployment.md'
+ - 'Deploy (PHP frontend)' : 'admin/Deployment-PHP.md'
+ - 'Deploy (Python frontend)' : 'admin/Deployment-Python.md'
- 'Nominatim UI' : 'admin/Setup-Nominatim-UI.md'
- 'Advanced Installations' : 'admin/Advanced-Installations.md'
- 'Maintenance' : 'admin/Maintenance.md'
- 'Special Phrases': 'customize/Special-Phrases.md'
- 'External data: US housenumbers from TIGER': 'customize/Tiger.md'
- 'External data: Postcodes': 'customize/Postcodes.md'
+ - 'Library Guide':
+ - 'Getting Started': 'library/Getting-Started.md'
+ - 'Nominatim API class': 'library/NominatimAPI.md'
+ - 'Configuration': 'library/Configuration.md'
+ - 'Input Parameter Types': 'library/Input-Parameter-Types.md'
+ - 'Result Handling': 'library/Result-Handling.md'
+ - 'Low-level DB Access': 'library/Low-Level-DB-Access.md'
- 'Developers Guide':
- 'Architecture Overview' : 'develop/overview.md'
- 'Database Layout' : 'develop/Database-Layout.md'
- codehilite
- admonition
- pymdownx.superfences
+ - pymdownx.tabbed:
+ alternate_style: true
- def_list
- toc:
permalink:
- search
- mkdocstrings:
handlers:
- python-legacy:
- rendering:
- show_source: false
- show_signature_annotations: false
+ python:
+ paths: ["${PROJECT_SOURCE_DIR}"]
+ options:
+ show_source: False
+ show_bases: False
($this->bIncludePolygonAsSVG ? 1 : 0);
if ($iWantedTypes > CONST_PolygonOutput_MaximumTypes) {
if (CONST_PolygonOutput_MaximumTypes) {
- userError('Select only '.CONST_PolygonOutput_MaximumTypes.' polgyon output option');
+ userError('Select only '.CONST_PolygonOutput_MaximumTypes.' polygon output option');
} else {
userError('Polygon output is disabled');
}
protected function lookupLargeArea($sPointSQL, $iMaxRank)
{
+ $sCountryCode = $this->getCountryCode($sPointSQL);
+ if (CONST_Search_WithinCountries and $sCountryCode == null) {
+ return null;
+ }
+
if ($iMaxRank > 4) {
$aPlace = $this->lookupPolygon($sPointSQL, $iMaxRank);
if ($aPlace) {
// If no polygon which contains the searchpoint is found,
// searches in the country_osm_grid table for a polygon.
- return $this->lookupInCountry($sPointSQL, $iMaxRank);
+ return $this->lookupInCountry($sPointSQL, $iMaxRank, $sCountryCode);
}
- protected function lookupInCountry($sPointSQL, $iMaxRank)
+ protected function getCountryCode($sPointSQL)
{
- Debug::newFunction('lookupInCountry');
+ Debug::newFunction('getCountryCode');
// searches for polygon in table country_osm_grid which contains the searchpoint
// and searches for the nearest place node to the searchpoint in this polygon
$sSQL = 'SELECT country_code FROM country_osm_grid';
null,
'Could not determine country polygon containing the point.'
);
- Debug::printVar('Country code', $sCountryCode);
+ return $sCountryCode;
+ }
+ protected function lookupInCountry($sPointSQL, $iMaxRank, $sCountryCode)
+ {
+ Debug::newFunction('lookupInCountry');
if ($sCountryCode) {
if ($iMaxRank > 4) {
// look for place nodes with the given country code
+++ /dev/null
-<?php
-/**
- * SPDX-License-Identifier: GPL-2.0-only
- *
- * This file is part of Nominatim. (https://nominatim.org)
- *
- * Copyright (C) 2022 by the Nominatim developer community.
- * For a full list of authors see the git log.
- */
- @define('CONST_LibDir', dirname(dirname(__FILE__)));
- // Script to extract structured city and street data
- // from a running nominatim instance as CSV data
-
-
- require_once(CONST_LibDir.'/init-cmd.php');
- require_once(CONST_LibDir.'/ParameterParser.php');
- ini_set('memory_limit', '800M');
-
- $aCMDOptions = array(
- 'Export addresses as CSV file from a Nominatim database',
- array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
- array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
- array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
-
- array('output-type', '', 0, 1, 1, 1, 'str', 'Type of places to output (see below)'),
- array('output-format', '', 0, 1, 1, 1, 'str', 'Column mapping (see below)'),
- array('output-all-postcodes', '', 0, 1, 0, 0, 'bool', 'List all postcodes for address instead of just the most likely one'),
- array('language', '', 0, 1, 1, 1, 'str', 'Preferred language for output (local name, if omitted)'),
- array('restrict-to-country', '', 0, 1, 1, 1, 'str', 'Export only objects within country (country code)'),
- array('restrict-to-osm-node', '', 0, 1, 1, 1, 'int', 'Export only objects that are children of this OSM node'),
- array('restrict-to-osm-way', '', 0, 1, 1, 1, 'int', 'Export only objects that are children of this OSM way'),
- array('restrict-to-osm-relation', '', 0, 1, 1, 1, 'int', 'Export only objects that are children of this OSM relation'),
- array('project-dir', '', 0, 1, 1, 1, 'realpath', 'Base directory of the Nominatim installation (default: .)'),
- "\nAddress ranks: continent, country, state, county, city, suburb, street, path",
- 'Additional output types: postcode, placeid (placeid for each object)',
- "\noutput-format must be a semicolon-separated list of address ranks. Multiple ranks",
- 'can be merged into one column by simply using a comma-separated list.',
- "\nDefault output-type: street",
- 'Default output format: street;suburb;city;county;state;country'
- );
- getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
-
- loadSettings($aCMDResult['project-dir'] ?? getcwd());
-
- $aRankmap = array(
- 'continent' => 1,
- 'country' => 4,
- 'state' => 8,
- 'county' => 12,
- 'city' => 16,
- 'suburb' => 20,
- 'street' => 26,
- 'path' => 27
- );
-
- $oDB = new Nominatim\DB();
- $oDB->connect();
-
- if (isset($aCMDResult['output-type'])) {
- if (!isset($aRankmap[$aCMDResult['output-type']])) {
- fail('unknown output-type: '.$aCMDResult['output-type']);
- }
- $iOutputRank = $aRankmap[$aCMDResult['output-type']];
- } else {
- $iOutputRank = $aRankmap['street'];
- }
-
-
- // Preferred language
- $oParams = new Nominatim\ParameterParser();
- if (!isset($aCMDResult['language'])) {
- $aCMDResult['language'] = 'xx';
- }
- $aLangPrefOrder = $oParams->getPreferredLanguages($aCMDResult['language']);
- $sLanguagePrefArraySQL = $oDB->getArraySQL($oDB->getDBQuotedList($aLangPrefOrder));
-
- // output formatting: build up a lookup table that maps address ranks to columns
- $aColumnMapping = array();
- $iNumCol = 0;
- if (!isset($aCMDResult['output-format'])) {
- $aCMDResult['output-format'] = 'street;suburb;city;county;state;country';
- }
- foreach (preg_split('/\s*;\s*/', $aCMDResult['output-format']) as $sColumn) {
- $bHasData = false;
- foreach (preg_split('/\s*,\s*/', $sColumn) as $sRank) {
- if ($sRank == 'postcode' || $sRank == 'placeid') {
- $aColumnMapping[$sRank] = $iNumCol;
- $bHasData = true;
- } elseif (isset($aRankmap[$sRank])) {
- $iRank = $aRankmap[$sRank];
- if ($iRank <= $iOutputRank) {
- $aColumnMapping[(string)$iRank] = $iNumCol;
- $bHasData = true;
- }
- }
- }
- if ($bHasData) {
- $iNumCol++;
- }
- }
-
- // build the query for objects
- $sPlacexSQL = 'select min(place_id) as place_id, ';
- $sPlacexSQL .= 'array_agg(place_id) as place_ids, ';
- $sPlacexSQL .= 'country_code as cc, ';
- $sPlacexSQL .= 'postcode, ';
- // get the address places excluding postcodes
- $sPlacexSQL .= 'array(select address_place_id from place_addressline a';
- $sPlacexSQL .= ' where a.place_id = placex.place_id and isaddress';
- $sPlacexSQL .= ' and address_place_id != placex.place_id';
- $sPlacexSQL .= ' and not cached_rank_address in (5,11)';
- $sPlacexSQL .= ' and cached_rank_address > 2 order by cached_rank_address)';
- $sPlacexSQL .= ' as address';
- $sPlacexSQL .= ' from placex where name is not null and linked_place_id is null';
-
- $sPlacexSQL .= ' and rank_address = '.$iOutputRank;
-
- if (isset($aCMDResult['restrict-to-country'])) {
- $sPlacexSQL .= ' and country_code = '.$oDB->getDBQuoted($aCMDResult['restrict-to-country']);
- }
-
- // restriction to parent place id
- $sParentId = false;
- $sOsmType = false;
-
- if (isset($aCMDResult['restrict-to-osm-node'])) {
- $sOsmType = 'N';
- $sOsmId = $aCMDResult['restrict-to-osm-node'];
- }
- if (isset($aCMDResult['restrict-to-osm-way'])) {
- $sOsmType = 'W';
- $sOsmId = $aCMDResult['restrict-to-osm-way'];
- }
- if (isset($aCMDResult['restrict-to-osm-relation'])) {
- $sOsmType = 'R';
- $sOsmId = $aCMDResult['restrict-to-osm-relation'];
- }
- if ($sOsmType) {
- $sSQL = 'select place_id from placex where osm_type = :osm_type and osm_id = :osm_id';
- $sParentId = $oDB->getOne($sSQL, array('osm_type' => $sOsmType, 'osm_id' => $sOsmId));
- if (!$sParentId) {
- fail('Could not find place '.$sOsmType.' '.$sOsmId);
- }
- }
- if ($sParentId) {
- $sPlacexSQL .= ' and place_id in (select place_id from place_addressline where address_place_id = '.$sParentId.' and isaddress)';
- }
-
- $sPlacexSQL .= " group by name->'name', address, postcode, country_code, placex.place_id";
-
- // Iterate over placeids
- // to get further hierarchical information
- //var_dump($sPlacexSQL);
- $oResults = $oDB->getQueryStatement($sPlacexSQL);
- $fOutstream = fopen('php://output', 'w');
- while ($aRow = $oResults->fetch()) {
- $iPlaceID = $aRow['place_id'];
- $sSQL = "select rank_address,get_name_by_language(name,$sLanguagePrefArraySQL) as localname from get_addressdata(:place_id, -1)";
- $sSQL .= ' WHERE isaddress';
- $sSQL .= ' order by rank_address desc,isaddress desc';
- $aAddressLines = $oDB->getAll($sSQL, array('place_id' => $iPlaceID));
-
- $aOutput = array_fill(0, $iNumCol, '');
- // output address parts
- foreach ($aAddressLines as $aAddress) {
- if (isset($aColumnMapping[$aAddress['rank_address']])) {
- $aOutput[$aColumnMapping[$aAddress['rank_address']]] = $aAddress['localname'];
- }
- }
- // output postcode
- if (isset($aColumnMapping['postcode'])) {
- if ($aCMDResult['output-all-postcodes']) {
- $sSQL = 'select array_agg(px.postcode) from placex px join place_addressline pa ';
- $sSQL .= 'on px.place_id = pa.address_place_id ';
- $sSQL .= 'where pa.cached_rank_address in (5,11) ';
- $sSQL .= 'and pa.place_id in (select place_id from place_addressline where address_place_id in (:first_place_id)) ';
- $sSQL .= 'group by postcode order by count(*) desc limit 1';
- $sRes = $oDB->getOne($sSQL, array('first_place_id' => substr($aRow['place_ids'], 1, -1)));
-
- $aOutput[$aColumnMapping['postcode']] = substr($sRes, 1, -1);
- } else {
- $aOutput[$aColumnMapping['postcode']] = $aRow['postcode'];
- }
- }
- if (isset($aColumnMapping['placeid'])) {
- $aOutput[$aColumnMapping['placeid']] = substr($aRow['place_ids'], 1, -1);
- }
- fputcsv($fOutstream, $aOutput);
- }
- fclose($fOutstream);
+++ /dev/null
-<?php
-/**
- * SPDX-License-Identifier: GPL-2.0-only
- *
- * This file is part of Nominatim. (https://nominatim.org)
- *
- * Copyright (C) 2022 by the Nominatim developer community.
- * For a full list of authors see the git log.
- */
-@define('CONST_LibDir', dirname(dirname(__FILE__)));
-
-require_once(CONST_LibDir.'/init-cmd.php');
-require_once(CONST_LibDir.'/log.php');
-require_once(CONST_LibDir.'/PlaceLookup.php');
-require_once(CONST_LibDir.'/ReverseGeocode.php');
-
-ini_set('memory_limit', '800M');
-
-$aCMDOptions = array(
- 'Tools to warm nominatim db',
- array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
- array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
- array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
- array('reverse-only', '', 0, 1, 0, 0, 'bool', 'Warm reverse only'),
- array('search-only', '', 0, 1, 0, 0, 'bool', 'Warm search only'),
- array('project-dir', '', 0, 1, 1, 1, 'realpath', 'Base directory of the Nominatim installation (default: .)'),
- );
-getCmdOpt($_SERVER['argv'], $aCMDOptions, $aResult, true, true);
-
-loadSettings($aCMDResult['project-dir'] ?? getcwd());
-
-@define('CONST_Database_DSN', getSetting('DATABASE_DSN'));
-@define('CONST_Default_Language', getSetting('DEFAULT_LANGUAGE', false));
-@define('CONST_Log_DB', getSettingBool('LOG_DB'));
-@define('CONST_Log_File', getSetting('LOG_FILE', false));
-@define('CONST_NoAccessControl', getSettingBool('CORS_NOACCESSCONTROL'));
-@define('CONST_Places_Max_ID_count', getSetting('LOOKUP_MAX_COUNT'));
-@define('CONST_PolygonOutput_MaximumTypes', getSetting('POLYGON_OUTPUT_MAX_TYPES'));
-@define('CONST_Search_BatchMode', getSettingBool('SEARCH_BATCH_MODE'));
-@define('CONST_Search_NameOnlySearchFrequencyThreshold', getSetting('SEARCH_NAME_ONLY_THRESHOLD'));
-@define('CONST_Use_US_Tiger_Data', getSettingBool('USE_US_TIGER_DATA'));
-@define('CONST_MapIcon_URL', getSetting('MAPICON_URL', false));
-@define('CONST_TokenizerDir', CONST_InstallDir.'/tokenizer');
-
-require_once(CONST_LibDir.'/Geocode.php');
-
-$oDB = new Nominatim\DB();
-$oDB->connect();
-
-$bVerbose = $aResult['verbose'];
-
-function print_results($aResults, $bVerbose)
-{
- if ($bVerbose) {
- if ($aResults && count($aResults)) {
- echo $aResults[0]['langaddress']."\n";
- } else {
- echo "<not found>\n";
- }
- } else {
- echo '.';
- }
-}
-
-if (!$aResult['search-only']) {
- $oReverseGeocode = new Nominatim\ReverseGeocode($oDB);
- $oReverseGeocode->setZoom(20);
- $oPlaceLookup = new Nominatim\PlaceLookup($oDB);
- $oPlaceLookup->setIncludeAddressDetails(true);
- $oPlaceLookup->setLanguagePreference(array('en'));
-
- echo 'Warm reverse: ';
- if ($bVerbose) {
- echo "\n";
- }
- for ($i = 0; $i < 1000; $i++) {
- $fLat = rand(-9000, 9000) / 100;
- $fLon = rand(-18000, 18000) / 100;
- if ($bVerbose) {
- echo "$fLat, $fLon = ";
- }
-
- $oLookup = $oReverseGeocode->lookup($fLat, $fLon);
- $aSearchResults = $oLookup ? $oPlaceLookup->lookup(array($oLookup->iId => $oLookup)) : null;
- print_results($aSearchResults, $bVerbose);
- }
- echo "\n";
-}
-
-if (!$aResult['reverse-only']) {
- $oGeocode = new Nominatim\Geocode($oDB);
-
- echo 'Warm search: ';
- if ($bVerbose) {
- echo "\n";
- }
-
- $oTokenizer = new \Nominatim\Tokenizer($oDB);
-
- $aWords = $oTokenizer->mostFrequentWords(1000);
-
- $sSQL = 'SELECT word FROM word WHERE word is not null ORDER BY search_name_count DESC LIMIT 1000';
- foreach ($aWords as $sWord) {
- if ($bVerbose) {
- echo "$sWord = ";
- }
-
- $oGeocode->setLanguagePreference(array('en'));
- $oGeocode->setQuery($sWord);
- $aSearchResults = $oGeocode->lookup();
- print_results($aSearchResults, $bVerbose);
- }
- echo "\n";
-}
$sPlaceId = $oParams->getString('place_id');
$sOsmType = $oParams->getSet('osmtype', array('N', 'W', 'R'));
-$iOsmId = $oParams->getInt('osmid', -1);
+$iOsmId = $oParams->getInt('osmid', 0);
$sClass = $oParams->getString('class');
$bIncludeKeywords = $oParams->getBool('keywords', false);
$sLanguagePrefArraySQL = $oDB->getArraySQL($oDB->getDBQuotedList($aLangPrefOrder));
-if ($sOsmType && $iOsmId > 0) {
+if ($sOsmType && $iOsmId !== 0) {
$sSQL = 'SELECT place_id FROM placex WHERE osm_type = :type AND osm_id = :id';
$aSQLParams = array(':type' => $sOsmType, ':id' => $iOsmId);
// osm_type and osm_id are not unique enough
-- --- Return the record for the base entry.
+ current_rank_address := 1000;
FOR location IN
SELECT placex.place_id, osm_type, osm_id, name,
coalesce(extratags->'linked_place', extratags->'place') as place_type,
-- If the place had a postcode assigned, take this one only
-- into consideration when it is an area and the place does not have
-- a postcode itself.
- IF location.fromarea AND location.isaddress
+ IF location.fromarea AND location_isaddress
AND (place.address is null or not place.address ? 'postcode')
THEN
place.postcode := null; -- remove the less exact postcode
-- Remove the place from the list of places to be deleted
DELETE FROM place_to_be_deleted pdel
WHERE pdel.osm_type = NEW.osm_type and pdel.osm_id = NEW.osm_id
- and pdel.class = NEW.class;
+ and pdel.class = NEW.class and pdel.type = NEW.type;
-- Have we already done this place?
SELECT * INTO existing
RETURN NULL;
END;
$$ LANGUAGE plpgsql;
-
-CREATE OR REPLACE FUNCTION flush_deleted_places()
- RETURNS INTEGER
- AS $$
-BEGIN
- -- deleting large polygons can have a massive effect on the system - require manual intervention to let them through
- INSERT INTO import_polygon_delete (osm_type, osm_id, class, type)
- SELECT osm_type, osm_id, class, type FROM place_to_be_deleted WHERE deferred;
-
- -- delete from place table
- ALTER TABLE place DISABLE TRIGGER place_before_delete;
- DELETE FROM place USING place_to_be_deleted
- WHERE place.osm_type = place_to_be_deleted.osm_type
- and place.osm_id = place_to_be_deleted.osm_id
- and place.class = place_to_be_deleted.class
- and place.type = place_to_be_deleted.type
- and not deferred;
- ALTER TABLE place ENABLE TRIGGER place_before_delete;
-
- -- Mark for delete in the placex table
- UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
- WHERE placex.osm_type = 'N' and place_to_be_deleted.osm_type = 'N'
- and placex.osm_id = place_to_be_deleted.osm_id
- and placex.class = place_to_be_deleted.class
- and placex.type = place_to_be_deleted.type
- and not deferred;
- UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
- WHERE placex.osm_type = 'W' and place_to_be_deleted.osm_type = 'W'
- and placex.osm_id = place_to_be_deleted.osm_id
- and placex.class = place_to_be_deleted.class
- and placex.type = place_to_be_deleted.type
- and not deferred;
- UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
- WHERE placex.osm_type = 'R' and place_to_be_deleted.osm_type = 'R'
- and placex.osm_id = place_to_be_deleted.osm_id
- and placex.class = place_to_be_deleted.class
- and placex.type = place_to_be_deleted.type
- and not deferred;
-
- -- Mark for delete in interpolations
- UPDATE location_property_osmline SET indexed_status = 100 FROM place_to_be_deleted
- WHERE place_to_be_deleted.osm_type = 'W'
- and place_to_be_deleted.class = 'place'
- and place_to_be_deleted.type = 'houses'
- and location_property_osmline.osm_id = place_to_be_deleted.osm_id
- and not deferred;
-
- -- Clear todo list.
- TRUNCATE TABLE place_to_be_deleted;
-
- RETURN NULL;
-END;
-$$ LANGUAGE plpgsql;
-
END IF;
RETURN ST_Envelope(ST_Collect(
- ST_Project(geom, radius, 0.785398)::geometry,
- ST_Project(geom, radius, 3.9269908)::geometry));
+ ST_Project(geom::geography, radius, 0.785398)::geometry,
+ ST_Project(geom::geography, radius, 3.9269908)::geometry));
END;
$$
LANGUAGE plpgsql IMMUTABLE;
END;
$$
LANGUAGE plpgsql;
+
+CREATE OR REPLACE FUNCTION flush_deleted_places()
+ RETURNS INTEGER
+ AS $$
+BEGIN
+ -- deleting large polygons can have a massive effect on the system - require manual intervention to let them through
+ INSERT INTO import_polygon_delete (osm_type, osm_id, class, type)
+ SELECT osm_type, osm_id, class, type FROM place_to_be_deleted WHERE deferred;
+
+ -- delete from place table
+ ALTER TABLE place DISABLE TRIGGER place_before_delete;
+ DELETE FROM place USING place_to_be_deleted
+ WHERE place.osm_type = place_to_be_deleted.osm_type
+ and place.osm_id = place_to_be_deleted.osm_id
+ and place.class = place_to_be_deleted.class
+ and place.type = place_to_be_deleted.type
+ and not deferred;
+ ALTER TABLE place ENABLE TRIGGER place_before_delete;
+
+ -- Mark for delete in the placex table
+ UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
+ WHERE placex.osm_type = 'N' and place_to_be_deleted.osm_type = 'N'
+ and placex.osm_id = place_to_be_deleted.osm_id
+ and placex.class = place_to_be_deleted.class
+ and placex.type = place_to_be_deleted.type
+ and not deferred;
+ UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
+ WHERE placex.osm_type = 'W' and place_to_be_deleted.osm_type = 'W'
+ and placex.osm_id = place_to_be_deleted.osm_id
+ and placex.class = place_to_be_deleted.class
+ and placex.type = place_to_be_deleted.type
+ and not deferred;
+ UPDATE placex SET indexed_status = 100 FROM place_to_be_deleted
+ WHERE placex.osm_type = 'R' and place_to_be_deleted.osm_type = 'R'
+ and placex.osm_id = place_to_be_deleted.osm_id
+ and placex.class = place_to_be_deleted.class
+ and placex.type = place_to_be_deleted.type
+ and not deferred;
+
+ -- Mark for delete in interpolations
+ UPDATE location_property_osmline SET indexed_status = 100 FROM place_to_be_deleted
+ WHERE place_to_be_deleted.osm_type = 'W'
+ and place_to_be_deleted.class = 'place'
+ and place_to_be_deleted.type = 'houses'
+ and location_property_osmline.osm_id = place_to_be_deleted.osm_id
+ and not deferred;
+
+ -- Clear todo list.
+ TRUNCATE TABLE place_to_be_deleted;
+
+ RETURN NULL;
+END;
+$$ LANGUAGE plpgsql;
from nominatim.cli import get_set_parser
def get_parser():
- parser = get_set_parser(phpcgi_path='@PHPCGI_BIN@')
+ parser = get_set_parser()
return parser.parser
from .core import (NominatimAPI as NominatimAPI,
NominatimAPIAsync as NominatimAPIAsync)
+from .connection import (SearchConnection as SearchConnection)
from .status import (StatusResult as StatusResult)
from .types import (PlaceID as PlaceID,
OsmID as OsmID,
"""
Extended SQLAlchemy connection class that also includes access to the schema.
"""
-from typing import cast, Any, Mapping, Sequence, Union, Dict, Optional, Set
+from typing import cast, Any, Mapping, Sequence, Union, Dict, Optional, Set, \
+ Awaitable, Callable, TypeVar
+import asyncio
import sqlalchemy as sa
from sqlalchemy.ext.asyncio import AsyncConnection
from nominatim.db.sqlalchemy_types import Geometry
from nominatim.api.logging import log
+T = TypeVar('T')
+
class SearchConnection:
""" An extended SQLAlchemy connection class, that also contains
then table definitions. The underlying asynchronous SQLAlchemy
self.t = tables # pylint: disable=invalid-name
self._property_cache = properties
self._classtables: Optional[Set[str]] = None
+ self.query_timeout: Optional[int] = None
+
+
+ def set_query_timeout(self, timeout: Optional[int]) -> None:
+ """ Set the timeout after which a query over this connection
+ is cancelled.
+ """
+ self.query_timeout = timeout
async def scalar(self, sql: sa.sql.base.Executable,
""" Execute a 'scalar()' query on the connection.
"""
log().sql(self.connection, sql, params)
- return await self.connection.scalar(sql, params)
+ return await asyncio.wait_for(self.connection.scalar(sql, params), self.query_timeout)
async def execute(self, sql: 'sa.Executable',
""" Execute a 'execute()' query on the connection.
"""
log().sql(self.connection, sql, params)
- return await self.connection.execute(sql, params)
+ return await asyncio.wait_for(self.connection.execute(sql, params), self.query_timeout)
async def get_property(self, name: str, cached: bool = True) -> str:
Raises a ValueError if the property does not exist.
"""
- if name.startswith('DB:'):
- raise ValueError(f"Illegal property value '{name}'.")
+ lookup_name = f'DBPROP:{name}'
- if cached and name in self._property_cache:
- return cast(str, self._property_cache[name])
+ if cached and lookup_name in self._property_cache:
+ return cast(str, self._property_cache[lookup_name])
sql = sa.select(self.t.properties.c.value)\
.where(self.t.properties.c.property == name)
if value is None:
raise ValueError(f"Property '{name}' not found in database.")
- self._property_cache[name] = cast(str, value)
+ self._property_cache[lookup_name] = cast(str, value)
return cast(str, value)
return self._property_cache['DB:server_version']
+ async def get_cached_value(self, group: str, name: str,
+ factory: Callable[[], Awaitable[T]]) -> T:
+ """ Access the cache for this Nominatim instance.
+ Each cache value needs to belong to a group and have a name.
+ This function is for internal API use only.
+
+ `factory` is an async callback function that produces
+ the value if it is not already cached.
+
+ Returns the cached value or the result of factory (also caching
+ the result).
+ """
+ full_name = f'{group}:{name}'
+
+ if full_name in self._property_cache:
+ return cast(T, self._property_cache[full_name])
+
+ value = await factory()
+ self._property_cache[full_name] = value
+
+ return value
+
+
async def get_class_table(self, cls: str, typ: str) -> Optional[SaFromClause]:
""" Lookup up if there is a classtype table for the given category
and return a SQLAlchemy table for it, if it exists.
"""
from typing import Mapping, Optional, Any, AsyncIterator, Dict, Sequence, List, Tuple
import asyncio
+import sys
import contextlib
from pathlib import Path
from nominatim.api.results import DetailedResult, ReverseResult, SearchResults
-class NominatimAPIAsync:
- """ API loader asynchornous version.
+class NominatimAPIAsync: #pylint: disable=too-many-instance-attributes
+ """ The main frontend to the Nominatim database implements the
+ functions for lookup, forward and reverse geocoding using
+ asynchronous functions.
+
+ This class shares most of the functions with its synchronous
+ version. There are some additional functions or parameters,
+ which are documented below.
"""
def __init__(self, project_dir: Path,
- environ: Optional[Mapping[str, str]] = None) -> None:
+ environ: Optional[Mapping[str, str]] = None,
+ loop: Optional[asyncio.AbstractEventLoop] = None) -> None:
+ """ Initiate a new frontend object with synchronous API functions.
+
+ Parameters:
+ project_dir: Path to the
+ [project directory](../admin/Import.md#creating-the-project-directory)
+ of the local Nominatim installation.
+ environ: Mapping of [configuration parameters](../customize/Settings.md).
+ When set, replaces any configuration via environment variables.
+ Settings in this mapping also have precedence over any
+ parameters found in the `.env` file of the project directory.
+ loop: The asyncio event loop that will be used when calling
+ functions. Only needed, when a custom event loop is used
+ and the Python version is 3.9 or earlier.
+ """
self.config = Configuration(project_dir, environ)
+ self.query_timeout = self.config.get_int('QUERY_TIMEOUT') \
+ if self.config.QUERY_TIMEOUT else None
+ self.reverse_restrict_to_country_area = self.config.get_bool('SEARCH_WITHIN_COUNTRIES')
self.server_version = 0
- self._engine_lock = asyncio.Lock()
+ if sys.version_info >= (3, 10):
+ self._engine_lock = asyncio.Lock()
+ else:
+ self._engine_lock = asyncio.Lock(loop=loop) # pylint: disable=unexpected-keyword-arg
self._engine: Optional[sa_asyncio.AsyncEngine] = None
self._tables: Optional[SearchTables] = None
self._property_cache: Dict[str, Any] = {'DB:server_version': 0}
async def setup_database(self) -> None:
- """ Set up the engine and connection parameters.
+ """ Set up the SQL engine and connections.
This function will be implicitly called when the database is
accessed for the first time. You may also call it explicitly to
if self._engine:
return
- dsn = self.config.get_database_params()
+ extra_args: Dict[str, Any] = {'future': True,
+ 'echo': self.config.get_bool('DEBUG_SQL')}
- query = {k: v for k, v in dsn.items()
- if k not in ('user', 'password', 'dbname', 'host', 'port')}
- if PGCORE_LIB == 'asyncpg':
- query['prepared_statement_cache_size'] = '0'
+ is_sqlite = self.config.DATABASE_DSN.startswith('sqlite:')
- dburl = sa.engine.URL.create(
- f'postgresql+{PGCORE_LIB}',
- database=dsn.get('dbname'),
- username=dsn.get('user'), password=dsn.get('password'),
- host=dsn.get('host'), port=int(dsn['port']) if 'port' in dsn else None,
- query=query)
- engine = sa_asyncio.create_async_engine(dburl, future=True,
- echo=self.config.get_bool('DEBUG_SQL'))
+ if is_sqlite:
+ params = dict((p.split('=', 1)
+ for p in self.config.DATABASE_DSN[7:].split(';')))
+ dburl = sa.engine.URL.create('sqlite+aiosqlite',
+ database=params.get('dbname'))
+
+ else:
+ dsn = self.config.get_database_params()
+ query = {k: v for k, v in dsn.items()
+ if k not in ('user', 'password', 'dbname', 'host', 'port')}
+
+ dburl = sa.engine.URL.create(
+ f'postgresql+{PGCORE_LIB}',
+ database=dsn.get('dbname'),
+ username=dsn.get('user'),
+ password=dsn.get('password'),
+ host=dsn.get('host'),
+ port=int(dsn['port']) if 'port' in dsn else None,
+ query=query)
+ extra_args['max_overflow'] = 0
+ extra_args['pool_size'] = self.config.get_int('API_POOL_SIZE')
+
+ engine = sa_asyncio.create_async_engine(dburl, **extra_args)
try:
async with engine.begin() as conn:
except (PGCORE_ERROR, sa.exc.OperationalError):
server_version = 0
- if server_version >= 110000:
+ if server_version >= 110000 and not is_sqlite:
@sa.event.listens_for(engine.sync_engine, "connect")
def _on_connect(dbapi_con: Any, _: Any) -> None:
cursor = dbapi_con.cursor()
# Make sure that all connections get the new settings
await self.close()
+ if is_sqlite:
+ @sa.event.listens_for(engine.sync_engine, "connect")
+ def _on_sqlite_connect(dbapi_con: Any, _: Any) -> None:
+ dbapi_con.run_async(lambda conn: conn.enable_load_extension(True))
+ cursor = dbapi_con.cursor()
+ cursor.execute("SELECT load_extension('mod_spatialite')")
+ cursor.execute('SELECT SetDecimalPrecision(7)')
+ dbapi_con.run_async(lambda conn: conn.enable_load_extension(False))
+
self._property_cache['DB:server_version'] = server_version
self._tables = SearchTables(sa.MetaData(), engine.name) # pylint: disable=no-member
"""
try:
async with self.begin() as conn:
+ conn.set_query_timeout(self.query_timeout)
status = await get_status(conn)
except (PGCORE_ERROR, sa.exc.OperationalError):
return StatusResult(700, 'Database connection failed')
"""
details = ntyp.LookupDetails.from_kwargs(params)
async with self.begin() as conn:
+ conn.set_query_timeout(self.query_timeout)
if details.keywords:
await make_query_analyzer(conn)
return await get_detailed_place(conn, place, details)
"""
details = ntyp.LookupDetails.from_kwargs(params)
async with self.begin() as conn:
+ conn.set_query_timeout(self.query_timeout)
if details.keywords:
await make_query_analyzer(conn)
return SearchResults(filter(None,
details = ntyp.ReverseDetails.from_kwargs(params)
async with self.begin() as conn:
+ conn.set_query_timeout(self.query_timeout)
if details.keywords:
await make_query_analyzer(conn)
- geocoder = ReverseGeocoder(conn, details)
+ geocoder = ReverseGeocoder(conn, details,
+ self.reverse_restrict_to_country_area)
return await geocoder.lookup(coord)
raise UsageError('Nothing to search for.')
async with self.begin() as conn:
- geocoder = ForwardGeocoder(conn, ntyp.SearchDetails.from_kwargs(params))
+ conn.set_query_timeout(self.query_timeout)
+ geocoder = ForwardGeocoder(conn, ntyp.SearchDetails.from_kwargs(params),
+ self.config.get_int('REQUEST_TIMEOUT') \
+ if self.config.REQUEST_TIMEOUT else None)
phrases = [Phrase(PhraseType.NONE, p.strip()) for p in query.split(',')]
return await geocoder.lookup(phrases)
""" Find an address using structured search.
"""
async with self.begin() as conn:
+ conn.set_query_timeout(self.query_timeout)
details = ntyp.SearchDetails.from_kwargs(params)
phrases: List[Phrase] = []
if amenity:
details.layers |= ntyp.DataLayer.POI
- geocoder = ForwardGeocoder(conn, details)
+ geocoder = ForwardGeocoder(conn, details,
+ self.config.get_int('REQUEST_TIMEOUT') \
+ if self.config.REQUEST_TIMEOUT else None)
return await geocoder.lookup(phrases)
details = ntyp.SearchDetails.from_kwargs(params)
async with self.begin() as conn:
+ conn.set_query_timeout(self.query_timeout)
if near_query:
phrases = [Phrase(PhraseType.NONE, p) for p in near_query.split(',')]
else:
if details.keywords:
await make_query_analyzer(conn)
- geocoder = ForwardGeocoder(conn, details)
+ geocoder = ForwardGeocoder(conn, details,
+ self.config.get_int('REQUEST_TIMEOUT') \
+ if self.config.REQUEST_TIMEOUT else None)
return await geocoder.lookup_pois(categories, phrases)
class NominatimAPI:
- """ API loader, synchronous version.
+ """ This class provides a thin synchronous wrapper around the asynchronous
+ Nominatim functions. It creates its own event loop and runs each
+ synchronous function call to completion using that loop.
"""
def __init__(self, project_dir: Path,
environ: Optional[Mapping[str, str]] = None) -> None:
+ """ Initiate a new frontend object with synchronous API functions.
+
+ Parameters:
+ project_dir: Path to the
+ [project directory](../admin/Import.md#creating-the-project-directory)
+ of the local Nominatim installation.
+ environ: Mapping of [configuration parameters](../customize/Settings.md).
+ When set, replaces any configuration via environment variables.
+ Settings in this mapping also have precedence over any
+ parameters found in the `.env` file of the project directory.
+ """
self._loop = asyncio.new_event_loop()
- self._async_api = NominatimAPIAsync(project_dir, environ)
+ self._async_api = NominatimAPIAsync(project_dir, environ, loop=self._loop)
def close(self) -> None:
- """ Close all active connections to the database. The NominatimAPIAsync
- object remains usable after closing. If a new API functions is
- called, new connections are created.
+ """ Close all active connections to the database.
+
+ This function also closes the asynchronous worker loop making
+ the NominatimAPI object unusuable.
"""
self._loop.run_until_complete(self._async_api.close())
self._loop.close()
@property
def config(self) -> Configuration:
- """ Return the configuration used by the API.
+ """ Provide read-only access to the [configuration](#Configuration)
+ used by the API.
"""
return self._async_api.config
def status(self) -> StatusResult:
- """ Return the status of the database.
+ """ Return the status of the database as a dataclass object
+ with the fields described below.
+
+ Returns:
+ status(int): A status code as described on the status page.
+ message(str): Either 'OK' or a human-readable message of the
+ problem encountered.
+ software_version(tuple): A tuple with the version of the
+ Nominatim library consisting of (major, minor, patch, db-patch)
+ version.
+ database_version(tuple): A tuple with the version of the library
+ which was used for the import or last migration.
+ Also consists of (major, minor, patch, db-patch).
+ data_updated(datetime): Timestamp with the age of the data.
"""
return self._loop.run_until_complete(self._async_api.status())
def details(self, place: ntyp.PlaceRef, **params: Any) -> Optional[DetailedResult]:
""" Get detailed information about a place in the database.
+
+ The result is a dataclass object with the fields described below
+ or `None` if the place could not be found in the database.
+
+ Parameters:
+ place: Description of the place to look up. See
+ [Place identification](Input-Parameter-Types.md#place-identification)
+ for the various ways to reference a place.
+
+ Other parameters:
+ geometry_output (enum): Add the full geometry of the place to the result.
+ Multiple formats may be selected. Note that geometries can become
+ quite large. (Default: none)
+ geometry_simplification (float): Simplification factor to use on
+ the geometries before returning them. The factor expresses
+ the tolerance in degrees from which the geometry may differ.
+ Topology is preserved. (Default: 0.0)
+ address_details (bool): Add detailed information about the places
+ that make up the address of the requested object. (Default: False)
+ linked_places (bool): Add detailed information about the places
+ that link to the result. (Default: False)
+ parented_places (bool): Add detailed information about all places
+ for which the requested object is a parent, i.e. all places for
+ which the object provides the address details.
+ Only POI places can have parents. (Default: False)
+ keywords (bool): Add detailed information about the search terms
+ used for this place.
+
+ Returns:
+ source_table (enum): Data source of the place. See below for possible values.
+ category (tuple): A tuple of two strings with the primary OSM tag
+ and value.
+ centroid (Point): Point position of the place.
+ place_id (Optional[int]): Internal ID of the place. This ID may differ
+ for the same place between different installations.
+ parent_place_id (Optional(int]): Internal ID of the parent of this
+ place. Only meaning full for POI-like objects (places with a
+ rank_address of 30).
+ linked_place_id (Optional[int]): Internal ID of the place this object
+ linkes to. When this ID is set then there is no guarantee that
+ the rest of the result information is complete.
+ admin_level (int): Value of the `admin_level` OSM tag. Only meaningful
+ for administrative boundary objects.
+ indexed_date (datetime): Timestamp when the place was last updated.
+ osm_object (Optional[tuple]): OSM type and ID of the place, if available.
+ names (Optional[dict]): Dictionary of names of the place. Keys are
+ usually the corresponding OSM tag keys.
+ address (Optional[dict]): Dictionary of address parts directly
+ attributed to the place. Keys are usually the corresponding
+ OSM tag keys with the `addr:` prefix removed.
+ extratags (Optional[dict]): Dictionary of additional attributes for
+ the place. Usually OSM tag keys and values.
+ housenumber (Optional[str]): House number of the place, normalised
+ for lookup. To get the house number in its original spelling,
+ use `address['housenumber']`.
+ postcode (Optional[str]): Computed postcode for the place. To get
+ directly attributed postcodes, use `address['postcode']` instead.
+ wikipedia (Optional[str]): Reference to a wikipedia site for the place.
+ The string has the format <language code>:<wikipedia title>.
+ rank_address (int): [Address rank](../customize/Ranking.md#address-rank).
+ rank_search (int): [Search rank](../customize/Ranking.md#search-rank).
+ importance (Optional[float]): Relative importance of the place. This is a measure
+ how likely the place will be searched for.
+ country_code (Optional[str]): Country the feature is in as
+ ISO 3166-1 alpha-2 country code.
+ address_rows (Optional[AddressLines]): List of places that make up the
+ computed address. `None` when `address_details` parameter was False.
+ linked_rows (Optional[AddressLines]): List of places that link to the object.
+ `None` when `linked_places` parameter was False.
+ parented_rows (Optional[AddressLines]): List of direct children of the place.
+ `None` when `parented_places` parameter was False.
+ name_keywords (Optional[WordInfos]): List of search words for the name of
+ the place. `None` when `keywords` parameter is set to False.
+ address_keywords (Optional[WordInfos]): List of search word for the address of
+ the place. `None` when `keywords` parameter is set to False.
+ geometry (dict): Dictionary containing the full geometry of the place
+ in the formats requested in the `geometry_output` parameter.
"""
return self._loop.run_until_complete(self._async_api.details(place, **params))
""" Get simple information about a list of places.
Returns a list of place information for all IDs that were found.
+ Each result is a dataclass with the fields detailed below.
+
+ Parameters:
+ places: List of descriptions of the place to look up. See
+ [Place identification](Input-Parameter-Types.md#place-identification)
+ for the various ways to reference a place.
+
+ Other parameters:
+ geometry_output (enum): Add the full geometry of the place to the result.
+ Multiple formats may be selected. Note that geometries can become
+ quite large. (Default: none)
+ geometry_simplification (float): Simplification factor to use on
+ the geometries before returning them. The factor expresses
+ the tolerance in degrees from which the geometry may differ.
+ Topology is preserved. (Default: 0.0)
+ address_details (bool): Add detailed information about the places
+ that make up the address of the requested object. (Default: False)
+ linked_places (bool): Add detailed information about the places
+ that link to the result. (Default: False)
+ parented_places (bool): Add detailed information about all places
+ for which the requested object is a parent, i.e. all places for
+ which the object provides the address details.
+ Only POI places can have parents. (Default: False)
+ keywords (bool): Add detailed information about the search terms
+ used for this place.
+
+ Returns:
+ source_table (enum): Data source of the place. See below for possible values.
+ category (tuple): A tuple of two strings with the primary OSM tag
+ and value.
+ centroid (Point): Point position of the place.
+ place_id (Optional[int]): Internal ID of the place. This ID may differ
+ for the same place between different installations.
+ osm_object (Optional[tuple]): OSM type and ID of the place, if available.
+ names (Optional[dict]): Dictionary of names of the place. Keys are
+ usually the corresponding OSM tag keys.
+ address (Optional[dict]): Dictionary of address parts directly
+ attributed to the place. Keys are usually the corresponding
+ OSM tag keys with the `addr:` prefix removed.
+ extratags (Optional[dict]): Dictionary of additional attributes for
+ the place. Usually OSM tag keys and values.
+ housenumber (Optional[str]): House number of the place, normalised
+ for lookup. To get the house number in its original spelling,
+ use `address['housenumber']`.
+ postcode (Optional[str]): Computed postcode for the place. To get
+ directly attributed postcodes, use `address['postcode']` instead.
+ wikipedia (Optional[str]): Reference to a wikipedia site for the place.
+ The string has the format <language code>:<wikipedia title>.
+ rank_address (int): [Address rank](../customize/Ranking.md#address-rank).
+ rank_search (int): [Search rank](../customize/Ranking.md#search-rank).
+ importance (Optional[float]): Relative importance of the place. This is a measure
+ how likely the place will be searched for.
+ country_code (Optional[str]): Country the feature is in as
+ ISO 3166-1 alpha-2 country code.
+ address_rows (Optional[AddressLines]): List of places that make up the
+ computed address. `None` when `address_details` parameter was False.
+ linked_rows (Optional[AddressLines]): List of places that link to the object.
+ `None` when `linked_places` parameter was False.
+ parented_rows (Optional[AddressLines]): List of direct children of the place.
+ `None` when `parented_places` parameter was False.
+ name_keywords (Optional[WordInfos]): List of search words for the name of
+ the place. `None` when `keywords` parameter is set to False.
+ address_keywords (Optional[WordInfos]): List of search word for the address of
+ the place. `None` when `keywords` parameter is set to False.
+ bbox (Bbox): Bounding box of the full geometry of the place.
+ If the place is a single point, then the size of the bounding
+ box is guessed according to the type of place.
+ geometry (dict): Dictionary containing the full geometry of the place
+ in the formats requested in the `geometry_output` parameter.
"""
return self._loop.run_until_complete(self._async_api.lookup(places, **params))
def reverse(self, coord: ntyp.AnyPoint, **params: Any) -> Optional[ReverseResult]:
""" Find a place by its coordinates. Also known as reverse geocoding.
- Returns the closest result that can be found or None if
- no place matches the given criteria.
+ Returns the closest result that can be found or `None` if
+ no place matches the given criteria. The result is a dataclass
+ with the fields as detailed below.
+
+ Parameters:
+ coord: Coordinate to lookup the place for as a Point
+ or a tuple (x, y). Must be in WGS84 projection.
+
+ Other parameters:
+ max_rank (int): Highest address rank to return. Can be used to
+ restrict search to streets or settlements.
+ layers (enum): Defines the kind of data to take into account.
+ See description of layers below. (Default: addresses and POIs)
+ geometry_output (enum): Add the full geometry of the place to the result.
+ Multiple formats may be selected. Note that geometries can become
+ quite large. (Default: none)
+ geometry_simplification (float): Simplification factor to use on
+ the geometries before returning them. The factor expresses
+ the tolerance in degrees from which the geometry may differ.
+ Topology is preserved. (Default: 0.0)
+ address_details (bool): Add detailed information about the places
+ that make up the address of the requested object. (Default: False)
+ linked_places (bool): Add detailed information about the places
+ that link to the result. (Default: False)
+ parented_places (bool): Add detailed information about all places
+ for which the requested object is a parent, i.e. all places for
+ which the object provides the address details.
+ Only POI places can have parents. (Default: False)
+ keywords (bool): Add detailed information about the search terms
+ used for this place.
+
+ Returns:
+ source_table (enum): Data source of the place. See below for possible values.
+ category (tuple): A tuple of two strings with the primary OSM tag
+ and value.
+ centroid (Point): Point position of the place.
+ place_id (Optional[int]): Internal ID of the place. This ID may differ
+ for the same place between different installations.
+ osm_object (Optional[tuple]): OSM type and ID of the place, if available.
+ names (Optional[dict]): Dictionary of names of the place. Keys are
+ usually the corresponding OSM tag keys.
+ address (Optional[dict]): Dictionary of address parts directly
+ attributed to the place. Keys are usually the corresponding
+ OSM tag keys with the `addr:` prefix removed.
+ extratags (Optional[dict]): Dictionary of additional attributes for
+ the place. Usually OSM tag keys and values.
+ housenumber (Optional[str]): House number of the place, normalised
+ for lookup. To get the house number in its original spelling,
+ use `address['housenumber']`.
+ postcode (Optional[str]): Computed postcode for the place. To get
+ directly attributed postcodes, use `address['postcode']` instead.
+ wikipedia (Optional[str]): Reference to a wikipedia site for the place.
+ The string has the format <language code>:<wikipedia title>.
+ rank_address (int): [Address rank](../customize/Ranking.md#address-rank).
+ rank_search (int): [Search rank](../customize/Ranking.md#search-rank).
+ importance (Optional[float]): Relative importance of the place. This is a measure
+ how likely the place will be searched for.
+ country_code (Optional[str]): Country the feature is in as
+ ISO 3166-1 alpha-2 country code.
+ address_rows (Optional[AddressLines]): List of places that make up the
+ computed address. `None` when `address_details` parameter was False.
+ linked_rows (Optional[AddressLines]): List of places that link to the object.
+ `None` when `linked_places` parameter was False.
+ parented_rows (Optional[AddressLines]): List of direct children of the place.
+ `None` when `parented_places` parameter was False.
+ name_keywords (Optional[WordInfos]): List of search words for the name of
+ the place. `None` when `keywords` parameter is set to False.
+ address_keywords (Optional[WordInfos]): List of search word for the address of
+ the place. `None` when `keywords` parameter is set to False.
+ bbox (Bbox): Bounding box of the full geometry of the place.
+ If the place is a single point, then the size of the bounding
+ box is guessed according to the type of place.
+ geometry (dict): Dictionary containing the full geometry of the place
+ in the formats requested in the `geometry_output` parameter.
+ distance (Optional[float]): Distance in degree from the input point.
"""
return self._loop.run_until_complete(self._async_api.reverse(coord, **params))
def search(self, query: str, **params: Any) -> SearchResults:
""" Find a place by free-text search. Also known as forward geocoding.
+
+ Parameters:
+ query: Free-form text query searching for a place.
+
+ Other parameters:
+ max_results (int): Maximum number of results to return. The
+ actual number of results may be less. (Default: 10)
+ min_rank (int): Lowest permissible rank for the result.
+ For addressable places this is the minimum
+ [address rank](../customize/Ranking.md#address-rank). For all
+ other places the [search rank](../customize/Ranking.md#search-rank)
+ is used.
+ max_rank (int): Highest permissible rank for the result. See min_rank above.
+ layers (enum): Defines the kind of data to take into account.
+ See [layers section](Input-Parameter-Types.md#layers) for details.
+ (Default: addresses and POIs)
+ countries (list[str]): Restrict search to countries with the given
+ ISO 3166-1 alpha-2 country code. An empty list (the default)
+ disables this filter.
+ excluded (list[int]): A list of internal IDs of places to exclude
+ from the search.
+ viewbox (Optional[Bbox]): Bounding box of an area to focus search on.
+ bounded_viewbox (bool): Consider the bounding box given in `viewbox`
+ as a filter and return only results within the bounding box.
+ near (Optional[Point]): Focus search around the given point and
+ return results ordered by distance to the given point.
+ near_radius (Optional[float]): Restrict results to results within
+ the given distance in degrees of `near` point. Ignored, when
+ `near` is not set.
+ categories (list[tuple]): Restrict search to places of the given
+ categories. The category is the main OSM tag assigned to each
+ place. An empty list (the default) disables this filter.
+ geometry_output (enum): Add the full geometry of the place to the result.
+ Multiple formats may be selected. Note that geometries can become
+ quite large. (Default: none)
+ geometry_simplification (float): Simplification factor to use on
+ the geometries before returning them. The factor expresses
+ the tolerance in degrees from which the geometry may differ.
+ Topology is preserved. (Default: 0.0)
+ address_details (bool): Add detailed information about the places
+ that make up the address of the requested object. (Default: False)
+ linked_places (bool): Add detailed information about the places
+ that link to the result. (Default: False)
+ parented_places (bool): Add detailed information about all places
+ for which the requested object is a parent, i.e. all places for
+ which the object provides the address details.
+ Only POI places can have parents. (Default: False)
+ keywords (bool): Add detailed information about the search terms
+ used for this place.
+
+ Returns:
+ source_table (enum): Data source of the place. See below for possible values.
+ category (tuple): A tuple of two strings with the primary OSM tag
+ and value.
+ centroid (Point): Point position of the place.
+ place_id (Optional[int]): Internal ID of the place. This ID may differ
+ for the same place between different installations.
+ osm_object (Optional[tuple]): OSM type and ID of the place, if available.
+ names (Optional[dict]): Dictionary of names of the place. Keys are
+ usually the corresponding OSM tag keys.
+ address (Optional[dict]): Dictionary of address parts directly
+ attributed to the place. Keys are usually the corresponding
+ OSM tag keys with the `addr:` prefix removed.
+ extratags (Optional[dict]): Dictionary of additional attributes for
+ the place. Usually OSM tag keys and values.
+ housenumber (Optional[str]): House number of the place, normalised
+ for lookup. To get the house number in its original spelling,
+ use `address['housenumber']`.
+ postcode (Optional[str]): Computed postcode for the place. To get
+ directly attributed postcodes, use `address['postcode']` instead.
+ wikipedia (Optional[str]): Reference to a wikipedia site for the place.
+ The string has the format <language code>:<wikipedia title>.
+ rank_address (int): [Address rank](../customize/Ranking.md#address-rank).
+ rank_search (int): [Search rank](../customize/Ranking.md#search-rank).
+ importance (Optional[float]): Relative importance of the place. This is a measure
+ how likely the place will be searched for.
+ country_code (Optional[str]): Country the feature is in as
+ ISO 3166-1 alpha-2 country code.
+ address_rows (Optional[AddressLines]): List of places that make up the
+ computed address. `None` when `address_details` parameter was False.
+ linked_rows (Optional[AddressLines]): List of places that link to the object.
+ `None` when `linked_places` parameter was False.
+ parented_rows (Optional[AddressLines]): List of direct children of the place.
+ `None` when `parented_places` parameter was False.
+ name_keywords (Optional[WordInfos]): List of search words for the name of
+ the place. `None` when `keywords` parameter is set to False.
+ address_keywords (Optional[WordInfos]): List of search word for the address of
+ the place. `None` when `keywords` parameter is set to False.
+ bbox (Bbox): Bounding box of the full geometry of the place.
+ If the place is a single point, then the size of the bounding
+ box is guessed according to the type of place.
+ geometry (dict): Dictionary containing the full geometry of the place
+ in the formats requested in the `geometry_output` parameter.
"""
return self._loop.run_until_complete(
self._async_api.search(query, **params))
postalcode: Optional[str] = None,
**params: Any) -> SearchResults:
""" Find an address using structured search.
+
+ Parameters:
+ amenity: Name of a POI.
+ street: Street and optionally housenumber of the address. If the address
+ does not have a street, then the place the housenumber references to.
+ city: Postal city of the address.
+ county: County equivalent of the address. Does not exist in all
+ jurisdictions.
+ state: State or province of the address.
+ country: Country with its full name or its ISO 3166-1 alpha-2 country code.
+ Do not use together with the country_code filter.
+ postalcode: Post code or ZIP for the place.
+
+ Other parameters:
+ max_results (int): Maximum number of results to return. The
+ actual number of results may be less. (Default: 10)
+ min_rank (int): Lowest permissible rank for the result.
+ For addressable places this is the minimum
+ [address rank](../customize/Ranking.md#address-rank). For all
+ other places the [search rank](../customize/Ranking.md#search-rank)
+ is used.
+ max_rank (int): Highest permissible rank for the result. See min_rank above.
+ layers (enum): Defines the kind of data to take into account.
+ See [layers section](Input-Parameter-Types.md#layers) for details.
+ (Default: addresses and POIs)
+ countries (list[str]): Restrict search to countries with the given
+ ISO 3166-1 alpha-2 country code. An empty list (the default)
+ disables this filter. Do not use, when the country parameter
+ is used.
+ excluded (list[int]): A list of internal IDs of places to exclude
+ from the search.
+ viewbox (Optional[Bbox]): Bounding box of an area to focus search on.
+ bounded_viewbox (bool): Consider the bounding box given in `viewbox`
+ as a filter and return only results within the bounding box.
+ near (Optional[Point]): Focus search around the given point and
+ return results ordered by distance to the given point.
+ near_radius (Optional[float]): Restrict results to results within
+ the given distance in degrees of `near` point. Ignored, when
+ `near` is not set.
+ categories (list[tuple]): Restrict search to places of the given
+ categories. The category is the main OSM tag assigned to each
+ place. An empty list (the default) disables this filter.
+ geometry_output (enum): Add the full geometry of the place to the result.
+ Multiple formats may be selected. Note that geometries can become
+ quite large. (Default: none)
+ geometry_simplification (float): Simplification factor to use on
+ the geometries before returning them. The factor expresses
+ the tolerance in degrees from which the geometry may differ.
+ Topology is preserved. (Default: 0.0)
+ address_details (bool): Add detailed information about the places
+ that make up the address of the requested object. (Default: False)
+ linked_places (bool): Add detailed information about the places
+ that link to the result. (Default: False)
+ parented_places (bool): Add detailed information about all places
+ for which the requested object is a parent, i.e. all places for
+ which the object provides the address details.
+ Only POI places can have parents. (Default: False)
+ keywords (bool): Add detailed information about the search terms
+ used for this place.
+
+ Returns:
+ source_table (enum): Data source of the place. See below for possible values.
+ category (tuple): A tuple of two strings with the primary OSM tag
+ and value.
+ centroid (Point): Point position of the place.
+ place_id (Optional[int]): Internal ID of the place. This ID may differ
+ for the same place between different installations.
+ osm_object (Optional[tuple]): OSM type and ID of the place, if available.
+ names (Optional[dict]): Dictionary of names of the place. Keys are
+ usually the corresponding OSM tag keys.
+ address (Optional[dict]): Dictionary of address parts directly
+ attributed to the place. Keys are usually the corresponding
+ OSM tag keys with the `addr:` prefix removed.
+ extratags (Optional[dict]): Dictionary of additional attributes for
+ the place. Usually OSM tag keys and values.
+ housenumber (Optional[str]): House number of the place, normalised
+ for lookup. To get the house number in its original spelling,
+ use `address['housenumber']`.
+ postcode (Optional[str]): Computed postcode for the place. To get
+ directly attributed postcodes, use `address['postcode']` instead.
+ wikipedia (Optional[str]): Reference to a wikipedia site for the place.
+ The string has the format <language code>:<wikipedia title>.
+ rank_address (int): [Address rank](../customize/Ranking.md#address-rank).
+ rank_search (int): [Search rank](../customize/Ranking.md#search-rank).
+ importance (Optional[float]): Relative importance of the place. This is a measure
+ how likely the place will be searched for.
+ country_code (Optional[str]): Country the feature is in as
+ ISO 3166-1 alpha-2 country code.
+ address_rows (Optional[AddressLines]): List of places that make up the
+ computed address. `None` when `address_details` parameter was False.
+ linked_rows (Optional[AddressLines]): List of places that link to the object.
+ `None` when `linked_places` parameter was False.
+ parented_rows (Optional[AddressLines]): List of direct children of the place.
+ `None` when `parented_places` parameter was False.
+ name_keywords (Optional[WordInfos]): List of search words for the name of
+ the place. `None` when `keywords` parameter is set to False.
+ address_keywords (Optional[WordInfos]): List of search word for the address of
+ the place. `None` when `keywords` parameter is set to False.
+ bbox (Bbox): Bounding box of the full geometry of the place.
+ If the place is a single point, then the size of the bounding
+ box is guessed according to the type of place.
+ geometry (dict): Dictionary containing the full geometry of the place
+ in the formats requested in the `geometry_output` parameter.
"""
return self._loop.run_until_complete(
self._async_api.search_address(amenity, street, city, county,
near_query: Optional[str] = None,
**params: Any) -> SearchResults:
""" Find an object of a certain category near another place.
+
The near place may either be given as an unstructured search
query in itself or as a geographic area through the
viewbox or near parameters.
+
+ Parameters:
+ categories: Restrict search to places of the given
+ categories. The category is the main OSM tag assigned to each
+ place.
+ near_query: Optional free-text query to define the are to
+ restrict search to.
+
+ Other parameters:
+ max_results (int): Maximum number of results to return. The
+ actual number of results may be less. (Default: 10)
+ min_rank (int): Lowest permissible rank for the result.
+ For addressable places this is the minimum
+ [address rank](../customize/Ranking.md#address-rank). For all
+ other places the [search rank](../customize/Ranking.md#search-rank)
+ is used.
+ max_rank (int): Highest permissible rank for the result. See min_rank above.
+ layers (enum): Defines the kind of data to take into account.
+ See [layers section](Input-Parameter-Types.md#layers) for details.
+ (Default: addresses and POIs)
+ countries (list[str]): Restrict search to countries with the given
+ ISO 3166-1 alpha-2 country code. An empty list (the default)
+ disables this filter.
+ excluded (list[int]): A list of internal IDs of places to exclude
+ from the search.
+ viewbox (Optional[Bbox]): Bounding box of an area to focus search on.
+ bounded_viewbox (bool): Consider the bounding box given in `viewbox`
+ as a filter and return only results within the bounding box.
+ near (Optional[Point]): Focus search around the given point and
+ return results ordered by distance to the given point.
+ near_radius (Optional[float]): Restrict results to results within
+ the given distance in degrees of `near` point. Ignored, when
+ `near` is not set.
+ geometry_output (enum): Add the full geometry of the place to the result.
+ Multiple formats may be selected. Note that geometries can become
+ quite large. (Default: none)
+ geometry_simplification (float): Simplification factor to use on
+ the geometries before returning them. The factor expresses
+ the tolerance in degrees from which the geometry may differ.
+ Topology is preserved. (Default: 0.0)
+ address_details (bool): Add detailed information about the places
+ that make up the address of the requested object. (Default: False)
+ linked_places (bool): Add detailed information about the places
+ that link to the result. (Default: False)
+ parented_places (bool): Add detailed information about all places
+ for which the requested object is a parent, i.e. all places for
+ which the object provides the address details.
+ Only POI places can have parents. (Default: False)
+ keywords (bool): Add detailed information about the search terms
+ used for this place.
+
+ Returns:
+ source_table (enum): Data source of the place. See below for possible values.
+ category (tuple): A tuple of two strings with the primary OSM tag
+ and value.
+ centroid (Point): Point position of the place.
+ place_id (Optional[int]): Internal ID of the place. This ID may differ
+ for the same place between different installations.
+ osm_object (Optional[tuple]): OSM type and ID of the place, if available.
+ names (Optional[dict]): Dictionary of names of the place. Keys are
+ usually the corresponding OSM tag keys.
+ address (Optional[dict]): Dictionary of address parts directly
+ attributed to the place. Keys are usually the corresponding
+ OSM tag keys with the `addr:` prefix removed.
+ extratags (Optional[dict]): Dictionary of additional attributes for
+ the place. Usually OSM tag keys and values.
+ housenumber (Optional[str]): House number of the place, normalised
+ for lookup. To get the house number in its original spelling,
+ use `address['housenumber']`.
+ postcode (Optional[str]): Computed postcode for the place. To get
+ directly attributed postcodes, use `address['postcode']` instead.
+ wikipedia (Optional[str]): Reference to a wikipedia site for the place.
+ The string has the format <language code>:<wikipedia title>.
+ rank_address (int): [Address rank](../customize/Ranking.md#address-rank).
+ rank_search (int): [Search rank](../customize/Ranking.md#search-rank).
+ importance (Optional[float]): Relative importance of the place. This is a measure
+ how likely the place will be searched for.
+ country_code (Optional[str]): Country the feature is in as
+ ISO 3166-1 alpha-2 country code.
+ address_rows (Optional[AddressLines]): List of places that make up the
+ computed address. `None` when `address_details` parameter was False.
+ linked_rows (Optional[AddressLines]): List of places that link to the object.
+ `None` when `linked_places` parameter was False.
+ parented_rows (Optional[AddressLines]): List of direct children of the place.
+ `None` when `parented_places` parameter was False.
+ name_keywords (Optional[WordInfos]): List of search words for the name of
+ the place. `None` when `keywords` parameter is set to False.
+ address_keywords (Optional[WordInfos]): List of search word for the address of
+ the place. `None` when `keywords` parameter is set to False.
+ bbox (Bbox): Bounding box of the full geometry of the place.
+ If the place is a single point, then the size of the bounding
+ box is guessed according to the type of place.
+ geometry (dict): Dictionary containing the full geometry of the place
+ in the formats requested in the `geometry_output` parameter.
"""
return self._loop.run_until_complete(
self._async_api.search_category(categories, near_query, **params))
if sa.__version__.startswith('1'):
try:
+ sqlstr = re.sub(r'__\[POSTCOMPILE_[^]]*\]', '%s', sqlstr)
return sqlstr % tuple((repr(params.get(name, None))
for name in compiled.positiontup)) # type: ignore
except TypeError:
# Fixes an odd issue with Python 3.7 where percentages are not
# quoted correctly.
sqlstr = re.sub(r'%(?!\()', '%%', sqlstr)
+ sqlstr = re.sub(r'__\[POSTCOMPILE_([^]]*)\]', r'%(\1)s', sqlstr)
return sqlstr % params
-
class HTMLLogger(BaseLogger):
""" Logger that formats messages in HTML.
"""
self.buffer = io.StringIO()
+ def _timestamp(self) -> None:
+ self._write(f'[{dt.datetime.now()}]\n')
+
+
def get_buffer(self) -> str:
return self.buffer.getvalue()
def section(self, heading: str) -> None:
+ self._timestamp()
self._write(f"\n# {heading}\n\n")
def result_dump(self, heading: str, results: Iterator[Tuple[Any, Any]]) -> None:
+ self._timestamp()
self._write(f'{heading}:\n')
total = 0
for rank, res in results:
def sql(self, conn: AsyncConnection, statement: 'sa.Executable',
params: Union[Mapping[str, Any], Sequence[Mapping[str, Any]], None]) -> None:
+ self._timestamp()
sqlstr = '\n| '.join(textwrap.wrap(self.format_sql(conn, statement, params), width=78))
self._write(f"| {sqlstr}\n\n")
t.c.importance, t.c.wikipedia, t.c.indexed_date,
t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
t.c.linked_place_id,
+ t.c.geometry.ST_Expand(0).label('bbox'),
t.c.centroid)
if isinstance(place, ntyp.PlaceID):
sql = sql.where(t.c.osm_id == place.osm_id).limit(1)
if place.osm_class and place.osm_class.isdigit():
sql = sql.order_by(sa.func.greatest(0,
- sa.func.least(int(place.osm_class) - t.c.endnumber),
- t.c.startnumber - int(place.osm_class)))
+ int(place.osm_class) - t.c.endnumber,
+ t.c.startnumber - int(place.osm_class)))
else:
return None
if details.geometry_output & ntyp.GeometryFormat.GEOJSON:
def _add_geometry(sql: SaSelect, column: SaColumn) -> SaSelect:
- return sql.add_columns(sa.literal_column(f"""
- ST_AsGeoJSON(CASE WHEN ST_NPoints({column.name}) > 5000
- THEN ST_SimplifyPreserveTopology({column.name}, 0.0001)
- ELSE {column.name} END)
- """).label('geometry_geojson'))
+ return sql.add_columns(sa.func.ST_AsGeoJSON(
+ sa.case((sa.func.ST_NPoints(column) > 5000,
+ sa.func.ST_SimplifyPreserveTopology(column, 0.0001)),
+ else_=column), 7).label('geometry_geojson'))
else:
def _add_geometry(sql: SaSelect, column: SaColumn) -> SaSelect:
return sql.add_columns(sa.func.ST_GeometryType(column).label('geometry_type'))
# add missing details
assert result is not None
- result.parent_place_id = row.parent_place_id
- result.linked_place_id = getattr(row, 'linked_place_id', None)
- result.admin_level = getattr(row, 'admin_level', 15)
+ if 'type' in result.geometry:
+ result.geometry['type'] = GEOMETRY_TYPE_MAP.get(result.geometry['type'],
+ result.geometry['type'])
indexed_date = getattr(row, 'indexed_date', None)
if indexed_date is not None:
result.indexed_date = indexed_date.replace(tzinfo=dt.timezone.utc)
out = []
if details.geometry_simplification > 0.0:
- col = col.ST_SimplifyPreserveTopology(details.geometry_simplification)
+ col = sa.func.ST_SimplifyPreserveTopology(col, details.geometry_simplification)
if details.geometry_output & ntyp.GeometryFormat.GEOJSON:
- out.append(col.ST_AsGeoJSON().label('geometry_geojson'))
+ out.append(sa.func.ST_AsGeoJSON(col, 7).label('geometry_geojson'))
if details.geometry_output & ntyp.GeometryFormat.TEXT:
- out.append(col.ST_AsText().label('geometry_text'))
+ out.append(sa.func.ST_AsText(col).label('geometry_text'))
if details.geometry_output & ntyp.GeometryFormat.KML:
- out.append(col.ST_AsKML().label('geometry_kml'))
+ out.append(sa.func.ST_AsKML(col, 7).label('geometry_kml'))
if details.geometry_output & ntyp.GeometryFormat.SVG:
- out.append(col.ST_AsSVG().label('geometry_svg'))
+ out.append(sa.func.ST_AsSVG(col, 0, 7).label('geometry_svg'))
return sql.add_columns(*out)
# add missing details
assert result is not None
- result.bbox = getattr(row, 'bbox', None)
+ if hasattr(row, 'bbox'):
+ result.bbox = ntyp.Bbox.from_wkb(row.bbox)
await nres.add_result_details(conn, [result], details)
return result
+
+
+GEOMETRY_TYPE_MAP = {
+ 'POINT': 'ST_Point',
+ 'MULTIPOINT': 'ST_MultiPoint',
+ 'LINESTRING': 'ST_LineString',
+ 'MULTILINESTRING': 'ST_MultiLineString',
+ 'POLYGON': 'ST_Polygon',
+ 'MULTIPOLYGON': 'ST_MultiPolygon',
+ 'GEOMETRYCOLLECTION': 'ST_GeometryCollection'
+}
internal use only. That's why they are implemented as free-standing functions
instead of member functions.
"""
-from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List, Any
+from typing import Optional, Tuple, Dict, Sequence, TypeVar, Type, List, cast
import enum
import dataclasses
import datetime as dt
import sqlalchemy as sa
from nominatim.typing import SaSelect, SaRow
+from nominatim.db.sqlalchemy_types import Geometry
from nominatim.api.types import Point, Bbox, LookupDetails
from nominatim.api.connection import SearchConnection
from nominatim.api.logging import log
class SourceTable(enum.Enum):
- """ Enumeration of kinds of results.
+ """ The `SourceTable` type lists the possible sources a result can have.
"""
PLACEX = 1
+ """ The placex table is the main source for result usually containing
+ OSM data.
+ """
OSMLINE = 2
+ """ The osmline table contains address interpolations from OSM data.
+ Interpolation addresses are always approximate. The OSM id in the
+ result refers to the OSM way with the interpolation line object.
+ """
TIGER = 3
+ """ TIGER address data contains US addresses imported on the side,
+ see [Installing TIGER data](../customize/Tiger.md).
+ TIGER address are also interpolations. The addresses always refer
+ to a street from OSM data. The OSM id in the result refers to
+ that street.
+ """
POSTCODE = 4
+ """ The postcode table contains artificial centroids for postcodes,
+ computed from the postcodes available with address points. Results
+ are always approximate.
+ """
COUNTRY = 5
+ """ The country table provides a fallback, when country data is missing
+ in the OSM data.
+ """
@dataclasses.dataclass
class AddressLine:
- """ Detailed information about a related place.
+ """ The `AddressLine` may contain the following fields about a related place
+ and its function as an address object. Most fields are optional.
+ Their presence depends on the kind and function of the address part.
"""
- place_id: Optional[int]
- osm_object: Optional[Tuple[str, int]]
category: Tuple[str, str]
+ """ Main category of the place, described by a key-value pair.
+ """
names: Dict[str, str]
- extratags: Optional[Dict[str, str]]
-
- admin_level: Optional[int]
+ """ All available names for the place including references, alternative
+ names and translations.
+ """
fromarea: bool
+ """ If true, then the exact area of the place is known. Without area
+ information, Nominatim has to make an educated guess if an address
+ belongs to one place or another.
+ """
isaddress: bool
+ """ If true, this place should be considered for the final address display.
+ Nominatim will sometimes include more than one candidate for
+ the address in the list when it cannot reliably determine where the
+ place belongs. It will consider names of all candidates when searching
+ but when displaying the result, only the most likely candidate should
+ be shown.
+ """
rank_address: int
+ """ [Address rank](../customize/Ranking.md#address-rank) of the place.
+ """
distance: float
+ """ Distance in degrees between the result place and this address part.
+ """
+ place_id: Optional[int] = None
+ """ Internal ID of the place.
+ """
+ osm_object: Optional[Tuple[str, int]] = None
+ """ OSM type and ID of the place, if such an object exists.
+ """
+ extratags: Optional[Dict[str, str]] = None
+ """ Any extra information available about the place. This is a dictionary
+ that usually contains OSM tag key-value pairs.
+ """
+
+ admin_level: Optional[int] = None
+ """ The administrative level of a boundary as tagged in the input data.
+ This field is only meaningful for places of the category
+ (boundary, administrative).
+ """
local_name: Optional[str] = None
+ """ Place holder for localization of this address part. See
+ [Localization](#localization) below.
+ """
class AddressLines(List[AddressLine]):
def localize(self, locales: Locales) -> List[str]:
""" Set the local name of address parts according to the chosen
- locale. Return the list of local names without duplications.
+ locale. Return the list of local names without duplicates.
Only address parts that are marked as isaddress are localized
and returned.
@dataclasses.dataclass
class WordInfo:
- """ Detailed information about a search term.
+ """ Each entry in the list of search terms contains the
+ following detailed information.
"""
word_id: int
+ """ Internal identifier for the word.
+ """
word_token: str
+ """ Normalised and transliterated form of the word.
+ This form is used for searching.
+ """
word: Optional[str] = None
+ """ Untransliterated form, if available.
+ """
WordInfos = Sequence[WordInfo]
place_id : Optional[int] = None
osm_object: Optional[Tuple[str, int]] = None
+ parent_place_id: Optional[int] = None
+ linked_place_id: Optional[int] = None
+ admin_level: int = 15
locale_name: Optional[str] = None
display_name: Optional[str] = None
""" A search result with more internal information from the database
added.
"""
- parent_place_id: Optional[int] = None
- linked_place_id: Optional[int] = None
- admin_level: int = 15
indexed_date: Optional[dt.datetime] = None
May be empty when no result was found.
"""
- def localize(self, locales: Locales) -> None:
- """ Apply the given locales to all results.
- """
- for result in self:
- result.localize(locales)
-
def _filter_geometries(row: SaRow) -> Dict[str, str]:
return {k[9:]: v for k, v in row._mapping.items() # pylint: disable=W0212
place_id=row.place_id,
osm_object=(row.osm_type, row.osm_id),
category=(row.class_, row.type),
+ parent_place_id = row.parent_place_id,
+ linked_place_id = getattr(row, 'linked_place_id', None),
+ admin_level = getattr(row, 'admin_level', 15),
names=_mingle_name_tags(row.name),
address=row.address,
extratags=row.extratags,
res = class_type(source_table=SourceTable.OSMLINE,
place_id=row.place_id,
+ parent_place_id = row.parent_place_id,
osm_object=('W', row.osm_id),
category=('place', 'houses' if hnr is None else 'house'),
address=row.address,
def create_from_tiger_row(row: Optional[SaRow],
- class_type: Type[BaseResultT]) -> Optional[BaseResultT]:
+ class_type: Type[BaseResultT],
+ osm_type: Optional[str] = None,
+ osm_id: Optional[int] = None) -> Optional[BaseResultT]:
""" Construct a new result and add the data from the result row
from the Tiger data interpolation table. 'class_type' defines
the type of result to return. Returns None if the row is None.
res = class_type(source_table=SourceTable.TIGER,
place_id=row.place_id,
- osm_object=(row.osm_type, row.osm_id),
+ parent_place_id = row.parent_place_id,
+ osm_object=(osm_type or row.osm_type, osm_id or row.osm_id),
category=('place', 'houses' if hnr is None else 'house'),
postcode=row.postcode,
country_code='us',
return class_type(source_table=SourceTable.POSTCODE,
place_id=row.place_id,
+ parent_place_id = row.parent_place_id,
category=('place', 'postcode'),
names={'ref': row.postcode},
rank_search=row.rank_search,
centroid=Point.from_wkb(row.centroid),
names=row.name,
rank_address=4, rank_search=4,
- country_code=row.country_code)
+ country_code=row.country_code,
+ geometry=_filter_geometries(row))
async def add_result_details(conn: SearchConnection, results: List[BaseResultT],
log().comment('Query keywords')
for result in results:
await complete_keywords(conn, result)
+ for result in results:
+ result.localize(details.locales)
-def _result_row_to_address_row(row: SaRow) -> AddressLine:
+def _result_row_to_address_row(row: SaRow, isaddress: Optional[bool] = None) -> AddressLine:
""" Create a new AddressLine from the results of a datbase query.
"""
- extratags: Dict[str, str] = getattr(row, 'extratags', {})
- if hasattr(row, 'place_type') and row.place_type:
- extratags['place'] = row.place_type
+ extratags: Dict[str, str] = getattr(row, 'extratags', {}) or {}
+ if 'linked_place' in extratags:
+ extratags['place'] = extratags['linked_place']
names = _mingle_name_tags(row.name) or {}
if getattr(row, 'housenumber', None) is not None:
names['housenumber'] = row.housenumber
+ if isaddress is None:
+ isaddress = getattr(row, 'isaddress', True)
+
return AddressLine(place_id=row.place_id,
osm_object=None if row.osm_type is None else (row.osm_type, row.osm_id),
category=(getattr(row, 'class'), row.type),
extratags=extratags,
admin_level=row.admin_level,
fromarea=row.fromarea,
- isaddress=getattr(row, 'isaddress', True),
+ isaddress=isaddress,
rank_address=row.rank_address,
distance=row.distance)
+def _get_address_lookup_id(result: BaseResultT) -> int:
+ assert result.place_id
+ if result.source_table != SourceTable.PLACEX or result.rank_search > 27:
+ return result.parent_place_id or result.place_id
+
+ return result.linked_place_id or result.place_id
+
+
+async def _finalize_entry(conn: SearchConnection, result: BaseResultT) -> None:
+ assert result.address_rows is not None
+ postcode = result.postcode
+ if not postcode and result.address:
+ postcode = result.address.get('postcode')
+ if postcode and ',' not in postcode and ';' not in postcode:
+ result.address_rows.append(AddressLine(
+ category=('place', 'postcode'),
+ names={'ref': postcode},
+ fromarea=False, isaddress=True, rank_address=5,
+ distance=0.0))
+ if result.country_code:
+ async def _get_country_names() -> Optional[Dict[str, str]]:
+ t = conn.t.country_name
+ sql = sa.select(t.c.name, t.c.derived_name)\
+ .where(t.c.country_code == result.country_code)
+ for cres in await conn.execute(sql):
+ names = cast(Dict[str, str], cres[0])
+ if cres[1]:
+ names.update(cast(Dict[str, str], cres[1]))
+ return names
+ return None
+
+ country_names = await conn.get_cached_value('COUNTRY_NAME',
+ result.country_code,
+ _get_country_names)
+ if country_names:
+ result.address_rows.append(AddressLine(
+ category=('place', 'country'),
+ names=country_names,
+ fromarea=False, isaddress=True, rank_address=4,
+ distance=0.0))
+ result.address_rows.append(AddressLine(
+ category=('place', 'country_code'),
+ names={'ref': result.country_code}, extratags = {},
+ fromarea=True, isaddress=False, rank_address=4,
+ distance=0.0))
+
+
+def _setup_address_details(result: BaseResultT) -> None:
+ """ Retrieve information about places that make up the address of the result.
+ """
+ result.address_rows = AddressLines()
+ if result.names:
+ result.address_rows.append(AddressLine(
+ place_id=result.place_id,
+ osm_object=result.osm_object,
+ category=result.category,
+ names=result.names,
+ extratags=result.extratags or {},
+ admin_level=result.admin_level,
+ fromarea=True, isaddress=True,
+ rank_address=result.rank_address or 100, distance=0.0))
+ if result.source_table == SourceTable.PLACEX and result.address:
+ housenumber = result.address.get('housenumber')\
+ or result.address.get('streetnumber')\
+ or result.address.get('conscriptionnumber')
+ elif result.housenumber:
+ housenumber = result.housenumber
+ else:
+ housenumber = None
+ if housenumber:
+ result.address_rows.append(AddressLine(
+ category=('place', 'house_number'),
+ names={'ref': housenumber},
+ fromarea=True, isaddress=True, rank_address=28, distance=0))
+ if result.address and '_unlisted_place' in result.address:
+ result.address_rows.append(AddressLine(
+ category=('place', 'locality'),
+ names={'name': result.address['_unlisted_place']},
+ fromarea=False, isaddress=True, rank_address=25, distance=0))
+
+
async def complete_address_details(conn: SearchConnection, results: List[BaseResultT]) -> None:
""" Retrieve information about places that make up the address of the result.
"""
- def get_hnr(result: BaseResult) -> Tuple[int, int]:
- housenumber = -1
- if result.source_table in (SourceTable.TIGER, SourceTable.OSMLINE):
- if result.housenumber is not None:
- housenumber = int(result.housenumber)
- elif result.extratags is not None and 'startnumber' in result.extratags:
- # details requests do not come with a specific house number
- housenumber = int(result.extratags['startnumber'])
- assert result.place_id
- return result.place_id, housenumber
+ for result in results:
+ _setup_address_details(result)
+
+ ### Lookup entries from place_address line
- data: List[Tuple[Any, ...]] = [get_hnr(r) for r in results if r.place_id]
+ lookup_ids = [{'pid': r.place_id,
+ 'lid': _get_address_lookup_id(r),
+ 'names': list(r.address.values()) if r.address else [],
+ 'c': ('SRID=4326;' + r.centroid.to_wkt()) if r.centroid else '' }
+ for r in results if r.place_id]
- if not data:
+ if not lookup_ids:
return
- values = sa.values(sa.column('place_id', type_=sa.Integer),
- sa.column('housenumber', type_=sa.Integer),
- name='places',
- literal_binds=True).data(data)
-
- sfn = sa.func.get_addressdata(values.c.place_id, values.c.housenumber)\
- .table_valued( # type: ignore[no-untyped-call]
- sa.column('place_id', type_=sa.Integer),
- 'osm_type',
- sa.column('osm_id', type_=sa.BigInteger),
- sa.column('name', type_=conn.t.types.Composite),
- 'class', 'type', 'place_type',
- sa.column('admin_level', type_=sa.Integer),
- sa.column('fromarea', type_=sa.Boolean),
- sa.column('isaddress', type_=sa.Boolean),
- sa.column('rank_address', type_=sa.SmallInteger),
- sa.column('distance', type_=sa.Float),
- joins_implicitly=True)
-
- sql = sa.select(values.c.place_id.label('result_place_id'), sfn)\
- .order_by(values.c.place_id,
- sa.column('rank_address').desc(),
- sa.column('isaddress').desc())
+ ltab = sa.func.JsonArrayEach(sa.type_coerce(lookup_ids, sa.JSON))\
+ .table_valued(sa.column('value', type_=sa.JSON)) # type: ignore[no-untyped-call]
+
+ t = conn.t.placex
+ taddr = conn.t.addressline
+
+ sql = sa.select(ltab.c.value['pid'].as_integer().label('src_place_id'),
+ t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
+ t.c.class_, t.c.type, t.c.extratags,
+ t.c.admin_level, taddr.c.fromarea,
+ sa.case((t.c.rank_address == 11, 5),
+ else_=t.c.rank_address).label('rank_address'),
+ taddr.c.distance, t.c.country_code, t.c.postcode)\
+ .join(taddr, sa.or_(taddr.c.place_id == ltab.c.value['pid'].as_integer(),
+ taddr.c.place_id == ltab.c.value['lid'].as_integer()))\
+ .join(t, taddr.c.address_place_id == t.c.place_id)\
+ .order_by('src_place_id')\
+ .order_by(sa.column('rank_address').desc())\
+ .order_by((taddr.c.place_id == ltab.c.value['pid'].as_integer()).desc())\
+ .order_by(sa.case((sa.func.CrosscheckNames(t.c.name, ltab.c.value['names']), 2),
+ (taddr.c.isaddress, 0),
+ (sa.and_(taddr.c.fromarea,
+ t.c.geometry.ST_Contains(
+ sa.func.ST_GeomFromEWKT(
+ ltab.c.value['c'].as_string()))), 1),
+ else_=-1).desc())\
+ .order_by(taddr.c.fromarea.desc())\
+ .order_by(taddr.c.distance.desc())\
+ .order_by(t.c.rank_search.desc())
+
current_result = None
+ current_rank_address = -1
for row in await conn.execute(sql):
- if current_result is None or row.result_place_id != current_result.place_id:
- for result in results:
- if result.place_id == row.result_place_id:
- current_result = result
- break
+ if current_result is None or row.src_place_id != current_result.place_id:
+ current_result = next((r for r in results if r.place_id == row.src_place_id), None)
+ assert current_result is not None
+ current_rank_address = -1
+
+ location_isaddress = row.rank_address != current_rank_address
+
+ if current_result.country_code is None and row.country_code:
+ current_result.country_code = row.country_code
+
+ if row.type in ('postcode', 'postal_code') and location_isaddress:
+ if not row.fromarea or \
+ (current_result.address and 'postcode' in current_result.address):
+ location_isaddress = False
else:
- assert False
- current_result.address_rows = AddressLines()
- current_result.address_rows.append(_result_row_to_address_row(row))
+ current_result.postcode = None
+
+ assert current_result.address_rows is not None
+ current_result.address_rows.append(_result_row_to_address_row(row, location_isaddress))
+ current_rank_address = row.rank_address
+
+ for result in results:
+ await _finalize_entry(conn, result)
+
+
+ ### Finally add the record for the parent entry where necessary.
+
+ parent_lookup_ids = list(filter(lambda e: e['pid'] != e['lid'], lookup_ids))
+ if parent_lookup_ids:
+ ltab = sa.func.JsonArrayEach(sa.type_coerce(parent_lookup_ids, sa.JSON))\
+ .table_valued(sa.column('value', type_=sa.JSON)) # type: ignore[no-untyped-call]
+ sql = sa.select(ltab.c.value['pid'].as_integer().label('src_place_id'),
+ t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
+ t.c.class_, t.c.type, t.c.extratags,
+ t.c.admin_level,
+ t.c.rank_address)\
+ .where(t.c.place_id == ltab.c.value['lid'].as_integer())
+
+ for row in await conn.execute(sql):
+ current_result = next((r for r in results if r.place_id == row.src_place_id), None)
+ assert current_result is not None
+ assert current_result.address_rows is not None
+
+ current_result.address_rows.append(AddressLine(
+ place_id=row.place_id,
+ osm_object=(row.osm_type, row.osm_id),
+ category=(row.class_, row.type),
+ names=row.name, extratags=row.extratags or {},
+ admin_level=row.admin_level,
+ fromarea=True, isaddress=True,
+ rank_address=row.rank_address, distance=0.0))
+
+ ### Now sort everything
+ for result in results:
+ assert result.address_rows is not None
+ result.address_rows.sort(key=lambda a: (-a.rank_address, a.isaddress))
-# pylint: disable=consider-using-f-string
def _placex_select_address_row(conn: SearchConnection,
centroid: Point) -> SaSelect:
t = conn.t.placex
return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
t.c.class_.label('class'), t.c.type,
t.c.admin_level, t.c.housenumber,
- sa.literal_column("""ST_GeometryType(geometry) in
- ('ST_Polygon','ST_MultiPolygon')""").label('fromarea'),
+ t.c.geometry.is_area().label('fromarea'),
t.c.rank_address,
- sa.literal_column(
- """ST_DistanceSpheroid(geometry, 'SRID=4326;POINT(%f %f)'::geometry,
- 'SPHEROID["WGS 84",6378137,298.257223563, AUTHORITY["EPSG","7030"]]')
- """ % centroid).label('distance'))
+ t.c.geometry.distance_spheroid(
+ sa.bindparam('centroid', value=centroid, type_=Geometry)).label('distance'))
async def complete_linked_places(conn: SearchConnection, result: BaseResult) -> None:
sel = sa.select(t.c.word_id, t.c.word_token, t.c.word)
for name_tokens, address_tokens in await conn.execute(sql):
- for row in await conn.execute(sel.where(t.c.word_id == sa.any_(name_tokens))):
+ for row in await conn.execute(sel.where(t.c.word_id.in_(name_tokens))):
result.name_keywords.append(WordInfo(*row))
- for row in await conn.execute(sel.where(t.c.word_id == sa.any_(address_tokens))):
+ for row in await conn.execute(sel.where(t.c.word_id.in_(address_tokens))):
result.address_keywords.append(WordInfo(*row))
"""
Implementation of reverse geocoding.
"""
-from typing import Optional, List, Callable, Type, Tuple, Dict, Any
+from typing import Optional, List, Callable, Type, Tuple, Dict, Any, cast, Union
+import functools
import sqlalchemy as sa
-from nominatim.typing import SaColumn, SaSelect, SaFromClause, SaLabel, SaRow, SaBind
+from nominatim.typing import SaColumn, SaSelect, SaFromClause, SaLabel, SaRow,\
+ SaBind, SaLambdaSelect
from nominatim.api.connection import SearchConnection
import nominatim.api.results as nres
from nominatim.api.logging import log
WKT_PARAM: SaBind = sa.bindparam('wkt', type_=Geometry)
MAX_RANK_PARAM: SaBind = sa.bindparam('max_rank')
+def no_index(expr: SaColumn) -> SaColumn:
+ """ Wrap the given expression, so that the query planner will
+ refrain from using the expression for index lookup.
+ """
+ return sa.func.coalesce(sa.null(), expr) # pylint: disable=not-callable
+
+
def _select_from_placex(t: SaFromClause, use_wkt: bool = True) -> SaSelect:
""" Create a select statement with the columns relevant for reverse
results.
t.c.importance, t.c.wikipedia,
t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
centroid,
+ t.c.linked_place_id, t.c.admin_level,
distance.label('distance'),
t.c.geometry.ST_Expand(0).label('bbox'))
else_=0).label('position')
-def _is_address_point(table: SaFromClause) -> SaColumn:
- return sa.and_(table.c.rank_address == 30,
- sa.or_(table.c.housenumber != None,
- table.c.name.has_key('housename')))
-
-
def _get_closest(*rows: Optional[SaRow]) -> Optional[SaRow]:
return min(rows, key=lambda row: 1000 if row is None else row.distance)
coordinate.
"""
- def __init__(self, conn: SearchConnection, params: ReverseDetails) -> None:
+ def __init__(self, conn: SearchConnection, params: ReverseDetails,
+ restrict_to_country_areas: bool = False) -> None:
self.conn = conn
self.params = params
+ self.restrict_to_country_areas = restrict_to_country_areas
self.bind_params: Dict[str, Any] = {'max_rank': params.max_rank}
return self.layer_enabled(DataLayer.RAILWAY, DataLayer.MANMADE, DataLayer.NATURAL)
- def _add_geometry_columns(self, sql: SaSelect, col: SaColumn) -> SaSelect:
- if not self.has_geometries():
- return sql
-
+ def _add_geometry_columns(self, sql: SaLambdaSelect, col: SaColumn) -> SaSelect:
out = []
if self.params.geometry_simplification > 0.0:
col = sa.func.ST_SimplifyPreserveTopology(col, self.params.geometry_simplification)
if self.params.geometry_output & GeometryFormat.GEOJSON:
- out.append(sa.func.ST_AsGeoJSON(col).label('geometry_geojson'))
+ out.append(sa.func.ST_AsGeoJSON(col, 7).label('geometry_geojson'))
if self.params.geometry_output & GeometryFormat.TEXT:
out.append(sa.func.ST_AsText(col).label('geometry_text'))
if self.params.geometry_output & GeometryFormat.KML:
- out.append(sa.func.ST_AsKML(col).label('geometry_kml'))
+ out.append(sa.func.ST_AsKML(col, 7).label('geometry_kml'))
if self.params.geometry_output & GeometryFormat.SVG:
- out.append(sa.func.ST_AsSVG(col).label('geometry_svg'))
+ out.append(sa.func.ST_AsSVG(col, 0, 7).label('geometry_svg'))
return sql.add_columns(*out)
"""
t = self.conn.t.placex
- sql = _select_from_placex(t)\
- .where(t.c.geometry.ST_DWithin(WKT_PARAM, distance))\
- .where(t.c.indexed_status == 0)\
- .where(t.c.linked_place_id == None)\
+ # PostgreSQL must not get the distance as a parameter because
+ # there is a danger it won't be able to proberly estimate index use
+ # when used with prepared statements
+ diststr = sa.text(f"{distance}")
+
+ sql: SaLambdaSelect = sa.lambda_stmt(lambda: _select_from_placex(t)
+ .where(t.c.geometry.ST_DWithin(WKT_PARAM, diststr))
+ .where(t.c.indexed_status == 0)
+ .where(t.c.linked_place_id == None)
.where(sa.or_(sa.not_(t.c.geometry.is_area()),
- t.c.centroid.ST_Distance(WKT_PARAM) < distance))\
- .order_by('distance')\
- .limit(1)
+ t.c.centroid.ST_Distance(WKT_PARAM) < diststr))
+ .order_by('distance')
+ .limit(1))
- sql = self._add_geometry_columns(sql, t.c.geometry)
+ if self.has_geometries():
+ sql = self._add_geometry_columns(sql, t.c.geometry)
- restrict: List[SaColumn] = []
+ restrict: List[Union[SaColumn, Callable[[], SaColumn]]] = []
if self.layer_enabled(DataLayer.ADDRESS):
- restrict.append(sa.and_(t.c.rank_address >= 26,
- t.c.rank_address <= min(29, self.max_rank)))
+ max_rank = min(29, self.max_rank)
+ restrict.append(lambda: no_index(t.c.rank_address).between(26, max_rank))
if self.max_rank == 30:
- restrict.append(_is_address_point(t))
+ restrict.append(lambda: sa.func.IsAddressPoint(t))
if self.layer_enabled(DataLayer.POI) and self.max_rank == 30:
- restrict.append(sa.and_(t.c.rank_search == 30,
- t.c.class_.not_in(('place', 'building')),
- sa.not_(t.c.geometry.is_line_like())))
+ restrict.append(lambda: sa.and_(no_index(t.c.rank_search) == 30,
+ t.c.class_.not_in(('place', 'building')),
+ sa.not_(t.c.geometry.is_line_like())))
if self.has_feature_layers():
- restrict.append(sa.and_(t.c.rank_search.between(26, MAX_RANK_PARAM),
- t.c.rank_address == 0,
+ restrict.append(sa.and_(no_index(t.c.rank_search).between(26, MAX_RANK_PARAM),
+ no_index(t.c.rank_address) == 0,
self._filter_by_layer(t)))
if not restrict:
async def _find_housenumber_for_street(self, parent_place_id: int) -> Optional[SaRow]:
t = self.conn.t.placex
- sql = _select_from_placex(t)\
- .where(t.c.geometry.ST_DWithin(WKT_PARAM, 0.001))\
- .where(t.c.parent_place_id == parent_place_id)\
- .where(_is_address_point(t))\
- .where(t.c.indexed_status == 0)\
- .where(t.c.linked_place_id == None)\
- .order_by('distance')\
- .limit(1)
+ sql: SaLambdaSelect = sa.lambda_stmt(lambda: _select_from_placex(t)
+ .where(t.c.geometry.ST_DWithin(WKT_PARAM, 0.001))
+ .where(t.c.parent_place_id == parent_place_id)
+ .where(sa.func.IsAddressPoint(t))
+ .where(t.c.indexed_status == 0)
+ .where(t.c.linked_place_id == None)
+ .order_by('distance')
+ .limit(1))
- sql = self._add_geometry_columns(sql, t.c.geometry)
+ if self.has_geometries():
+ sql = self._add_geometry_columns(sql, t.c.geometry)
return (await self.conn.execute(sql, self.bind_params)).one_or_none()
distance: float) -> Optional[SaRow]:
t = self.conn.t.osmline
- sql = sa.select(t,
- t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
- _locate_interpolation(t))\
- .where(t.c.linegeo.ST_DWithin(WKT_PARAM, distance))\
- .where(t.c.startnumber != None)\
- .order_by('distance')\
- .limit(1)
+ sql: Any = sa.lambda_stmt(lambda:
+ sa.select(t,
+ t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
+ _locate_interpolation(t))
+ .where(t.c.linegeo.ST_DWithin(WKT_PARAM, distance))
+ .where(t.c.startnumber != None)
+ .order_by('distance')
+ .limit(1))
if parent_place_id is not None:
- sql = sql.where(t.c.parent_place_id == parent_place_id)
+ sql += lambda s: s.where(t.c.parent_place_id == parent_place_id)
- inner = sql.subquery('ipol')
+ def _wrap_query(base_sql: SaLambdaSelect) -> SaSelect:
+ inner = base_sql.subquery('ipol')
- sql = sa.select(inner.c.place_id, inner.c.osm_id,
- inner.c.parent_place_id, inner.c.address,
- _interpolated_housenumber(inner),
- _interpolated_position(inner),
- inner.c.postcode, inner.c.country_code,
- inner.c.distance)
+ return sa.select(inner.c.place_id, inner.c.osm_id,
+ inner.c.parent_place_id, inner.c.address,
+ _interpolated_housenumber(inner),
+ _interpolated_position(inner),
+ inner.c.postcode, inner.c.country_code,
+ inner.c.distance)
+
+ sql += _wrap_query
if self.has_geometries():
sub = sql.subquery('geom')
return (await self.conn.execute(sql, self.bind_params)).one_or_none()
- async def _find_tiger_number_for_street(self, parent_place_id: int,
- parent_type: str,
- parent_id: int) -> Optional[SaRow]:
+ async def _find_tiger_number_for_street(self, parent_place_id: int) -> Optional[SaRow]:
t = self.conn.t.tiger
- inner = sa.select(t,
- t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
- _locate_interpolation(t))\
- .where(t.c.linegeo.ST_DWithin(WKT_PARAM, 0.001))\
- .where(t.c.parent_place_id == parent_place_id)\
- .order_by('distance')\
- .limit(1)\
- .subquery('tiger')
-
- sql = sa.select(inner.c.place_id,
- inner.c.parent_place_id,
- sa.literal(parent_type).label('osm_type'),
- sa.literal(parent_id).label('osm_id'),
- _interpolated_housenumber(inner),
- _interpolated_position(inner),
- inner.c.postcode,
- inner.c.distance)
+ def _base_query() -> SaSelect:
+ inner = sa.select(t,
+ t.c.linegeo.ST_Distance(WKT_PARAM).label('distance'),
+ _locate_interpolation(t))\
+ .where(t.c.linegeo.ST_DWithin(WKT_PARAM, 0.001))\
+ .where(t.c.parent_place_id == parent_place_id)\
+ .order_by('distance')\
+ .limit(1)\
+ .subquery('tiger')
+
+ return sa.select(inner.c.place_id,
+ inner.c.parent_place_id,
+ _interpolated_housenumber(inner),
+ _interpolated_position(inner),
+ inner.c.postcode,
+ inner.c.distance)
+
+ sql: SaLambdaSelect = sa.lambda_stmt(_base_query)
if self.has_geometries():
sub = sql.subquery('geom')
distance = addr_row.distance
elif row.country_code == 'us' and parent_place_id is not None:
log().comment('Find TIGER housenumber for street')
- addr_row = await self._find_tiger_number_for_street(parent_place_id,
- row.osm_type,
- row.osm_id)
+ addr_row = await self._find_tiger_number_for_street(parent_place_id)
log().var_dump('Result (street Tiger housenumber)', addr_row)
if addr_row is not None:
+ row_func = cast(RowFunc,
+ functools.partial(nres.create_from_tiger_row,
+ osm_type=row.osm_type,
+ osm_id=row.osm_id))
row = addr_row
- row_func = nres.create_from_tiger_row
else:
distance = row.distance
log().comment('Reverse lookup by larger address area features')
t = self.conn.t.placex
- # The inner SQL brings results in the right order, so that
- # later only a minimum of results needs to be checked with ST_Contains.
- inner = sa.select(t, sa.literal(0.0).label('distance'))\
- .where(t.c.rank_search.between(5, MAX_RANK_PARAM))\
- .where(t.c.rank_address.between(5, 25))\
- .where(t.c.geometry.is_area())\
- .where(t.c.geometry.intersects(WKT_PARAM))\
- .where(t.c.name != None)\
- .where(t.c.indexed_status == 0)\
- .where(t.c.linked_place_id == None)\
- .where(t.c.type != 'postcode')\
- .order_by(sa.desc(t.c.rank_search))\
- .limit(50)\
- .subquery('area')
+ def _base_query() -> SaSelect:
+ # The inner SQL brings results in the right order, so that
+ # later only a minimum of results needs to be checked with ST_Contains.
+ inner = sa.select(t, sa.literal(0.0).label('distance'))\
+ .where(t.c.rank_search.between(5, MAX_RANK_PARAM))\
+ .where(t.c.geometry.intersects(WKT_PARAM))\
+ .where(sa.func.PlacexGeometryReverseLookuppolygon())\
+ .order_by(sa.desc(t.c.rank_search))\
+ .limit(50)\
+ .subquery('area')
- sql = _select_from_placex(inner, False)\
- .where(inner.c.geometry.ST_Contains(WKT_PARAM))\
- .order_by(sa.desc(inner.c.rank_search))\
- .limit(1)
+ return _select_from_placex(inner, False)\
+ .where(inner.c.geometry.ST_Contains(WKT_PARAM))\
+ .order_by(sa.desc(inner.c.rank_search))\
+ .limit(1)
- sql = self._add_geometry_columns(sql, inner.c.geometry)
+ sql: SaLambdaSelect = sa.lambda_stmt(_base_query)
+ if self.has_geometries():
+ sql = self._add_geometry_columns(sql, sa.literal_column('area.geometry'))
address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
log().var_dump('Result (area)', address_row)
if address_row is not None and address_row.rank_search < self.max_rank:
log().comment('Search for better matching place nodes inside the area')
- inner = sa.select(t,
+
+ address_rank = address_row.rank_search
+ address_id = address_row.place_id
+
+ def _place_inside_area_query() -> SaSelect:
+ inner = \
+ sa.select(t,
t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\
- .where(t.c.osm_type == 'N')\
- .where(t.c.rank_search > address_row.rank_search)\
+ .where(t.c.rank_search > address_rank)\
.where(t.c.rank_search <= MAX_RANK_PARAM)\
- .where(t.c.rank_address.between(5, 25))\
- .where(t.c.name != None)\
.where(t.c.indexed_status == 0)\
- .where(t.c.linked_place_id == None)\
- .where(t.c.type != 'postcode')\
- .where(t.c.geometry
- .ST_Buffer(sa.func.reverse_place_diameter(t.c.rank_search))
- .intersects(WKT_PARAM))\
+ .where(sa.func.IntersectsReverseDistance(t, WKT_PARAM))\
.order_by(sa.desc(t.c.rank_search))\
.limit(50)\
.subquery('places')
- touter = self.conn.t.placex.alias('outer')
- sql = _select_from_placex(inner, False)\
- .join(touter, touter.c.geometry.ST_Contains(inner.c.geometry))\
- .where(touter.c.place_id == address_row.place_id)\
- .where(inner.c.distance < sa.func.reverse_place_diameter(inner.c.rank_search))\
- .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
- .limit(1)
+ touter = t.alias('outer')
+ return _select_from_placex(inner, False)\
+ .join(touter, touter.c.geometry.ST_Contains(inner.c.geometry))\
+ .where(touter.c.place_id == address_id)\
+ .where(sa.func.IsBelowReverseDistance(inner.c.distance, inner.c.rank_search))\
+ .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
+ .limit(1)
- sql = self._add_geometry_columns(sql, inner.c.geometry)
+ sql = sa.lambda_stmt(_place_inside_area_query)
+ if self.has_geometries():
+ sql = self._add_geometry_columns(sql, sa.literal_column('places.geometry'))
place_address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
log().var_dump('Result (place node)', place_address_row)
.where(t.c.indexed_status == 0)\
.where(t.c.linked_place_id == None)\
.where(self._filter_by_layer(t))\
- .where(t.c.geometry
- .ST_Buffer(sa.func.reverse_place_diameter(t.c.rank_search))
- .intersects(WKT_PARAM))\
+ .where(t.c.geometry.intersects(sa.func.ST_Expand(WKT_PARAM, 0.007)))\
.order_by(sa.desc(t.c.rank_search))\
+ .order_by('distance')\
.limit(50)\
.subquery()
.order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
.limit(1)
- sql = self._add_geometry_columns(sql, inner.c.geometry)
+ if self.has_geometries():
+ sql = self._add_geometry_columns(sql, inner.c.geometry)
row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
log().var_dump('Result (non-address feature)', row)
return _get_closest(address_row, other_row)
- async def lookup_country(self) -> Optional[SaRow]:
+ async def lookup_country_codes(self) -> List[str]:
""" Lookup the country for the current search.
"""
log().section('Reverse lookup by country code')
sql = sa.select(t.c.country_code).distinct()\
.where(t.c.geometry.ST_Contains(WKT_PARAM))
- ccodes = tuple((r[0] for r in await self.conn.execute(sql, self.bind_params)))
+ ccodes = [cast(str, r[0]) for r in await self.conn.execute(sql, self.bind_params)]
log().var_dump('Country codes', ccodes)
+ return ccodes
+
+
+ async def lookup_country(self, ccodes: List[str]) -> Optional[SaRow]:
+ """ Lookup the country for the current search.
+ """
+ if not ccodes:
+ ccodes = await self.lookup_country_codes()
if not ccodes:
return None
if self.max_rank > 4:
log().comment('Search for place nodes in country')
- inner = sa.select(t,
+ def _base_query() -> SaSelect:
+ inner = \
+ sa.select(t,
t.c.geometry.ST_Distance(WKT_PARAM).label('distance'))\
- .where(t.c.osm_type == 'N')\
.where(t.c.rank_search > 4)\
.where(t.c.rank_search <= MAX_RANK_PARAM)\
- .where(t.c.rank_address.between(5, 25))\
- .where(t.c.name != None)\
.where(t.c.indexed_status == 0)\
- .where(t.c.linked_place_id == None)\
- .where(t.c.type != 'postcode')\
.where(t.c.country_code.in_(ccodes))\
- .where(t.c.geometry
- .ST_Buffer(sa.func.reverse_place_diameter(t.c.rank_search))
- .intersects(WKT_PARAM))\
+ .where(sa.func.IntersectsReverseDistance(t, WKT_PARAM))\
.order_by(sa.desc(t.c.rank_search))\
.limit(50)\
- .subquery()
+ .subquery('area')
- sql = _select_from_placex(inner, False)\
- .where(inner.c.distance < sa.func.reverse_place_diameter(inner.c.rank_search))\
- .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
- .limit(1)
+ return _select_from_placex(inner, False)\
+ .where(sa.func.IsBelowReverseDistance(inner.c.distance, inner.c.rank_search))\
+ .order_by(sa.desc(inner.c.rank_search), inner.c.distance)\
+ .limit(1)
- sql = self._add_geometry_columns(sql, inner.c.geometry)
+ sql: SaLambdaSelect = sa.lambda_stmt(_base_query)
+ if self.has_geometries():
+ sql = self._add_geometry_columns(sql, sa.literal_column('area.geometry'))
address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
log().var_dump('Result (addressable place node)', address_row)
if address_row is None:
# Still nothing, then return a country with the appropriate country code.
- sql = _select_from_placex(t)\
+ sql = sa.lambda_stmt(lambda: _select_from_placex(t)\
.where(t.c.country_code.in_(ccodes))\
.where(t.c.rank_address == 4)\
.where(t.c.rank_search == 4)\
.where(t.c.linked_place_id == None)\
.order_by('distance')\
- .limit(1)
+ .limit(1))
- sql = self._add_geometry_columns(sql, t.c.geometry)
+ if self.has_geometries():
+ sql = self._add_geometry_columns(sql, t.c.geometry)
address_row = (await self.conn.execute(sql, self.bind_params)).one_or_none()
row, tmp_row_func = await self.lookup_street_poi()
if row is not None:
row_func = tmp_row_func
- if row is None and self.max_rank > 4:
- row = await self.lookup_area()
- if row is None and self.layer_enabled(DataLayer.ADDRESS):
- row = await self.lookup_country()
+
+ if row is None:
+ if self.restrict_to_country_areas:
+ ccodes = await self.lookup_country_codes()
+ if not ccodes:
+ return None
+ else:
+ ccodes = []
+
+ if self.max_rank > 4:
+ row = await self.lookup_area()
+ if row is None and self.layer_enabled(DataLayer.ADDRESS):
+ row = await self.lookup_country(ccodes)
result = row_func(row, nres.ReverseResult)
if result is not None:
"""
Convertion from token assignment to an abstract DB search.
"""
-from typing import Optional, List, Tuple, Iterator
+from typing import Optional, List, Tuple, Iterator, Dict
import heapq
from nominatim.api.types import SearchDetails, DataLayer
from nominatim.api.search.token_assignment import TokenAssignment
import nominatim.api.search.db_search_fields as dbf
import nominatim.api.search.db_searches as dbs
-from nominatim.api.logging import log
def wrap_near_search(categories: List[Tuple[str, str]],
if sdata is None:
return
- categories = self.get_search_categories(assignment)
+ near_items = self.get_near_items(assignment)
+ if near_items is not None and not near_items:
+ return # impossible compbination of near items and category parameter
if assignment.name is None:
- if categories and not sdata.postcodes:
- sdata.qualifiers = categories
- categories = None
+ if near_items and not sdata.postcodes:
+ sdata.qualifiers = near_items
+ near_items = None
builder = self.build_poi_search(sdata)
elif assignment.housenumber:
hnr_tokens = self.query.get_tokens(assignment.housenumber,
builder = self.build_housenumber_search(sdata, hnr_tokens, assignment.address)
else:
builder = self.build_special_search(sdata, assignment.address,
- bool(categories))
+ bool(near_items))
else:
builder = self.build_name_search(sdata, assignment.name, assignment.address,
- bool(categories))
+ bool(near_items))
- if categories:
- penalty = min(categories.penalties)
- categories.penalties = [p - penalty for p in categories.penalties]
+ if near_items:
+ penalty = min(near_items.penalties)
+ near_items.penalties = [p - penalty for p in near_items.penalties]
for search in builder:
- yield dbs.NearSearch(penalty, categories, search)
+ search_penalty = search.penalty
+ search.penalty = 0.0
+ yield dbs.NearSearch(penalty + assignment.penalty + search_penalty,
+ near_items, search)
else:
- yield from builder
+ for search in builder:
+ search.penalty += assignment.penalty
+ yield search
def build_poi_search(self, sdata: dbf.SearchData) -> Iterator[dbs.AbstractSearch]:
""" Build a simple address search for special entries where the
housenumber is the main name token.
"""
- partial_tokens: List[int] = []
- for trange in address:
- partial_tokens.extend(t.token for t in self.query.get_partials_list(trange))
+ sdata.lookups = [dbf.FieldLookup('name_vector', [t.token for t in hnrs], 'lookup_any')]
+ expected_count = sum(t.count for t in hnrs)
+
+ partials = [t for trange in address
+ for t in self.query.get_partials_list(trange)]
+
+ if expected_count < 8000:
+ sdata.lookups.append(dbf.FieldLookup('nameaddress_vector',
+ [t.token for t in partials], 'restrict'))
+ elif len(partials) != 1 or partials[0].count < 10000:
+ sdata.lookups.append(dbf.FieldLookup('nameaddress_vector',
+ [t.token for t in partials], 'lookup_all'))
+ else:
+ sdata.lookups.append(
+ dbf.FieldLookup('nameaddress_vector',
+ [t.token for t
+ in self.query.get_tokens(address[0], TokenType.WORD)],
+ 'lookup_any'))
- sdata.lookups = [dbf.FieldLookup('name_vector', [t.token for t in hnrs], 'lookup_any'),
- dbf.FieldLookup('nameaddress_vector', partial_tokens, 'lookup_all')
- ]
- yield dbs.PlaceSearch(0.05, sdata, sum(t.count for t in hnrs))
+ sdata.housenumbers = dbf.WeightedStrings([], [])
+ yield dbs.PlaceSearch(0.05, sdata, expected_count)
def build_name_search(self, sdata: dbf.SearchData,
be searched for. This takes into account how frequent the terms
are and tries to find a lookup that optimizes index use.
"""
- penalty = 0.0 # extra penalty currently unused
-
+ penalty = 0.0 # extra penalty
name_partials = self.query.get_partials_list(name)
- exp_name_count = min(t.count for t in name_partials)
- addr_partials = []
- for trange in address:
- addr_partials.extend(self.query.get_partials_list(trange))
+ name_tokens = [t.token for t in name_partials]
+
+ addr_partials = [t for r in address for t in self.query.get_partials_list(r)]
addr_tokens = [t.token for t in addr_partials]
+
partials_indexed = all(t.is_indexed for t in name_partials) \
and all(t.is_indexed for t in addr_partials)
+ exp_count = min(t.count for t in name_partials) / (2**(len(name_partials) - 1))
- if (len(name_partials) > 3 or exp_name_count < 1000) and partials_indexed:
- # Lookup by name partials, use address partials to restrict results.
- lookup = [dbf.FieldLookup('name_vector',
- [t.token for t in name_partials], 'lookup_all')]
- if addr_tokens:
- lookup.append(dbf.FieldLookup('nameaddress_vector', addr_tokens, 'restrict'))
- yield penalty, exp_name_count, lookup
- return
-
- exp_addr_count = min(t.count for t in addr_partials) if addr_partials else exp_name_count
- if exp_addr_count < 1000 and partials_indexed:
- # Lookup by address partials and restrict results through name terms.
- yield penalty, exp_addr_count,\
- [dbf.FieldLookup('name_vector', [t.token for t in name_partials], 'restrict'),
- dbf.FieldLookup('nameaddress_vector', addr_tokens, 'lookup_all')]
+ if (len(name_partials) > 3 or exp_count < 8000) and partials_indexed:
+ yield penalty, exp_count, dbf.lookup_by_names(name_tokens, addr_tokens)
return
# Partial term to frequent. Try looking up by rare full names first.
name_fulls = self.query.get_tokens(name, TokenType.WORD)
- rare_names = list(filter(lambda t: t.count < 1000, name_fulls))
- # At this point drop unindexed partials from the address.
- # This might yield wrong results, nothing we can do about that.
- if not partials_indexed:
- addr_tokens = [t.token for t in addr_partials if t.is_indexed]
- log().var_dump('before', penalty)
- penalty += 1.2 * sum(t.penalty for t in addr_partials if not t.is_indexed)
- log().var_dump('after', penalty)
- if rare_names:
+ if name_fulls:
+ fulls_count = sum(t.count for t in name_fulls)
+ # At this point drop unindexed partials from the address.
+ # This might yield wrong results, nothing we can do about that.
+ if not partials_indexed:
+ addr_tokens = [t.token for t in addr_partials if t.is_indexed]
+ penalty += 1.2 * sum(t.penalty for t in addr_partials if not t.is_indexed)
# Any of the full names applies with all of the partials from the address
- lookup = [dbf.FieldLookup('name_vector', [t.token for t in rare_names], 'lookup_any')]
- if addr_tokens:
- lookup.append(dbf.FieldLookup('nameaddress_vector', addr_tokens, 'restrict'))
- yield penalty, sum(t.count for t in rare_names), lookup
+ yield penalty, fulls_count / (2**len(addr_partials)),\
+ dbf.lookup_by_any_name([t.token for t in name_fulls], addr_tokens,
+ 'restrict' if fulls_count < 10000 else 'lookup_all')
# To catch remaining results, lookup by name and address
- if all(t.is_indexed for t in name_partials):
- lookup = [dbf.FieldLookup('name_vector',
- [t.token for t in name_partials], 'lookup_all')]
- else:
- # we don't have the partials, try with the non-rare names
- non_rare_names = [t.token for t in name_fulls if t.count >= 1000]
- if not non_rare_names:
- return
- lookup = [dbf.FieldLookup('name_vector', non_rare_names, 'lookup_any')]
- if addr_tokens:
- lookup.append(dbf.FieldLookup('nameaddress_vector', addr_tokens, 'lookup_all'))
- yield penalty + 0.1 * max(0, 5 - len(name_partials) - len(addr_tokens)),\
- min(exp_name_count, exp_addr_count), lookup
+ # We only do this if there is a reasonable number of results expected.
+ exp_count = exp_count / (2**len(addr_partials)) if addr_partials else exp_count
+ if exp_count < 10000 and all(t.is_indexed for t in name_partials):
+ lookup = [dbf.FieldLookup('name_vector', name_tokens, 'lookup_all')]
+ if addr_tokens:
+ lookup.append(dbf.FieldLookup('nameaddress_vector', addr_tokens, 'lookup_all'))
+ penalty += 0.35 * max(0, 5 - len(name_partials) - len(addr_tokens))
+ yield penalty, exp_count, lookup
def get_name_ranking(self, trange: TokenRange) -> dbf.FieldRanking:
self.query.get_tokens(assignment.postcode,
TokenType.POSTCODE))
if assignment.qualifier:
- sdata.set_qualifiers(self.query.get_tokens(assignment.qualifier,
- TokenType.QUALIFIER))
+ tokens = self.query.get_tokens(assignment.qualifier, TokenType.QUALIFIER)
+ if self.details.categories:
+ tokens = [t for t in tokens if t.get_category() in self.details.categories]
+ if not tokens:
+ return None
+ sdata.set_qualifiers(tokens)
+ elif self.details.categories:
+ sdata.qualifiers = dbf.WeightedCategories(self.details.categories,
+ [0.0] * len(self.details.categories))
if assignment.address:
sdata.set_ranking([self.get_addr_ranking(r) for r in assignment.address])
return sdata
- def get_search_categories(self,
- assignment: TokenAssignment) -> Optional[dbf.WeightedCategories]:
- """ Collect tokens for category search or use the categories
+ def get_near_items(self, assignment: TokenAssignment) -> Optional[dbf.WeightedCategories]:
+ """ Collect tokens for near items search or use the categories
requested per parameter.
Returns None if no category search is requested.
"""
- if assignment.category:
- tokens = [t for t in self.query.get_tokens(assignment.category,
- TokenType.CATEGORY)
- if not self.details.categories
- or t.get_category() in self.details.categories]
- return dbf.WeightedCategories([t.get_category() for t in tokens],
- [t.penalty for t in tokens])
-
- if self.details.categories:
- return dbf.WeightedCategories(self.details.categories,
- [0.0] * len(self.details.categories))
+ if assignment.near_item:
+ tokens: Dict[Tuple[str, str], float] = {}
+ for t in self.query.get_tokens(assignment.near_item, TokenType.NEAR_ITEM):
+ cat = t.get_category()
+ # The category of a near search will be that of near_item.
+ # Thus, if search is restricted to a category parameter,
+ # the two sets must intersect.
+ if (not self.details.categories or cat in self.details.categories)\
+ and t.penalty < tokens.get(cat, 1000.0):
+ tokens[cat] = t.penalty
+ return dbf.WeightedCategories(list(tokens.keys()), list(tokens.values()))
return None
"""
Data structures for more complex fields in abstract search descriptions.
"""
-from typing import List, Tuple, Iterator, cast
+from typing import List, Tuple, Iterator, cast, Dict
import dataclasses
import sqlalchemy as sa
""" Set the qulaifier field from the given tokens.
"""
if tokens:
- min_penalty = min(t.penalty for t in tokens)
+ categories: Dict[Tuple[str, str], float] = {}
+ min_penalty = 1000.0
+ for t in tokens:
+ if t.penalty < min_penalty:
+ min_penalty = t.penalty
+ cat = t.get_category()
+ if t.penalty < categories.get(cat, 1000.0):
+ categories[cat] = t.penalty
self.penalty += min_penalty
- self.qualifiers = WeightedCategories([t.get_category() for t in tokens],
- [t.penalty - min_penalty for t in tokens])
+ self.qualifiers = WeightedCategories(list(categories.keys()),
+ list(categories.values()))
def set_ranking(self, rankings: List[FieldRanking]) -> None:
self.rankings.append(ranking)
else:
self.penalty += ranking.default
+
+
+def lookup_by_names(name_tokens: List[int], addr_tokens: List[int]) -> List[FieldLookup]:
+ """ Create a lookup list where name tokens are looked up via index
+ and potential address tokens are used to restrict the search further.
+ """
+ lookup = [FieldLookup('name_vector', name_tokens, 'lookup_all')]
+ if addr_tokens:
+ lookup.append(FieldLookup('nameaddress_vector', addr_tokens, 'restrict'))
+
+ return lookup
+
+
+def lookup_by_any_name(name_tokens: List[int], addr_tokens: List[int],
+ lookup_type: str) -> List[FieldLookup]:
+ """ Create a lookup list where name tokens are looked up via index
+ and only one of the name tokens must be present.
+ Potential address tokens are used to restrict the search further.
+ """
+ lookup = [FieldLookup('name_vector', name_tokens, 'lookup_any')]
+ if addr_tokens:
+ lookup.append(FieldLookup('nameaddress_vector', addr_tokens, lookup_type))
+
+ return lookup
+
+
+def lookup_by_addr(name_tokens: List[int], addr_tokens: List[int]) -> List[FieldLookup]:
+ """ Create a lookup list where address tokens are looked up via index
+ and the name tokens are only used to restrict the search further.
+ """
+ return [FieldLookup('name_vector', name_tokens, 'restrict'),
+ FieldLookup('nameaddress_vector', addr_tokens, 'lookup_all')]
"""
Implementation of the acutal database accesses for forward search.
"""
-from typing import List, Tuple, AsyncIterator, Dict, Any
+from typing import List, Tuple, AsyncIterator, Dict, Any, Callable
import abc
import sqlalchemy as sa
from sqlalchemy.dialects.postgresql import ARRAY, array_agg
from nominatim.typing import SaFromClause, SaScalarSelect, SaColumn, \
- SaExpression, SaSelect, SaRow, SaBind
+ SaExpression, SaSelect, SaLambdaSelect, SaRow, SaBind
from nominatim.api.connection import SearchConnection
from nominatim.api.types import SearchDetails, DataLayer, GeometryFormat, Bbox
import nominatim.api.results as nres
#pylint: disable=singleton-comparison,not-callable
#pylint: disable=too-many-branches,too-many-arguments,too-many-locals,too-many-statements
+def no_index(expr: SaColumn) -> SaColumn:
+ """ Wrap the given expression, so that the query planner will
+ refrain from using the expression for index lookup.
+ """
+ return sa.func.coalesce(sa.null(), expr) # pylint: disable=not-callable
+
+
def _details_to_bind_params(details: SearchDetails) -> Dict[str, Any]:
""" Create a dictionary from search parameters that can be used
as bind parameter for SQL execute.
VIEWBOX2_PARAM: SaBind = sa.bindparam('viewbox2', type_=Geometry)
NEAR_PARAM: SaBind = sa.bindparam('near', type_=Geometry)
NEAR_RADIUS_PARAM: SaBind = sa.bindparam('near_radius')
-EXCLUDED_PARAM: SaBind = sa.bindparam('excluded')
COUNTRIES_PARAM: SaBind = sa.bindparam('countries')
+def _within_near(t: SaFromClause) -> Callable[[], SaExpression]:
+ return lambda: t.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM)
+
+def _exclude_places(t: SaFromClause) -> Callable[[], SaExpression]:
+ return lambda: t.c.place_id.not_in(sa.bindparam('excluded'))
+
def _select_placex(t: SaFromClause) -> SaSelect:
return sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
t.c.class_, t.c.type,
t.c.address, t.c.extratags,
t.c.housenumber, t.c.postcode, t.c.country_code,
- t.c.importance, t.c.wikipedia,
+ t.c.wikipedia,
t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
+ t.c.linked_place_id, t.c.admin_level,
t.c.centroid,
t.c.geometry.ST_Expand(0).label('bbox'))
-def _add_geometry_columns(sql: SaSelect, col: SaColumn, details: SearchDetails) -> SaSelect:
- if not details.geometry_output:
- return sql
-
+def _add_geometry_columns(sql: SaLambdaSelect, col: SaColumn, details: SearchDetails) -> SaSelect:
out = []
if details.geometry_simplification > 0.0:
col = sa.func.ST_SimplifyPreserveTopology(col, details.geometry_simplification)
if details.geometry_output & GeometryFormat.GEOJSON:
- out.append(sa.func.ST_AsGeoJSON(col).label('geometry_geojson'))
+ out.append(sa.func.ST_AsGeoJSON(col, 7).label('geometry_geojson'))
if details.geometry_output & GeometryFormat.TEXT:
out.append(sa.func.ST_AsText(col).label('geometry_text'))
if details.geometry_output & GeometryFormat.KML:
- out.append(sa.func.ST_AsKML(col).label('geometry_kml'))
+ out.append(sa.func.ST_AsKML(col, 7).label('geometry_kml'))
if details.geometry_output & GeometryFormat.SVG:
- out.append(sa.func.ST_AsSVG(col).label('geometry_svg'))
+ out.append(sa.func.ST_AsSVG(col, 0, 7).label('geometry_svg'))
return sql.add_columns(*out)
for n in numerals)))
if details.excluded:
- sql = sql.where(table.c.place_id.not_in(EXCLUDED_PARAM))
+ sql = sql.where(_exclude_places(table))
return sql.scalar_subquery()
def _filter_by_layer(table: SaFromClause, layers: DataLayer) -> SaColumn:
orexpr: List[SaExpression] = []
if layers & DataLayer.ADDRESS and layers & DataLayer.POI:
- orexpr.append(table.c.rank_address.between(1, 30))
+ orexpr.append(no_index(table.c.rank_address).between(1, 30))
elif layers & DataLayer.ADDRESS:
- orexpr.append(table.c.rank_address.between(1, 29))
- orexpr.append(sa.and_(table.c.rank_address == 30,
+ orexpr.append(no_index(table.c.rank_address).between(1, 29))
+ orexpr.append(sa.and_(no_index(table.c.rank_address) == 30,
sa.or_(table.c.housenumber != None,
- table.c.address.has_key('housename'))))
+ table.c.address.has_key('addr:housename'))))
elif layers & DataLayer.POI:
- orexpr.append(sa.and_(table.c.rank_address == 30,
+ orexpr.append(sa.and_(no_index(table.c.rank_address) == 30,
table.c.class_.not_in(('place', 'building'))))
if layers & DataLayer.MANMADE:
if not layers & DataLayer.NATURAL:
exclude.extend(('natural', 'water', 'waterway'))
orexpr.append(sa.and_(table.c.class_.not_in(tuple(exclude)),
- table.c.rank_address == 0))
+ no_index(table.c.rank_address) == 0))
else:
include = []
if layers & DataLayer.RAILWAY:
if layers & DataLayer.NATURAL:
include.extend(('natural', 'water', 'waterway'))
orexpr.append(sa.and_(table.c.class_.in_(tuple(include)),
- table.c.rank_address == 0))
+ no_index(table.c.rank_address) == 0))
if len(orexpr) == 1:
return orexpr[0]
place_ids: List[int],
details: SearchDetails) -> AsyncIterator[nres.SearchResult]:
t = conn.t.placex
- sql = _select_placex(t).where(t.c.place_id.in_(place_ids))
+ sql = _select_placex(t).add_columns(t.c.importance)\
+ .where(t.c.place_id.in_(place_ids))
- sql = _add_geometry_columns(sql, t.c.geometry, details)
+ if details.geometry_output:
+ sql = _add_geometry_columns(sql, t.c.geometry, details)
for row in await conn.execute(sql):
result = nres.create_from_placex_row(row, nres.SearchResult)
base.sort(key=lambda r: (r.accuracy, r.rank_search))
max_accuracy = base[0].accuracy + 0.5
+ if base[0].rank_address == 0:
+ min_rank = 0
+ max_rank = 0
+ elif base[0].rank_address < 26:
+ min_rank = 1
+ max_rank = min(25, base[0].rank_address + 4)
+ else:
+ min_rank = 26
+ max_rank = 30
base = nres.SearchResults(r for r in base if r.source_table == nres.SourceTable.PLACEX
and r.accuracy <= max_accuracy
- and r.bbox and r.bbox.area < 20)
+ and r.bbox and r.bbox.area < 20
+ and r.rank_address >= min_rank
+ and r.rank_address <= max_rank)
if base:
baseids = [b.place_id for b in base[:5] if b.place_id]
"""
table = await conn.get_class_table(*category)
- t = conn.t.placex.alias('p')
tgeom = conn.t.placex.alias('pgeom')
- sql = _select_placex(t).where(tgeom.c.place_id.in_(ids))\
- .where(t.c.class_ == category[0])\
- .where(t.c.type == category[1])
-
if table is None:
# No classtype table available, do a simplified lookup in placex.
- sql = sql.join(tgeom, t.c.geometry.ST_DWithin(tgeom.c.centroid, 0.01))\
- .order_by(tgeom.c.centroid.ST_Distance(t.c.centroid))
+ table = conn.t.placex.alias('inner')
+ sql = sa.select(table.c.place_id,
+ sa.func.min(tgeom.c.centroid.ST_Distance(table.c.centroid))
+ .label('dist'))\
+ .join(tgeom, table.c.geometry.intersects(tgeom.c.centroid.ST_Expand(0.01)))\
+ .where(table.c.class_ == category[0])\
+ .where(table.c.type == category[1])
else:
# Use classtype table. We can afford to use a larger
# radius for the lookup.
- sql = sql.join(table, t.c.place_id == table.c.place_id)\
- .join(tgeom,
- sa.case((sa.and_(tgeom.c.rank_address < 9,
- tgeom.c.geometry.is_area()),
- tgeom.c.geometry.ST_Contains(table.c.centroid)),
- else_ = tgeom.c.centroid.ST_DWithin(table.c.centroid, 0.05)))\
- .order_by(tgeom.c.centroid.ST_Distance(table.c.centroid))
-
- sql = sql.where(t.c.rank_address.between(MIN_RANK_PARAM, MAX_RANK_PARAM))
+ sql = sa.select(table.c.place_id,
+ sa.func.min(tgeom.c.centroid.ST_Distance(table.c.centroid))
+ .label('dist'))\
+ .join(tgeom,
+ table.c.centroid.ST_CoveredBy(
+ sa.case((sa.and_(tgeom.c.rank_address > 9,
+ tgeom.c.geometry.is_area()),
+ tgeom.c.geometry),
+ else_ = tgeom.c.centroid.ST_Expand(0.05))))
+
+ inner = sql.where(tgeom.c.place_id.in_(ids))\
+ .group_by(table.c.place_id).subquery()
+
+ t = conn.t.placex
+ sql = _select_placex(t).add_columns((-inner.c.dist).label('importance'))\
+ .join(inner, inner.c.place_id == t.c.place_id)\
+ .order_by(inner.c.dist)
+
+ sql = sql.where(no_index(t.c.rank_address).between(MIN_RANK_PARAM, MAX_RANK_PARAM))
if details.countries:
sql = sql.where(t.c.country_code.in_(COUNTRIES_PARAM))
if details.excluded:
- sql = sql.where(t.c.place_id.not_in(EXCLUDED_PARAM))
+ sql = sql.where(_exclude_places(t))
if details.layers is not None:
sql = sql.where(_filter_by_layer(t, details.layers))
"""
def __init__(self, sdata: SearchData) -> None:
super().__init__(sdata.penalty)
- self.categories = sdata.qualifiers
+ self.qualifiers = sdata.qualifiers
self.countries = sdata.countries
if details.near and details.near_radius is not None and details.near_radius < 0.2:
# simply search in placex table
- sql = _select_placex(t) \
- .where(t.c.linked_place_id == None) \
- .where(t.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM)) \
- .order_by(t.c.centroid.ST_Distance(NEAR_PARAM))
+ def _base_query() -> SaSelect:
+ return _select_placex(t) \
+ .add_columns((-t.c.centroid.ST_Distance(NEAR_PARAM))
+ .label('importance'))\
+ .where(t.c.linked_place_id == None) \
+ .where(t.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM)) \
+ .order_by(t.c.centroid.ST_Distance(NEAR_PARAM)) \
+ .limit(LIMIT_PARAM)
+
+ classtype = self.qualifiers.values
+ if len(classtype) == 1:
+ cclass, ctype = classtype[0]
+ sql: SaLambdaSelect = sa.lambda_stmt(lambda: _base_query()
+ .where(t.c.class_ == cclass)
+ .where(t.c.type == ctype))
+ else:
+ sql = _base_query().where(sa.or_(*(sa.and_(t.c.class_ == cls, t.c.type == typ)
+ for cls, typ in classtype)))
if self.countries:
sql = sql.where(t.c.country_code.in_(self.countries.values))
if details.viewbox is not None and details.bounded_viewbox:
sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM))
- classtype = self.categories.values
- if len(classtype) == 1:
- sql = sql.where(t.c.class_ == classtype[0][0]) \
- .where(t.c.type == classtype[0][1])
- else:
- sql = sql.where(sa.or_(*(sa.and_(t.c.class_ == cls, t.c.type == typ)
- for cls, typ in classtype)))
-
- sql = sql.limit(LIMIT_PARAM)
rows.extend(await conn.execute(sql, bind_params))
else:
# use the class type tables
- for category in self.categories.values:
+ for category in self.qualifiers.values:
table = await conn.get_class_table(*category)
if table is not None:
sql = _select_placex(t)\
+ .add_columns(t.c.importance)\
.join(table, t.c.place_id == table.c.place_id)\
.where(t.c.class_ == category[0])\
.where(t.c.type == category[1])
for row in rows:
result = nres.create_from_placex_row(row, nres.SearchResult)
assert result
- result.accuracy = self.penalty + self.categories.get_penalty((row.class_, row.type))
+ result.accuracy = self.penalty + self.qualifiers.get_penalty((row.class_, row.type))
result.bbox = Bbox.from_wkb(row.bbox)
results.append(result)
"""
t = conn.t.placex
+ ccodes = self.countries.values
sql = _select_placex(t)\
- .where(t.c.country_code.in_(self.countries.values))\
+ .add_columns(t.c.importance)\
+ .where(t.c.country_code.in_(ccodes))\
.where(t.c.rank_address == 4)
- sql = _add_geometry_columns(sql, t.c.geometry, details)
+ if details.geometry_output:
+ sql = _add_geometry_columns(sql, t.c.geometry, details)
if details.excluded:
- sql = sql.where(t.c.place_id.not_in(EXCLUDED_PARAM))
+ sql = sql.where(_exclude_places(t))
if details.viewbox is not None and details.bounded_viewbox:
- sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM))
+ sql = sql.where(lambda: t.c.geometry.intersects(VIEWBOX_PARAM))
if details.near is not None and details.near_radius is not None:
- sql = sql.where(t.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM))
+ sql = sql.where(_within_near(t))
results = nres.SearchResults()
for row in await conn.execute(sql, _details_to_bind_params(details)):
result = nres.create_from_placex_row(row, nres.SearchResult)
assert result
result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0)
+ result.bbox = Bbox.from_wkb(row.bbox)
results.append(result)
return results or await self.lookup_in_country_table(conn, details)
sql = sa.select(tgrid.c.country_code,
tgrid.c.geometry.ST_Centroid().ST_Collect().ST_Centroid()
- .label('centroid'))\
+ .label('centroid'),
+ tgrid.c.geometry.ST_Collect().ST_Expand(0).label('bbox'))\
.where(tgrid.c.country_code.in_(self.countries.values))\
.group_by(tgrid.c.country_code)
if details.viewbox is not None and details.bounded_viewbox:
sql = sql.where(tgrid.c.geometry.intersects(VIEWBOX_PARAM))
if details.near is not None and details.near_radius is not None:
- sql = sql.where(tgrid.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM))
+ sql = sql.where(_within_near(tgrid))
sub = sql.subquery('grid')
+ sa.func.coalesce(t.c.derived_name,
sa.cast('', type_=conn.t.types.Composite))
).label('name'),
- sub.c.centroid)\
+ sub.c.centroid, sub.c.bbox)\
.join(sub, t.c.country_code == sub.c.country_code)
+ if details.geometry_output:
+ sql = _add_geometry_columns(sql, sub.c.centroid, details)
+
results = nres.SearchResults()
for row in await conn.execute(sql, _details_to_bind_params(details)):
result = nres.create_from_country_row(row, nres.SearchResult)
assert result
+ result.bbox = Bbox.from_wkb(row.bbox)
result.accuracy = self.penalty + self.countries.get_penalty(row.country_code, 5.0)
results.append(result)
""" Find results for the search in the database.
"""
t = conn.t.postcode
+ pcs = self.postcodes.values
sql = sa.select(t.c.place_id, t.c.parent_place_id,
t.c.rank_search, t.c.rank_address,
t.c.postcode, t.c.country_code,
t.c.geometry.label('centroid'))\
- .where(t.c.postcode.in_(self.postcodes.values))
+ .where(t.c.postcode.in_(pcs))
- sql = _add_geometry_columns(sql, t.c.geometry, details)
+ if details.geometry_output:
+ sql = _add_geometry_columns(sql, t.c.geometry, details)
penalty: SaExpression = sa.literal(self.penalty)
sql = sql.where(t.c.geometry.intersects(VIEWBOX_PARAM))
else:
penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0),
- (t.c.geometry.intersects(VIEWBOX2_PARAM), 1.0),
- else_=2.0)
+ (t.c.geometry.intersects(VIEWBOX2_PARAM), 0.5),
+ else_=1.0)
if details.near is not None:
if details.near_radius is not None:
- sql = sql.where(t.c.geometry.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM))
+ sql = sql.where(_within_near(t))
sql = sql.order_by(t.c.geometry.ST_Distance(NEAR_PARAM))
if self.countries:
sql = sql.where(t.c.country_code.in_(self.countries.values))
if details.excluded:
- sql = sql.where(t.c.place_id.not_in(EXCLUDED_PARAM))
+ sql = sql.where(_exclude_places(t))
if self.lookups:
assert len(self.lookups) == 1
details: SearchDetails) -> nres.SearchResults:
""" Find results for the search in the database.
"""
- t = conn.t.placex.alias('p')
- tsearch = conn.t.search_name.alias('s')
+ t = conn.t.placex
+ tsearch = conn.t.search_name
- sql = sa.select(t.c.place_id, t.c.osm_type, t.c.osm_id, t.c.name,
- t.c.class_, t.c.type,
- t.c.address, t.c.extratags,
- t.c.housenumber, t.c.postcode, t.c.country_code,
- t.c.wikipedia,
- t.c.parent_place_id, t.c.rank_address, t.c.rank_search,
- t.c.centroid,
- t.c.geometry.ST_Expand(0).label('bbox'))\
- .where(t.c.place_id == tsearch.c.place_id)
+ sql: SaLambdaSelect = sa.lambda_stmt(lambda:
+ _select_placex(t).where(t.c.place_id == tsearch.c.place_id))
- sql = _add_geometry_columns(sql, t.c.geometry, details)
+ if details.geometry_output:
+ sql = _add_geometry_columns(sql, t.c.geometry, details)
penalty: SaExpression = sa.literal(self.penalty)
for ranking in self.rankings:
# if a postcode is given, don't search for state or country level objects
sql = sql.where(tsearch.c.address_rank > 9)
tpc = conn.t.postcode
+ pcs = self.postcodes.values
if self.expected_count > 1000:
# Many results expected. Restrict by postcode.
sql = sql.where(sa.select(tpc.c.postcode)
- .where(tpc.c.postcode.in_(self.postcodes.values))
+ .where(tpc.c.postcode.in_(pcs))
.where(tsearch.c.centroid.ST_DWithin(tpc.c.geometry, 0.12))
.exists())
# Less results, only have a preference for close postcodes
pc_near = sa.select(sa.func.min(tpc.c.geometry.ST_Distance(tsearch.c.centroid)))\
- .where(tpc.c.postcode.in_(self.postcodes.values))\
+ .where(tpc.c.postcode.in_(pcs))\
.scalar_subquery()
- penalty += sa.case((t.c.postcode.in_(self.postcodes.values), 0.0),
+ penalty += sa.case((t.c.postcode.in_(pcs), 0.0),
else_=sa.func.coalesce(pc_near, 2.0))
if details.viewbox is not None:
if details.bounded_viewbox:
- sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX_PARAM))
+ if details.viewbox.area < 0.2:
+ sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX_PARAM))
+ else:
+ sql = sql.where(tsearch.c.centroid.ST_Intersects_no_index(VIEWBOX_PARAM))
+ elif self.expected_count >= 10000:
+ if details.viewbox.area < 0.5:
+ sql = sql.where(tsearch.c.centroid.intersects(VIEWBOX2_PARAM))
+ else:
+ sql = sql.where(tsearch.c.centroid.ST_Intersects_no_index(VIEWBOX2_PARAM))
else:
penalty += sa.case((t.c.geometry.intersects(VIEWBOX_PARAM), 0.0),
- (t.c.geometry.intersects(VIEWBOX2_PARAM), 1.0),
- else_=2.0)
+ (t.c.geometry.intersects(VIEWBOX2_PARAM), 0.5),
+ else_=1.0)
if details.near is not None:
if details.near_radius is not None:
- sql = sql.where(tsearch.c.centroid.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM))
- sql = sql.add_columns(-tsearch.c.centroid.ST_Distance(NEAR_PARAM)
+ if details.near_radius < 0.1:
+ sql = sql.where(tsearch.c.centroid.ST_DWithin(NEAR_PARAM, NEAR_RADIUS_PARAM))
+ else:
+ sql = sql.where(tsearch.c.centroid.ST_DWithin_no_index(NEAR_PARAM,
+ NEAR_RADIUS_PARAM))
+ sql = sql.add_columns((-tsearch.c.centroid.ST_Distance(NEAR_PARAM))
.label('importance'))
sql = sql.order_by(sa.desc(sa.text('importance')))
else:
- sql = sql.order_by(penalty - sa.case((tsearch.c.importance > 0, tsearch.c.importance),
- else_=0.75001-(sa.cast(tsearch.c.search_rank, sa.Float())/40)))
+ if self.expected_count < 10000\
+ or (details.viewbox is not None and details.viewbox.area < 0.5):
+ sql = sql.order_by(
+ penalty - sa.case((tsearch.c.importance > 0, tsearch.c.importance),
+ else_=0.75001-(sa.cast(tsearch.c.search_rank, sa.Float())/40)))
sql = sql.add_columns(t.c.importance)
- sql = sql.add_columns(penalty.label('accuracy'))\
- .order_by(sa.text('accuracy'))
+ sql = sql.add_columns(penalty.label('accuracy'))
+
+ if self.expected_count < 10000:
+ sql = sql.order_by(sa.text('accuracy'))
if self.housenumbers:
hnr_regexp = f"\\m({'|'.join(self.housenumbers.values)})\\M"
sql = sql.where(tsearch.c.address_rank.between(16, 30))\
.where(sa.or_(tsearch.c.address_rank < 30,
- t.c.housenumber.op('~*')(hnr_regexp)))
+ t.c.housenumber.op('~*')(hnr_regexp)))
# Cross check for housenumbers, need to do that on a rather large
# set. Worst case there are 40.000 main streets in OSM.
.where(thnr.c.indexed_status == 0)
if details.excluded:
- place_sql = place_sql.where(thnr.c.place_id.not_in(EXCLUDED_PARAM))
+ place_sql = place_sql.where(thnr.c.place_id.not_in(sa.bindparam('excluded')))
if self.qualifiers:
place_sql = place_sql.where(self.qualifiers.sql_restrict(thnr))
- numerals = [int(n) for n in self.housenumbers.values if n.isdigit()]
- interpol_sql: SaExpression
- tiger_sql: SaExpression
+ numerals = [int(n) for n in self.housenumbers.values
+ if n.isdigit() and len(n) < 8]
+ interpol_sql: SaColumn
+ tiger_sql: SaColumn
if numerals and \
(not self.qualifiers or ('place', 'house') in self.qualifiers.values):
# Housenumbers from interpolations
numerals, details)
), else_=None)
else:
- interpol_sql = sa.literal_column('NULL')
- tiger_sql = sa.literal_column('NULL')
+ interpol_sql = sa.null()
+ tiger_sql = sa.null()
unsort = sa.select(inner, place_sql.scalar_subquery().label('placex_hnr'),
interpol_sql.label('interpol_hnr'),
if self.qualifiers:
sql = sql.where(self.qualifiers.sql_restrict(t))
if details.excluded:
- sql = sql.where(tsearch.c.place_id.not_in(EXCLUDED_PARAM))
+ sql = sql.where(_exclude_places(tsearch))
if details.min_rank > 0:
sql = sql.where(sa.or_(tsearch.c.address_rank >= MIN_RANK_PARAM,
tsearch.c.search_rank >= MIN_RANK_PARAM))
assert result
result.bbox = Bbox.from_wkb(row.bbox)
result.accuracy = row.accuracy
- if not details.excluded or not result.place_id in details.excluded:
- results.append(result)
-
if self.housenumbers and row.rank_address < 30:
if row.placex_hnr:
subs = _get_placex_housenumbers(conn, row.placex_hnr, details)
sub.accuracy += 0.6
results.append(sub)
- result.accuracy += 1.0 # penalty for missing housenumber
+ # Only add the street as a result, if it meets all other
+ # filter conditions.
+ if (not details.excluded or result.place_id not in details.excluded)\
+ and (not self.qualifiers or result.category in self.qualifiers.values)\
+ and result.rank_address >= details.min_rank:
+ result.accuracy += 1.0 # penalty for missing housenumber
+ results.append(result)
+ else:
+ results.append(result)
return results
"""
Public interface to the search code.
"""
-from typing import List, Any, Optional, Iterator, Tuple
+from typing import List, Any, Optional, Iterator, Tuple, Dict
import itertools
+import re
+import datetime as dt
+import difflib
from nominatim.api.connection import SearchConnection
from nominatim.api.types import SearchDetails
-from nominatim.api.results import SearchResults, add_result_details
+from nominatim.api.results import SearchResult, SearchResults, add_result_details
from nominatim.api.search.token_assignment import yield_token_assignments
from nominatim.api.search.db_search_builder import SearchBuilder, build_poi_search, wrap_near_search
from nominatim.api.search.db_searches import AbstractSearch
""" Main class responsible for place search.
"""
- def __init__(self, conn: SearchConnection, params: SearchDetails) -> None:
+ def __init__(self, conn: SearchConnection,
+ params: SearchDetails, timeout: Optional[int]) -> None:
self.conn = conn
self.params = params
+ self.timeout = dt.timedelta(seconds=timeout or 1000000)
self.query_analyzer: Optional[AbstractQueryAnalyzer] = None
num_searches = 0
for assignment in yield_token_assignments(query):
searches.extend(search_builder.build(assignment))
- log().table_dump('Searches for assignment',
- _dump_searches(searches, query, num_searches))
+ if num_searches < len(searches):
+ log().table_dump('Searches for assignment',
+ _dump_searches(searches, query, num_searches))
num_searches = len(searches)
searches.sort(key=lambda s: s.penalty)
is found.
"""
log().section('Execute database searches')
- results = SearchResults()
+ results: Dict[Any, SearchResult] = {}
+
+ end_time = dt.datetime.now() + self.timeout
- num_results = 0
- min_ranking = 1000.0
+ min_ranking = searches[0].penalty + 2.0
prev_penalty = 0.0
for i, search in enumerate(searches):
if search.penalty > prev_penalty and (search.penalty > min_ranking or i > 20):
break
log().table_dump(f"{i + 1}. Search", _dump_searches([search], query))
- for result in await search.lookup(self.conn, self.params):
- results.append(result)
- min_ranking = min(min_ranking, result.ranking + 0.5, search.penalty + 0.3)
- log().result_dump('Results', ((r.accuracy, r) for r in results[num_results:]))
- num_results = len(results)
+ lookup_results = await search.lookup(self.conn, self.params)
+ for result in lookup_results:
+ rhash = (result.source_table, result.place_id,
+ result.housenumber, result.country_code)
+ prevresult = results.get(rhash)
+ if prevresult:
+ prevresult.accuracy = min(prevresult.accuracy, result.accuracy)
+ else:
+ results[rhash] = result
+ min_ranking = min(min_ranking, result.accuracy * 1.2)
+ log().result_dump('Results', ((r.accuracy, r) for r in lookup_results))
prev_penalty = search.penalty
+ if dt.datetime.now() >= end_time:
+ break
+ return SearchResults(results.values())
+
+
+ def sort_and_cut_results(self, results: SearchResults) -> SearchResults:
+ """ Remove badly matching results, sort by ranking and
+ limit to the configured number of results.
+ """
if results:
min_ranking = min(r.ranking for r in results)
results = SearchResults(r for r in results if r.ranking < min_ranking + 0.5)
+ results.sort(key=lambda r: r.ranking)
if results:
- min_rank = min(r.rank_search for r in results)
-
+ min_rank = results[0].rank_search
results = SearchResults(r for r in results
if r.ranking + 0.05 * (r.rank_search - min_rank)
< min_ranking + 0.5)
- results.sort(key=lambda r: r.accuracy - r.calculated_importance())
results = SearchResults(results[:self.limit])
return results
+ def rerank_by_query(self, query: QueryStruct, results: SearchResults) -> None:
+ """ Adjust the accuracy of the localized result according to how well
+ they match the original query.
+ """
+ assert self.query_analyzer is not None
+ qwords = [word for phrase in query.source
+ for word in re.split('[, ]+', phrase.text) if word]
+ if not qwords:
+ return
+
+ for result in results:
+ # Negative importance indicates ordering by distance, which is
+ # more important than word matching.
+ if not result.display_name\
+ or (result.importance is not None and result.importance < 0):
+ continue
+ distance = 0.0
+ norm = self.query_analyzer.normalize_text(result.display_name)
+ words = set((w for w in norm.split(' ') if w))
+ if not words:
+ continue
+ for qword in qwords:
+ wdist = max(difflib.SequenceMatcher(a=qword, b=w).quick_ratio() for w in words)
+ if wdist < 0.5:
+ distance += len(qword)
+ else:
+ distance += (1.0 - wdist) * len(qword)
+ # Compensate for the fact that country names do not get a
+ # match penalty yet by the tokenizer.
+ # Temporary hack that needs to be removed!
+ if result.rank_address == 4:
+ distance *= 2
+ result.accuracy += distance * 0.4 / sum(len(w) for w in qwords)
+
+
async def lookup_pois(self, categories: List[Tuple[str, str]],
phrases: List[Phrase]) -> SearchResults:
""" Look up places by category. If phrase is given, a place search
if query:
searches = [wrap_near_search(categories, s) for s in searches[:50]]
results = await self.execute_searches(query, searches)
+ await add_result_details(self.conn, results, self.params)
+ log().result_dump('Preliminary Results', ((r.accuracy, r) for r in results))
+ results = self.sort_and_cut_results(results)
else:
results = SearchResults()
else:
search = build_poi_search(categories, self.params.countries)
results = await search.lookup(self.conn, self.params)
+ await add_result_details(self.conn, results, self.params)
- await add_result_details(self.conn, results, self.params)
log().result_dump('Final Results', ((r.accuracy, r) for r in results))
return results
# Execute SQL until an appropriate result is found.
results = await self.execute_searches(query, searches[:50])
await add_result_details(self.conn, results, self.params)
+ log().result_dump('Preliminary Results', ((r.accuracy, r) for r in results))
+ self.rerank_by_query(query, results)
+ log().result_dump('Results after reranking', ((r.accuracy, r) for r in results))
+ results = self.sort_and_cut_results(results)
log().result_dump('Final Results', ((r.accuracy, r) for r in results))
return results
# pylint: disable=invalid-name,too-many-locals
def _dump_searches(searches: List[AbstractSearch], query: QueryStruct,
start: int = 0) -> Iterator[Optional[List[Any]]]:
- yield ['Penalty', 'Lookups', 'Housenr', 'Postcode', 'Countries', 'Qualifier', 'Rankings']
+ yield ['Penalty', 'Lookups', 'Housenr', 'Postcode', 'Countries',
+ 'Qualifier', 'Catgeory', 'Rankings']
def tk(tl: List[int]) -> str:
tstr = [f"{query.find_lookup_word_by_id(t)}({t})" for t in tl]
for search in searches[start:]:
fields = ('lookups', 'rankings', 'countries', 'housenumbers',
- 'postcodes', 'qualifier')
- iters = itertools.zip_longest([f"{search.penalty:.3g}"],
- *(getattr(search, attr, []) for attr in fields),
- fillvalue= '')
- for penalty, lookup, rank, cc, hnr, pc, qual in iters:
+ 'postcodes', 'qualifiers')
+ if hasattr(search, 'search'):
+ iters = itertools.zip_longest([f"{search.penalty:.3g}"],
+ *(getattr(search.search, attr, []) for attr in fields),
+ getattr(search, 'categories', []),
+ fillvalue='')
+ else:
+ iters = itertools.zip_longest([f"{search.penalty:.3g}"],
+ *(getattr(search, attr, []) for attr in fields),
+ [],
+ fillvalue='')
+ for penalty, lookup, rank, cc, hnr, pc, qual, cat in iters:
yield [penalty, fmt_lookup(lookup), fmt_cstr(hnr),
- fmt_cstr(pc), fmt_cstr(cc), fmt_cstr(qual), fmt_ranking(rank)]
+ fmt_cstr(pc), fmt_cstr(cc), fmt_cstr(qual), fmt_cstr(cat), fmt_ranking(rank)]
yield None
seq = difflib.SequenceMatcher(a=self.lookup_word, b=norm)
distance = 0
for tag, afrom, ato, bfrom, bto in seq.get_opcodes():
- if tag == 'delete' and (afrom == 0 or ato == len(self.lookup_word)):
+ if tag in ('delete', 'insert') and (afrom == 0 or ato == len(self.lookup_word)):
distance += 1
elif tag == 'replace':
distance += max((ato-afrom), (bto-bfrom))
penalty = 0.0
if row.type == 'w':
penalty = 0.3
+ elif row.type == 'W':
+ if len(row.word_token) == 1 and row.word_token == row.word:
+ penalty = 0.2 if row.word.isdigit() else 0.3
elif row.type == 'H':
penalty = sum(0.1 for c in row.word_token if c != ' ' and not c.isdigit())
if all(not c.isdigit() for c in row.word_token):
penalty += 0.2 * (len(row.word_token) - 1)
+ elif row.type == 'C':
+ if len(row.word_token) == 1:
+ penalty = 0.3
if row.info is None:
lookup_word = row.word
async def setup(self) -> None:
""" Set up static data structures needed for the analysis.
"""
- rules = await self.conn.get_property('tokenizer_import_normalisation')
- self.normalizer = Transliterator.createFromRules("normalization", rules)
- rules = await self.conn.get_property('tokenizer_import_transliteration')
- self.transliterator = Transliterator.createFromRules("transliteration", rules)
+ async def _make_normalizer() -> Any:
+ rules = await self.conn.get_property('tokenizer_import_normalisation')
+ return Transliterator.createFromRules("normalization", rules)
+
+ self.normalizer = await self.conn.get_cached_value('ICUTOK', 'normalizer',
+ _make_normalizer)
+
+ async def _make_transliterator() -> Any:
+ rules = await self.conn.get_property('tokenizer_import_transliteration')
+ return Transliterator.createFromRules("transliteration", rules)
+
+ self.transliterator = await self.conn.get_cached_value('ICUTOK', 'transliterator',
+ _make_transliterator)
if 'word' not in self.conn.t.meta.tables:
sa.Table('word', self.conn.t.meta,
if row.type == 'S':
if row.info['op'] in ('in', 'near'):
if trange.start == 0:
- query.add_token(trange, qmod.TokenType.CATEGORY, token)
+ query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
else:
query.add_token(trange, qmod.TokenType.QUALIFIER, token)
if trange.start == 0 or trange.end == query.num_token_slots():
token = copy(token)
token.penalty += 0.1 * (query.num_token_slots())
- query.add_token(trange, qmod.TokenType.CATEGORY, token)
+ query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
else:
query.add_token(trange, DB_TO_TOKEN_TYPE[row.type], token)
for row in await self.lookup_in_db(lookup_words):
for trange in words[row.word_token.strip()]:
token, ttype = self.make_token(row)
- if ttype == qmod.TokenType.CATEGORY:
+ if ttype == qmod.TokenType.NEAR_ITEM:
if trange.start == 0:
- query.add_token(trange, qmod.TokenType.CATEGORY, token)
+ query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
elif ttype == qmod.TokenType.QUALIFIER:
query.add_token(trange, qmod.TokenType.QUALIFIER, token)
if trange.start == 0 or trange.end == query.num_token_slots():
token = copy(token)
token.penalty += 0.1 * (query.num_token_slots())
- query.add_token(trange, qmod.TokenType.CATEGORY, token)
+ query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
elif ttype != qmod.TokenType.PARTIAL or trange.start + 1 == trange.end:
query.add_token(trange, ttype, token)
return query
+ def normalize_text(self, text: str) -> str:
+ """ Bring the given text into a normalized form.
+
+ This only removes case, so some difference with the normalization
+ in the phrase remains.
+ """
+ return text.lower()
+
+
def split_query(self, query: qmod.QueryStruct) -> Tuple[List[str],
Dict[str, List[qmod.TokenRange]]]:
""" Transliterate the phrases and split them into tokens.
ttype = qmod.TokenType.POSTCODE
lookup_word = row.word_token[1:]
else:
- ttype = qmod.TokenType.CATEGORY if row.operator in ('in', 'near')\
+ ttype = qmod.TokenType.NEAR_ITEM if row.operator in ('in', 'near')\
else qmod.TokenType.QUALIFIER
lookup_word = row.word
elif row.word_token.startswith(' '):
"""
Datastructures for a tokenized query.
"""
-from typing import List, Tuple, Optional, NamedTuple, Iterator
+from typing import List, Tuple, Optional, Iterator
from abc import ABC, abstractmethod
import dataclasses
import enum
""" Country name or reference. """
QUALIFIER = enum.auto()
""" Special term used together with name (e.g. _Hotel_ Bellevue). """
- CATEGORY = enum.auto()
+ NEAR_ITEM = enum.auto()
""" Special term used as searchable object(e.g. supermarket in ...). """
COUNTRY = enum.auto()
""" Contains the country name or code. """
- def compatible_with(self, ttype: TokenType) -> bool:
+ def compatible_with(self, ttype: TokenType,
+ is_full_phrase: bool) -> bool:
""" Check if the given token type can be used with the phrase type.
"""
if self == PhraseType.NONE:
- return True
+ return not is_full_phrase or ttype != TokenType.QUALIFIER
if self == PhraseType.AMENITY:
- return ttype in (TokenType.WORD, TokenType.PARTIAL,
- TokenType.QUALIFIER, TokenType.CATEGORY)
+ return ttype in (TokenType.WORD, TokenType.PARTIAL)\
+ or (is_full_phrase and ttype == TokenType.NEAR_ITEM)\
+ or (not is_full_phrase and ttype == TokenType.QUALIFIER)
if self == PhraseType.STREET:
return ttype in (TokenType.WORD, TokenType.PARTIAL, TokenType.HOUSENUMBER)
if self == PhraseType.POSTCODE:
category objects.
"""
-
-class TokenRange(NamedTuple):
+@dataclasses.dataclass
+class TokenRange:
""" Indexes of query nodes over which a token spans.
"""
start: int
end: int
+ def __lt__(self, other: 'TokenRange') -> bool:
+ return self.end <= other.start
+
+
+ def __le__(self, other: 'TokenRange') -> bool:
+ return NotImplemented
+
+
+ def __gt__(self, other: 'TokenRange') -> bool:
+ return self.start >= other.end
+
+
+ def __ge__(self, other: 'TokenRange') -> bool:
+ return NotImplemented
+
+
def replace_start(self, new_start: int) -> 'TokenRange':
""" Return a new token range with the new start.
"""
be added to, then the token is silently dropped.
"""
snode = self.nodes[trange.start]
- if snode.ptype.compatible_with(ttype):
+ full_phrase = snode.btype in (BreakType.START, BreakType.PHRASE)\
+ and self.nodes[trange.end].btype in (BreakType.PHRASE, BreakType.END)
+ if snode.ptype.compatible_with(ttype, full_phrase):
tlist = snode.get_tokens(trange.end, ttype)
if tlist is None:
snode.starting.append(TokenList(trange.end, ttype, [token]))
"""
+ @abstractmethod
+ def normalize_text(self, text: str) -> str:
+ """ Bring the given text into a normalized form. That is the
+ standardized form search will work with. All information removed
+ at this stage is inevitably lost.
+ """
+
+
+
async def make_query_analyzer(conn: SearchConnection) -> AbstractQueryAnalyzer:
""" Create a query analyzer for the tokenizer used by the database.
"""
housenumber: Optional[qmod.TokenRange] = None
postcode: Optional[qmod.TokenRange] = None
country: Optional[qmod.TokenRange] = None
- category: Optional[qmod.TokenRange] = None
+ near_item: Optional[qmod.TokenRange] = None
qualifier: Optional[qmod.TokenRange] = None
out.postcode = token.trange
elif token.ttype == qmod.TokenType.COUNTRY:
out.country = token.trange
- elif token.ttype == qmod.TokenType.CATEGORY:
- out.category = token.trange
+ elif token.ttype == qmod.TokenType.NEAR_ITEM:
+ out.near_item = token.trange
elif token.ttype == qmod.TokenType.QUALIFIER:
out.qualifier = token.trange
return out
"""
# Country and category must be the final term for left-to-right
return len(self.seq) > 1 and \
- self.seq[-1].ttype in (qmod.TokenType.COUNTRY, qmod.TokenType.CATEGORY)
+ self.seq[-1].ttype in (qmod.TokenType.COUNTRY, qmod.TokenType.NEAR_ITEM)
def appendable(self, ttype: qmod.TokenType) -> Optional[int]:
if ttype == qmod.TokenType.COUNTRY:
return None if self.direction == -1 else 1
- if ttype == qmod.TokenType.CATEGORY:
+ if ttype == qmod.TokenType.NEAR_ITEM:
return self.direction
if ttype == qmod.TokenType.QUALIFIER:
if self.direction == 1:
if (len(self.seq) == 1
- and self.seq[0].ttype in (qmod.TokenType.PARTIAL, qmod.TokenType.CATEGORY)) \
+ and self.seq[0].ttype in (qmod.TokenType.PARTIAL, qmod.TokenType.NEAR_ITEM)) \
or (len(self.seq) == 2
- and self.seq[0].ttype == qmod.TokenType.CATEGORY
+ and self.seq[0].ttype == qmod.TokenType.NEAR_ITEM
and self.seq[1].ttype == qmod.TokenType.PARTIAL):
return 1
return None
if self.direction == -1:
return -1
- tempseq = self.seq[1:] if self.seq[0].ttype == qmod.TokenType.CATEGORY else self.seq
+ tempseq = self.seq[1:] if self.seq[0].ttype == qmod.TokenType.NEAR_ITEM else self.seq
if len(tempseq) == 0:
return 1
if len(tempseq) == 1 and self.seq[0].ttype == qmod.TokenType.HOUSENUMBER:
priors = sum(1 for t in self.seq[hnrpos+1:] if t.ttype == qmod.TokenType.PARTIAL)
if not self._adapt_penalty_from_priors(priors, 1):
return False
+ if any(t.ttype == qmod.TokenType.NEAR_ITEM for t in self.seq):
+ self.penalty += 1.0
return True
+ def _get_assignments_postcode(self, base: TokenAssignment,
+ query_len: int) -> Iterator[TokenAssignment]:
+ """ Yield possible assignments of Postcode searches with an
+ address component.
+ """
+ assert base.postcode is not None
+
+ if (base.postcode.start == 0 and self.direction != -1)\
+ or (base.postcode.end == query_len and self.direction != 1):
+ log().comment('postcode search')
+ # <address>,<postcode> should give preference to address search
+ if base.postcode.start == 0:
+ penalty = self.penalty
+ self.direction = -1 # name searches are only possbile backwards
+ else:
+ penalty = self.penalty + 0.1
+ self.direction = 1 # name searches are only possbile forwards
+ yield dataclasses.replace(base, penalty=penalty)
+
+
+ def _get_assignments_address_forward(self, base: TokenAssignment,
+ query: qmod.QueryStruct) -> Iterator[TokenAssignment]:
+ """ Yield possible assignments of address searches with
+ left-to-right reading.
+ """
+ first = base.address[0]
+
+ log().comment('first word = name')
+ yield dataclasses.replace(base, penalty=self.penalty,
+ name=first, address=base.address[1:])
+
+ # To paraphrase:
+ # * if another name term comes after the first one and before the
+ # housenumber
+ # * a qualifier comes after the name
+ # * the containing phrase is strictly typed
+ if (base.housenumber and first.end < base.housenumber.start)\
+ or (base.qualifier and base.qualifier > first)\
+ or (query.nodes[first.start].ptype != qmod.PhraseType.NONE):
+ return
+
+ penalty = self.penalty
+
+ # Penalty for:
+ # * <name>, <street>, <housenumber> , ...
+ # * queries that are comma-separated
+ if (base.housenumber and base.housenumber > first) or len(query.source) > 1:
+ penalty += 0.25
+
+ for i in range(first.start + 1, first.end):
+ name, addr = first.split(i)
+ log().comment(f'split first word = name ({i - first.start})')
+ yield dataclasses.replace(base, name=name, address=[addr] + base.address[1:],
+ penalty=penalty + PENALTY_TOKENCHANGE[query.nodes[i].btype])
+
+
+ def _get_assignments_address_backward(self, base: TokenAssignment,
+ query: qmod.QueryStruct) -> Iterator[TokenAssignment]:
+ """ Yield possible assignments of address searches with
+ right-to-left reading.
+ """
+ last = base.address[-1]
+
+ if self.direction == -1 or len(base.address) > 1:
+ log().comment('last word = name')
+ yield dataclasses.replace(base, penalty=self.penalty,
+ name=last, address=base.address[:-1])
+
+ # To paraphrase:
+ # * if another name term comes before the last one and after the
+ # housenumber
+ # * a qualifier comes before the name
+ # * the containing phrase is strictly typed
+ if (base.housenumber and last.start > base.housenumber.end)\
+ or (base.qualifier and base.qualifier < last)\
+ or (query.nodes[last.start].ptype != qmod.PhraseType.NONE):
+ return
+
+ penalty = self.penalty
+ if base.housenumber and base.housenumber < last:
+ penalty += 0.4
+ if len(query.source) > 1:
+ penalty += 0.25
+
+ for i in range(last.start + 1, last.end):
+ addr, name = last.split(i)
+ log().comment(f'split last word = name ({i - last.start})')
+ yield dataclasses.replace(base, name=name, address=base.address[:-1] + [addr],
+ penalty=penalty + PENALTY_TOKENCHANGE[query.nodes[i].btype])
+
+
def get_assignments(self, query: qmod.QueryStruct) -> Iterator[TokenAssignment]:
""" Yield possible assignments for the current sequence.
"""
base = TokenAssignment.from_ranges(self.seq)
+ num_addr_tokens = sum(t.end - t.start for t in base.address)
+ if num_addr_tokens > 50:
+ return
+
# Postcode search (postcode-only search is covered in next case)
if base.postcode is not None and base.address:
- if (base.postcode.start == 0 and self.direction != -1)\
- or (base.postcode.end == query.num_token_slots() and self.direction != 1):
- log().comment('postcode search')
- # <address>,<postcode> should give preference to address search
- if base.postcode.start == 0:
- penalty = self.penalty
- else:
- penalty = self.penalty + 0.1
- yield dataclasses.replace(base, penalty=penalty)
+ yield from self._get_assignments_postcode(base, query.num_token_slots())
# Postcode or country-only search
if not base.address:
- if not base.housenumber and (base.postcode or base.country or base.category):
+ if not base.housenumber and (base.postcode or base.country or base.near_item):
log().comment('postcode/country search')
yield dataclasses.replace(base, penalty=self.penalty)
else:
# <postcode>,<address> should give preference to postcode search
if base.postcode and base.postcode.start == 0:
self.penalty += 0.1
- # Use entire first word as name
+
+ # Right-to-left reading of the address
if self.direction != -1:
- log().comment('first word = name')
- yield dataclasses.replace(base, name=base.address[0],
- penalty=self.penalty,
- address=base.address[1:])
-
- # Use entire last word as name
- if self.direction == -1 or (self.direction == 0 and len(base.address) > 1):
- log().comment('last word = name')
- yield dataclasses.replace(base, name=base.address[-1],
- penalty=self.penalty,
- address=base.address[:-1])
+ yield from self._get_assignments_address_forward(base, query)
+
+ # Left-to-right reading of the address
+ if self.direction != 1:
+ yield from self._get_assignments_address_backward(base, query)
# variant for special housenumber searches
if base.housenumber:
yield dataclasses.replace(base, penalty=self.penalty)
- # Use beginning of first word as name
- if self.direction != -1:
- first = base.address[0]
- if (not base.housenumber or first.end >= base.housenumber.start)\
- and (not base.qualifier or first.start >= base.qualifier.end):
- for i in range(first.start + 1, first.end):
- name, addr = first.split(i)
- penalty = self.penalty + PENALTY_TOKENCHANGE[query.nodes[i].btype]
- log().comment(f'split first word = name ({i - first.start})')
- yield dataclasses.replace(base, name=name, penalty=penalty,
- address=[addr] + base.address[1:])
-
- # Use end of last word as name
- if self.direction != 1:
- last = base.address[-1]
- if (not base.housenumber or last.start <= base.housenumber.end)\
- and (not base.qualifier or last.end <= base.qualifier.start):
- for i in range(last.start + 1, last.end):
- addr, name = last.split(i)
- penalty = self.penalty + PENALTY_TOKENCHANGE[query.nodes[i].btype]
- log().comment(f'split last word = name ({i - last.start})')
- yield dataclasses.replace(base, name=name, penalty=penalty,
- address=base.address[:-1] + [addr])
-
-
def yield_token_assignments(query: qmod.QueryStruct) -> Iterator[TokenAssignment]:
""" Return possible word type assignments to word positions.
sql = sa.select(conn.t.import_status.c.lastimportdate).limit(1)
status.data_updated = await conn.scalar(sql)
+ if status.data_updated is not None:
+ status.data_updated = status.data_updated.replace(tzinfo=dt.timezone.utc)
+
# Database version
try:
verstr = await conn.get_property('database_version')
from binascii import unhexlify
from nominatim.errors import UsageError
+from nominatim.api.localization import Locales
# pylint: disable=no-member,too-many-boolean-expressions,too-many-instance-attributes
@dataclasses.dataclass
class PlaceID:
- """ Reference an object by Nominatim's internal ID.
+ """ Reference a place by Nominatim's internal ID.
+
+ A PlaceID may reference place from the main table placex, from
+ the interpolation tables or the postcode tables. Place IDs are not
+ stable between installations. You may use this type theefore only
+ with place IDs obtained from the same database.
"""
place_id: int
+ """
+ The internal ID of the place to reference.
+ """
@dataclasses.dataclass
class OsmID:
- """ Reference by the OSM ID and potentially the basic category.
+ """ Reference a place by its OSM ID and potentially the basic category.
+
+ The OSM ID may refer to places in the main table placex and OSM
+ interpolation lines.
"""
osm_type: str
+ """ OSM type of the object. Must be one of `N`(node), `W`(way) or
+ `R`(relation).
+ """
osm_id: int
+ """ The OSM ID of the object.
+ """
osm_class: Optional[str] = None
+ """ The same OSM object may appear multiple times in the database under
+ different categories. The optional class parameter allows to distinguish
+ the different categories and corresponds to the key part of the category.
+ If there are multiple objects in the database and `osm_class` is
+ left out, then one of the objects is returned at random.
+ """
def __post_init__(self) -> None:
if self.osm_type not in ('N', 'W', 'R'):
WKB_BBOX_HEADER_BE = b'\x00\x20\x00\x00\x03\x00\x00\x10\xe6\x00\x00\x00\x01\x00\x00\x00\x05'
class Bbox:
- """ A bounding box in WSG84 projection.
+ """ A bounding box in WGS84 projection.
The coordinates are available as an array in the 'coord'
property in the order (minx, miny, maxx, maxy).
"""
def __init__(self, minx: float, miny: float, maxx: float, maxy: float) -> None:
+ """ Create a new bounding box with the given coordinates in WGS84
+ projection.
+ """
self.coords = (minx, miny, maxx, maxy)
@staticmethod
def from_wkb(wkb: Union[None, str, bytes]) -> 'Optional[Bbox]':
""" Create a Bbox from a bounding box polygon as returned by
- the database. Return s None if the input value is None.
+ the database. Returns `None` if the input value is None.
"""
if wkb is None:
return None
except ValueError as exc:
raise UsageError('Bounding box parameter needs to be numbers.') from exc
- if x1 < -180.0 or x1 > 180.0 or y1 < -90.0 or y1 > 90.0 \
- or x2 < -180.0 or x2 > 180.0 or y2 < -90.0 or y2 > 90.0:
- raise UsageError('Bounding box coordinates invalid.')
+ x1 = min(180, max(-180, x1))
+ x2 = min(180, max(-180, x2))
+ y1 = min(90, max(-90, y1))
+ y2 = min(90, max(-90, y2))
if x1 == x2 or y1 == y2:
raise UsageError('Bounding box with invalid parameters.')
class GeometryFormat(enum.Flag):
- """ Geometry output formats supported by Nominatim.
+ """ All search functions support returning the full geometry of a place in
+ various formats. The internal geometry is converted by PostGIS to
+ the desired format and then returned as a string. It is possible to
+ request multiple formats at the same time.
"""
NONE = 0
+ """ No geometry requested. Alias for a empty flag.
+ """
GEOJSON = enum.auto()
+ """
+ [GeoJSON](https://geojson.org/) format
+ """
KML = enum.auto()
+ """
+ [KML](https://en.wikipedia.org/wiki/Keyhole_Markup_Language) format
+ """
SVG = enum.auto()
+ """
+ [SVG](http://www.w3.org/TR/SVG/paths.html) format
+ """
TEXT = enum.auto()
+ """
+ [WKT](https://en.wikipedia.org/wiki/Well-known_text_representation_of_geometry) format
+ """
class DataLayer(enum.Flag):
- """ Layer types that can be selected for reverse and forward search.
+ """ The `DataLayer` flag type defines the layers that can be selected
+ for reverse and forward search.
"""
- POI = enum.auto()
ADDRESS = enum.auto()
+ """ The address layer contains all places relavant for addresses:
+ fully qualified addresses with a house number (or a house name equivalent,
+ for some addresses) and places that can be part of an address like
+ roads, cities, states.
+ """
+ POI = enum.auto()
+ """ Layer for points of interest like shops, restaurants but also
+ recycling bins or postboxes.
+ """
RAILWAY = enum.auto()
- MANMADE = enum.auto()
+ """ Layer with railway features including tracks and other infrastructure.
+ Note that in Nominatim's standard configuration, only very few railway
+ features are imported into the database. Thus a custom configuration
+ is required to make full use of this layer.
+ """
NATURAL = enum.auto()
+ """ Layer with natural features like rivers, lakes and mountains.
+ """
+ MANMADE = enum.auto()
+ """ Layer with other human-made features and boundaries. This layer is
+ the catch-all and includes all features not covered by the other
+ layers. A typical example for this layer are national park boundaries.
+ """
def format_country(cc: Any) -> List[str]:
@dataclasses.dataclass
class LookupDetails:
- """ Collection of parameters that define the amount of details
+ """ Collection of parameters that define which kind of details are
returned with a lookup or details result.
"""
geometry_output: GeometryFormat = GeometryFormat.NONE
0.0 means the original geometry is kept. The higher the value, the
more the geometry gets simplified.
"""
+ locales: Locales = Locales()
+ """ Prefered languages for localization of results.
+ """
@classmethod
def from_kwargs(cls: Type[TParam], kwargs: Dict[str, Any]) -> TParam:
"""
excluded: List[int] = dataclasses.field(default_factory=list,
metadata={'transform': format_excluded})
- """ List of OSM objects to exclude from the results. Currenlty only
+ """ List of OSM objects to exclude from the results. Currently only
works when the internal place ID is given.
An empty list (the default) will disable this filter.
"""
or (self.bounded_viewbox
and self.viewbox is not None and self.near is not None
and self.viewbox.contains(self.near))
- or self.layers is not None and not self.layers)
+ or (self.layers is not None and not self.layers)
+ or (self.max_rank <= 4 and
+ self.layers is not None and not self.layers & DataLayer.ADDRESS))
def layer_enabled(self, layer: DataLayer) -> bool:
"""
Output formatters for API version v1.
"""
-from typing import Mapping, Any
+from typing import List, Dict, Mapping, Any
import collections
+import datetime as dt
import nominatim.api as napi
from nominatim.api.result_formatting import FormatDispatcher
from nominatim.api.v1 import format_json, format_xml
from nominatim.utils.json_writer import JsonWriter
+class RawDataList(List[Dict[str, Any]]):
+ """ Data type for formatting raw data lists 'as is' in json.
+ """
+
dispatch = FormatDispatcher()
@dispatch.format_func(napi.StatusResult, 'text')
if result.address_rows is not None:
_add_address_rows(out, 'address', result.address_rows, locales)
- if result.linked_rows is not None:
+ if result.linked_rows:
_add_address_rows(out, 'linked_places', result.linked_rows, locales)
if result.name_keywords is not None or result.address_keywords is not None:
options: Mapping[str, Any]) -> str:
return format_json.format_base_json(results, options, False,
class_label='category')
+
+@dispatch.format_func(RawDataList, 'json')
+def _format_raw_data_json(results: RawDataList, _: Mapping[str, Any]) -> str:
+ out = JsonWriter()
+ out.start_array()
+ for res in results:
+ out.start_object()
+ for k, v in res.items():
+ if isinstance(v, dt.datetime):
+ out.keyval(k, v.isoformat(sep= ' ', timespec='seconds'))
+ else:
+ out.keyval(k, v)
+ out.end_object().next()
+
+ out.end_array()
+
+ return out()
_write_osm_id(out, result.osm_object)
- out.keyval('lat', result.centroid.lat)\
- .keyval('lon', result.centroid.lon)\
+ out.keyval('lat', f"{result.centroid.lat}")\
+ .keyval('lon', f"{result.centroid.lon}")\
.keyval(class_label, result.category[0])\
.keyval('type', result.category[1])\
.keyval('place_rank', result.rank_search)\
return REVERSE_MAX_RANKS[max(0, min(18, zoom))]
-FEATURE_TYPE_TO_RANK: Dict[Optional[str], Any] = {
+FEATURE_TYPE_TO_RANK: Dict[Optional[str], Tuple[int, int]] = {
'country': (4, 4),
'state': (8, 8),
'city': (14, 16),
assert result.names and 'ref' in result.names
if any(_is_postcode_relation_for(r, result.names['ref']) for r in results):
continue
- classification = (result.osm_object[0] if result.osm_object else None,
- result.category,
- result.display_name,
- result.rank_address)
- if result.osm_object not in osm_ids_done \
- and classification not in classification_done:
+ if result.source_table == SourceTable.PLACEX:
+ classification = (result.osm_object[0] if result.osm_object else None,
+ result.category,
+ result.display_name,
+ result.rank_address)
+ if result.osm_object not in osm_ids_done \
+ and classification not in classification_done:
+ deduped.append(result)
+ osm_ids_done.add(result.osm_object)
+ classification_done.add(classification)
+ else:
deduped.append(result)
- osm_ids_done.add(result.osm_object)
- classification_done.add(classification)
if len(deduped) >= max_results:
break
return f"(?P<{axis}_deg>\\d+\\.\\d+)°?"
def _deg_min(axis: str) -> str:
- return f"(?P<{axis}_deg>\\d+)[°\\s]+(?P<{axis}_min>[\\d.]+)?[′']*"
+ return f"(?P<{axis}_deg>\\d+)[°\\s]+(?P<{axis}_min>[\\d.]+)[′']*"
def _deg_min_sec(axis: str) -> str:
- return f"(?P<{axis}_deg>\\d+)[°\\s]+(?P<{axis}_min>\\d+)[′'\\s]+(?P<{axis}_sec>[\\d.]+)?[\"″]*"
+ return f"(?P<{axis}_deg>\\d+)[°\\s]+(?P<{axis}_min>\\d+)[′'\\s]+(?P<{axis}_sec>[\\d.]+)[\"″]*"
COORD_REGEX = [re.compile(r'(?:(?P<pre>.*?)\s+)??' + r + r'(?:\s+(?P<post>.*))?') for r in (
r"(?P<ns>[NS])\s*" + _deg('lat') + r"[\s,]+" + r"(?P<ew>[EW])\s*" + _deg('lon'),
import math
from urllib.parse import urlencode
+import sqlalchemy as sa
+
from nominatim.errors import UsageError
from nominatim.config import Configuration
import nominatim.api as napi
import nominatim.api.logging as loglib
from nominatim.api.v1.format import dispatch as formatting
+from nominatim.api.v1.format import RawDataList
from nominatim.api.v1 import helpers
-CONTENT_TYPE = {
- 'text': 'text/plain; charset=utf-8',
- 'xml': 'text/xml; charset=utf-8',
- 'debug': 'text/html; charset=utf-8'
-}
+CONTENT_TEXT = 'text/plain; charset=utf-8'
+CONTENT_XML = 'text/xml; charset=utf-8'
+CONTENT_HTML = 'text/html; charset=utf-8'
+CONTENT_JSON = 'application/json; charset=utf-8'
+
+CONTENT_TYPE = {'text': CONTENT_TEXT, 'xml': CONTENT_XML, 'debug': CONTENT_HTML}
class ASGIAdaptor(abc.ABC):
""" Adapter class for the different ASGI frameworks.
Wraps functionality over concrete requests and responses.
"""
- content_type: str = 'text/plain; charset=utf-8'
+ content_type: str = CONTENT_TEXT
@abc.abstractmethod
def get(self, name: str, default: Optional[str] = None) -> Optional[str]:
@abc.abstractmethod
- def create_response(self, status: int, output: str) -> Any:
+ def create_response(self, status: int, output: str, num_results: int) -> Any:
""" Create a response from the given parameters. The result will
be returned by the endpoint functions. The adaptor may also
return None when the response is created internally with some
body of the response to 'output'.
"""
+ @abc.abstractmethod
+ def base_uri(self) -> str:
+ """ Return the URI of the original request.
+ """
+
@abc.abstractmethod
def config(self) -> Configuration:
"""
- def build_response(self, output: str, status: int = 200) -> Any:
+ def build_response(self, output: str, status: int = 200, num_results: int = 0) -> Any:
""" Create a response from the given output. Wraps a JSONP function
around the response, if necessary.
"""
- if self.content_type == 'application/json' and status == 200:
+ if self.content_type == CONTENT_JSON and status == 200:
jsonp = self.get('json_callback')
if jsonp is not None:
if any(not part.isidentifier() for part in jsonp.split('.')):
self.raise_error('Invalid json_callback value')
output = f"{jsonp}({output})"
- self.content_type = 'application/javascript'
+ self.content_type = 'application/javascript; charset=utf-8'
- return self.create_response(status, output)
+ return self.create_response(status, output, num_results)
def raise_error(self, msg: str, status: int = 400) -> NoReturn:
message. The message will be formatted according to the
output format chosen by the request.
"""
- if self.content_type == 'text/xml; charset=utf-8':
+ if self.content_type == CONTENT_XML:
msg = f"""<?xml version="1.0" encoding="UTF-8" ?>
<error>
<code>{status}</code>
<message>{msg}</message>
</error>
"""
- elif self.content_type == 'application/json':
+ elif self.content_type == CONTENT_JSON:
msg = f"""{{"error":{{"code":{status},"message":"{msg}"}}}}"""
- elif self.content_type == 'text/html; charset=utf-8':
+ elif self.content_type == CONTENT_HTML:
loglib.log().section('Execution error')
loglib.log().var_dump('Status', status)
loglib.log().var_dump('Message', msg)
"""
if self.get_bool('debug', False):
loglib.set_log_output('html')
- self.content_type = 'text/html; charset=utf-8'
+ self.content_type = CONTENT_HTML
return True
return False
self.raise_error("Parameter 'format' must be one of: " +
', '.join(formatting.list_formats(result_type)))
- self.content_type = CONTENT_TYPE.get(fmt, 'application/json')
+ self.content_type = CONTENT_TYPE.get(fmt, CONTENT_JSON)
return fmt
numgeoms += 1
if numgeoms > self.config().get_int('POLYGON_OUTPUT_MAX_TYPES'):
- self.raise_error('Too many polgyon output options selected.')
+ self.raise_error('Too many polygon output options selected.')
return {'address_details': True,
'geometry_simplification': self.get_float('polygon_threshold', 0.0),
result = await api.details(place,
address_details=params.get_bool('addressdetails', False),
- linked_places=params.get_bool('linkedplaces', False),
+ linked_places=params.get_bool('linkedplaces', True),
parented_places=params.get_bool('hierarchy', False),
keywords=params.get_bool('keywords', False),
geometry_output = napi.GeometryFormat.GEOJSON
if params.get_bool('polygon_geojson', False)
- else napi.GeometryFormat.NONE
+ else napi.GeometryFormat.NONE,
+ locales=locales
)
if debug:
if result is None:
params.raise_error('No place with that OSM ID found.', status=404)
- result.localize(locales)
-
output = formatting.format_result(result, fmt,
{'locales': locales,
'group_hierarchy': params.get_bool('group_hierarchy', False),
'icon_base_url': params.config().MAPICON_URL})
- return params.build_response(output)
+ return params.build_response(output, num_results=1)
async def reverse_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> Any:
details = params.parse_geometry_details(fmt)
details['max_rank'] = helpers.zoom_to_rank(params.get_int('zoom', 18))
details['layers'] = params.get_layers()
+ details['locales'] = napi.Locales.from_accept_languages(params.get_accepted_languages())
result = await api.reverse(coord, **details)
if debug:
- return params.build_response(loglib.get_and_disable())
+ return params.build_response(loglib.get_and_disable(), num_results=1 if result else 0)
if fmt == 'xml':
queryparts = {'lat': str(coord.lat), 'lon': str(coord.lon), 'format': 'xml'}
'namedetails': params.get_bool('namedetails', False),
'addressdetails': params.get_bool('addressdetails', True)}
- if result:
- result.localize(napi.Locales.from_accept_languages(params.get_accepted_languages()))
-
output = formatting.format_result(napi.ReverseResults([result] if result else []),
fmt, fmt_options)
- return params.build_response(output)
+ return params.build_response(output, num_results=1 if result else 0)
async def lookup_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> Any:
fmt = params.parse_format(napi.SearchResults, 'xml')
debug = params.setup_debugging()
details = params.parse_geometry_details(fmt)
+ details['locales'] = napi.Locales.from_accept_languages(params.get_accepted_languages())
places = []
for oid in (params.get('osm_ids') or '').split(','):
oid = oid.strip()
if len(oid) > 1 and oid[0] in 'RNWrnw' and oid[1:].isdigit():
- places.append(napi.OsmID(oid[0], int(oid[1:])))
+ places.append(napi.OsmID(oid[0].upper(), int(oid[1:])))
if len(places) > params.config().get_int('LOOKUP_MAX_COUNT'):
params.raise_error('Too many object IDs.')
results = napi.SearchResults()
if debug:
- return params.build_response(loglib.get_and_disable())
+ return params.build_response(loglib.get_and_disable(), num_results=len(results))
fmt_options = {'extratags': params.get_bool('extratags', False),
'namedetails': params.get_bool('namedetails', False),
'addressdetails': params.get_bool('addressdetails', True)}
- results.localize(napi.Locales.from_accept_languages(params.get_accepted_languages()))
-
output = formatting.format_result(results, fmt, fmt_options)
- return params.build_response(output)
+ return params.build_response(output, num_results=len(results))
async def _unstructured_search(query: str, api: napi.NominatimAPIAsync,
helpers.feature_type_to_rank(params.get('featureType', ''))
if params.get('featureType', None) is not None:
details['layers'] = napi.DataLayer.ADDRESS
+ else:
+ details['layers'] = params.get_layers()
+ details['locales'] = napi.Locales.from_accept_languages(params.get_accepted_languages())
+
+ # unstructured query parameters
query = params.get('q', None)
+ # structured query parameters
queryparts = {}
+ for key in ('amenity', 'street', 'city', 'county', 'state', 'postalcode', 'country'):
+ details[key] = params.get(key, None)
+ if details[key]:
+ queryparts[key] = details[key]
+
try:
if query is not None:
+ if queryparts:
+ params.raise_error("Structured query parameters"
+ "(amenity, street, city, county, state, postalcode, country)"
+ " cannot be used together with 'q' parameter.")
queryparts['q'] = query
results = await _unstructured_search(query, api, details)
else:
- for key in ('amenity', 'street', 'city', 'county', 'state', 'postalcode', 'country'):
- details[key] = params.get(key, None)
- if details[key]:
- queryparts[key] = details[key]
query = ', '.join(queryparts.values())
results = await api.search_address(**details)
except UsageError as err:
params.raise_error(str(err))
- results.localize(napi.Locales.from_accept_languages(params.get_accepted_languages()))
-
if details['dedupe'] and len(results) > 1:
results = helpers.deduplicate_results(results, max_results)
if debug:
- return params.build_response(loglib.get_and_disable())
+ return params.build_response(loglib.get_and_disable(), num_results=len(results))
if fmt == 'xml':
helpers.extend_query_parts(queryparts, details,
(str(r.place_id) for r in results if r.place_id))
queryparts['format'] = fmt
- moreurl = urlencode(queryparts)
+ moreurl = params.base_uri() + '/search?' + urlencode(queryparts)
else:
moreurl = ''
output = formatting.format_result(results, fmt, fmt_options)
- return params.build_response(output)
+ return params.build_response(output, num_results=len(results))
+
+
+async def deletable_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> Any:
+ """ Server glue for /deletable endpoint.
+ This is a special endpoint that shows polygons that have been
+ deleted or are broken in the OSM data but are kept in the
+ Nominatim database to minimize disruption.
+ """
+ fmt = params.parse_format(RawDataList, 'json')
+
+ async with api.begin() as conn:
+ sql = sa.text(""" SELECT p.place_id, country_code,
+ name->'name' as name, i.*
+ FROM placex p, import_polygon_delete i
+ WHERE p.osm_id = i.osm_id AND p.osm_type = i.osm_type
+ AND p.class = i.class AND p.type = i.type
+ """)
+ results = RawDataList(r._asdict() for r in await conn.execute(sql))
+
+ return params.build_response(formatting.format_result(results, fmt, {}))
+
+
+async def polygons_endpoint(api: napi.NominatimAPIAsync, params: ASGIAdaptor) -> Any:
+ """ Server glue for /polygons endpoint.
+ This is a special endpoint that shows polygons that have changed
+ thier size but are kept in the Nominatim database with their
+ old area to minimize disruption.
+ """
+ fmt = params.parse_format(RawDataList, 'json')
+ sql_params: Dict[str, Any] = {
+ 'days': params.get_int('days', -1),
+ 'cls': params.get('class')
+ }
+ reduced = params.get_bool('reduced', False)
+
+ async with api.begin() as conn:
+ sql = sa.select(sa.text("""osm_type, osm_id, class, type,
+ name->'name' as name,
+ country_code, errormessage, updated"""))\
+ .select_from(sa.text('import_polygon_error'))
+ if sql_params['days'] > 0:
+ sql = sql.where(sa.text("updated > 'now'::timestamp - make_interval(days => :days)"))
+ if reduced:
+ sql = sql.where(sa.text("errormessage like 'Area reduced%'"))
+ if sql_params['cls'] is not None:
+ sql = sql.where(sa.text("class = :cls"))
+
+ sql = sql.order_by(sa.literal_column('updated').desc()).limit(1000)
+
+ results = RawDataList(r._asdict() for r in await conn.execute(sql, sql_params))
+
+ return params.build_response(formatting.format_result(results, fmt, {}))
EndpointFunc = Callable[[napi.NominatimAPIAsync, ASGIAdaptor], Any]
('details', details_endpoint),
('reverse', reverse_endpoint),
('lookup', lookup_endpoint),
- ('search', search_endpoint)
+ ('search', search_endpoint),
+ ('deletable', deletable_endpoint),
+ ('polygons', polygons_endpoint),
]
#
# This file is part of Nominatim. (https://nominatim.org)
#
-# Copyright (C) 2022 by the Nominatim developer community.
+# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Command-line interface to the Nominatim functions for import, update,
database administration and querying.
"""
-from typing import Optional, Any, List, Union
+from typing import Optional, Any
import importlib
import logging
import os
from pathlib import Path
from nominatim.config import Configuration
-from nominatim.tools.exec_utils import run_legacy_script, run_php_server
+from nominatim.tools.exec_utils import run_php_server
from nominatim.errors import UsageError
from nominatim import clicmd
from nominatim import version
self.parser.print_help()
return 1
- args.phpcgi_path = Path(kwargs['phpcgi_path'])
args.project_dir = Path(args.project_dir).resolve()
if 'cli_args' not in kwargs:
#
# No need to document the functions each time.
# pylint: disable=C0111
-class QueryExport:
- """\
- Export addresses as CSV file from the database.
- """
-
- def add_args(self, parser: argparse.ArgumentParser) -> None:
- group = parser.add_argument_group('Output arguments')
- group.add_argument('--output-type', default='street',
- choices=('continent', 'country', 'state', 'county',
- 'city', 'suburb', 'street', 'path'),
- help='Type of places to output (default: street)')
- group.add_argument('--output-format',
- default='street;suburb;city;county;state;country',
- help=("Semicolon-separated list of address types "
- "(see --output-type). Multiple ranks can be "
- "merged into one column by simply using a "
- "comma-separated list."))
- group.add_argument('--output-all-postcodes', action='store_true',
- help=("List all postcodes for address instead of "
- "just the most likely one"))
- group.add_argument('--language',
- help=("Preferred language for output "
- "(use local name, if omitted)"))
- group = parser.add_argument_group('Filter arguments')
- group.add_argument('--restrict-to-country', metavar='COUNTRY_CODE',
- help='Export only objects within country')
- group.add_argument('--restrict-to-osm-node', metavar='ID', type=int,
- help='Export only children of this OSM node')
- group.add_argument('--restrict-to-osm-way', metavar='ID', type=int,
- help='Export only children of this OSM way')
- group.add_argument('--restrict-to-osm-relation', metavar='ID', type=int,
- help='Export only children of this OSM relation')
-
-
- def run(self, args: NominatimArgs) -> int:
- params: List[Union[int, str]] = [
- '--output-type', args.output_type,
- '--output-format', args.output_format]
- if args.output_all_postcodes:
- params.append('--output-all-postcodes')
- if args.language:
- params.extend(('--language', args.language))
- if args.restrict_to_country:
- params.extend(('--restrict-to-country', args.restrict_to_country))
- if args.restrict_to_osm_node:
- params.extend(('--restrict-to-osm-node', args.restrict_to_osm_node))
- if args.restrict_to_osm_way:
- params.extend(('--restrict-to-osm-way', args.restrict_to_osm_way))
- if args.restrict_to_osm_relation:
- params.extend(('--restrict-to-osm-relation', args.restrict_to_osm_relation))
-
- return run_legacy_script('export.php', *params, config=args.config)
-
-
class AdminServe:
"""\
Start a simple web server for serving the API.
parser.add_subcommand('admin', clicmd.AdminFuncs())
- parser.add_subcommand('export', QueryExport())
+ parser.add_subcommand('export', clicmd.QueryExport())
+ parser.add_subcommand('convert', clicmd.ConvertDB())
parser.add_subcommand('serve', AdminServe())
parser.add_subcommand('search', clicmd.APISearch())
#
# This file is part of Nominatim. (https://nominatim.org)
#
-# Copyright (C) 2022 by the Nominatim developer community.
+# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Subcommand definitions for the command-line tool.
from nominatim.clicmd.admin import AdminFuncs as AdminFuncs
from nominatim.clicmd.freeze import SetupFreeze as SetupFreeze
from nominatim.clicmd.special_phrases import ImportSpecialPhrases as ImportSpecialPhrases
+from nominatim.clicmd.export import QueryExport as QueryExport
+from nominatim.clicmd.convert import ConvertDB as ConvertDB
"""
import logging
import argparse
+import random
-from nominatim.tools.exec_utils import run_legacy_script
+from nominatim.db.connection import connect
from nominatim.clicmd.args import NominatimArgs
+import nominatim.api as napi
# Do not repeat documentation of subcommand classes.
# pylint: disable=C0111
help='Print performance analysis of the indexing process')
objs.add_argument('--collect-os-info', action="store_true",
help="Generate a report about the host system information")
+ objs.add_argument('--clean-deleted', action='store', metavar='AGE',
+ help='Clean up deleted relations')
group = parser.add_argument_group('Arguments for cache warming')
group.add_argument('--search-only', action='store_const', dest='target',
const='search',
mgroup.add_argument('--place-id', type=int,
help='Analyse indexing of the given Nominatim object')
+
def run(self, args: NominatimArgs) -> int:
+ # pylint: disable=too-many-return-statements
if args.warm:
return self._warm(args)
collect_os_info.report_system_information(args.config)
return 0
+ if args.clean_deleted:
+ LOG.warning('Cleaning up deleted relations')
+ from ..tools import admin
+ admin.clean_deleted_relations(args.config, age=args.clean_deleted)
+ return 0
+
return 1
+
def _warm(self, args: NominatimArgs) -> int:
LOG.warning('Warming database caches')
- params = ['warm.php']
- if args.target == 'reverse':
- params.append('--reverse-only')
- if args.target == 'search':
- params.append('--search-only')
- return run_legacy_script(*params, config=args.config)
+
+ api = napi.NominatimAPI(args.project_dir)
+
+ try:
+ if args.target != 'search':
+ for _ in range(1000):
+ api.reverse((random.uniform(-90, 90), random.uniform(-180, 180)),
+ address_details=True)
+
+ if args.target != 'reverse':
+ from ..tokenizer import factory as tokenizer_factory
+
+ tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
+ with connect(args.config.get_libpq_dsn()) as conn:
+ if conn.table_exists('search_name'):
+ words = tokenizer.most_frequent_words(conn, 1000)
+ else:
+ words = []
+
+ for word in words:
+ api.search(word)
+ finally:
+ api.close()
+
+ return 0
"""
Subcommand definitions for API calls from the command line.
"""
-from typing import Mapping, Dict, Any
+from typing import Dict, Any
import argparse
import logging
import json
import sys
-from nominatim.tools.exec_utils import run_api_script
-from nominatim.errors import UsageError
from nominatim.clicmd.args import NominatimArgs
import nominatim.api as napi
import nominatim.api.v1 as api_output
"Parameter is difference tolerance in degrees."))
-def _run_api(endpoint: str, args: NominatimArgs, params: Mapping[str, object]) -> int:
- script_file = args.project_dir / 'website' / (endpoint + '.php')
-
- if not script_file.exists():
- LOG.error("Cannot find API script file.\n\n"
- "Make sure to run 'nominatim' from the project directory \n"
- "or use the option --project-dir.")
- raise UsageError("API script not found.")
-
- return run_api_script(endpoint, args.project_dir,
- phpcgi_bin=args.phpcgi_path, params=params)
-
class APISearch:
"""\
Execute a search query.
'countries': args.countrycodes,
'excluded': args.exclude_place_ids,
'viewbox': args.viewbox,
- 'bounded_viewbox': args.bounded
+ 'bounded_viewbox': args.bounded,
+ 'locales': args.get_locales(api.config.DEFAULT_LANGUAGE)
}
if args.query:
country=args.country,
**params)
- for result in results:
- result.localize(args.get_locales(api.config.DEFAULT_LANGUAGE))
-
if args.dedupe and len(results) > 1:
results = deduplicate_results(results, args.limit)
layers=args.get_layers(napi.DataLayer.ADDRESS | napi.DataLayer.POI),
address_details=True, # needed for display name
geometry_output=args.get_geometry_output(),
- geometry_simplification=args.polygon_threshold)
+ geometry_simplification=args.polygon_threshold,
+ locales=args.get_locales(api.config.DEFAULT_LANGUAGE))
if args.format == 'debug':
print(loglib.get_and_disable())
return 0
if result:
- result.localize(args.get_locales(api.config.DEFAULT_LANGUAGE))
output = api_output.format_result(
napi.ReverseResults([result]),
args.format,
results = api.lookup(places,
address_details=True, # needed for display name
geometry_output=args.get_geometry_output(),
- geometry_simplification=args.polygon_threshold or 0.0)
-
- for result in results:
- result.localize(args.get_locales(api.config.DEFAULT_LANGUAGE))
+ geometry_simplification=args.polygon_threshold or 0.0,
+ locales=args.get_locales(api.config.DEFAULT_LANGUAGE))
output = api_output.format_result(
results,
api = napi.NominatimAPI(args.project_dir)
+ locales = args.get_locales(api.config.DEFAULT_LANGUAGE)
result = api.details(place,
address_details=args.addressdetails,
linked_places=args.linkedplaces,
keywords=args.keywords,
geometry_output=napi.GeometryFormat.GEOJSON
if args.polygon_geojson
- else napi.GeometryFormat.NONE)
+ else napi.GeometryFormat.NONE,
+ locales=locales)
if result:
- locales = args.get_locales(api.config.DEFAULT_LANGUAGE)
- result.localize(locales)
-
output = api_output.format_result(
result,
'json',
# Basic environment set by root program.
config: Configuration
project_dir: Path
- phpcgi_path: Path
# Global switches
version: bool
check_database: bool
migrate: bool
collect_os_info: bool
+ clean_deleted: str
analyse_indexing: bool
target: Optional[str]
osm_id: Optional[str]
offline: bool
ignore_errors: bool
index_noanalyse: bool
+ prepare_database: bool
# Arguments to 'index'
boundaries_only: bool
output_all_postcodes: bool
language: Optional[str]
restrict_to_country: Optional[str]
- restrict_to_osm_node: Optional[int]
- restrict_to_osm_way: Optional[int]
- restrict_to_osm_relation: Optional[int]
+
+ # Arguments to 'convert'
+ output: Path
# Arguments to 'refresh'
postcodes: bool
--- /dev/null
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Implementation of the 'convert' subcommand.
+"""
+from typing import Set, Any, Union, Optional, Sequence
+import argparse
+import asyncio
+from pathlib import Path
+
+from nominatim.clicmd.args import NominatimArgs
+from nominatim.errors import UsageError
+
+# Do not repeat documentation of subcommand classes.
+# pylint: disable=C0111
+# Using non-top-level imports to avoid eventually unused imports.
+# pylint: disable=E0012,C0415
+
+class WithAction(argparse.Action):
+ """ Special action that saves a list of flags, given on the command-line
+ as `--with-foo` or `--without-foo`.
+ """
+ def __init__(self, option_strings: Sequence[str], dest: Any,
+ default: bool = True, **kwargs: Any) -> None:
+ if 'nargs' in kwargs:
+ raise ValueError("nargs not allowed.")
+ if option_strings is None:
+ raise ValueError("Positional parameter not allowed.")
+
+ self.dest_set = kwargs.pop('dest_set')
+ full_option_strings = []
+ for opt in option_strings:
+ if not opt.startswith('--'):
+ raise ValueError("short-form options not allowed")
+ if default:
+ self.dest_set.add(opt[2:])
+ full_option_strings.append(f"--with-{opt[2:]}")
+ full_option_strings.append(f"--without-{opt[2:]}")
+
+ super().__init__(full_option_strings, argparse.SUPPRESS, nargs=0, **kwargs)
+
+
+ def __call__(self, parser: argparse.ArgumentParser, namespace: argparse.Namespace,
+ values: Union[str, Sequence[Any], None],
+ option_string: Optional[str] = None) -> None:
+ assert option_string
+ if option_string.startswith('--with-'):
+ self.dest_set.add(option_string[7:])
+ if option_string.startswith('--without-'):
+ self.dest_set.discard(option_string[10:])
+
+
+class ConvertDB:
+ """ Convert an existing database into a different format. (EXPERIMENTAL)
+
+ Dump a read-only version of the database in a different format.
+ At the moment only a SQLite database suitable for reverse lookup
+ can be created.
+ """
+
+ def __init__(self) -> None:
+ self.options: Set[str] = set()
+
+ def add_args(self, parser: argparse.ArgumentParser) -> None:
+ parser.add_argument('--format', default='sqlite',
+ choices=('sqlite', ),
+ help='Format of the output database (must be sqlite currently)')
+ parser.add_argument('--output', '-o', required=True, type=Path,
+ help='File to write the database to.')
+ group = parser.add_argument_group('Switches to define database layout'
+ '(currently no effect)')
+ group.add_argument('--reverse', action=WithAction, dest_set=self.options, default=True,
+ help='Enable/disable support for reverse and lookup API'
+ ' (default: enabled)')
+ group.add_argument('--search', action=WithAction, dest_set=self.options, default=False,
+ help='Enable/disable support for search API (default: disabled)')
+ group.add_argument('--details', action=WithAction, dest_set=self.options, default=True,
+ help='Enable/disable support for details API (default: enabled)')
+
+
+ def run(self, args: NominatimArgs) -> int:
+ if args.output.exists():
+ raise UsageError(f"File '{args.output}' already exists. Refusing to overwrite.")
+
+ if args.format == 'sqlite':
+ from ..tools import convert_sqlite
+
+ asyncio.run(convert_sqlite.convert(args.project_dir, args.output, self.options))
+ return 0
+
+ return 1
--- /dev/null
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Implementation of the 'export' subcommand.
+"""
+from typing import Optional, List, cast
+import logging
+import argparse
+import asyncio
+import csv
+import sys
+
+import sqlalchemy as sa
+
+from nominatim.clicmd.args import NominatimArgs
+import nominatim.api as napi
+from nominatim.api.results import create_from_placex_row, ReverseResult, add_result_details
+from nominatim.api.types import LookupDetails
+from nominatim.errors import UsageError
+
+# Do not repeat documentation of subcommand classes.
+# pylint: disable=C0111
+# Using non-top-level imports to avoid eventually unused imports.
+# pylint: disable=E0012,C0415
+# Needed for SQLAlchemy
+# pylint: disable=singleton-comparison
+
+LOG = logging.getLogger()
+
+RANK_RANGE_MAP = {
+ 'country': (4, 4),
+ 'state': (5, 9),
+ 'county': (10, 12),
+ 'city': (13, 16),
+ 'suburb': (17, 21),
+ 'street': (26, 26),
+ 'path': (27, 27)
+}
+
+RANK_TO_OUTPUT_MAP = {
+ 4: 'country',
+ 5: 'state', 6: 'state', 7: 'state', 8: 'state', 9: 'state',
+ 10: 'county', 11: 'county', 12: 'county',
+ 13: 'city', 14: 'city', 15: 'city', 16: 'city',
+ 17: 'suburb', 18: 'suburb', 19: 'suburb', 20: 'suburb', 21: 'suburb',
+ 26: 'street', 27: 'path'}
+
+class QueryExport:
+ """\
+ Export places as CSV file from the database.
+
+
+ """
+
+ def add_args(self, parser: argparse.ArgumentParser) -> None:
+ group = parser.add_argument_group('Output arguments')
+ group.add_argument('--output-type', default='street',
+ choices=('country', 'state', 'county',
+ 'city', 'suburb', 'street', 'path'),
+ help='Type of places to output (default: street)')
+ group.add_argument('--output-format',
+ default='street;suburb;city;county;state;country',
+ help=("Semicolon-separated list of address types "
+ "(see --output-type). Additionally accepts:"
+ "placeid,postcode"))
+ group.add_argument('--language',
+ help=("Preferred language for output "
+ "(use local name, if omitted)"))
+ group = parser.add_argument_group('Filter arguments')
+ group.add_argument('--restrict-to-country', metavar='COUNTRY_CODE',
+ help='Export only objects within country')
+ group.add_argument('--restrict-to-osm-node', metavar='ID', type=int,
+ dest='node',
+ help='Export only children of this OSM node')
+ group.add_argument('--restrict-to-osm-way', metavar='ID', type=int,
+ dest='way',
+ help='Export only children of this OSM way')
+ group.add_argument('--restrict-to-osm-relation', metavar='ID', type=int,
+ dest='relation',
+ help='Export only children of this OSM relation')
+
+
+ def run(self, args: NominatimArgs) -> int:
+ return asyncio.run(export(args))
+
+
+async def export(args: NominatimArgs) -> int:
+ """ The actual export as a asynchronous function.
+ """
+
+ api = napi.NominatimAPIAsync(args.project_dir)
+
+ try:
+ output_range = RANK_RANGE_MAP[args.output_type]
+
+ writer = init_csv_writer(args.output_format)
+
+ async with api.begin() as conn, api.begin() as detail_conn:
+ t = conn.t.placex
+
+ sql = sa.select(t.c.place_id, t.c.parent_place_id,
+ t.c.osm_type, t.c.osm_id, t.c.name,
+ t.c.class_, t.c.type, t.c.admin_level,
+ t.c.address, t.c.extratags,
+ t.c.housenumber, t.c.postcode, t.c.country_code,
+ t.c.importance, t.c.wikipedia, t.c.indexed_date,
+ t.c.rank_address, t.c.rank_search,
+ t.c.centroid)\
+ .where(t.c.linked_place_id == None)\
+ .where(t.c.rank_address.between(*output_range))
+
+ parent_place_id = await get_parent_id(conn, args.node, args.way, args.relation)
+ if parent_place_id:
+ taddr = conn.t.addressline
+
+ sql = sql.join(taddr, taddr.c.place_id == t.c.place_id)\
+ .where(taddr.c.address_place_id == parent_place_id)\
+ .where(taddr.c.isaddress)
+
+ if args.restrict_to_country:
+ sql = sql.where(t.c.country_code == args.restrict_to_country.lower())
+
+ results = []
+ for row in await conn.execute(sql):
+ result = create_from_placex_row(row, ReverseResult)
+ if result is not None:
+ results.append(result)
+
+ if len(results) == 1000:
+ await dump_results(detail_conn, results, writer, args.language)
+ results = []
+
+ if results:
+ await dump_results(detail_conn, results, writer, args.language)
+ finally:
+ await api.close()
+
+ return 0
+
+
+def init_csv_writer(output_format: str) -> 'csv.DictWriter[str]':
+ fields = output_format.split(';')
+ writer = csv.DictWriter(sys.stdout, fieldnames=fields, extrasaction='ignore')
+ writer.writeheader()
+
+ return writer
+
+
+async def dump_results(conn: napi.SearchConnection,
+ results: List[ReverseResult],
+ writer: 'csv.DictWriter[str]',
+ lang: Optional[str]) -> None:
+ locale = napi.Locales([lang] if lang else None)
+ await add_result_details(conn, results,
+ LookupDetails(address_details=True, locales=locale))
+
+
+ for result in results:
+ data = {'placeid': result.place_id,
+ 'postcode': result.postcode}
+
+ for line in (result.address_rows or []):
+ if line.isaddress and line.local_name:
+ if line.category[1] == 'postcode':
+ data['postcode'] = line.local_name
+ elif line.rank_address in RANK_TO_OUTPUT_MAP:
+ data[RANK_TO_OUTPUT_MAP[line.rank_address]] = line.local_name
+
+ writer.writerow(data)
+
+
+async def get_parent_id(conn: napi.SearchConnection, node_id: Optional[int],
+ way_id: Optional[int],
+ relation_id: Optional[int]) -> Optional[int]:
+ """ Get the place ID for the given OSM object.
+ """
+ if node_id is not None:
+ osm_type, osm_id = 'N', node_id
+ elif way_id is not None:
+ osm_type, osm_id = 'W', way_id
+ elif relation_id is not None:
+ osm_type, osm_id = 'R', relation_id
+ else:
+ return None
+
+ t = conn.t.placex
+ sql = sa.select(t.c.place_id).limit(1)\
+ .where(t.c.osm_type == osm_type)\
+ .where(t.c.osm_id == osm_id)\
+ .where(t.c.rank_address > 0)\
+ .order_by(t.c.rank_address)
+
+ for result in await conn.execute(sql):
+ return cast(int, result[0])
+
+ raise UsageError(f'Cannot find a place {osm_type}{osm_id}.')
LOG.warning('Import secondary importance raster data from %s', args.project_dir)
if refresh.import_secondary_importance(args.config.get_libpq_dsn(),
args.project_dir) > 0:
- LOG.fatal('FATAL: Cannot update sendary importance raster data')
+ LOG.fatal('FATAL: Cannot update secondary importance raster data')
return 1
if args.functions:
if args.wiki_data:
data_path = Path(args.config.WIKIPEDIA_DATA_PATH
or args.project_dir)
- LOG.warning('Import wikipdia article importance from %s', data_path)
+ LOG.warning('Import wikipedia article importance from %s', data_path)
if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
data_path) > 0:
- LOG.fatal('FATAL: Wikipedia importance dump file not found')
+ LOG.fatal('FATAL: Wikipedia importance file not found in %s', data_path)
return 1
# Attention: importance MUST come after wiki data import.
def add_args(self, parser: argparse.ArgumentParser) -> None:
group_name = parser.add_argument_group('Required arguments')
- group1 = group_name.add_mutually_exclusive_group(required=True)
+ group1 = group_name.add_argument_group()
group1.add_argument('--osm-file', metavar='FILE', action='append',
help='OSM file to be imported'
- ' (repeat for importing multiple files)')
+ ' (repeat for importing multiple files)',
+ default=None)
group1.add_argument('--continue', dest='continue_at',
- choices=['load-data', 'indexing', 'db-postprocess'],
- help='Continue an import that was interrupted')
+ choices=['import-from-file', 'load-data', 'indexing', 'db-postprocess'],
+ help='Continue an import that was interrupted',
+ default=None)
group2 = parser.add_argument_group('Optional arguments')
group2.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
help='Size of cache to be used by osm2pgsql (in MB)')
help='Continue import even when errors in SQL are present')
group3.add_argument('--index-noanalyse', action='store_true',
help='Do not perform analyse operations during index (expert only)')
+ group3.add_argument('--prepare-database', action='store_true',
+ help='Create the database but do not import any data')
- def run(self, args: NominatimArgs) -> int: # pylint: disable=too-many-statements
+ def run(self, args: NominatimArgs) -> int: # pylint: disable=too-many-statements, too-many-branches
from ..data import country_info
from ..tools import database_import, refresh, postcodes, freeze
from ..indexer.indexer import Indexer
country_info.setup_country_config(args.config)
- if args.continue_at is None:
+ if args.osm_file is None and args.continue_at is None and not args.prepare_database:
+ raise UsageError("No input files (use --osm-file).")
+
+ if args.osm_file is not None and args.continue_at not in ('import-from-file', None):
+ raise UsageError(f"Cannot use --continue {args.continue_at} and --osm-file together.")
+
+ if args.continue_at is not None and args.prepare_database:
+ raise UsageError(
+ "Cannot use --continue and --prepare-database together."
+ )
+
+
+ if args.prepare_database or args.continue_at is None:
+ LOG.warning('Creating database')
+ database_import.setup_database_skeleton(args.config.get_libpq_dsn(),
+ rouser=args.config.DATABASE_WEBUSER)
+ if args.prepare_database:
+ return 0
+
+ if args.continue_at in (None, 'import-from-file'):
files = args.get_osm_file_list()
if not files:
raise UsageError("No input files (use --osm-file).")
- LOG.warning('Creating database')
- database_import.setup_database_skeleton(args.config.get_libpq_dsn(),
- rouser=args.config.DATABASE_WEBUSER)
-
- LOG.warning('Setting up country tables')
- country_info.setup_country_tables(args.config.get_libpq_dsn(),
- args.config.lib_dir.data,
- args.no_partitions)
-
- LOG.warning('Importing OSM data file')
- database_import.import_osm_data(files,
- args.osm2pgsql_options(0, 1),
- drop=args.no_updates,
- ignore_errors=args.ignore_errors)
-
- LOG.warning('Importing wikipedia importance data')
- data_path = Path(args.config.WIKIPEDIA_DATA_PATH or args.project_dir)
- if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
- data_path) > 0:
- LOG.error('Wikipedia importance dump file not found. '
- 'Calculating importance values of locations will not '
- 'use Wikipedia importance data.')
-
- LOG.warning('Importing secondary importance raster data')
- if refresh.import_secondary_importance(args.config.get_libpq_dsn(),
- args.project_dir) != 0:
- LOG.error('Secondary importance file not imported. '
- 'Falling back to default ranking.')
-
- self._setup_tables(args.config, args.reverse_only)
-
- if args.continue_at is None or args.continue_at == 'load-data':
+ if args.continue_at in ('import-from-file', None):
+ # Check if the correct plugins are installed
+ database_import.check_existing_database_plugins(args.config.get_libpq_dsn())
+ LOG.warning('Setting up country tables')
+ country_info.setup_country_tables(args.config.get_libpq_dsn(),
+ args.config.lib_dir.data,
+ args.no_partitions)
+
+ LOG.warning('Importing OSM data file')
+ database_import.import_osm_data(files,
+ args.osm2pgsql_options(0, 1),
+ drop=args.no_updates,
+ ignore_errors=args.ignore_errors)
+
+ LOG.warning('Importing wikipedia importance data')
+ data_path = Path(args.config.WIKIPEDIA_DATA_PATH or args.project_dir)
+ if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
+ data_path) > 0:
+ LOG.error('Wikipedia importance dump file not found. '
+ 'Calculating importance values of locations will not '
+ 'use Wikipedia importance data.')
+
+ LOG.warning('Importing secondary importance raster data')
+ if refresh.import_secondary_importance(args.config.get_libpq_dsn(),
+ args.project_dir) != 0:
+ LOG.error('Secondary importance file not imported. '
+ 'Falling back to default ranking.')
+
+ self._setup_tables(args.config, args.reverse_only)
+
+ if args.continue_at in ('import-from-file', 'load-data', None):
LOG.warning('Initialise tables')
with connect(args.config.get_libpq_dsn()) as conn:
database_import.truncate_data_tables(conn)
LOG.warning("Setting up tokenizer")
tokenizer = self._get_tokenizer(args.continue_at, args.config)
- if args.continue_at is None or args.continue_at == 'load-data':
+ if args.continue_at in ('import-from-file', 'load-data', None):
LOG.warning('Calculate postcodes')
postcodes.update_postcodes(args.config.get_libpq_dsn(),
args.project_dir, tokenizer)
- if args.continue_at is None or args.continue_at in ('load-data', 'indexing'):
+ if args.continue_at in \
+ ('import-from-file', 'load-data', 'indexing', None):
LOG.warning('Indexing places')
indexer = Indexer(args.config.get_libpq_dsn(), tokenizer, num_threads)
indexer.index_full(analyse=not args.index_noanalyse)
"""
from ..tokenizer import factory as tokenizer_factory
- if continue_at is None or continue_at == 'load-data':
+ if continue_at in ('import-from-file', 'load-data', None):
# (re)initialise the tokenizer data
return tokenizer_factory.create_tokenizer(config)
class Configuration:
- """ Load and manage the project configuration.
-
- Nominatim uses dotenv to configure the software. Configuration options
- are resolved in the following order:
-
- * from the OS environment (or the dictionary given in `environ`)
- * from the .env file in the project directory of the installation
- * from the default installation in the configuration directory
+ """ This class wraps access to the configuration settings
+ for the Nominatim instance in use.
All Nominatim configuration options are prefixed with 'NOMINATIM_' to
- avoid conflicts with other environment variables.
+ avoid conflicts with other environment variables. All settings can
+ be accessed as properties of the class under the same name as the
+ setting but with the `NOMINATIM_` prefix removed. In addition, there
+ are accessor functions that convert the setting values to types
+ other than string.
"""
def __init__(self, project_dir: Optional[Path],
def get_bool(self, name: str) -> bool:
""" Return the given configuration parameter as a boolean.
- Values of '1', 'yes' and 'true' are accepted as truthy values,
- everything else is interpreted as false.
+
+ Parameters:
+ name: Name of the configuration parameter with the NOMINATIM_
+ prefix removed.
+
+ Returns:
+ `True` for values of '1', 'yes' and 'true', `False` otherwise.
"""
return getattr(self, name).lower() in ('1', 'yes', 'true')
def get_int(self, name: str) -> int:
""" Return the given configuration parameter as an int.
+
+ Parameters:
+ name: Name of the configuration parameter with the NOMINATIM_
+ prefix removed.
+
+ Returns:
+ The configuration value converted to int.
+
+ Raises:
+ ValueError: when the value is not a number.
"""
try:
return int(getattr(self, name))
def get_str_list(self, name: str) -> Optional[List[str]]:
""" Return the given configuration parameter as a list of strings.
The values are assumed to be given as a comma-sparated list and
- will be stripped before returning them. On empty values None
- is returned.
+ will be stripped before returning them.
+
+ Parameters:
+ name: Name of the configuration parameter with the NOMINATIM_
+ prefix removed.
+
+ Returns:
+ (List[str]): The comma-split parameter as a list. The
+ elements are stripped of leading and final spaces before
+ being returned.
+ (None): The configuration parameter was unset or empty.
"""
raw = getattr(self, name)
def get_path(self, name: str) -> Optional[Path]:
""" Return the given configuration parameter as a Path.
- If a relative path is configured, then the function converts this
- into an absolute path with the project directory as root path.
- If the configuration is unset, None is returned.
+
+ Parameters:
+ name: Name of the configuration parameter with the NOMINATIM_
+ prefix removed.
+
+ Returns:
+ (Path): A Path object of the parameter value.
+ If a relative path is configured, then the function converts this
+ into an absolute path with the project directory as root path.
+ (None): The configuration parameter was unset or empty.
"""
value = getattr(self, name)
if not value:
pc_pattern = config['pattern'].replace('d', '[0-9]').replace('l', '[A-Z]')
- self.norm_pattern = re.compile(f'\\s*(?:{country_code.upper()}[ -]?)?(.*)\\s*')
+ self.norm_pattern = re.compile(f'\\s*(?:{country_code.upper()}[ -]?)?({pc_pattern})\\s*')
self.pattern = re.compile(pc_pattern)
self.output = config.get('output', r'\g<0>')
self.current_params: Optional[Sequence[Any]] = None
self.ignore_sql_errors = ignore_sql_errors
- self.conn: Optional['psycopg2.connection'] = None
- self.cursor: Optional['psycopg2.cursor'] = None
+ self.conn: Optional['psycopg2._psycopg.connection'] = None
+ self.cursor: Optional['psycopg2._psycopg.cursor'] = None
self.connect(cursor_factory=cursor_factory)
def close(self) -> None:
"""
if self.conn is not None:
if self.cursor is not None:
- self.cursor.close() # type: ignore[no-untyped-call]
+ self.cursor.close()
self.cursor = None
self.conn.close()
""" Query execution that logs the SQL query when debugging is enabled.
"""
if LOG.isEnabledFor(logging.DEBUG):
- LOG.debug(self.mogrify(query, args).decode('utf-8')) # type: ignore[no-untyped-call]
+ LOG.debug(self.mogrify(query, args).decode('utf-8'))
super().execute(query, args)
return (int(version_parts[0]), int(version_parts[1]))
+
+ def extension_loaded(self, extension_name: str) -> bool:
+ """ Return True if the hstore extension is loaded in the database.
+ """
+ with self.cursor() as cur:
+ cur.execute('SELECT extname FROM pg_extension WHERE extname = %s', (extension_name, ))
+ return cur.rowcount > 0
+
+
class ConnectionContext(ContextManager[Connection]):
""" Context manager of the connection that also provides direct access
to the underlying connection.
--- /dev/null
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Custom functions and expressions for SQLAlchemy.
+"""
+from __future__ import annotations
+from typing import Any
+
+import sqlalchemy as sa
+from sqlalchemy.ext.compiler import compiles
+
+from nominatim.typing import SaColumn
+
+# pylint: disable=all
+
+class PlacexGeometryReverseLookuppolygon(sa.sql.functions.GenericFunction[Any]):
+ """ Check for conditions that allow partial index use on
+ 'idx_placex_geometry_reverse_lookupPolygon'.
+
+ Needs to be constant, so that the query planner picks them up correctly
+ in prepared statements.
+ """
+ name = 'PlacexGeometryReverseLookuppolygon'
+ inherit_cache = True
+
+
+@compiles(PlacexGeometryReverseLookuppolygon) # type: ignore[no-untyped-call, misc]
+def _default_intersects(element: SaColumn,
+ compiler: 'sa.Compiled', **kw: Any) -> str:
+ return ("(ST_GeometryType(placex.geometry) in ('ST_Polygon', 'ST_MultiPolygon')"
+ " AND placex.rank_address between 4 and 25"
+ " AND placex.type != 'postcode'"
+ " AND placex.name is not null"
+ " AND placex.indexed_status = 0"
+ " AND placex.linked_place_id is null)")
+
+
+@compiles(PlacexGeometryReverseLookuppolygon, 'sqlite') # type: ignore[no-untyped-call, misc]
+def _sqlite_intersects(element: SaColumn,
+ compiler: 'sa.Compiled', **kw: Any) -> str:
+ return ("(ST_GeometryType(placex.geometry) in ('POLYGON', 'MULTIPOLYGON')"
+ " AND placex.rank_address between 4 and 25"
+ " AND placex.type != 'postcode'"
+ " AND placex.name is not null"
+ " AND placex.indexed_status = 0"
+ " AND placex.linked_place_id is null)")
+
+
+class IntersectsReverseDistance(sa.sql.functions.GenericFunction[Any]):
+ name = 'IntersectsReverseDistance'
+ inherit_cache = True
+
+ def __init__(self, table: sa.Table, geom: SaColumn) -> None:
+ super().__init__(table.c.geometry, # type: ignore[no-untyped-call]
+ table.c.rank_search, geom)
+ self.tablename = table.name
+
+
+@compiles(IntersectsReverseDistance) # type: ignore[no-untyped-call, misc]
+def default_reverse_place_diameter(element: SaColumn,
+ compiler: 'sa.Compiled', **kw: Any) -> str:
+ table = element.tablename
+ return f"({table}.rank_address between 4 and 25"\
+ f" AND {table}.type != 'postcode'"\
+ f" AND {table}.name is not null"\
+ f" AND {table}.linked_place_id is null"\
+ f" AND {table}.osm_type = 'N'" + \
+ " AND ST_Buffer(%s, reverse_place_diameter(%s)) && %s)" % \
+ tuple(map(lambda c: compiler.process(c, **kw), element.clauses))
+
+
+@compiles(IntersectsReverseDistance, 'sqlite') # type: ignore[no-untyped-call, misc]
+def sqlite_reverse_place_diameter(element: SaColumn,
+ compiler: 'sa.Compiled', **kw: Any) -> str:
+ geom1, rank, geom2 = list(element.clauses)
+ table = element.tablename
+
+ return (f"({table}.rank_address between 4 and 25"\
+ f" AND {table}.type != 'postcode'"\
+ f" AND {table}.name is not null"\
+ f" AND {table}.linked_place_id is null"\
+ f" AND {table}.osm_type = 'N'"\
+ " AND MbrIntersects(%s, ST_Expand(%s, 14.0 * exp(-0.2 * %s) - 0.03))"\
+ f" AND {table}.place_id IN"\
+ " (SELECT place_id FROM placex_place_node_areas"\
+ " WHERE ROWID IN (SELECT ROWID FROM SpatialIndex"\
+ " WHERE f_table_name = 'placex_place_node_areas'"\
+ " AND search_frame = %s)))") % (
+ compiler.process(geom1, **kw),
+ compiler.process(geom2, **kw),
+ compiler.process(rank, **kw),
+ compiler.process(geom2, **kw))
+
+
+class IsBelowReverseDistance(sa.sql.functions.GenericFunction[Any]):
+ name = 'IsBelowReverseDistance'
+ inherit_cache = True
+
+
+@compiles(IsBelowReverseDistance) # type: ignore[no-untyped-call, misc]
+def default_is_below_reverse_distance(element: SaColumn,
+ compiler: 'sa.Compiled', **kw: Any) -> str:
+ dist, rank = list(element.clauses)
+ return "%s < reverse_place_diameter(%s)" % (compiler.process(dist, **kw),
+ compiler.process(rank, **kw))
+
+
+@compiles(IsBelowReverseDistance, 'sqlite') # type: ignore[no-untyped-call, misc]
+def sqlite_is_below_reverse_distance(element: SaColumn,
+ compiler: 'sa.Compiled', **kw: Any) -> str:
+ dist, rank = list(element.clauses)
+ return "%s < 14.0 * exp(-0.2 * %s) - 0.03" % (compiler.process(dist, **kw),
+ compiler.process(rank, **kw))
+
+
+def select_index_placex_geometry_reverse_lookupplacenode(table: str) -> 'sa.TextClause':
+ """ Create an expression with the necessary conditions over a placex
+ table that the index 'idx_placex_geometry_reverse_lookupPlaceNode'
+ can be used.
+ """
+ return sa.text(f"{table}.rank_address between 4 and 25"
+ f" AND {table}.type != 'postcode'"
+ f" AND {table}.name is not null"
+ f" AND {table}.linked_place_id is null"
+ f" AND {table}.osm_type = 'N'")
+
+
+class IsAddressPoint(sa.sql.functions.GenericFunction[Any]):
+ name = 'IsAddressPoint'
+ inherit_cache = True
+
+ def __init__(self, table: sa.Table) -> None:
+ super().__init__(table.c.rank_address, # type: ignore[no-untyped-call]
+ table.c.housenumber, table.c.name)
+
+
+@compiles(IsAddressPoint) # type: ignore[no-untyped-call, misc]
+def default_is_address_point(element: SaColumn,
+ compiler: 'sa.Compiled', **kw: Any) -> str:
+ rank, hnr, name = list(element.clauses)
+ return "(%s = 30 AND (%s IS NOT NULL OR %s ? 'addr:housename'))" % (
+ compiler.process(rank, **kw),
+ compiler.process(hnr, **kw),
+ compiler.process(name, **kw))
+
+
+@compiles(IsAddressPoint, 'sqlite') # type: ignore[no-untyped-call, misc]
+def sqlite_is_address_point(element: SaColumn,
+ compiler: 'sa.Compiled', **kw: Any) -> str:
+ rank, hnr, name = list(element.clauses)
+ return "(%s = 30 AND coalesce(%s, json_extract(%s, '$.addr:housename')) IS NOT NULL)" % (
+ compiler.process(rank, **kw),
+ compiler.process(hnr, **kw),
+ compiler.process(name, **kw))
+
+
+class CrosscheckNames(sa.sql.functions.GenericFunction[Any]):
+ """ Check if in the given list of names in parameters 1 any of the names
+ from the JSON array in parameter 2 are contained.
+ """
+ name = 'CrosscheckNames'
+ inherit_cache = True
+
+@compiles(CrosscheckNames) # type: ignore[no-untyped-call, misc]
+def compile_crosscheck_names(element: SaColumn,
+ compiler: 'sa.Compiled', **kw: Any) -> str:
+ arg1, arg2 = list(element.clauses)
+ return "coalesce(avals(%s) && ARRAY(SELECT * FROM json_array_elements_text(%s)), false)" % (
+ compiler.process(arg1, **kw), compiler.process(arg2, **kw))
+
+
+@compiles(CrosscheckNames, 'sqlite') # type: ignore[no-untyped-call, misc]
+def compile_sqlite_crosscheck_names(element: SaColumn,
+ compiler: 'sa.Compiled', **kw: Any) -> str:
+ arg1, arg2 = list(element.clauses)
+ return "EXISTS(SELECT *"\
+ " FROM json_each(%s) as name, json_each(%s) as match_name"\
+ " WHERE name.value = match_name.value)"\
+ % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))
+
+
+class JsonArrayEach(sa.sql.functions.GenericFunction[Any]):
+ """ Return elements of a json array as a set.
+ """
+ name = 'JsonArrayEach'
+ inherit_cache = True
+
+
+@compiles(JsonArrayEach) # type: ignore[no-untyped-call, misc]
+def default_json_array_each(element: SaColumn, compiler: 'sa.Compiled', **kw: Any) -> str:
+ return "json_array_elements(%s)" % compiler.process(element.clauses, **kw)
+
+
+@compiles(JsonArrayEach, 'sqlite') # type: ignore[no-untyped-call, misc]
+def sqlite_json_array_each(element: SaColumn, compiler: 'sa.Compiled', **kw: Any) -> str:
+ return "json_each(%s)" % compiler.process(element.clauses, **kw)
+
+
+class Greatest(sa.sql.functions.GenericFunction[Any]):
+ """ Function to compute maximum of all its input parameters.
+ """
+ name = 'greatest'
+ inherit_cache = True
+
+
+@compiles(Greatest, 'sqlite') # type: ignore[no-untyped-call, misc]
+def sqlite_greatest(element: SaColumn, compiler: 'sa.Compiled', **kw: Any) -> str:
+ return "max(%s)" % compiler.process(element.clauses, **kw)
from typing import Any
import sqlalchemy as sa
-from sqlalchemy.dialects.postgresql import HSTORE, ARRAY, JSONB
+from sqlalchemy.dialects.postgresql import HSTORE, ARRAY, JSONB, array
from sqlalchemy.dialects.sqlite import JSON as sqlite_json
+import nominatim.db.sqlalchemy_functions #pylint: disable=unused-import
from nominatim.db.sqlalchemy_types import Geometry
class PostgresTypes:
Composite = HSTORE
Json = JSONB
IntArray = ARRAY(sa.Integer()) #pylint: disable=invalid-name
+ to_array = array
class SqliteTypes:
Json = sqlite_json
IntArray = sqlite_json
+ @staticmethod
+ def to_array(arr: Any) -> Any:
+ """ Sqlite has no special conversion for arrays.
+ """
+ return arr
+
#pylint: disable=too-many-instance-attributes
class SearchTables:
""" Data class that holds the tables of the Nominatim database.
+
+ This schema strictly reflects the read-access view of the database.
+ Any data used for updates only will not be visible.
"""
def __init__(self, meta: sa.MetaData, engine_name: str) -> None:
sa.Column('value', sa.Text))
self.placex = sa.Table('placex', meta,
- sa.Column('place_id', sa.BigInteger, nullable=False, unique=True),
+ sa.Column('place_id', sa.BigInteger, nullable=False),
sa.Column('parent_place_id', sa.BigInteger),
sa.Column('linked_place_id', sa.BigInteger),
sa.Column('importance', sa.Float),
sa.Column('indexed_date', sa.DateTime),
sa.Column('rank_address', sa.SmallInteger),
sa.Column('rank_search', sa.SmallInteger),
- sa.Column('partition', sa.SmallInteger),
sa.Column('indexed_status', sa.SmallInteger),
sa.Column('osm_type', sa.String(1), nullable=False),
sa.Column('osm_id', sa.BigInteger, nullable=False),
sa.Column('centroid', Geometry))
self.addressline = sa.Table('place_addressline', meta,
- sa.Column('place_id', sa.BigInteger, index=True),
- sa.Column('address_place_id', sa.BigInteger, index=True),
+ sa.Column('place_id', sa.BigInteger),
+ sa.Column('address_place_id', sa.BigInteger),
sa.Column('distance', sa.Float),
- sa.Column('cached_rank_address', sa.SmallInteger),
sa.Column('fromarea', sa.Boolean),
sa.Column('isaddress', sa.Boolean))
self.postcode = sa.Table('location_postcode', meta,
- sa.Column('place_id', sa.BigInteger, unique=True),
+ sa.Column('place_id', sa.BigInteger),
sa.Column('parent_place_id', sa.BigInteger),
sa.Column('rank_search', sa.SmallInteger),
sa.Column('rank_address', sa.SmallInteger),
sa.Column('indexed_status', sa.SmallInteger),
sa.Column('indexed_date', sa.DateTime),
sa.Column('country_code', sa.String(2)),
- sa.Column('postcode', sa.Text, index=True),
+ sa.Column('postcode', sa.Text),
sa.Column('geometry', Geometry))
self.osmline = sa.Table('location_property_osmline', meta,
- sa.Column('place_id', sa.BigInteger, nullable=False, unique=True),
+ sa.Column('place_id', sa.BigInteger, nullable=False),
sa.Column('osm_id', sa.BigInteger),
sa.Column('parent_place_id', sa.BigInteger),
sa.Column('indexed_date', sa.DateTime),
sa.Column('startnumber', sa.Integer),
sa.Column('endnumber', sa.Integer),
sa.Column('step', sa.SmallInteger),
- sa.Column('partition', sa.SmallInteger),
sa.Column('indexed_status', sa.SmallInteger),
sa.Column('linegeo', Geometry),
sa.Column('address', self.types.Composite),
sa.Column('country_code', sa.String(2)),
sa.Column('name', self.types.Composite),
sa.Column('derived_name', self.types.Composite),
- sa.Column('country_default_language_code', sa.Text),
sa.Column('partition', sa.Integer))
self.country_grid = sa.Table('country_osm_grid', meta,
# The following tables are not necessarily present.
self.search_name = sa.Table('search_name', meta,
- sa.Column('place_id', sa.BigInteger, index=True),
+ sa.Column('place_id', sa.BigInteger),
sa.Column('importance', sa.Float),
sa.Column('search_rank', sa.SmallInteger),
sa.Column('address_rank', sa.SmallInteger),
- sa.Column('name_vector', self.types.IntArray, index=True),
- sa.Column('nameaddress_vector', self.types.IntArray, index=True),
+ sa.Column('name_vector', self.types.IntArray),
+ sa.Column('nameaddress_vector', self.types.IntArray),
sa.Column('country_code', sa.String(2)),
sa.Column('centroid', Geometry))
sa.Column('startnumber', sa.Integer),
sa.Column('endnumber', sa.Integer),
sa.Column('step', sa.SmallInteger),
- sa.Column('partition', sa.SmallInteger),
sa.Column('linegeo', Geometry),
sa.Column('postcode', sa.Text))
"""
Custom types for SQLAlchemy.
"""
+from __future__ import annotations
from typing import Callable, Any, cast
import sys
import sqlalchemy as sa
+from sqlalchemy.ext.compiler import compiles
from sqlalchemy import types
from nominatim.typing import SaColumn, SaBind
#pylint: disable=all
+class Geometry_DistanceSpheroid(sa.sql.expression.FunctionElement[float]):
+ """ Function to compute the spherical distance in meters.
+ """
+ type = sa.Float()
+ name = 'Geometry_DistanceSpheroid'
+ inherit_cache = True
+
+
+@compiles(Geometry_DistanceSpheroid) # type: ignore[no-untyped-call, misc]
+def _default_distance_spheroid(element: SaColumn,
+ compiler: 'sa.Compiled', **kw: Any) -> str:
+ return "ST_DistanceSpheroid(%s,"\
+ " 'SPHEROID[\"WGS 84\",6378137,298.257223563, AUTHORITY[\"EPSG\",\"7030\"]]')"\
+ % compiler.process(element.clauses, **kw)
+
+
+@compiles(Geometry_DistanceSpheroid, 'sqlite') # type: ignore[no-untyped-call, misc]
+def _spatialite_distance_spheroid(element: SaColumn,
+ compiler: 'sa.Compiled', **kw: Any) -> str:
+ return "COALESCE(Distance(%s, true), 0.0)" % compiler.process(element.clauses, **kw)
+
+
+class Geometry_IsLineLike(sa.sql.expression.FunctionElement[Any]):
+ """ Check if the geometry is a line or multiline.
+ """
+ name = 'Geometry_IsLineLike'
+ inherit_cache = True
+
+
+@compiles(Geometry_IsLineLike) # type: ignore[no-untyped-call, misc]
+def _default_is_line_like(element: SaColumn,
+ compiler: 'sa.Compiled', **kw: Any) -> str:
+ return "ST_GeometryType(%s) IN ('ST_LineString', 'ST_MultiLineString')" % \
+ compiler.process(element.clauses, **kw)
+
+
+@compiles(Geometry_IsLineLike, 'sqlite') # type: ignore[no-untyped-call, misc]
+def _sqlite_is_line_like(element: SaColumn,
+ compiler: 'sa.Compiled', **kw: Any) -> str:
+ return "ST_GeometryType(%s) IN ('LINESTRING', 'MULTILINESTRING')" % \
+ compiler.process(element.clauses, **kw)
+
+
+class Geometry_IsAreaLike(sa.sql.expression.FunctionElement[Any]):
+ """ Check if the geometry is a polygon or multipolygon.
+ """
+ name = 'Geometry_IsLineLike'
+ inherit_cache = True
+
+
+@compiles(Geometry_IsAreaLike) # type: ignore[no-untyped-call, misc]
+def _default_is_area_like(element: SaColumn,
+ compiler: 'sa.Compiled', **kw: Any) -> str:
+ return "ST_GeometryType(%s) IN ('ST_Polygon', 'ST_MultiPolygon')" % \
+ compiler.process(element.clauses, **kw)
+
+
+@compiles(Geometry_IsAreaLike, 'sqlite') # type: ignore[no-untyped-call, misc]
+def _sqlite_is_area_like(element: SaColumn,
+ compiler: 'sa.Compiled', **kw: Any) -> str:
+ return "ST_GeometryType(%s) IN ('POLYGON', 'MULTIPOLYGON')" % \
+ compiler.process(element.clauses, **kw)
+
+
+class Geometry_IntersectsBbox(sa.sql.expression.FunctionElement[Any]):
+ """ Check if the bounding boxes of the given geometries intersect.
+ """
+ name = 'Geometry_IntersectsBbox'
+ inherit_cache = True
+
+
+@compiles(Geometry_IntersectsBbox) # type: ignore[no-untyped-call, misc]
+def _default_intersects(element: SaColumn,
+ compiler: 'sa.Compiled', **kw: Any) -> str:
+ arg1, arg2 = list(element.clauses)
+ return "%s && %s" % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))
+
+
+@compiles(Geometry_IntersectsBbox, 'sqlite') # type: ignore[no-untyped-call, misc]
+def _sqlite_intersects(element: SaColumn,
+ compiler: 'sa.Compiled', **kw: Any) -> str:
+ return "MbrIntersects(%s) = 1" % compiler.process(element.clauses, **kw)
+
+
+class Geometry_ColumnIntersectsBbox(sa.sql.expression.FunctionElement[Any]):
+ """ Check if the bounding box of the geometry intersects with the
+ given table column, using the spatial index for the column.
+
+ The index must exist or the query may return nothing.
+ """
+ name = 'Geometry_ColumnIntersectsBbox'
+ inherit_cache = True
+
+
+@compiles(Geometry_ColumnIntersectsBbox) # type: ignore[no-untyped-call, misc]
+def default_intersects_column(element: SaColumn,
+ compiler: 'sa.Compiled', **kw: Any) -> str:
+ arg1, arg2 = list(element.clauses)
+ return "%s && %s" % (compiler.process(arg1, **kw), compiler.process(arg2, **kw))
+
+
+@compiles(Geometry_ColumnIntersectsBbox, 'sqlite') # type: ignore[no-untyped-call, misc]
+def spatialite_intersects_column(element: SaColumn,
+ compiler: 'sa.Compiled', **kw: Any) -> str:
+ arg1, arg2 = list(element.clauses)
+ return "MbrIntersects(%s, %s) = 1 and "\
+ "%s.ROWID IN (SELECT ROWID FROM SpatialIndex "\
+ "WHERE f_table_name = '%s' AND f_geometry_column = '%s' "\
+ "AND search_frame = %s)" %(
+ compiler.process(arg1, **kw),
+ compiler.process(arg2, **kw),
+ arg1.table.name, arg1.table.name, arg1.name,
+ compiler.process(arg2, **kw))
+
+
+class Geometry_ColumnDWithin(sa.sql.expression.FunctionElement[Any]):
+ """ Check if the geometry is within the distance of the
+ given table column, using the spatial index for the column.
+
+ The index must exist or the query may return nothing.
+ """
+ name = 'Geometry_ColumnDWithin'
+ inherit_cache = True
+
+
+@compiles(Geometry_ColumnDWithin) # type: ignore[no-untyped-call, misc]
+def default_dwithin_column(element: SaColumn,
+ compiler: 'sa.Compiled', **kw: Any) -> str:
+ return "ST_DWithin(%s)" % compiler.process(element.clauses, **kw)
+
+@compiles(Geometry_ColumnDWithin, 'sqlite') # type: ignore[no-untyped-call, misc]
+def spatialite_dwithin_column(element: SaColumn,
+ compiler: 'sa.Compiled', **kw: Any) -> str:
+ geom1, geom2, dist = list(element.clauses)
+ return "ST_Distance(%s, %s) < %s and "\
+ "%s.ROWID IN (SELECT ROWID FROM SpatialIndex "\
+ "WHERE f_table_name = '%s' AND f_geometry_column = '%s' "\
+ "AND search_frame = ST_Expand(%s, %s))" %(
+ compiler.process(geom1, **kw),
+ compiler.process(geom2, **kw),
+ compiler.process(dist, **kw),
+ geom1.table.name, geom1.table.name, geom1.name,
+ compiler.process(geom2, **kw),
+ compiler.process(dist, **kw))
+
+
+
class Geometry(types.UserDefinedType): # type: ignore[type-arg]
""" Simplified type decorator for PostGIS geometry. This type
only supports geometries in 4326 projection.
def bind_processor(self, dialect: 'sa.Dialect') -> Callable[[Any], str]:
def process(value: Any) -> str:
if isinstance(value, str):
- return 'SRID=4326;' + value
+ return value
- return 'SRID=4326;' + cast(str, value.to_wkt())
+ return cast(str, value.to_wkt())
return process
return process
+ def column_expression(self, col: SaColumn) -> SaColumn:
+ return sa.func.ST_AsEWKB(col)
+
+
def bind_expression(self, bindvalue: SaBind) -> SaColumn:
- return sa.func.ST_GeomFromText(bindvalue, type_=self)
+ return sa.func.ST_GeomFromText(bindvalue, sa.text('4326'), type_=self)
class comparator_factory(types.UserDefinedType.Comparator): # type: ignore[type-arg]
def intersects(self, other: SaColumn) -> 'sa.Operators':
- return self.op('&&')(other)
+ if isinstance(self.expr, sa.Column):
+ return Geometry_ColumnIntersectsBbox(self.expr, other)
+
+ return Geometry_IntersectsBbox(self.expr, other)
+
def is_line_like(self) -> SaColumn:
- return sa.func.ST_GeometryType(self, type_=sa.String).in_(('ST_LineString',
- 'ST_MultiLineString'))
+ return Geometry_IsLineLike(self)
+
def is_area(self) -> SaColumn:
- return sa.func.ST_GeometryType(self, type_=sa.String).in_(('ST_Polygon',
- 'ST_MultiPolygon'))
+ return Geometry_IsAreaLike(self)
def ST_DWithin(self, other: SaColumn, distance: SaColumn) -> SaColumn:
- return sa.func.ST_DWithin(self, other, distance, type_=sa.Float)
+ if isinstance(self.expr, sa.Column):
+ return Geometry_ColumnDWithin(self.expr, other, distance)
+
+ return sa.func.ST_DWithin(self.expr, other, distance)
+
+
+ def ST_DWithin_no_index(self, other: SaColumn, distance: SaColumn) -> SaColumn:
+ return sa.func.ST_DWithin(sa.func.coalesce(sa.null(), self),
+ other, distance)
+
+
+ def ST_Intersects_no_index(self, other: SaColumn) -> 'sa.Operators':
+ return Geometry_IntersectsBbox(sa.func.coalesce(sa.null(), self), other)
def ST_Distance(self, other: SaColumn) -> SaColumn:
def ST_Contains(self, other: SaColumn) -> SaColumn:
- return sa.func.ST_Contains(self, other, type_=sa.Float)
+ return sa.func.ST_Contains(self, other, type_=sa.Boolean)
+
+
+ def ST_CoveredBy(self, other: SaColumn) -> SaColumn:
+ return sa.func.ST_CoveredBy(self, other, type_=sa.Boolean)
def ST_ClosestPoint(self, other: SaColumn) -> SaColumn:
- return sa.func.ST_ClosestPoint(self, other, type_=Geometry)
+ return sa.func.coalesce(sa.func.ST_ClosestPoint(self, other, type_=Geometry),
+ other)
def ST_Buffer(self, other: SaColumn) -> SaColumn:
def ST_LineLocatePoint(self, other: SaColumn) -> SaColumn:
return sa.func.ST_LineLocatePoint(self, other, type_=sa.Float)
+
+
+ def distance_spheroid(self, other: SaColumn) -> SaColumn:
+ return Geometry_DistanceSpheroid(self, other)
+
+
+@compiles(Geometry, 'sqlite') # type: ignore[no-untyped-call]
+def get_col_spec(self, *args, **kwargs): # type: ignore[no-untyped-def]
+ return 'GEOMETRY'
+
+
+SQLITE_FUNCTION_ALIAS = (
+ ('ST_AsEWKB', sa.Text, 'AsEWKB'),
+ ('ST_GeomFromEWKT', Geometry, 'GeomFromEWKT'),
+ ('ST_AsGeoJSON', sa.Text, 'AsGeoJSON'),
+ ('ST_AsKML', sa.Text, 'AsKML'),
+ ('ST_AsSVG', sa.Text, 'AsSVG'),
+ ('ST_LineLocatePoint', sa.Float, 'ST_Line_Locate_Point'),
+ ('ST_LineInterpolatePoint', sa.Float, 'ST_Line_Interpolate_Point'),
+)
+
+def _add_function_alias(func: str, ftype: type, alias: str) -> None:
+ _FuncDef = type(func, (sa.sql.functions.GenericFunction, ), {
+ "type": ftype(),
+ "name": func,
+ "identifier": func,
+ "inherit_cache": True})
+
+ func_templ = f"{alias}(%s)"
+
+ def _sqlite_impl(element: Any, compiler: Any, **kw: Any) -> Any:
+ return func_templ % compiler.process(element.clauses, **kw)
+
+ compiles(_FuncDef, 'sqlite')(_sqlite_impl) # type: ignore[no-untyped-call]
+
+for alias in SQLITE_FUNCTION_ALIAS:
+ _add_function_alias(*alias)
+
+
+class ST_DWithin(sa.sql.functions.GenericFunction[Any]):
+ name = 'ST_DWithin'
+ inherit_cache = True
+
+
+@compiles(ST_DWithin, 'sqlite') # type: ignore[no-untyped-call, misc]
+def default_json_array_each(element: SaColumn, compiler: 'sa.Compiled', **kw: Any) -> str:
+ geom1, geom2, dist = list(element.clauses)
+ return "(MbrIntersects(%s, ST_Expand(%s, %s)) = 1 AND ST_Distance(%s, %s) <= %s)" % (
+ compiler.process(geom1, **kw), compiler.process(geom2, **kw),
+ compiler.process(dist, **kw),
+ compiler.process(geom1, **kw), compiler.process(geom2, **kw),
+ compiler.process(dist, **kw))
"""
if self.buffer.tell() > 0:
self.buffer.seek(0)
- cur.copy_from(self.buffer, table, columns=columns) # type: ignore[no-untyped-call]
+ cur.copy_from(self.buffer, table, columns=columns)
"""
Server implementation using the falcon webserver framework.
"""
-from typing import Optional, Mapping, cast, Any
+from typing import Optional, Mapping, cast, Any, List
from pathlib import Path
+import datetime as dt
from falcon.asgi import App, Request, Response
resp.content_type = exception.content_type
+async def timeout_error_handler(req: Request, resp: Response, #pylint: disable=unused-argument
+ exception: TimeoutError, #pylint: disable=unused-argument
+ _: Any) -> None:
+ """ Special error handler that passes message and content type as
+ per exception info.
+ """
+ resp.status = 503
+ resp.text = "Query took too long to process."
+ resp.content_type = 'text/plain; charset=utf-8'
+
+
class ParamWrapper(api_impl.ASGIAdaptor):
""" Adaptor class for server glue to Falcon framework.
"""
return HTTPNominatimError(msg, status, self.content_type)
- def create_response(self, status: int, output: str) -> None:
+ def create_response(self, status: int, output: str, num_results: int) -> None:
+ self.response.context.num_results = num_results
self.response.status = status
self.response.text = output
self.response.content_type = self.content_type
+ def base_uri(self) -> str:
+ return cast (str, self.request.forwarded_prefix)
+
def config(self) -> Configuration:
return self._config
""" Converter for server glue endpoint functions to Falcon request handlers.
"""
- def __init__(self, func: api_impl.EndpointFunc, api: NominatimAPIAsync) -> None:
+ def __init__(self, name: str, func: api_impl.EndpointFunc, api: NominatimAPIAsync) -> None:
+ self.name = name
self.func = func
self.api = api
await self.func(self.api, ParamWrapper(req, resp, self.api.config))
+class FileLoggingMiddleware:
+ """ Middleware to log selected requests into a file.
+ """
+
+ def __init__(self, file_name: str):
+ self.fd = open(file_name, 'a', buffering=1, encoding='utf8') # pylint: disable=R1732
+
+
+ async def process_request(self, req: Request, _: Response) -> None:
+ """ Callback before the request starts timing.
+ """
+ req.context.start = dt.datetime.now(tz=dt.timezone.utc)
+
+
+ async def process_response(self, req: Request, resp: Response,
+ resource: Optional[EndpointWrapper],
+ req_succeeded: bool) -> None:
+ """ Callback after requests writes to the logfile. It only
+ writes logs for sucessful requests for search, reverse and lookup.
+ """
+ if not req_succeeded or resource is None or resp.status != 200\
+ or resource.name not in ('reverse', 'search', 'lookup', 'details'):
+ return
+
+ finish = dt.datetime.now(tz=dt.timezone.utc)
+ duration = (finish - req.context.start).total_seconds()
+ params = req.scope['query_string'].decode('utf8')
+ start = req.context.start.replace(tzinfo=None)\
+ .isoformat(sep=' ', timespec='milliseconds')
+
+ self.fd.write(f"[{start}] "
+ f"{duration:.4f} {getattr(resp.context, 'num_results', 0)} "
+ f'{resource.name} "{params}"\n')
+
+
+class APIShutdown:
+ """ Middleware that closes any open database connections.
+ """
+
+ def __init__(self, api: NominatimAPIAsync) -> None:
+ self.api = api
+
+ async def process_shutdown(self, *_: Any) -> None:
+ """Process the ASGI lifespan shutdown event.
+ """
+ await self.api.close()
+
+
def get_application(project_dir: Path,
environ: Optional[Mapping[str, str]] = None) -> App:
""" Create a Nominatim Falcon ASGI application.
"""
api = NominatimAPIAsync(project_dir, environ)
- app = App(cors_enable=api.config.get_bool('CORS_NOACCESSCONTROL'))
+ middleware: List[object] = [APIShutdown(api)]
+ log_file = api.config.LOG_FILE
+ if log_file:
+ middleware.append(FileLoggingMiddleware(log_file))
+
+ app = App(cors_enable=api.config.get_bool('CORS_NOACCESSCONTROL'),
+ middleware=middleware)
app.add_error_handler(HTTPNominatimError, nominatim_error_handler)
+ app.add_error_handler(TimeoutError, timeout_error_handler)
legacy_urls = api.config.get_bool('SERVE_LEGACY_URLS')
for name, func in api_impl.ROUTES:
- endpoint = EndpointWrapper(func, api)
+ endpoint = EndpointWrapper(name, func, api)
app.add_route(f"/{name}", endpoint)
if legacy_urls:
app.add_route(f"/{name}.php", endpoint)
"""
Server implementation using the starlette webserver framework.
"""
-from typing import Any, Optional, Mapping, Callable, cast, Coroutine
+from typing import Any, Optional, Mapping, Callable, cast, Coroutine, Dict, Awaitable
from pathlib import Path
+import datetime as dt
from starlette.applications import Starlette
from starlette.routing import Route
from starlette.exceptions import HTTPException
-from starlette.responses import Response
+from starlette.responses import Response, PlainTextResponse
from starlette.requests import Request
from starlette.middleware import Middleware
+from starlette.middleware.base import BaseHTTPMiddleware, RequestResponseEndpoint
from starlette.middleware.cors import CORSMiddleware
from nominatim.api import NominatimAPIAsync
headers={'content-type': self.content_type})
- def create_response(self, status: int, output: str) -> Response:
+ def create_response(self, status: int, output: str, num_results: int) -> Response:
+ self.request.state.num_results = num_results
return Response(output, status_code=status, media_type=self.content_type)
+ def base_uri(self) -> str:
+ scheme = self.request.url.scheme
+ host = self.request.url.hostname
+ port = self.request.url.port
+ root = self.request.scope['root_path']
+ if (scheme == 'http' and port == 80) or (scheme == 'https' and port == 443):
+ port = None
+ if port is not None:
+ return f"{scheme}://{host}:{port}{root}"
+
+ return f"{scheme}://{host}{root}"
+
+
def config(self) -> Configuration:
return cast(Configuration, self.request.app.state.API.config)
return _callback
+class FileLoggingMiddleware(BaseHTTPMiddleware):
+ """ Middleware to log selected requests into a file.
+ """
+
+ def __init__(self, app: Starlette, file_name: str = ''):
+ super().__init__(app)
+ self.fd = open(file_name, 'a', buffering=1, encoding='utf8') # pylint: disable=R1732
+
+ async def dispatch(self, request: Request,
+ call_next: RequestResponseEndpoint) -> Response:
+ start = dt.datetime.now(tz=dt.timezone.utc)
+ response = await call_next(request)
+
+ if response.status_code != 200:
+ return response
+
+ finish = dt.datetime.now(tz=dt.timezone.utc)
+
+ for endpoint in ('reverse', 'search', 'lookup', 'details'):
+ if request.url.path.startswith('/' + endpoint):
+ qtype = endpoint
+ break
+ else:
+ return response
+
+ duration = (finish - start).total_seconds()
+ params = request.scope['query_string'].decode('utf8')
+
+ self.fd.write(f"[{start.replace(tzinfo=None).isoformat(sep=' ', timespec='milliseconds')}] "
+ f"{duration:.4f} {getattr(request.state, 'num_results', 0)} "
+ f'{qtype} "{params}"\n')
+
+ return response
+
+
+async def timeout_error(request: Request, #pylint: disable=unused-argument
+ _: Exception) -> Response:
+ """ Error handler for query timeouts.
+ """
+ return PlainTextResponse("Query took too long to process.", status_code=503)
+
+
def get_application(project_dir: Path,
environ: Optional[Mapping[str, str]] = None,
debug: bool = True) -> Starlette:
middleware = []
if config.get_bool('CORS_NOACCESSCONTROL'):
- middleware.append(Middleware(CORSMiddleware, allow_origins=['*']))
+ middleware.append(Middleware(CORSMiddleware,
+ allow_origins=['*'],
+ allow_methods=['GET', 'OPTIONS'],
+ max_age=86400))
+
+ log_file = config.LOG_FILE
+ if log_file:
+ middleware.append(Middleware(FileLoggingMiddleware, file_name=log_file))
+
+ exceptions: Dict[Any, Callable[[Request, Exception], Awaitable[Response]]] = {
+ TimeoutError: timeout_error
+ }
async def _shutdown() -> None:
await app.state.API.close()
app = Starlette(debug=debug, routes=routes, middleware=middleware,
+ exception_handlers=exceptions,
on_shutdown=[_shutdown])
app.state.API = NominatimAPIAsync(project_dir, environ)
from pathlib import Path
from nominatim.config import Configuration
+from nominatim.db.connection import Connection
from nominatim.data.place_info import PlaceInfo
from nominatim.typing import Protocol
Returns:
The function returns the list of all tuples that could be
- found for the given words. Each list entry is a tuple of
- (original word, word token, word id).
+ found for the given words. Each list entry is a tuple of
+ (original word, word token, word id).
"""
Returns:
A JSON-serialisable structure that will be handed into
- the database via the `token_info` field.
+ the database via the `token_info` field.
"""
tables should be skipped. This option is only required for
migration purposes and can be safely ignored by custom
tokenizers.
-
- TODO: can we move the init_db parameter somewhere else?
"""
Returns:
If an issue was found, return an error message with the
- description of the issue as well as hints for the user on
- how to resolve the issue. If everything is okay, return `None`.
+ description of the issue as well as hints for the user on
+ how to resolve the issue. If everything is okay, return `None`.
"""
"""
+ @abstractmethod
+ def most_frequent_words(self, conn: Connection, num: int) -> List[str]:
+ """ Return a list of the most frequent full words in the database.
+
+ Arguments:
+ conn: Open connection to the database which may be used to
+ retrieve the words.
+ num: Maximum number of words to return.
+ """
+
+
class TokenizerModule(Protocol):
""" Interface that must be exported by modules that implement their
own tokenizer.
self.loader.make_token_analysis())
+ def most_frequent_words(self, conn: Connection, num: int) -> List[str]:
+ """ Return a list of the `num` most frequent full words
+ in the database.
+ """
+ with conn.cursor() as cur:
+ cur.execute("""SELECT word, sum((info->>'count')::int) as count
+ FROM word WHERE type = 'W'
+ GROUP BY word
+ ORDER BY count DESC LIMIT %s""", (num,))
+ return list(s[0].split('@')[0] for s in cur)
+
+
def _install_php(self, phpdir: Path, overwrite: bool = True) -> None:
""" Install the php script for the tokenizer.
"""
return LegacyNameAnalyzer(self.dsn, normalizer)
+ def most_frequent_words(self, conn: Connection, num: int) -> List[str]:
+ """ Return a list of the `num` most frequent full words
+ in the database.
+ """
+ with conn.cursor() as cur:
+ cur.execute(""" SELECT word FROM word WHERE word is not null
+ ORDER BY search_name_count DESC LIMIT %s""", (num,))
+ return list(s[0] for s in cur)
+
+
def _install_php(self, config: Configuration, overwrite: bool = True) -> None:
""" Install the php script for the tokenizer.
"""
Returns:
If the parameter value is a simple string, it is returned as a
- one-item list. If the parameter value does not exist, the given
- default is returned. If the parameter value is a list, it is
- checked to contain only strings before being returned.
+ one-item list. If the parameter value does not exist, the given
+ default is returned. If the parameter value is a list, it is
+ checked to contain only strings before being returned.
"""
values = self.data.get(param, None)
Returns:
A regular expression pattern which can be used to
- split a string. The regular expression makes sure that the
- resulting names are stripped and that repeated delimiters
- are ignored. It may still create empty fields on occasion. The
- code needs to filter those.
+ split a string. The regular expression makes sure that the
+ resulting names are stripped and that repeated delimiters
+ are ignored. It may still create empty fields on occasion. The
+ code needs to filter those.
"""
delimiter_set = set(self.data.get('delimiters', default))
if not delimiter_set:
Returns:
A filter function that takes a target string as the argument and
- returns True if it fully matches any of the regular expressions
- otherwise returns False.
+ returns True if it fully matches any of the regular expressions
+ otherwise returns False.
"""
filters = self.get_string_list(param) or default
--- /dev/null
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2022 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+This sanitizer maps OSM data to Japanese block addresses.
+It replaces blocknumber and housenumber with housenumber,
+and quarter and neighbourhood with place.
+"""
+
+
+from typing import Callable
+from typing import List, Optional
+
+from nominatim.tokenizer.sanitizers.base import ProcessInfo
+from nominatim.tokenizer.sanitizers.config import SanitizerConfig
+from nominatim.data.place_name import PlaceName
+
+def create(_: SanitizerConfig) -> Callable[[ProcessInfo], None]:
+ """Set up the sanitizer
+ """
+ return tag_japanese
+
+def reconbine_housenumber(
+ new_address: List[PlaceName],
+ tmp_housenumber: Optional[str],
+ tmp_blocknumber: Optional[str]
+) -> List[PlaceName]:
+ """ Recombine the tag of housenumber by using housenumber and blocknumber
+ """
+ if tmp_blocknumber and tmp_housenumber:
+ new_address.append(
+ PlaceName(
+ kind='housenumber',
+ name=f'{tmp_blocknumber}-{tmp_housenumber}',
+ suffix=''
+ )
+ )
+ elif tmp_blocknumber:
+ new_address.append(
+ PlaceName(
+ kind='housenumber',
+ name=tmp_blocknumber,
+ suffix=''
+ )
+ )
+ elif tmp_housenumber:
+ new_address.append(
+ PlaceName(
+ kind='housenumber',
+ name=tmp_housenumber,
+ suffix=''
+ )
+ )
+ return new_address
+
+def reconbine_place(
+ new_address: List[PlaceName],
+ tmp_neighbourhood: Optional[str],
+ tmp_quarter: Optional[str]
+) -> List[PlaceName]:
+ """ Recombine the tag of place by using neighbourhood and quarter
+ """
+ if tmp_neighbourhood and tmp_quarter:
+ new_address.append(
+ PlaceName(
+ kind='place',
+ name=f'{tmp_quarter}{tmp_neighbourhood}',
+ suffix=''
+ )
+ )
+ elif tmp_neighbourhood:
+ new_address.append(
+ PlaceName(
+ kind='place',
+ name=tmp_neighbourhood,
+ suffix=''
+ )
+ )
+ elif tmp_quarter:
+ new_address.append(
+ PlaceName(
+ kind='place',
+ name=tmp_quarter,
+ suffix=''
+ )
+ )
+ return new_address
+def tag_japanese(obj: ProcessInfo) -> None:
+ """Recombine kind of address
+ """
+ if obj.place.country_code != 'jp':
+ return
+ tmp_housenumber = None
+ tmp_blocknumber = None
+ tmp_neighbourhood = None
+ tmp_quarter = None
+
+ new_address = []
+ for item in obj.address:
+ if item.kind == 'housenumber':
+ tmp_housenumber = item.name
+ elif item.kind == 'block_number':
+ tmp_blocknumber = item.name
+ elif item.kind == 'neighbourhood':
+ tmp_neighbourhood = item.name
+ elif item.kind == 'quarter':
+ tmp_quarter = item.name
+ else:
+ new_address.append(item)
+
+ new_address = reconbine_housenumber(new_address, tmp_housenumber, tmp_blocknumber)
+ new_address = reconbine_place(new_address, tmp_neighbourhood, tmp_quarter)
+
+ obj.address = [item for item in new_address if item.name is not None]
Returns:
ID string with a canonical form of the name. The string may
- be empty, when the analyzer cannot analyze the name at all,
- for example because the character set in use does not match.
+ be empty, when the analyzer cannot analyze the name at all,
+ for example because the character set in use does not match.
"""
def compute_variants(self, canonical_id: str) -> List[str]:
Returns:
A list of possible spelling variants. All strings must have
- been transformed with the global normalizer and
- transliterator ICU rules. Otherwise they cannot be matched
- against the input by the query frontend.
- The list may be empty, when there are no useful
- spelling variants. This may happen when an analyzer only
- usually outputs additional variants to the canonical spelling
- and there are no such variants.
+ been transformed with the global normalizer and
+ transliterator ICU rules. Otherwise they cannot be matched
+ against the input by the query frontend.
+ The list may be empty, when there are no useful
+ spelling variants. This may happen when an analyzer only
+ usually outputs additional variants to the canonical spelling
+ and there are no such variants.
"""
Returns:
A data object with configuration data. This will be handed
- as is into the `create()` function and may be
- used freely by the analysis module as needed.
+ as is into the `create()` function and may be
+ used freely by the analysis module as needed.
"""
def create(self, normalizer: Any, transliterator: Any, config: Any) -> Analyzer:
Returns:
A new analyzer instance. This must be an object that implements
- the Analyzer protocol.
+ the Analyzer protocol.
"""
import logging
from psycopg2.extras import Json, register_hstore
+from psycopg2 import DataError
from nominatim.config import Configuration
from nominatim.db.connection import connect, Cursor
for msg in conn.notices:
print(msg)
+
+
+def clean_deleted_relations(config: Configuration, age: str) -> None:
+ """ Clean deleted relations older than a given age
+ """
+ with connect(config.get_libpq_dsn()) as conn:
+ with conn.cursor() as cur:
+ try:
+ cur.execute("""SELECT place_force_delete(p.place_id)
+ FROM import_polygon_delete d, placex p
+ WHERE p.osm_type = d.osm_type AND p.osm_id = d.osm_id
+ AND age(p.indexed_date) > %s::interval""",
+ (age, ))
+ except DataError as exc:
+ raise UsageError('Invalid PostgreSQL time interval format') from exc
+ conn.commit()
import subprocess
import sys
from pathlib import Path
-from typing import List, Optional, Tuple, Union, cast
+from typing import List, Optional, Tuple, Union
import psutil
from psycopg2.extensions import make_dsn, parse_dsn
from nominatim.config import Configuration
from nominatim.db.connection import connect
-from nominatim.typing import DictCursorResults
from nominatim.version import NOMINATIM_VERSION
postgresql_ver: str = convert_version(conn.server_version_tuple())
with conn.cursor() as cur:
- cur.execute(f"""
- SELECT datname FROM pg_catalog.pg_database
- WHERE datname='{parse_dsn(config.get_libpq_dsn())['dbname']}'""")
- nominatim_db_exists = cast(Optional[DictCursorResults], cur.fetchall())
- if nominatim_db_exists:
- with connect(config.get_libpq_dsn()) as conn:
- postgis_ver: str = convert_version(conn.postgis_version_tuple())
- else:
- postgis_ver = "Unable to connect to database"
+ num = cur.scalar("SELECT count(*) FROM pg_catalog.pg_database WHERE datname=%s",
+ (parse_dsn(config.get_libpq_dsn())['dbname'], ))
+ nominatim_db_exists = num == 1 if isinstance(num, int) else False
+
+ if nominatim_db_exists:
+ with connect(config.get_libpq_dsn()) as conn:
+ postgis_ver: str = convert_version(conn.postgis_version_tuple())
+ else:
+ postgis_ver = "Unable to connect to database"
postgresql_config: str = get_postgresql_config(int(float(postgresql_ver)))
--- /dev/null
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Exporting a Nominatim database to SQlite.
+"""
+from typing import Set
+import logging
+from pathlib import Path
+
+import sqlalchemy as sa
+
+from nominatim.typing import SaSelect
+from nominatim.db.sqlalchemy_types import Geometry
+import nominatim.api as napi
+
+LOG = logging.getLogger()
+
+async def convert(project_dir: Path, outfile: Path, options: Set[str]) -> None:
+ """ Export an existing database to sqlite. The resulting database
+ will be usable against the Python frontend of Nominatim.
+ """
+ api = napi.NominatimAPIAsync(project_dir)
+
+ try:
+ outapi = napi.NominatimAPIAsync(project_dir,
+ {'NOMINATIM_DATABASE_DSN': f"sqlite:dbname={outfile}"})
+
+ async with api.begin() as src, outapi.begin() as dest:
+ writer = SqliteWriter(src, dest, options)
+ await writer.write()
+ finally:
+ await api.close()
+
+
+class SqliteWriter:
+ """ Worker class which creates a new SQLite database.
+ """
+
+ def __init__(self, src: napi.SearchConnection,
+ dest: napi.SearchConnection, options: Set[str]) -> None:
+ self.src = src
+ self.dest = dest
+ self.options = options
+
+
+ async def write(self) -> None:
+ """ Create the database structure and copy the data from
+ the source database to the destination.
+ """
+ await self.dest.execute(sa.select(sa.func.InitSpatialMetaData(True, 'WGS84')))
+
+ await self.create_tables()
+ await self.copy_data()
+ await self.create_indexes()
+
+
+ async def create_tables(self) -> None:
+ """ Set up the database tables.
+ """
+ if 'search' not in self.options:
+ self.dest.t.meta.remove(self.dest.t.search_name)
+
+ await self.dest.connection.run_sync(self.dest.t.meta.create_all)
+
+ # Convert all Geometry columns to Spatialite geometries
+ for table in self.dest.t.meta.sorted_tables:
+ for col in table.c:
+ if isinstance(col.type, Geometry):
+ await self.dest.execute(sa.select(
+ sa.func.RecoverGeometryColumn(table.name, col.name, 4326,
+ col.type.subtype.upper(), 'XY')))
+
+
+ async def copy_data(self) -> None:
+ """ Copy data for all registered tables.
+ """
+ for table in self.dest.t.meta.sorted_tables:
+ LOG.warning("Copying '%s'", table.name)
+ async_result = await self.src.connection.stream(self.select_from(table.name))
+
+ async for partition in async_result.partitions(10000):
+ data = [{('class_' if k == 'class' else k): getattr(r, k) for k in r._fields}
+ for r in partition]
+ await self.dest.execute(table.insert(), data)
+
+
+ async def create_indexes(self) -> None:
+ """ Add indexes necessary for the frontend.
+ """
+ # reverse place node lookup needs an extra table to simulate a
+ # partial index with adaptive buffering.
+ await self.dest.execute(sa.text(
+ """ CREATE TABLE placex_place_node_areas AS
+ SELECT place_id, ST_Expand(geometry,
+ 14.0 * exp(-0.2 * rank_search) - 0.03) as geometry
+ FROM placex
+ WHERE rank_address between 5 and 25
+ and osm_type = 'N'
+ and linked_place_id is NULL """))
+ await self.dest.execute(sa.select(
+ sa.func.RecoverGeometryColumn('placex_place_node_areas', 'geometry',
+ 4326, 'GEOMETRY', 'XY')))
+ await self.dest.execute(sa.select(sa.func.CreateSpatialIndex(
+ 'placex_place_node_areas', 'geometry')))
+
+ # Remaining indexes.
+ await self.create_spatial_index('country_grid', 'geometry')
+ await self.create_spatial_index('placex', 'geometry')
+ await self.create_spatial_index('osmline', 'linegeo')
+ await self.create_spatial_index('tiger', 'linegeo')
+ await self.create_index('placex', 'place_id')
+ await self.create_index('placex', 'parent_place_id')
+ await self.create_index('placex', 'rank_address')
+ await self.create_index('addressline', 'place_id')
+
+
+ async def create_spatial_index(self, table: str, column: str) -> None:
+ """ Create a spatial index on the given table and column.
+ """
+ await self.dest.execute(sa.select(
+ sa.func.CreateSpatialIndex(getattr(self.dest.t, table).name, column)))
+
+
+ async def create_index(self, table_name: str, column: str) -> None:
+ """ Create a simple index on the given table and column.
+ """
+ table = getattr(self.dest.t, table_name)
+ await self.dest.connection.run_sync(
+ sa.Index(f"idx_{table}_{column}", getattr(table.c, column)).create)
+
+
+ def select_from(self, table: str) -> SaSelect:
+ """ Create the SQL statement to select the source columns and rows.
+ """
+ columns = self.src.t.meta.tables[table].c
+
+ if table == 'placex':
+ # SQLite struggles with Geometries that are larger than 5MB,
+ # so simplify those.
+ return sa.select(*(c for c in columns if not isinstance(c.type, Geometry)),
+ sa.func.ST_AsText(columns.centroid).label('centroid'),
+ sa.func.ST_AsText(
+ sa.case((sa.func.ST_MemSize(columns.geometry) < 5000000,
+ columns.geometry),
+ else_=sa.func.ST_SimplifyPreserveTopology(
+ columns.geometry, 0.0001)
+ )).label('geometry'))
+
+ sql = sa.select(*(sa.func.ST_AsText(c).label(c.name)
+ if isinstance(c.type, Geometry) else c for c in columns))
+
+ return sql
from nominatim.db.sql_preprocessor import SQLPreprocessor
from nominatim.tools.exec_utils import run_osm2pgsql
from nominatim.errors import UsageError
-from nominatim.version import POSTGRESQL_REQUIRED_VERSION, POSTGIS_REQUIRED_VERSION
+from nominatim.version import POSTGRESQL_REQUIRED_VERSION, \
+ POSTGIS_REQUIRED_VERSION
LOG = logging.getLogger()
raise UsageError(f'{module} is too old.')
+def _require_loaded(extension_name: str, conn: Connection) -> None:
+ """ Check that the given extension is loaded. """
+ if not conn.extension_loaded(extension_name):
+ LOG.fatal('Required module %s is not loaded.', extension_name)
+ raise UsageError(f'{extension_name} is not loaded.')
+
+
+def check_existing_database_plugins(dsn: str) -> None:
+ """ Check that the database has the required plugins installed."""
+ with connect(dsn) as conn:
+ _require_version('PostgreSQL server',
+ conn.server_version_tuple(),
+ POSTGRESQL_REQUIRED_VERSION)
+ _require_version('PostGIS',
+ conn.postgis_version_tuple(),
+ POSTGIS_REQUIRED_VERSION)
+ _require_loaded('hstore', conn)
+
+
def setup_database_skeleton(dsn: str, rouser: Optional[str] = None) -> None:
""" Create a new database for Nominatim and populate it with the
essential extensions.
"""
Helper functions for executing external programs.
"""
-from typing import Any, Union, Optional, Mapping, IO
-from pathlib import Path
+from typing import Any, Mapping, IO
import logging
import os
import subprocess
import urllib.request as urlrequest
-from urllib.parse import urlencode
-from nominatim.config import Configuration
from nominatim.typing import StrPath
from nominatim.version import NOMINATIM_VERSION
from nominatim.db.connection import get_pg_env
LOG = logging.getLogger()
-def run_legacy_script(script: StrPath, *args: Union[int, str],
- config: Configuration,
- throw_on_fail: bool = False) -> int:
- """ Run a Nominatim PHP script with the given arguments.
-
- Returns the exit code of the script. If `throw_on_fail` is True
- then throw a `CalledProcessError` on a non-zero exit.
- """
- cmd = ['/usr/bin/env', 'php', '-Cq',
- str(config.lib_dir.php / 'admin' / script)]
- cmd.extend([str(a) for a in args])
-
- env = config.get_os_env()
- env['NOMINATIM_DATADIR'] = str(config.lib_dir.data)
- env['NOMINATIM_SQLDIR'] = str(config.lib_dir.sql)
- env['NOMINATIM_CONFIGDIR'] = str(config.config_dir)
- env['NOMINATIM_DATABASE_MODULE_SRC_PATH'] = str(config.lib_dir.module)
- if not env['NOMINATIM_OSM2PGSQL_BINARY']:
- env['NOMINATIM_OSM2PGSQL_BINARY'] = str(config.lib_dir.osm2pgsql)
-
- proc = subprocess.run(cmd, cwd=str(config.project_dir), env=env,
- check=throw_on_fail)
-
- return proc.returncode
-
-def run_api_script(endpoint: str, project_dir: Path,
- extra_env: Optional[Mapping[str, str]] = None,
- phpcgi_bin: Optional[Path] = None,
- params: Optional[Mapping[str, Any]] = None) -> int:
- """ Execute a Nominatim API function.
-
- The function needs a project directory that contains the website
- directory with the scripts to be executed. The scripts will be run
- using php_cgi. Query parameters can be added as named arguments.
-
- Returns the exit code of the script.
- """
- log = logging.getLogger()
- webdir = str(project_dir / 'website')
- query_string = urlencode(params or {})
-
- env = dict(QUERY_STRING=query_string,
- SCRIPT_NAME=f'/{endpoint}.php',
- REQUEST_URI=f'/{endpoint}.php?{query_string}',
- CONTEXT_DOCUMENT_ROOT=webdir,
- SCRIPT_FILENAME=f'{webdir}/{endpoint}.php',
- HTTP_HOST='localhost',
- HTTP_USER_AGENT='nominatim-tool',
- REMOTE_ADDR='0.0.0.0',
- DOCUMENT_ROOT=webdir,
- REQUEST_METHOD='GET',
- SERVER_PROTOCOL='HTTP/1.1',
- GATEWAY_INTERFACE='CGI/1.1',
- REDIRECT_STATUS='CGI')
-
- if extra_env:
- env.update(extra_env)
-
- if phpcgi_bin is None:
- cmd = ['/usr/bin/env', 'php-cgi']
- else:
- cmd = [str(phpcgi_bin)]
-
- proc = subprocess.run(cmd, cwd=str(project_dir), env=env,
- stdout=subprocess.PIPE,
- stderr=subprocess.PIPE,
- check=False)
-
- if proc.returncode != 0 or proc.stderr:
- if proc.stderr:
- log.error(proc.stderr.decode('utf-8').replace('\\n', '\n'))
- else:
- log.error(proc.stdout.decode('utf-8').replace('\\n', '\n'))
- return proc.returncode or 1
-
- result = proc.stdout.decode('utf-8')
- content_start = result.find('\r\n\r\n')
-
- print(result[content_start + 4:].replace('\\n', '\n'))
-
- return 0
-
-
def run_php_server(server_address: str, base_dir: StrPath) -> None:
""" Run the built-in server from the given directory.
"""
if options['tablespaces'][key]:
cmd.extend((param, options['tablespaces'][key]))
+ if options['tablespaces']['main_data']:
+ env['NOMINATIM_TABLESPACE_PLACE_DATA'] = options['tablespaces']['main_data']
+ if options['tablespaces']['main_index']:
+ env['NOMINATIM_TABLESPACE_PLACE_INDEX'] = options['tablespaces']['main_index']
+
if options.get('disable_jit', False):
env['PGOPTIONS'] = '-c jit=off -c max_parallel_workers_per_gather=0'
('Search_NameOnlySearchFrequencyThreshold', 'SEARCH_NAME_ONLY_THRESHOLD', str),
('Use_US_Tiger_Data', 'USE_US_TIGER_DATA', bool),
('MapIcon_URL', 'MAPICON_URL', str),
+ ('Search_WithinCountries', 'SEARCH_WITHIN_COUNTRIES', bool),
)
else:
TypeAlias = str
+SaLambdaSelect: TypeAlias = 'Union[sa.Select[Any], sa.StatementLambdaElement]'
SaSelect: TypeAlias = 'sa.Select[Any]'
SaScalarSelect: TypeAlias = 'sa.ScalarSelect[Any]'
SaRow: TypeAlias = 'sa.Row[Any]'
#
# This file is part of Nominatim. (https://nominatim.org)
#
-# Copyright (C) 2022 by the Nominatim developer community.
+# Copyright (C) 2023 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Version information for Nominatim.
return f"{self.major}.{self.minor}.{self.patch_level}-{self.db_patch_level}"
-NOMINATIM_VERSION = NominatimVersion(4, 2, 99, 2)
+NOMINATIM_VERSION = NominatimVersion(4, 3, 0, 0)
POSTGRESQL_REQUIRED_VERSION = (9, 6)
POSTGIS_REQUIRED_VERSION = (2, 2)
-Subproject commit 4facd1aea451cea220261c361698b8e5f18a9327
+Subproject commit 415de9abdf2d003a5c0a0abe8e8fc139acacc2b5
# EXPERT ONLY. You should usually use the supplied osm2pgsql.
NOMINATIM_OSM2PGSQL_BINARY=
-# Directory where to find US Tiger data files to import.
-# OBSOLETE: use `nominatim add-data --tiger-data <dir>` to explicitly state
-# the directory on import
-NOMINATIM_TIGER_DATA_PATH=
-
# Directory where to find pre-computed Wikipedia importance files.
# When unset, the data is expected to be located in the project directory.
NOMINATIM_WIKIPEDIA_DATA_PATH=
-# Configuration file for special phrase import.
-# OBSOLETE: use `nominatim special-phrases --config <file>` or simply put
-# a custom phrase-settings.json into your project directory.
-NOMINATIM_PHRASE_CONFIG=
-
# Configuration file for rank assignments.
NOMINATIM_ADDRESS_LEVEL_CONFIG=address-levels.json
# under <endpoint>.php
NOMINATIM_SERVE_LEGACY_URLS=yes
+# Maximum number of connection a single API object can use. (Python API only)
+# When running Nominatim as a server, then this is the maximum number
+# of connections _per worker_.
+NOMINATIM_API_POOL_SIZE=10
+
+# Timeout is seconds after which a single query to the database is cancelled.
+# The user receives a 503 response, when a query times out.
+# When empty, then timeouts are disabled.
+NOMINATIM_QUERY_TIMEOUT=10
+
+# Maximum time a single request is allowed to take. When the timeout is
+# exceeeded, the available results are returned.
+# When empty, then timouts are disabled.
+NOMINATIM_REQUEST_TIMEOUT=60
+
+# Search elements just within countries
+# If, despite not finding a point within the static grid of countries, it
+# finds a geometry of a region, do not return the geometry. Return "Unable
+# to geocode" instead.
+NOMINATIM_SEARCH_WITHIN_COUNTRIES=False
+
### Log settings
#
# The following options allow to enable logging of API requests.
NOMINATIM_LOG_FILE=
# Echo raw SQL from SQLAlchemy statements.
-# Works only in command line/library use.
+# EXPERT: Works only in command line/library use.
NOMINATIM_DEBUG_SQL=no
{ column = 'extratags', type = 'hstore' },
{ column = 'geometry', type = 'geometry', projection = 'WGS84', not_null = true },
},
+ data_tablespace = os.getenv("NOMINATIM_TABLESPACE_PLACE_DATA"),
+ index_tablespace = os.getenv("NOMINATIM_TABLESPACE_PLACE_INDEX"),
indexes = {}
}
-- "[𞥐𐒠߀𖭐꤀𖩠𑓐𑑐𑋰𑄶꩐꘠᱀᭐᮰᠐០᥆༠໐꧰႐᪐᪀᧐𑵐꯰᱐𑱐𑜰𑛀𑙐𑇐꧐꣐෦𑁦0𝟶𝟘𝟬𝟎𝟢₀⓿⓪⁰] > 0"
-- "[𞥑𐒡߁𖭑꤁𖩡𑓑𑑑𑋱𑄷꩑꘡᱁᭑᮱᠑១᥇༡໑꧱႑᪑᪁᧑𑵑꯱᱑𑱑𑜱𑛁𑙑𑇑꧑꣑෧𑁧1𝟷𝟙𝟭𝟏𝟣₁¹①⑴⒈❶➀➊⓵] > 1"
-- "[𞥒𐒢߂𖭒꤂𖩢𑓒𑑒𑋲𑄸꩒꘢᱂᭒᮲᠒២᥈༢໒꧲႒᪒᪂᧒𑵒꯲᱒𑱒𑜲𑛂𑙒𑇒꧒꣒෨𑁨2𝟸𝟚𝟮𝟐𝟤₂²②⑵⒉❷➁➋⓶] > 2"
-- "[𞥓𐒣߃𖭓꤃𖩣𑓓𑑓𑋳𑄹꩓꘣᱃᭓᮳᠓៣᥉༣໓꧳႓᪓᪃᧓𑵓꯳᱓𑱓𑜳𑛃𑙓𑇓꧓꣓෩𑁩3𝟹𝟛𝟯𝟑𝟥₃³③⑶⒊❸➂➌⓷] > 3"
-- "[𞥔𐒤߄𖭔꤄𖩤𑓔𑑔𑋴𑄺꩔꘤᱄᭔᮴᠔៤᥊༤໔꧴႔᪔᪄᧔𑵔꯴᱔𑱔𑜴𑛄𑙔𑇔꧔꣔෪𑁪4𝟺𝟜𝟰𝟒𝟦₄⁴④⑷⒋❹➃➍⓸] > 4"
-- "[𞥕𐒥߅𖭕꤅𖩥𑓕𑑕𑋵𑄻꩕꘥᱅᭕᮵᠕៥᥋༥໕꧵႕᪕᪅᧕𑵕꯵᱕𑱕𑜵𑛅𑙕𑇕꧕꣕෫𑁫5𝟻𝟝𝟱𝟓𝟧₅⁵⑤⑸⒌❺➄➎⓹] > 5"
-- "[𞥖𐒦߆𖭖꤆𖩦𑓖𑑖𑋶𑄼꩖꘦᱆᭖᮶᠖៦᥌༦໖꧶႖᪖᪆᧖𑵖꯶᱖𑱖𑜶𑛆𑙖𑇖꧖꣖෬𑁬6𝟼𝟞𝟲𝟔𝟨₆⁶⑥⑹⒍❻➅➏⓺] > 6"
-- "[𞥗𐒧߇𖭗꤇𖩧𑓗𑑗𑋷𑄽꩗꘧᱇᭗᮷᠗៧᥍༧໗꧷႗᪗᪇᧗𑵗꯷᱗𑱗𑜷𑛇𑙗𑇗꧗꣗෭𑁭7𝟽𝟟𝟳𝟕𝟩₇⁷⑦⑺⒎❼➆➐⓻] > 7"
-- "[𞥘𐒨߈𖭘꤈𖩨𑓘𑑘𑋸𑄾꩘꘨᱈᭘᮸᠘៨᥎༨໘꧸႘᪘᪈᧘𑵘꯸᱘𑱘𑜸𑛈𑙘𑇘꧘꣘෮𑁮8𝟾𝟠𝟴𝟖𝟪₈⁸⑧⑻⒏❽➇➑⓼] > 8"
-- "[𞥙𐒩߉𖭙꤉𖩩𑓙𑑙𑋹𑄿꩙꘩᱉᭙᮹᠙៩᥏༩໙꧹႙᪙᪉᧙𑵙꯹᱙𑱙𑜹𑛉𑙙𑇙꧙꣙෯𑁯9𝟿𝟡𝟵𝟗𝟫₉⁹⑨⑼⒐❾➈➒⓽] > 9"
-- "[𑜺⑩⑽⒑❿➉➓⓾] > '10'"
+- "[𞥐𐒠߀𖭐꤀𖩠𑓐𑑐𑋰𑄶꩐꘠᱀᭐᮰᠐០᥆༠໐꧰႐᪐᪀᧐𑵐꯰᱐𑱐𑜰𑛀𑙐𑇐꧐꣐෦𑁦0𝟶𝟘𝟬𝟎𝟢₀⓿⓪⁰零] > 0"
+- "[𞥑𐒡߁𖭑꤁𖩡𑓑𑑑𑋱𑄷꩑꘡᱁᭑᮱᠑១᥇༡໑꧱႑᪑᪁᧑𑵑꯱᱑𑱑𑜱𑛁𑙑𑇑꧑꣑෧𑁧1𝟷𝟙𝟭𝟏𝟣₁¹①⑴⒈❶➀➊⓵一] > 1"
+- "[𞥒𐒢߂𖭒꤂𖩢𑓒𑑒𑋲𑄸꩒꘢᱂᭒᮲᠒២᥈༢໒꧲႒᪒᪂᧒𑵒꯲᱒𑱒𑜲𑛂𑙒𑇒꧒꣒෨𑁨2𝟸𝟚𝟮𝟐𝟤₂²②⑵⒉❷➁➋⓶二] > 2"
+- "[𞥓𐒣߃𖭓꤃𖩣𑓓𑑓𑋳𑄹꩓꘣᱃᭓᮳᠓៣᥉༣໓꧳႓᪓᪃᧓𑵓꯳᱓𑱓𑜳𑛃𑙓𑇓꧓꣓෩𑁩3𝟹𝟛𝟯𝟑𝟥₃³③⑶⒊❸➂➌⓷三] > 3"
+- "[𞥔𐒤߄𖭔꤄𖩤𑓔𑑔𑋴𑄺꩔꘤᱄᭔᮴᠔៤᥊༤໔꧴႔᪔᪄᧔𑵔꯴᱔𑱔𑜴𑛄𑙔𑇔꧔꣔෪𑁪4𝟺𝟜𝟰𝟒𝟦₄⁴④⑷⒋❹➃➍⓸四] > 4"
+- "[𞥕𐒥߅𖭕꤅𖩥𑓕𑑕𑋵𑄻꩕꘥᱅᭕᮵᠕៥᥋༥໕꧵႕᪕᪅᧕𑵕꯵᱕𑱕𑜵𑛅𑙕𑇕꧕꣕෫𑁫5𝟻𝟝𝟱𝟓𝟧₅⁵⑤⑸⒌❺➄➎⓹五] > 5"
+- "[𞥖𐒦߆𖭖꤆𖩦𑓖𑑖𑋶𑄼꩖꘦᱆᭖᮶᠖៦᥌༦໖꧶႖᪖᪆᧖𑵖꯶᱖𑱖𑜶𑛆𑙖𑇖꧖꣖෬𑁬6𝟼𝟞𝟲𝟔𝟨₆⁶⑥⑹⒍❻➅➏⓺六] > 6"
+- "[𞥗𐒧߇𖭗꤇𖩧𑓗𑑗𑋷𑄽꩗꘧᱇᭗᮷᠗៧᥍༧໗꧷႗᪗᪇᧗𑵗꯷᱗𑱗𑜷𑛇𑙗𑇗꧗꣗෭𑁭7𝟽𝟟𝟳𝟕𝟩₇⁷⑦⑺⒎❼➆➐⓻七] > 7"
+- "[𞥘𐒨߈𖭘꤈𖩨𑓘𑑘𑋸𑄾꩘꘨᱈᭘᮸᠘៨᥎༨໘꧸႘᪘᪈᧘𑵘꯸᱘𑱘𑜸𑛈𑙘𑇘꧘꣘෮𑁮8𝟾𝟠𝟴𝟖𝟪₈⁸⑧⑻⒏❽➇➑⓼八] > 8"
+- "[𞥙𐒩߉𖭙꤉𖩩𑓙𑑙𑋹𑄿꩙꘩᱉᭙᮹᠙៩᥏༩໙꧹႙᪙᪉᧙𑵙꯹᱙𑱙𑜹𑛉𑙙𑇙꧙꣙෯𑁯9𝟿𝟡𝟵𝟗𝟫₉⁹⑨⑼⒐❾➈➒⓽九] > 9"
+- "[𑜺⑩⑽⒑❿➉➓⓾十] > '10'"
- "[⑪⑾⒒⓫] > '11'"
- "[⑫⑿⒓⓬] > '12'"
- "[⑬⒀⒔⓭] > '13'"
whitelist: [bg,ca,cs,da,de,el,en,es,et,eu,fi,fr,gl,hu,it,ja,mg,ms,nl,no,pl,pt,ro,ru,sk,sl,sv,tr,uk,vi]
use-defaults: all
mode: append
+ - step: tag-japanese
token-analysis:
- analyzer: generic
- id: "@housenumber"
flex.set_main_tags{
boundary = {administrative = 'named'},
- landuse = 'fallback',
- place = 'always'
+ landuse = {residential = 'fallback',
+ farm = 'fallback',
+ farmyard = 'fallback',
+ industrial = 'fallback',
+ commercial = 'fallback',
+ allotments = 'fallback',
+ retail = 'fallback'},
+ place = {county = 'always',
+ district = 'always',
+ municipality = 'always',
+ city = 'always',
+ town = 'always',
+ borough = 'always',
+ village = 'always',
+ suburb = 'always',
+ hamlet = 'always',
+ croft = 'always',
+ subdivision = 'always',
+ allotments = 'always',
+ neighbourhood = 'always',
+ quarter = 'always',
+ isolated_dwelling = 'always',
+ farm = 'always',
+ city_block = 'always',
+ mountain_pass = 'always',
+ square = 'always',
+ locality = 'always'}
}
flex.set_prefilters{delete_keys = {'building', 'source', 'highway',
'addr:housenumber', 'addr:street', 'addr:city',
+ 'addr:interpolation',
'source', '*source', 'type',
'is_in:postcode', '*:wikidata', '*:wikipedia',
'*:prefix', '*:suffix', 'name:prefix:*', 'name:suffix:*',
+@SQLITE
@APIDB
Feature: Localization of search results
Feature: Object details
Testing different parameter options for details API.
+ @SQLITE
Scenario: JSON Details
When sending json details query for W297699560
Then the result is valid json
| type |
| Point |
+ @SQLITE
Scenario: JSON Details with pretty printing
When sending json details query for W297699560
| pretty |
And result has attributes geometry
And result has not attributes keywords,address,linked_places,parentof
+ @SQLITE
Scenario: JSON Details with addressdetails
When sending json details query for W297699560
| addressdetails |
Then the result is valid json
And result has attributes address
+ @SQLITE
Scenario: JSON Details with linkedplaces
When sending json details query for R123924
| linkedplaces |
Then the result is valid json
And result has attributes linked_places
+ @SQLITE
Scenario: JSON Details with hierarchy
When sending json details query for W297699560
| hierarchy |
Then the result is valid json
And result has attributes hierarchy
+ @SQLITE
Scenario: JSON Details with grouped hierarchy
When sending json details query for W297699560
| hierarchy | group_hierarchy |
Then the result is valid json
And result has attributes keywords
+ @SQLITE
Scenario Outline: JSON details with full geometry
When sending json details query for <osmid>
| polygon_geojson |
+@SQLITE
@APIDB
Feature: Object details
Check details page for correctness
+@SQLITE
@APIDB
Feature: Places by osm_type and osm_id Tests
Simple tests for errors in various response formats.
+@SQLITE
@APIDB
Feature: Places by osm_type and osm_id Tests
Simple tests for response format.
+@SQLITE
@APIDB
Feature: Geometries for reverse geocoding
Tests for returning geometries with reverse
| 1 |
Then results contain
| geotext |
- | POLYGON((9.5225302 47.138066,9.5225348 47.1379282,9.5226142 47.1379294,9.5226143 47.1379257,9.522615 47.137917,9.5226225 47.1379098,9.5226334 47.1379052,9.5226461 47.1379037,9.5226588 47.1379056,9.5226693 47.1379107,9.5226762 47.1379181,9.5226762 47.1379268,9.5226761 47.1379308,9.5227366 47.1379317,9.5227352 47.1379753,9.5227608 47.1379757,9.5227595 47.1380148,9.5227355 47.1380145,9.5227337 47.1380692,9.5225302 47.138066)) |
+ | ^POLYGON\(\(9.5225302 47.138066, ?9.5225348 47.1379282, ?9.5226142 47.1379294, ?9.5226143 47.1379257, ?9.522615 47.137917, ?9.5226225 47.1379098, ?9.5226334 47.1379052, ?9.5226461 47.1379037, ?9.5226588 47.1379056, ?9.5226693 47.1379107, ?9.5226762 47.1379181, ?9.5226762 47.1379268, ?9.5226761 47.1379308, ?9.5227366 47.1379317, ?9.5227352 47.1379753, ?9.5227608 47.1379757, ?9.5227595 47.1380148, ?9.5227355 47.1380145, ?9.5227337 47.1380692, ?9.5225302 47.138066\)\) |
Scenario: Polygons can be slightly simplified
| 1 | 0.00001 |
Then results contain
| geotext |
- | POLYGON((9.5225302 47.138066,9.5225348 47.1379282,9.5226142 47.1379294,9.5226225 47.1379098,9.5226588 47.1379056,9.5226761 47.1379308,9.5227366 47.1379317,9.5227352 47.1379753,9.5227608 47.1379757,9.5227595 47.1380148,9.5227355 47.1380145,9.5227337 47.1380692,9.5225302 47.138066)) |
+ | ^POLYGON\(\(9.5225302 47.138066, ?9.5225348 47.1379282, ?9.5226142 47.1379294, ?9.5226225 47.1379098, ?9.5226588 47.1379056, ?9.5226761 47.1379308, ?9.5227366 47.1379317, ?9.5227352 47.1379753, ?9.5227608 47.1379757, ?9.5227595 47.1380148, ?9.5227355 47.1380145, ?9.5227337 47.1380692, ?9.5225302 47.138066\)\) |
Scenario: Polygons can be much simplified
| 1 | 0.9 |
Then results contain
| geotext |
- | POLYGON((9.5225302 47.138066,9.5225348 47.1379282,9.5227608 47.1379757,9.5227337 47.1380692,9.5225302 47.138066)) |
+ | ^POLYGON\(\(9.5225302 47.138066, ?9.5225348 47.1379282, ?9.5227608 47.1379757, ?9.5227337 47.1380692, ?9.5225302 47.138066\)\) |
Scenario: For polygons return the centroid as center point
+@SQLITE
@APIDB
Feature: Localization of reverse search results
+@SQLITE
@APIDB
Feature: Layer parameter in reverse geocoding
Testing correct function of layer selection while reverse geocoding
@v1-api-python-only
Scenario Outline: Search for mountain peaks begins at level 12
- When sending v1/reverse at 47.08221,9.56769
+ When sending v1/reverse at 47.08293,9.57109
| layer | zoom |
| natural | <zoom> |
Then results contain
@v1-api-python-only
- Scenario Outline: Reverse serach with manmade layers
+ Scenario Outline: Reverse search with manmade layers
When sending v1/reverse at 32.46904,-86.44439
| layer |
| <layer> |
| manmade | leisure | park |
| address | highway | residential |
| poi | leisure | pitch |
- | natural | waterway | stream |
+ | natural | waterway | river |
| natural,manmade | leisure | park |
+@SQLITE
@APIDB
Feature: Reverse geocoding
Testing the reverse function
+@SQLITE
@APIDB
Feature: Geocodejson for Reverse API
Testing correctness of geocodejson output (API version v1).
+@SQLITE
@APIDB
Feature: Geojson for Reverse API
Testing correctness of geojson output (API version v1).
+@SQLITE
@APIDB
Feature: Json output for Reverse API
Testing correctness of json and jsonv2 output (API version v1).
| polygon_text | 1 |
Then results contain
| geotext |
- | LINESTRING(9.5039353 47.0657546,9.5040437 47.0657781,9.5040808 47.065787,9.5054298 47.0661407) |
+ | ^LINESTRING\(9.5039353 47.0657546, ?9.5040437 47.0657781, ?9.5040808 47.065787, ?9.5054298 47.0661407\) |
Examples:
| format |
+@SQLITE
@APIDB
Feature: v1/reverse Parameter Tests
Tests for parameter inputs for the v1 reverse endpoint.
+@SQLITE
@APIDB
Feature: XML output for Reverse API
Testing correctness of xml output (API version v1).
| polygon_text | 1 |
Then results contain
| geotext |
- | LINESTRING(9.5039353 47.0657546,9.5040437 47.0657781,9.5040808 47.065787,9.5054298 47.0661407) |
+ | ^LINESTRING\(9.5039353 47.0657546, ?9.5040437 47.0657781, ?9.5040808 47.065787, ?9.5054298 47.0661407\) |
Scenario: Output of SVG
Feature: Search queries
Generic search result correctness
+ Scenario: Search for natural object
+ When sending json search query "Samina"
+ | accept-language |
+ | en |
+ Then results contain
+ | ID | class | type | display_name |
+ | 0 | waterway | river | Samina, Austria |
+
Scenario: House number search for non-street address
When sending json search query "6 Silum, Liechtenstein" with address
| accept-language |
| Liechtenstein |
And results contain
| class | type |
- | amenity | ^(pub)\|(bar) |
+ | amenity | ^(pub)\|(bar)\|(restaurant) |
#176
Scenario: Structured search restricts rank
+@SQLITE
@APIDB
Feature: Status queries
Testing status query
--- /dev/null
+@DB
+Feature: Searches in Japan
+ Test specifically for searches of Japanese addresses and in Japanese language.
+ @fail-legacy
+ Scenario: A block house-number is parented to the neighbourhood
+ Given the grid with origin JP
+ | 1 | | | | 2 |
+ | | 3 | | | |
+ | | | 9 | | |
+ | | | | 6 | |
+ And the places
+ | osm | class | type | name | geometry |
+ | W1 | highway | residential | 雉子橋通り | 1,2 |
+ And the places
+ | osm | class | type | housenr | addr+block_number | addr+neighbourhood | geometry |
+ | N3 | amenity | restaurant | 2 | 6 | 2丁目 | 3 |
+ And the places
+ | osm | class | type | name | geometry |
+ | N9 | place | neighbourhood | 2丁目 | 9 |
+ And the places
+ | osm | class | type | name | geometry |
+ | N6 | place | quarter | 加瀬 | 6 |
+ When importing
+ Then placex contains
+ | object | parent_place_id |
+ | N3 | N9 |
+ When sending search query "2丁目 6-2"
+ Then results contain
+ | osm |
+ | N3 |
When sending search query "399174"
Then results contain
| ID | type | display_name |
- | 0 | postcode | 399174 |
+ | 0 | postcode | 399174, Singapore |
@fail-legacy
When sending search query "3993 DX"
Then results contain
| ID | type | display_name |
- | 0 | postcode | 3993 DX |
+ | 0 | postcode | 3993 DX, Nederland |
When sending search query "3993dx"
Then results contain
| ID | type | display_name |
- | 0 | postcode | 3993 DX |
+ | 0 | postcode | 3993 DX, Nederland |
Examples:
| postcode |
When sending search query "399174"
Then results contain
| ID | type | display_name |
- | 0 | postcode | 399174 |
+ | 0 | postcode | 399174, Singapore |
@fail-legacy
When sending search query "675"
Then results contain
| ID | type | display_name |
- | 0 | postcode | AD675 |
+ | 0 | postcode | AD675, Andorra |
When sending search query "AD675"
Then results contain
| ID | type | display_name |
- | 0 | postcode | AD675 |
+ | 0 | postcode | AD675, Andorra |
Examples:
| postcode |
When sending search query "EH4 7EA"
Then results contain
| type | display_name |
- | postcode | EH4 7EA |
+ | postcode | EH4 7EA, United Kingdom |
When sending search query "E4 7EA"
Then results contain
| type | display_name |
- | postcode | E4 7EA |
+ | postcode | E4 7EA, United Kingdom |
def before_scenario(context, scenario):
- if 'DB' in context.tags:
+ if not 'SQLITE' in context.tags \
+ and context.config.userdata['API_TEST_DB'].startswith('sqlite:'):
+ context.scenario.skip("Not usable with Sqlite database.")
+ elif 'DB' in context.tags:
context.nominatim.setup_db(context)
elif 'APIDB' in context.tags:
context.nominatim.setup_api_db()
Then placex contains exactly
| object | type | admin_level |
| R10:boundary | informal | 4 |
+
+
+ Scenario: Main tag and geometry is changed
+ When loading osm data
+ """
+ n1 x40 y40
+ n2 x40.0001 y40
+ n3 x40.0001 y40.0001
+ n4 x40 y40.0001
+ w5 Tbuilding=house,name=Foo Nn1,n2,n3,n4,n1
+ """
+ Then place contains exactly
+ | object | type |
+ | W5:building | house |
+
+ When updating osm data
+ """
+ n1 x39.999 y40
+ w5 Tbuilding=terrace,name=Bar Nn1,n2,n3,n4,n1
+ """
+ Then place contains exactly
+ | object | type |
+ | W5:building | terrace |
be picked up by dotenv and creates a project directory with the
appropriate website scripts.
"""
- dsn = 'pgsql:dbname={}'.format(dbname)
+ if dbname.startswith('sqlite:'):
+ dsn = 'sqlite:dbname={}'.format(dbname[7:])
+ else:
+ dsn = 'pgsql:dbname={}'.format(dbname)
if self.db_host:
dsn += ';host=' + self.db_host
if self.db_port:
"""
self.write_nominatim_config(self.api_test_db)
+ if self.api_test_db.startswith('sqlite:'):
+ return
+
if not self.api_db_done:
self.api_db_done = True
cli.nominatim(module_dir='',
osm2pgsql_path=str(self.build_dir / 'osm2pgsql' / 'osm2pgsql'),
cli_args=cmdline,
- phpcgi_path='',
environ=self.test_env)
import nominatim.api as napi
from nominatim.db.sql_preprocessor import SQLPreprocessor
+from nominatim.tools import convert_sqlite
import nominatim.api.logging as loglib
class APITester:
testapi.async_to_sync(testapi.create_tables())
proc = SQLPreprocessor(temp_db_conn, testapi.api.config)
- proc.run_sql_file(temp_db_conn, 'functions/address_lookup.sql')
proc.run_sql_file(temp_db_conn, 'functions/ranking.sql')
loglib.set_log_output('text')
print(loglib.get_and_disable())
testapi.api.close()
+
+
+@pytest.fixture(params=['postgres_db', 'sqlite_db'])
+def frontend(request, event_loop, tmp_path):
+ if request.param == 'sqlite_db':
+ db = str(tmp_path / 'test_nominatim_python_unittest.sqlite')
+
+ def mkapi(apiobj, options={'reverse'}):
+ event_loop.run_until_complete(convert_sqlite.convert(Path('/invalid'),
+ db, options))
+ return napi.NominatimAPI(Path('/invalid'),
+ {'NOMINATIM_DATABASE_DSN': f"sqlite:dbname={db}",
+ 'NOMINATIM_USE_US_TIGER_DATA': 'yes'})
+ elif request.param == 'postgres_db':
+ def mkapi(apiobj, options=None):
+ return apiobj.api
+
+ return mkapi
--- /dev/null
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Provides dummy implementations of ASGIAdaptor for testing.
+"""
+from collections import namedtuple
+
+import nominatim.api.v1.server_glue as glue
+from nominatim.config import Configuration
+
+class FakeError(BaseException):
+
+ def __init__(self, msg, status):
+ self.msg = msg
+ self.status = status
+
+ def __str__(self):
+ return f'{self.status} -- {self.msg}'
+
+FakeResponse = namedtuple('FakeResponse', ['status', 'output', 'content_type'])
+
+class FakeAdaptor(glue.ASGIAdaptor):
+
+ def __init__(self, params=None, headers=None, config=None):
+ self.params = params or {}
+ self.headers = headers or {}
+ self._config = config or Configuration(None)
+
+
+ def get(self, name, default=None):
+ return self.params.get(name, default)
+
+
+ def get_header(self, name, default=None):
+ return self.headers.get(name, default)
+
+
+ def error(self, msg, status=400):
+ return FakeError(msg, status)
+
+
+ def create_response(self, status, output, num_results):
+ return FakeResponse(status, output, self.content_type)
+
+
+ def base_uri(self) -> str:
+ return 'http://test'
+
+ def config(self):
+ return self._config
+
('COUNTRY', 'COUNTRY'),
('POSTCODE', 'POSTCODE')])
def test_phrase_compatible(ptype, ttype):
- assert query.PhraseType[ptype].compatible_with(query.TokenType[ttype])
+ assert query.PhraseType[ptype].compatible_with(query.TokenType[ttype], False)
@pytest.mark.parametrize('ptype', ['COUNTRY', 'POSTCODE'])
def test_phrase_incompatible(ptype):
- assert not query.PhraseType[ptype].compatible_with(query.TokenType.PARTIAL)
+ assert not query.PhraseType[ptype].compatible_with(query.TokenType.PARTIAL, True)
def test_query_node_empty():
assert q.get_tokens(query.TokenRange(0, 1), query.TokenType.PARTIAL) == []
assert len(q.get_tokens(query.TokenRange(1, 2), query.TokenType.COUNTRY)) == 1
+
+
+def test_query_struct_amenity_single_word():
+ q = query.QueryStruct([query.Phrase(query.PhraseType.AMENITY, 'bar')])
+ q.add_node(query.BreakType.END, query.PhraseType.NONE)
+
+ q.add_token(query.TokenRange(0, 1), query.TokenType.PARTIAL, mktoken(1))
+ q.add_token(query.TokenRange(0, 1), query.TokenType.NEAR_ITEM, mktoken(2))
+ q.add_token(query.TokenRange(0, 1), query.TokenType.QUALIFIER, mktoken(3))
+
+ assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.PARTIAL)) == 1
+ assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.NEAR_ITEM)) == 1
+ assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.QUALIFIER)) == 0
+
+
+def test_query_struct_amenity_two_words():
+ q = query.QueryStruct([query.Phrase(query.PhraseType.AMENITY, 'foo bar')])
+ q.add_node(query.BreakType.WORD, query.PhraseType.AMENITY)
+ q.add_node(query.BreakType.END, query.PhraseType.NONE)
+
+ for trange in [(0, 1), (1, 2)]:
+ q.add_token(query.TokenRange(*trange), query.TokenType.PARTIAL, mktoken(1))
+ q.add_token(query.TokenRange(*trange), query.TokenType.NEAR_ITEM, mktoken(2))
+ q.add_token(query.TokenRange(*trange), query.TokenType.QUALIFIER, mktoken(3))
+
+ assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.PARTIAL)) == 1
+ assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.NEAR_ITEM)) == 0
+ assert len(q.get_tokens(query.TokenRange(0, 1), query.TokenType.QUALIFIER)) == 1
+
+ assert len(q.get_tokens(query.TokenRange(1, 2), query.TokenType.PARTIAL)) == 1
+ assert len(q.get_tokens(query.TokenRange(1, 2), query.TokenType.NEAR_ITEM)) == 0
+ assert len(q.get_tokens(query.TokenRange(1, 2), query.TokenType.QUALIFIER)) == 1
+
def make_query(*args):
- q = None
+ q = QueryStruct([Phrase(PhraseType.NONE, '')])
- for tlist in args:
- if q is None:
- q = QueryStruct([Phrase(PhraseType.NONE, '')])
- else:
- q.add_node(BreakType.WORD, PhraseType.NONE)
+ for _ in range(max(inner[0] for tlist in args for inner in tlist)):
+ q.add_node(BreakType.WORD, PhraseType.NONE)
+ q.add_node(BreakType.END, PhraseType.NONE)
- start = len(q.nodes) - 1
+ for start, tlist in enumerate(args):
for end, ttype, tinfo in tlist:
for tid, word in tinfo:
q.add_token(TokenRange(start, end), ttype,
MyToken(0.5 if ttype == TokenType.PARTIAL else 0.0, tid, 1, word, True))
- q.add_node(BreakType.END, PhraseType.NONE)
return q
assert set(search.countries.values) == {'en'}
-def test_country_search_with_confllicting_country_restriction():
+def test_country_search_with_conflicting_country_restriction():
q = make_query([(1, TokenType.COUNTRY, [(2, 'de'), (3, 'en')])])
builder = SearchBuilder(q, SearchDetails.from_kwargs({'countries': 'fr'}))
@pytest.mark.parametrize('kwargs', [{'viewbox': '0,0,1,1', 'bounded_viewbox': True},
{'near': '10,10'}])
-def test_category_only(kwargs):
- q = make_query([(1, TokenType.CATEGORY, [(2, 'foo')])])
+def test_near_item_only(kwargs):
+ q = make_query([(1, TokenType.NEAR_ITEM, [(2, 'foo')])])
builder = SearchBuilder(q, SearchDetails.from_kwargs(kwargs))
- searches = list(builder.build(TokenAssignment(category=TokenRange(0, 1))))
+ searches = list(builder.build(TokenAssignment(near_item=TokenRange(0, 1))))
assert len(searches) == 1
search = searches[0]
assert isinstance(search, dbs.PoiSearch)
- assert search.categories.values == [('this', 'that')]
+ assert search.qualifiers.values == [('this', 'that')]
@pytest.mark.parametrize('kwargs', [{'viewbox': '0,0,1,1'},
{}])
-def test_category_skipped(kwargs):
- q = make_query([(1, TokenType.CATEGORY, [(2, 'foo')])])
+def test_near_item_skipped(kwargs):
+ q = make_query([(1, TokenType.NEAR_ITEM, [(2, 'foo')])])
builder = SearchBuilder(q, SearchDetails.from_kwargs(kwargs))
- searches = list(builder.build(TokenAssignment(category=TokenRange(0, 1))))
+ searches = list(builder.build(TokenAssignment(near_item=TokenRange(0, 1))))
assert len(searches) == 0
def test_name_only_near_search():
- q = make_query([(1, TokenType.CATEGORY, [(88, 'g')])],
+ q = make_query([(1, TokenType.NEAR_ITEM, [(88, 'g')])],
[(2, TokenType.PARTIAL, [(1, 'a')]),
(2, TokenType.WORD, [(100, 'a')])])
builder = SearchBuilder(q, SearchDetails())
searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
- category=TokenRange(0, 1))))
+ near_item=TokenRange(0, 1))))
assert len(searches) == 1
search = searches[0]
assert len(searches) == 1
search = searches[0]
+ assert isinstance(search, dbs.PlaceSearch)
+ assert search.qualifiers.values == [('foo', 'bar')]
+
+
+def test_name_with_near_item_search_with_category_mismatch():
+ q = make_query([(1, TokenType.NEAR_ITEM, [(88, 'g')])],
+ [(2, TokenType.PARTIAL, [(1, 'a')]),
+ (2, TokenType.WORD, [(100, 'a')])])
+ builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar')]}))
+
+ searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
+ near_item=TokenRange(0, 1))))
+
+ assert len(searches) == 0
+
+
+def test_name_with_near_item_search_with_category_match():
+ q = make_query([(1, TokenType.NEAR_ITEM, [(88, 'g')])],
+ [(2, TokenType.PARTIAL, [(1, 'a')]),
+ (2, TokenType.WORD, [(100, 'a')])])
+ builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar'),
+ ('this', 'that')]}))
+
+ searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
+ near_item=TokenRange(0, 1))))
+
+ assert len(searches) == 1
+ search = searches[0]
+
assert isinstance(search, dbs.NearSearch)
assert isinstance(search.search, dbs.PlaceSearch)
+def test_name_with_qualifier_search_with_category_mismatch():
+ q = make_query([(1, TokenType.QUALIFIER, [(88, 'g')])],
+ [(2, TokenType.PARTIAL, [(1, 'a')]),
+ (2, TokenType.WORD, [(100, 'a')])])
+ builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar')]}))
+
+ searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
+ qualifier=TokenRange(0, 1))))
+
+ assert len(searches) == 0
+
+
+def test_name_with_qualifier_search_with_category_match():
+ q = make_query([(1, TokenType.QUALIFIER, [(88, 'g')])],
+ [(2, TokenType.PARTIAL, [(1, 'a')]),
+ (2, TokenType.WORD, [(100, 'a')])])
+ builder = SearchBuilder(q, SearchDetails.from_kwargs({'categories': [('foo', 'bar'),
+ ('this', 'that')]}))
+
+ searches = list(builder.build(TokenAssignment(name=TokenRange(1, 2),
+ qualifier=TokenRange(0, 1))))
+
+ assert len(searches) == 1
+ search = searches[0]
+
+ assert isinstance(search, dbs.PlaceSearch)
+ assert search.qualifiers.values == [('this', 'that')]
+
+
def test_name_only_search_with_countries():
q = make_query([(1, TokenType.PARTIAL, [(1, 'a')]),
(1, TokenType.WORD, [(100, 'a')])])
assert not search.housenumbers.values
-def make_counted_searches(name_part, name_full, address_part, address_full):
+def make_counted_searches(name_part, name_full, address_part, address_full,
+ num_address_parts=1):
q = QueryStruct([Phrase(PhraseType.NONE, '')])
- for i in range(2):
+ for i in range(1 + num_address_parts):
q.add_node(BreakType.WORD, PhraseType.NONE)
q.add_node(BreakType.END, PhraseType.NONE)
MyToken(0.5, 1, name_part, 'name_part', True))
q.add_token(TokenRange(0, 1), TokenType.WORD,
MyToken(0, 101, name_full, 'name_full', True))
- q.add_token(TokenRange(1, 2), TokenType.PARTIAL,
- MyToken(0.5, 2, address_part, 'address_part', True))
- q.add_token(TokenRange(1, 2), TokenType.WORD,
- MyToken(0, 102, address_full, 'address_full', True))
+ for i in range(num_address_parts):
+ q.add_token(TokenRange(i + 1, i + 2), TokenType.PARTIAL,
+ MyToken(0.5, 2, address_part, 'address_part', True))
+ q.add_token(TokenRange(i + 1, i + 2), TokenType.WORD,
+ MyToken(0, 102, address_full, 'address_full', True))
builder = SearchBuilder(q, SearchDetails())
return list(builder.build(TokenAssignment(name=TokenRange(0, 1),
- address=[TokenRange(1, 2)])))
+ address=[TokenRange(1, 1 + num_address_parts)])))
def test_infrequent_partials_in_name():
{('name_vector', 'lookup_all'), ('nameaddress_vector', 'restrict')}
-def test_frequent_partials_in_name_but_not_in_address():
- searches = make_counted_searches(10000, 1, 1, 1)
+def test_frequent_partials_in_name_and_address():
+ searches = make_counted_searches(9999, 1, 9999, 1)
- assert len(searches) == 1
- search = searches[0]
+ assert len(searches) == 2
- assert isinstance(search, dbs.PlaceSearch)
- assert len(search.lookups) == 2
- assert len(search.rankings) == 2
+ assert all(isinstance(s, dbs.PlaceSearch) for s in searches)
+ searches.sort(key=lambda s: s.penalty)
- assert set((l.column, l.lookup_type) for l in search.lookups) == \
- {('nameaddress_vector', 'lookup_all'), ('name_vector', 'restrict')}
+ assert set((l.column, l.lookup_type) for l in searches[0].lookups) == \
+ {('name_vector', 'lookup_any'), ('nameaddress_vector', 'restrict')}
+ assert set((l.column, l.lookup_type) for l in searches[1].lookups) == \
+ {('nameaddress_vector', 'lookup_all'), ('name_vector', 'lookup_all')}
-def test_frequent_partials_in_name_and_address():
- searches = make_counted_searches(10000, 1, 10000, 1)
+def test_too_frequent_partials_in_name_and_address():
+ searches = make_counted_searches(20000, 1, 10000, 1)
- assert len(searches) == 2
+ assert len(searches) == 1
assert all(isinstance(s, dbs.PlaceSearch) for s in searches)
searches.sort(key=lambda s: s.penalty)
assert set((l.column, l.lookup_type) for l in searches[0].lookups) == \
{('name_vector', 'lookup_any'), ('nameaddress_vector', 'restrict')}
- assert set((l.column, l.lookup_type) for l in searches[1].lookups) == \
- {('nameaddress_vector', 'lookup_all'), ('name_vector', 'lookup_all')}
assert query.num_token_slots() == 3
assert len(query.nodes[0].starting) == 1
- assert query.nodes[0].starting[0].ttype == TokenType.CATEGORY
+ assert query.nodes[0].starting[0].ttype == TokenType.NEAR_ITEM
assert not query.nodes[2].starting
query = await ana.analyze_query(make_phrase('foo BAR foo BAR foo'))
assert query.num_token_slots() == 5
- assert set(t.ttype for t in query.nodes[0].starting) == {TokenType.CATEGORY, TokenType.QUALIFIER}
+ assert set(t.ttype for t in query.nodes[0].starting) == {TokenType.NEAR_ITEM, TokenType.QUALIFIER}
assert set(t.ttype for t in query.nodes[2].starting) == {TokenType.QUALIFIER}
- assert set(t.ttype for t in query.nodes[4].starting) == {TokenType.CATEGORY, TokenType.QUALIFIER}
+ assert set(t.ttype for t in query.nodes[4].starting) == {TokenType.NEAR_ITEM, TokenType.QUALIFIER}
@pytest.mark.asyncio
assert query.num_token_slots() == 3
assert len(query.nodes[0].starting) == 1
- assert query.nodes[0].starting[0].ttype == TokenType.CATEGORY
+ assert query.nodes[0].starting[0].ttype == TokenType.NEAR_ITEM
assert not query.nodes[2].starting
query = await ana.analyze_query(make_phrase('foo BAR foo BAR foo'))
assert query.num_token_slots() == 5
- assert set(t.ttype for t in query.nodes[0].starting) == {TokenType.CATEGORY, TokenType.QUALIFIER}
+ assert set(t.ttype for t in query.nodes[0].starting) == {TokenType.NEAR_ITEM, TokenType.QUALIFIER}
assert set(t.ttype for t in query.nodes[2].starting) == {TokenType.QUALIFIER}
- assert set(t.ttype for t in query.nodes[4].starting) == {TokenType.CATEGORY, TokenType.QUALIFIER}
+ assert set(t.ttype for t in query.nodes[4].starting) == {TokenType.NEAR_ITEM, TokenType.QUALIFIER}
@pytest.mark.asyncio
--- /dev/null
+
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Test data types for search queries.
+"""
+import pytest
+
+import nominatim.api.search.query as nq
+
+def test_token_range_equal():
+ assert nq.TokenRange(2, 3) == nq.TokenRange(2, 3)
+ assert not (nq.TokenRange(2, 3) != nq.TokenRange(2, 3))
+
+
+@pytest.mark.parametrize('lop,rop', [((1, 2), (3, 4)),
+ ((3, 4), (3, 5)),
+ ((10, 12), (11, 12))])
+def test_token_range_unequal(lop, rop):
+ assert not (nq.TokenRange(*lop) == nq.TokenRange(*rop))
+ assert nq.TokenRange(*lop) != nq.TokenRange(*rop)
+
+
+def test_token_range_lt():
+ assert nq.TokenRange(1, 3) < nq.TokenRange(10, 12)
+ assert nq.TokenRange(5, 6) < nq.TokenRange(7, 8)
+ assert nq.TokenRange(1, 4) < nq.TokenRange(4, 5)
+ assert not(nq.TokenRange(5, 6) < nq.TokenRange(5, 6))
+ assert not(nq.TokenRange(10, 11) < nq.TokenRange(4, 5))
+
+
+def test_token_rankge_gt():
+ assert nq.TokenRange(3, 4) > nq.TokenRange(1, 2)
+ assert nq.TokenRange(100, 200) > nq.TokenRange(10, 11)
+ assert nq.TokenRange(10, 11) > nq.TokenRange(4, 10)
+ assert not(nq.TokenRange(5, 6) > nq.TokenRange(5, 6))
+ assert not(nq.TokenRange(1, 2) > nq.TokenRange(3, 4))
+ assert not(nq.TokenRange(4, 10) > nq.TokenRange(3, 5))
+
+
+def test_token_range_unimplemented_ops():
+ with pytest.raises(TypeError):
+ nq.TokenRange(1, 3) <= nq.TokenRange(10, 12)
+ with pytest.raises(TypeError):
+ nq.TokenRange(1, 3) >= nq.TokenRange(10, 12)
def test_find_none(apiobj):
assert len(run_search(apiobj, 0.0, ['xx'])) == 0
+
+
+@pytest.mark.parametrize('coord,numres', [((0.5, 1), 1), ((10, 10), 0)])
+def test_find_near(apiobj, coord, numres):
+ apiobj.add_country('ro', 'POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))')
+ apiobj.add_country_name('ro', {'name': 'România'})
+
+ results = run_search(apiobj, 0.0, ['ro'],
+ details=SearchDetails(near=napi.Point(*coord),
+ near_radius=0.1))
+
+ assert len(results) == numres
+
+
+class TestCountryParameters:
+
+ @pytest.fixture(autouse=True)
+ def fill_database(self, apiobj):
+ apiobj.add_placex(place_id=55, class_='boundary', type='administrative',
+ rank_search=4, rank_address=4,
+ name={'name': 'Lolaland'},
+ country_code='yw',
+ centroid=(10, 10),
+ geometry='POLYGON((9.5 9.5, 9.5 10.5, 10.5 10.5, 10.5 9.5, 9.5 9.5))')
+ apiobj.add_country('ro', 'POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))')
+ apiobj.add_country_name('ro', {'name': 'România'})
+
+
+ @pytest.mark.parametrize('geom', [napi.GeometryFormat.GEOJSON,
+ napi.GeometryFormat.KML,
+ napi.GeometryFormat.SVG,
+ napi.GeometryFormat.TEXT])
+ @pytest.mark.parametrize('cc', ['yw', 'ro'])
+ def test_return_geometries(self, apiobj, geom, cc):
+ results = run_search(apiobj, 0.5, [cc],
+ details=SearchDetails(geometry_output=geom))
+
+ assert len(results) == 1
+ assert geom.name.lower() in results[0].geometry
+
+
+ @pytest.mark.parametrize('pid,rids', [(76, [55]), (55, [])])
+ def test_exclude_place_id(self, apiobj, pid, rids):
+ results = run_search(apiobj, 0.5, ['yw', 'ro'],
+ details=SearchDetails(excluded=[pid]))
+
+ assert [r.place_id for r in results] == rids
+
+
+ @pytest.mark.parametrize('viewbox,rids', [((9, 9, 11, 11), [55]),
+ ((-10, -10, -3, -3), [])])
+ def test_bounded_viewbox_in_placex(self, apiobj, viewbox, rids):
+ results = run_search(apiobj, 0.5, ['yw'],
+ details=SearchDetails.from_kwargs({'viewbox': viewbox,
+ 'bounded_viewbox': True}))
+
+ assert [r.place_id for r in results] == rids
+
+
+ @pytest.mark.parametrize('viewbox,numres', [((0, 0, 1, 1), 1),
+ ((-10, -10, -3, -3), 0)])
+ def test_bounded_viewbox_in_fallback(self, apiobj, viewbox, numres):
+ results = run_search(apiobj, 0.5, ['ro'],
+ details=SearchDetails.from_kwargs({'viewbox': viewbox,
+ 'bounded_viewbox': True}))
+
+ assert len(results) == numres
FieldLookup, FieldRanking, RankedTokens
-def run_search(apiobj, global_penalty, cat, cat_penalty=None,
+def run_search(apiobj, global_penalty, cat, cat_penalty=None, ccodes=[],
details=SearchDetails()):
class PlaceSearchData:
penalty = 0.0
postcodes = WeightedStrings([], [])
- countries = WeightedStrings([], [])
+ countries = WeightedStrings(ccodes, [0.0] * len(ccodes))
housenumbers = WeightedStrings([], [])
qualifiers = WeightedStrings([], [])
lookups = [FieldLookup('name_vector', [56], 'lookup_all')]
rankings = []
+ if ccodes is not None:
+ details.countries = ccodes
+
place_search = PlaceSearch(0.0, PlaceSearchData(), 2)
if cat_penalty is None:
assert not run_search(apiobj, 0.4, [('this', 'that')])
+def test_no_appropriate_results_inner_query(apiobj):
+ apiobj.add_placex(place_id=100, country_code='us',
+ centroid=(5.6, 4.3),
+ geometry='POLYGON((0.0 0.0, 10.0 0.0, 10.0 2.0, 0.0 2.0, 0.0 0.0))')
+ apiobj.add_search_name(100, names=[56], country_code='us',
+ centroid=(5.6, 4.3))
+ apiobj.add_placex(place_id=22, class_='amenity', type='bank',
+ centroid=(5.6001, 4.2994))
+
+ assert not run_search(apiobj, 0.4, [('amenity', 'bank')])
+
+
class TestNearSearch:
@pytest.fixture(autouse=True)
assert [r.place_id for r in results] == [22]
+
+ @pytest.mark.parametrize('cc,rid', [('us', 22), ('mx', 23)])
+ def test_restrict_by_country(self, apiobj, cc, rid):
+ apiobj.add_placex(place_id=22, class_='amenity', type='bank',
+ centroid=(5.6001, 4.2994),
+ country_code='us')
+ apiobj.add_placex(place_id=122, class_='amenity', type='bank',
+ centroid=(5.6001, 4.2994),
+ country_code='mx')
+ apiobj.add_placex(place_id=23, class_='amenity', type='bank',
+ centroid=(-10.3001, 56.9),
+ country_code='mx')
+ apiobj.add_placex(place_id=123, class_='amenity', type='bank',
+ centroid=(-10.3001, 56.9),
+ country_code='us')
+
+ results = run_search(apiobj, 0.1, [('amenity', 'bank')], ccodes=[cc, 'fr'])
+
+ assert [r.place_id for r in results] == [rid]
+
+
+ @pytest.mark.parametrize('excluded,rid', [(22, 122), (122, 22)])
+ def test_exclude_place_by_id(self, apiobj, excluded, rid):
+ apiobj.add_placex(place_id=22, class_='amenity', type='bank',
+ centroid=(5.6001, 4.2994),
+ country_code='us')
+ apiobj.add_placex(place_id=122, class_='amenity', type='bank',
+ centroid=(5.6001, 4.2994),
+ country_code='us')
+
+
+ results = run_search(apiobj, 0.1, [('amenity', 'bank')],
+ details=SearchDetails(excluded=[excluded]))
+
+ assert [r.place_id for r in results] == [rid]
+
+
+ @pytest.mark.parametrize('layer,rids', [(napi.DataLayer.POI, [22]),
+ (napi.DataLayer.MANMADE, [])])
+ def test_with_layer(self, apiobj, layer, rids):
+ apiobj.add_placex(place_id=22, class_='amenity', type='bank',
+ centroid=(5.6001, 4.2994),
+ country_code='us')
+
+ results = run_search(apiobj, 0.1, [('amenity', 'bank')],
+ details=SearchDetails(layers=layer))
+
+ assert [r.place_id for r in results] == rids
"""
Tests for running the generic place searcher.
"""
+import json
+
import pytest
import nominatim.api as napi
assert geom.name.lower() in results[0].geometry
+ @pytest.mark.parametrize('factor,npoints', [(0.0, 3), (1.0, 2)])
+ def test_return_simplified_geometry(self, apiobj, factor, npoints):
+ apiobj.add_placex(place_id=333, country_code='us',
+ centroid=(9.0, 9.0),
+ geometry='LINESTRING(8.9 9.0, 9.0 9.0, 9.1 9.0)')
+ apiobj.add_search_name(333, names=[55], country_code='us',
+ centroid=(5.6, 4.3))
+
+ lookup = FieldLookup('name_vector', [55], 'lookup_all')
+ ranking = FieldRanking('name_vector', 0.9, [RankedTokens(0.0, [21])])
+
+ results = run_search(apiobj, 0.1, [lookup], [ranking],
+ details=SearchDetails(geometry_output=napi.GeometryFormat.GEOJSON,
+ geometry_simplification=factor))
+
+ assert len(results) == 1
+ result = results[0]
+ geom = json.loads(result.geometry['geojson'])
+
+ assert result.place_id == 333
+ assert len(geom['coordinates']) == npoints
+
+
@pytest.mark.parametrize('viewbox', ['5.0,4.0,6.0,5.0', '5.7,4.0,6.0,5.0'])
- def test_prefer_viewbox(self, apiobj, viewbox):
+ @pytest.mark.parametrize('wcount,rids', [(2, [100, 101]), (20000, [100])])
+ def test_prefer_viewbox(self, apiobj, viewbox, wcount, rids):
lookup = FieldLookup('name_vector', [1, 2], 'lookup_all')
- ranking = FieldRanking('name_vector', 0.9, [RankedTokens(0.0, [21])])
+ ranking = FieldRanking('name_vector', 0.2, [RankedTokens(0.0, [21])])
results = run_search(apiobj, 0.1, [lookup], [ranking])
assert [r.place_id for r in results] == [101, 100]
- results = run_search(apiobj, 0.1, [lookup], [ranking],
+ results = run_search(apiobj, 0.1, [lookup], [ranking], count=wcount,
details=SearchDetails.from_kwargs({'viewbox': viewbox}))
- assert [r.place_id for r in results] == [100, 101]
+ assert [r.place_id for r in results] == rids
- def test_force_viewbox(self, apiobj):
+ @pytest.mark.parametrize('viewbox', ['5.0,4.0,6.0,5.0', '5.55,4.27,5.62,4.31'])
+ def test_force_viewbox(self, apiobj, viewbox):
lookup = FieldLookup('name_vector', [1, 2], 'lookup_all')
- details=SearchDetails.from_kwargs({'viewbox': '5.0,4.0,6.0,5.0',
+ details=SearchDetails.from_kwargs({'viewbox': viewbox,
'bounded_viewbox': True})
results = run_search(apiobj, 0.1, [lookup], [], details=details)
assert [r.place_id for r in results] == [100, 101]
- def test_force_near(self, apiobj):
+ @pytest.mark.parametrize('radius', [0.09, 0.11])
+ def test_force_near(self, apiobj, radius):
lookup = FieldLookup('name_vector', [1, 2], 'lookup_all')
details=SearchDetails.from_kwargs({'near': '5.6,4.3',
- 'near_radius': 0.11})
+ 'near_radius': radius})
results = run_search(apiobj, 0.1, [lookup], [], details=details)
assert [r.place_id for r in results] == [2, 92, 2000]
+ def test_lookup_only_house_qualifier(self, apiobj):
+ lookup = FieldLookup('name_vector', [1,2], 'lookup_all')
+ ranking = FieldRanking('name_vector', 0.3, [RankedTokens(0.0, [10])])
+
+ results = run_search(apiobj, 0.1, [lookup], [ranking], hnrs=['22'],
+ quals=[('place', 'house')])
+
+ assert [r.place_id for r in results] == [2, 92]
+
+
+ def test_lookup_only_street_qualifier(self, apiobj):
+ lookup = FieldLookup('name_vector', [1,2], 'lookup_all')
+ ranking = FieldRanking('name_vector', 0.3, [RankedTokens(0.0, [10])])
+
+ results = run_search(apiobj, 0.1, [lookup], [ranking], hnrs=['22'],
+ quals=[('highway', 'residential')])
+
+ assert [r.place_id for r in results] == [1000, 2000]
+
+
+ @pytest.mark.parametrize('rank,found', [(26, True), (27, False), (30, False)])
+ def test_lookup_min_rank(self, apiobj, rank, found):
+ lookup = FieldLookup('name_vector', [1,2], 'lookup_all')
+ ranking = FieldRanking('name_vector', 0.3, [RankedTokens(0.0, [10])])
+
+ results = run_search(apiobj, 0.1, [lookup], [ranking], hnrs=['22'],
+ details=SearchDetails(min_rank=rank))
+
+ assert [r.place_id for r in results] == ([2, 92, 1000, 2000] if found else [2, 92])
+
+
@pytest.mark.parametrize('geom', [napi.GeometryFormat.GEOJSON,
napi.GeometryFormat.KML,
napi.GeometryFormat.SVG,
assert all(geom.name.lower() in r.geometry for r in results)
+def test_very_large_housenumber(apiobj):
+ apiobj.add_placex(place_id=93, class_='place', type='house',
+ parent_place_id=2000,
+ housenumber='2467463524544', country_code='pt')
+ apiobj.add_placex(place_id=2000, class_='highway', type='residential',
+ rank_search=26, rank_address=26,
+ country_code='pt')
+ apiobj.add_search_name(2000, names=[1,2],
+ search_rank=26, address_rank=26,
+ country_code='pt')
+
+ lookup = FieldLookup('name_vector', [1, 2], 'lookup_all')
+
+ results = run_search(apiobj, 0.1, [lookup], [], hnrs=['2467463524544'],
+ details=SearchDetails())
+
+ assert results
+ assert [r.place_id for r in results] == [93, 2000]
+
+
+@pytest.mark.parametrize('wcount,rids', [(2, [990, 991]), (30000, [990])])
+def test_name_and_postcode(apiobj, wcount, rids):
+ apiobj.add_placex(place_id=990, class_='highway', type='service',
+ rank_search=27, rank_address=27,
+ postcode='11225',
+ centroid=(10.0, 10.0),
+ geometry='LINESTRING(9.995 10, 10.005 10)')
+ apiobj.add_search_name(990, names=[111], centroid=(10.0, 10.0),
+ search_rank=27, address_rank=27)
+ apiobj.add_placex(place_id=991, class_='highway', type='service',
+ rank_search=27, rank_address=27,
+ postcode='11221',
+ centroid=(10.1, 10.1),
+ geometry='LINESTRING(9.995 10.1, 10.005 10.1)')
+ apiobj.add_search_name(991, names=[111], centroid=(10.1, 10.1),
+ search_rank=27, address_rank=27)
+ apiobj.add_postcode(place_id=100, country_code='ch', postcode='11225',
+ geometry='POINT(10 10)')
+
+ lookup = FieldLookup('name_vector', [111], 'lookup_all')
+
+ results = run_search(apiobj, 0.1, [lookup], [], pcs=['11225'], count=wcount,
+ details=SearchDetails())
+
+ assert results
+ assert [r.place_id for r in results] == rids
+
+
class TestInterpolations:
@pytest.fixture(autouse=True)
assert [r.place_id for r in results] == res + [990]
+ @pytest.mark.parametrize('geom', [napi.GeometryFormat.GEOJSON,
+ napi.GeometryFormat.KML,
+ napi.GeometryFormat.SVG,
+ napi.GeometryFormat.TEXT])
+ def test_osmline_with_geometries(self, apiobj, geom):
+ lookup = FieldLookup('name_vector', [111], 'lookup_all')
+
+ results = run_search(apiobj, 0.1, [lookup], [], hnrs=['21'],
+ details=SearchDetails(geometry_output=geom))
+
+ assert results[0].place_id == 992
+ assert geom.name.lower() in results[0].geometry
+
+
+
class TestTiger:
@pytest.fixture(autouse=True)
assert [r.place_id for r in results] == res + [990]
+ @pytest.mark.parametrize('geom', [napi.GeometryFormat.GEOJSON,
+ napi.GeometryFormat.KML,
+ napi.GeometryFormat.SVG,
+ napi.GeometryFormat.TEXT])
+ def test_tiger_with_geometries(self, apiobj, geom):
+ lookup = FieldLookup('name_vector', [111], 'lookup_all')
+
+ results = run_search(apiobj, 0.1, [lookup], [], hnrs=['21'],
+ details=SearchDetails(geometry_output=geom))
+
+ assert results[0].place_id == 992
+ assert geom.name.lower() in results[0].geometry
+
+
class TestLayersRank30:
@pytest.fixture(autouse=True)
@pytest.fixture(autouse=True)
def fill_database(self, apiobj):
apiobj.add_postcode(place_id=100, country_code='ch',
- parent_place_id=1000, postcode='12345')
+ parent_place_id=1000, postcode='12345',
+ geometry='POINT(17 5)')
apiobj.add_postcode(place_id=101, country_code='pl',
- parent_place_id=2000, postcode='12345')
+ parent_place_id=2000, postcode='12345',
+ geometry='POINT(-45 7)')
apiobj.add_placex(place_id=1000, class_='place', type='village',
rank_search=22, rank_address=22,
country_code='ch')
assert [r.place_id for r in results] == [100]
+
+ @pytest.mark.parametrize('coord,place_id', [((16.5, 5), 100),
+ ((-45.1, 7.004), 101)])
+ def test_lookup_near(self, apiobj, coord, place_id):
+ lookup = FieldLookup('name_vector', [1,2], 'restrict')
+ ranking = FieldRanking('name_vector', 0.3, [RankedTokens(0.0, [10])])
+
+ results = run_search(apiobj, 0.1, ['12345'],
+ lookup=[lookup], ranking=[ranking],
+ details=SearchDetails(near=napi.Point(*coord),
+ near_radius=0.6))
+
+ assert [r.place_id for r in results] == [place_id]
+
+
+ @pytest.mark.parametrize('geom', [napi.GeometryFormat.GEOJSON,
+ napi.GeometryFormat.KML,
+ napi.GeometryFormat.SVG,
+ napi.GeometryFormat.TEXT])
+ def test_return_geometries(self, apiobj, geom):
+ results = run_search(apiobj, 0.1, ['12345'],
+ details=SearchDetails(geometry_output=geom))
+
+ assert results
+ assert all(geom.name.lower() in r.geometry for r in results)
+
+
+ @pytest.mark.parametrize('viewbox, rids', [('-46,6,-44,8', [101,100]),
+ ('16,4,18,6', [100,101])])
+ def test_prefer_viewbox(self, apiobj, viewbox, rids):
+ results = run_search(apiobj, 0.1, ['12345'],
+ details=SearchDetails.from_kwargs({'viewbox': viewbox}))
+
+ assert [r.place_id for r in results] == rids
+
+
+ @pytest.mark.parametrize('viewbox, rid', [('-46,6,-44,8', 101),
+ ('16,4,18,6', 100)])
+ def test_restrict_to_viewbox(self, apiobj, viewbox, rid):
+ results = run_search(apiobj, 0.1, ['12345'],
+ details=SearchDetails.from_kwargs({'viewbox': viewbox,
+ 'bounded_viewbox': True}))
+
+ assert [r.place_id for r in results] == [rid]
+
+
+ @pytest.mark.parametrize('coord,rids', [((17.05, 5), [100, 101]),
+ ((-45, 7.1), [101, 100])])
+ def test_prefer_near(self, apiobj, coord, rids):
+ results = run_search(apiobj, 0.1, ['12345'],
+ details=SearchDetails(near=napi.Point(*coord)))
+
+ assert [r.place_id for r in results] == rids
+
+
+ @pytest.mark.parametrize('pid,rid', [(100, 101), (101, 100)])
+ def test_exclude(self, apiobj, pid, rid):
+ results = run_search(apiobj, 0.1, ['12345'],
+ details=SearchDetails(excluded=[pid]))
+
+ assert [r.place_id for r in results] == [rid]
def make_query(*args):
- q = None
+ q = QueryStruct([Phrase(args[0][1], '')])
dummy = MyToken(3.0, 45, 1, 'foo', True)
- for btype, ptype, tlist in args:
- if q is None:
- q = QueryStruct([Phrase(ptype, '')])
- else:
- q.add_node(btype, ptype)
+ for btype, ptype, _ in args[1:]:
+ q.add_node(btype, ptype)
+ q.add_node(BreakType.END, PhraseType.NONE)
- start = len(q.nodes) - 1
- for end, ttype in tlist:
+ for start, t in enumerate(args):
+ for end, ttype in t[2]:
q.add_token(TokenRange(start, end), ttype, dummy)
- q.add_node(BreakType.END, PhraseType.NONE)
-
return q
def test_single_word_poi_search():
q = make_query((BreakType.START, PhraseType.NONE,
- [(1, TokenType.CATEGORY),
+ [(1, TokenType.NEAR_ITEM),
(1, TokenType.QUALIFIER)]))
res = list(yield_token_assignments(q))
- assert res == [TokenAssignment(category=TokenRange(0, 1))]
+ assert res == [TokenAssignment(near_item=TokenRange(0, 1))]
@pytest.mark.parametrize('btype', [BreakType.WORD, BreakType.PART, BreakType.TOKEN])
@pytest.mark.parametrize('ttype', [TokenType.POSTCODE, TokenType.COUNTRY,
- TokenType.CATEGORY, TokenType.QUALIFIER])
+ TokenType.NEAR_ITEM, TokenType.QUALIFIER])
def test_housenumber_with_only_special_terms(ttype):
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.HOUSENUMBER)]),
(BreakType.WORD, PhraseType.NONE, [(2, ttype)]))
address=[TokenRange(0, 1)]))
-def test_category_at_beginning():
- q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.CATEGORY)]),
+def test_near_item_at_beginning():
+ q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.NEAR_ITEM)]),
(BreakType.WORD, PhraseType.NONE, [(2, TokenType.PARTIAL)]))
check_assignments(yield_token_assignments(q),
TokenAssignment(penalty=0.1, name=TokenRange(1, 2),
- category=TokenRange(0, 1)))
+ near_item=TokenRange(0, 1)))
-def test_category_at_end():
+def test_near_item_at_end():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
- (BreakType.WORD, PhraseType.NONE, [(2, TokenType.CATEGORY)]))
+ (BreakType.WORD, PhraseType.NONE, [(2, TokenType.NEAR_ITEM)]))
check_assignments(yield_token_assignments(q),
TokenAssignment(penalty=0.1, name=TokenRange(0, 1),
- category=TokenRange(1, 2)))
+ near_item=TokenRange(1, 2)))
-def test_category_in_middle():
+def test_near_item_in_middle():
q = make_query((BreakType.START, PhraseType.NONE, [(1, TokenType.PARTIAL)]),
- (BreakType.WORD, PhraseType.NONE, [(2, TokenType.CATEGORY)]),
+ (BreakType.WORD, PhraseType.NONE, [(2, TokenType.NEAR_ITEM)]),
(BreakType.WORD, PhraseType.NONE, [(3, TokenType.PARTIAL)]))
check_assignments(yield_token_assignments(q))
--- /dev/null
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Tests for the deletable v1 API call.
+"""
+import json
+from pathlib import Path
+
+import pytest
+import pytest_asyncio
+
+import psycopg2.extras
+
+from fake_adaptor import FakeAdaptor, FakeError, FakeResponse
+
+import nominatim.api.v1.server_glue as glue
+import nominatim.api as napi
+
+@pytest_asyncio.fixture
+async def api():
+ api = napi.NominatimAPIAsync(Path('/invalid'))
+ yield api
+ await api.close()
+
+
+class TestDeletableEndPoint:
+
+ @pytest.fixture(autouse=True)
+ def setup_deletable_table(self, temp_db_cursor, table_factory, temp_db_with_extensions):
+ psycopg2.extras.register_hstore(temp_db_cursor)
+ table_factory('import_polygon_delete',
+ definition='osm_id bigint, osm_type char(1), class text, type text',
+ content=[(345, 'N', 'boundary', 'administrative'),
+ (781, 'R', 'landuse', 'wood'),
+ (781, 'R', 'landcover', 'grass')])
+ table_factory('placex',
+ definition="""place_id bigint, osm_id bigint, osm_type char(1),
+ class text, type text, name HSTORE, country_code char(2)""",
+ content=[(1, 345, 'N', 'boundary', 'administrative', {'old_name': 'Former'}, 'ab'),
+ (2, 781, 'R', 'landuse', 'wood', {'name': 'Wood'}, 'cd'),
+ (3, 781, 'R', 'landcover', 'grass', None, 'cd')])
+
+
+
+ @pytest.mark.asyncio
+ async def test_deletable(self, api):
+ a = FakeAdaptor()
+
+ resp = await glue.deletable_endpoint(api, a)
+ results = json.loads(resp.output)
+
+ results.sort(key=lambda r: r['place_id'])
+
+ assert results == [{'place_id': 1, 'country_code': 'ab', 'name': None,
+ 'osm_id': 345, 'osm_type': 'N',
+ 'class': 'boundary', 'type': 'administrative'},
+ {'place_id': 2, 'country_code': 'cd', 'name': 'Wood',
+ 'osm_id': 781, 'osm_type': 'R',
+ 'class': 'landuse', 'type': 'wood'},
+ {'place_id': 3, 'country_code': 'cd', 'name': None,
+ 'osm_id': 781, 'osm_type': 'R',
+ 'class': 'landcover', 'type': 'grass'}]
+
@pytest.mark.parametrize('idobj', (napi.PlaceID(332), napi.OsmID('W', 4),
napi.OsmID('W', 4, 'highway')))
-def test_lookup_in_placex(apiobj, idobj):
+def test_lookup_in_placex(apiobj, frontend, idobj):
import_date = dt.datetime(2022, 12, 7, 14, 14, 46, 0)
apiobj.add_placex(place_id=332, osm_type='W', osm_id=4,
class_='highway', type='residential',
indexed_date=import_date,
geometry='LINESTRING(23 34, 23.1 34, 23.1 34.1, 23 34)')
- result = apiobj.api.details(idobj)
+ api = frontend(apiobj, options={'details'})
+ result = api.details(idobj)
assert result is not None
assert result.geometry == {'type': 'ST_LineString'}
-def test_lookup_in_placex_minimal_info(apiobj):
+def test_lookup_in_placex_minimal_info(apiobj, frontend):
import_date = dt.datetime(2022, 12, 7, 14, 14, 46, 0)
apiobj.add_placex(place_id=332, osm_type='W', osm_id=4,
class_='highway', type='residential',
indexed_date=import_date,
geometry='LINESTRING(23 34, 23.1 34, 23.1 34.1, 23 34)')
- result = apiobj.api.details(napi.PlaceID(332))
+ api = frontend(apiobj, options={'details'})
+ result = api.details(napi.PlaceID(332))
assert result is not None
assert result.geometry == {'type': 'ST_LineString'}
-def test_lookup_in_placex_with_geometry(apiobj):
+def test_lookup_in_placex_with_geometry(apiobj, frontend):
apiobj.add_placex(place_id=332,
geometry='LINESTRING(23 34, 23.1 34)')
- result = apiobj.api.details(napi.PlaceID(332), geometry_output=napi.GeometryFormat.GEOJSON)
+ api = frontend(apiobj, options={'details'})
+ result = api.details(napi.PlaceID(332), geometry_output=napi.GeometryFormat.GEOJSON)
assert result.geometry == {'geojson': '{"type":"LineString","coordinates":[[23,34],[23.1,34]]}'}
-def test_lookup_placex_with_address_details(apiobj):
+def test_lookup_placex_with_address_details(apiobj, frontend):
apiobj.add_placex(place_id=332, osm_type='W', osm_id=4,
class_='highway', type='residential', name='Street',
country_code='pl',
country_code='pl',
rank_search=17, rank_address=16)
- result = apiobj.api.details(napi.PlaceID(332), address_details=True)
+ api = frontend(apiobj, options={'details'})
+ result = api.details(napi.PlaceID(332), address_details=True)
assert result.address_rows == [
napi.AddressLine(place_id=332, osm_object=('W', 4),
category=('highway', 'residential'),
names={'name': 'Street'}, extratags={},
admin_level=15, fromarea=True, isaddress=True,
- rank_address=26, distance=0.0),
+ rank_address=26, distance=0.0,
+ local_name='Street'),
napi.AddressLine(place_id=1000, osm_object=('N', 3333),
category=('place', 'suburb'),
names={'name': 'Smallplace'}, extratags={},
admin_level=13, fromarea=False, isaddress=True,
- rank_address=23, distance=0.0034),
+ rank_address=23, distance=0.0034,
+ local_name='Smallplace'),
napi.AddressLine(place_id=1001, osm_object=('N', 3334),
category=('place', 'city'),
names={'name': 'Bigplace'}, extratags={},
admin_level=15, fromarea=True, isaddress=True,
- rank_address=16, distance=0.0),
+ rank_address=16, distance=0.0,
+ local_name='Bigplace'),
napi.AddressLine(place_id=None, osm_object=None,
category=('place', 'country_code'),
names={'ref': 'pl'}, extratags={},
]
-def test_lookup_place_with_linked_places_none_existing(apiobj):
+def test_lookup_place_with_linked_places_none_existing(apiobj, frontend):
apiobj.add_placex(place_id=332, osm_type='W', osm_id=4,
class_='highway', type='residential', name='Street',
country_code='pl', linked_place_id=45,
rank_search=27, rank_address=26)
- result = apiobj.api.details(napi.PlaceID(332), linked_places=True)
+ api = frontend(apiobj, options={'details'})
+ result = api.details(napi.PlaceID(332), linked_places=True)
assert result.linked_rows == []
-def test_lookup_place_with_linked_places_existing(apiobj):
+def test_lookup_place_with_linked_places_existing(apiobj, frontend):
apiobj.add_placex(place_id=332, osm_type='W', osm_id=4,
class_='highway', type='residential', name='Street',
country_code='pl', linked_place_id=45,
country_code='pl', linked_place_id=332,
rank_search=27, rank_address=26)
- result = apiobj.api.details(napi.PlaceID(332), linked_places=True)
+ api = frontend(apiobj, options={'details'})
+ result = api.details(napi.PlaceID(332), linked_places=True)
assert result.linked_rows == [
napi.AddressLine(place_id=1001, osm_object=('W', 5),
]
-def test_lookup_place_with_parented_places_not_existing(apiobj):
+def test_lookup_place_with_parented_places_not_existing(apiobj, frontend):
apiobj.add_placex(place_id=332, osm_type='W', osm_id=4,
class_='highway', type='residential', name='Street',
country_code='pl', parent_place_id=45,
rank_search=27, rank_address=26)
- result = apiobj.api.details(napi.PlaceID(332), parented_places=True)
+ api = frontend(apiobj, options={'details'})
+ result = api.details(napi.PlaceID(332), parented_places=True)
assert result.parented_rows == []
-def test_lookup_place_with_parented_places_existing(apiobj):
+def test_lookup_place_with_parented_places_existing(apiobj, frontend):
apiobj.add_placex(place_id=332, osm_type='W', osm_id=4,
class_='highway', type='residential', name='Street',
country_code='pl', parent_place_id=45,
country_code='pl', parent_place_id=332,
rank_search=27, rank_address=26)
- result = apiobj.api.details(napi.PlaceID(332), parented_places=True)
+ api = frontend(apiobj, options={'details'})
+ result = api.details(napi.PlaceID(332), parented_places=True)
assert result.parented_rows == [
napi.AddressLine(place_id=1001, osm_object=('N', 5),
@pytest.mark.parametrize('idobj', (napi.PlaceID(4924), napi.OsmID('W', 9928)))
-def test_lookup_in_osmline(apiobj, idobj):
+def test_lookup_in_osmline(apiobj, frontend, idobj):
import_date = dt.datetime(2022, 12, 7, 14, 14, 46, 0)
apiobj.add_osmline(place_id=4924, osm_id=9928,
parent_place_id=12,
indexed_date=import_date,
geometry='LINESTRING(23 34, 23 35)')
- result = apiobj.api.details(idobj)
+ api = frontend(apiobj, options={'details'})
+ result = api.details(idobj)
assert result is not None
assert result.geometry == {'type': 'ST_LineString'}
-def test_lookup_in_osmline_split_interpolation(apiobj):
+def test_lookup_in_osmline_split_interpolation(apiobj, frontend):
apiobj.add_osmline(place_id=1000, osm_id=9,
startnumber=2, endnumber=4, step=1)
apiobj.add_osmline(place_id=1001, osm_id=9,
apiobj.add_osmline(place_id=1002, osm_id=9,
startnumber=11, endnumber=20, step=1)
+ api = frontend(apiobj, options={'details'})
for i in range(1, 6):
- result = apiobj.api.details(napi.OsmID('W', 9, str(i)))
+ result = api.details(napi.OsmID('W', 9, str(i)))
assert result.place_id == 1000
for i in range(7, 11):
- result = apiobj.api.details(napi.OsmID('W', 9, str(i)))
+ result = api.details(napi.OsmID('W', 9, str(i)))
assert result.place_id == 1001
for i in range(12, 22):
- result = apiobj.api.details(napi.OsmID('W', 9, str(i)))
+ result = api.details(napi.OsmID('W', 9, str(i)))
assert result.place_id == 1002
-def test_lookup_osmline_with_address_details(apiobj):
+def test_lookup_osmline_with_address_details(apiobj, frontend):
apiobj.add_osmline(place_id=9000, osm_id=9,
startnumber=2, endnumber=4, step=1,
parent_place_id=332)
country_code='pl',
rank_search=17, rank_address=16)
- result = apiobj.api.details(napi.PlaceID(9000), address_details=True)
+ api = frontend(apiobj, options={'details'})
+ result = api.details(napi.PlaceID(9000), address_details=True)
assert result.address_rows == [
- napi.AddressLine(place_id=None, osm_object=None,
- category=('place', 'house_number'),
- names={'ref': '2'}, extratags={},
- admin_level=None, fromarea=True, isaddress=True,
- rank_address=28, distance=0.0),
napi.AddressLine(place_id=332, osm_object=('W', 4),
category=('highway', 'residential'),
names={'name': 'Street'}, extratags={},
admin_level=15, fromarea=True, isaddress=True,
- rank_address=26, distance=0.0),
+ rank_address=26, distance=0.0,
+ local_name='Street'),
napi.AddressLine(place_id=1000, osm_object=('N', 3333),
category=('place', 'suburb'),
names={'name': 'Smallplace'}, extratags={},
admin_level=13, fromarea=False, isaddress=True,
- rank_address=23, distance=0.0034),
+ rank_address=23, distance=0.0034,
+ local_name='Smallplace'),
napi.AddressLine(place_id=1001, osm_object=('N', 3334),
category=('place', 'city'),
names={'name': 'Bigplace'}, extratags={},
admin_level=15, fromarea=True, isaddress=True,
- rank_address=16, distance=0.0),
+ rank_address=16, distance=0.0,
+ local_name='Bigplace'),
napi.AddressLine(place_id=None, osm_object=None,
category=('place', 'country_code'),
names={'ref': 'pl'}, extratags={},
]
-def test_lookup_in_tiger(apiobj):
+def test_lookup_in_tiger(apiobj, frontend):
apiobj.add_tiger(place_id=4924,
parent_place_id=12,
startnumber=1, endnumber=4, step=1,
osm_type='W', osm_id=6601223,
geometry='LINESTRING(23 34, 23 35)')
- result = apiobj.api.details(napi.PlaceID(4924))
+ api = frontend(apiobj, options={'details'})
+ result = api.details(napi.PlaceID(4924))
assert result is not None
assert result.geometry == {'type': 'ST_LineString'}
-def test_lookup_tiger_with_address_details(apiobj):
+def test_lookup_tiger_with_address_details(apiobj, frontend):
apiobj.add_tiger(place_id=9000,
startnumber=2, endnumber=4, step=1,
parent_place_id=332)
country_code='us',
rank_search=17, rank_address=16)
- result = apiobj.api.details(napi.PlaceID(9000), address_details=True)
+ api = frontend(apiobj, options={'details'})
+ result = api.details(napi.PlaceID(9000), address_details=True)
assert result.address_rows == [
- napi.AddressLine(place_id=None, osm_object=None,
- category=('place', 'house_number'),
- names={'ref': '2'}, extratags={},
- admin_level=None, fromarea=True, isaddress=True,
- rank_address=28, distance=0.0),
napi.AddressLine(place_id=332, osm_object=('W', 4),
category=('highway', 'residential'),
names={'name': 'Street'}, extratags={},
admin_level=15, fromarea=True, isaddress=True,
- rank_address=26, distance=0.0),
+ rank_address=26, distance=0.0,
+ local_name='Street'),
napi.AddressLine(place_id=1000, osm_object=('N', 3333),
category=('place', 'suburb'),
names={'name': 'Smallplace'}, extratags={},
admin_level=13, fromarea=False, isaddress=True,
- rank_address=23, distance=0.0034),
+ rank_address=23, distance=0.0034,
+ local_name='Smallplace'),
napi.AddressLine(place_id=1001, osm_object=('N', 3334),
category=('place', 'city'),
names={'name': 'Bigplace'}, extratags={},
admin_level=15, fromarea=True, isaddress=True,
- rank_address=16, distance=0.0),
+ rank_address=16, distance=0.0,
+ local_name='Bigplace'),
napi.AddressLine(place_id=None, osm_object=None,
category=('place', 'country_code'),
names={'ref': 'us'}, extratags={},
]
-def test_lookup_in_postcode(apiobj):
+def test_lookup_in_postcode(apiobj, frontend):
import_date = dt.datetime(2022, 12, 7, 14, 14, 46, 0)
apiobj.add_postcode(place_id=554,
parent_place_id=152,
indexed_date=import_date,
geometry='POINT(-9.45 5.6)')
- result = apiobj.api.details(napi.PlaceID(554))
+ api = frontend(apiobj, options={'details'})
+ result = api.details(napi.PlaceID(554))
assert result is not None
assert result.geometry == {'type': 'ST_Point'}
-def test_lookup_postcode_with_address_details(apiobj):
+def test_lookup_postcode_with_address_details(apiobj, frontend):
apiobj.add_postcode(place_id=9000,
parent_place_id=332,
postcode='34 425',
country_code='gb',
rank_search=17, rank_address=16)
- result = apiobj.api.details(napi.PlaceID(9000), address_details=True)
+ api = frontend(apiobj, options={'details'})
+ result = api.details(napi.PlaceID(9000), address_details=True)
assert result.address_rows == [
+ napi.AddressLine(place_id=9000, osm_object=None,
+ category=('place', 'postcode'),
+ names={'ref': '34 425'}, extratags={},
+ admin_level=15, fromarea=True, isaddress=True,
+ rank_address=25, distance=0.0,
+ local_name='34 425'),
napi.AddressLine(place_id=332, osm_object=('N', 3333),
category=('place', 'suburb'),
names={'name': 'Smallplace'}, extratags={},
admin_level=13, fromarea=True, isaddress=True,
- rank_address=23, distance=0.0),
+ rank_address=23, distance=0.0,
+ local_name='Smallplace'),
napi.AddressLine(place_id=1001, osm_object=('N', 3334),
category=('place', 'city'),
names={'name': 'Bigplace'}, extratags={},
admin_level=15, fromarea=True, isaddress=True,
- rank_address=16, distance=0.0),
- napi.AddressLine(place_id=None, osm_object=None,
- category=('place', 'postcode'),
- names={'ref': '34 425'}, extratags={},
- admin_level=None, fromarea=False, isaddress=True,
- rank_address=5, distance=0.0),
+ rank_address=16, distance=0.0,
+ local_name='Bigplace'),
napi.AddressLine(place_id=None, osm_object=None,
category=('place', 'country_code'),
names={'ref': 'gb'}, extratags={},
@pytest.mark.parametrize('objid', [napi.PlaceID(1736),
napi.OsmID('W', 55),
napi.OsmID('N', 55, 'amenity')])
-def test_lookup_missing_object(apiobj, objid):
+def test_lookup_missing_object(apiobj, frontend, objid):
apiobj.add_placex(place_id=1, osm_type='N', osm_id=55,
class_='place', type='suburb')
- assert apiobj.api.details(objid) is None
+ api = frontend(apiobj, options={'details'})
+ assert api.details(objid) is None
@pytest.mark.parametrize('gtype', (napi.GeometryFormat.KML,
napi.GeometryFormat.SVG,
napi.GeometryFormat.TEXT))
-def test_lookup_unsupported_geometry(apiobj, gtype):
+def test_lookup_unsupported_geometry(apiobj, frontend, gtype):
apiobj.add_placex(place_id=332)
+ api = frontend(apiobj, options={'details'})
with pytest.raises(ValueError):
- apiobj.api.details(napi.PlaceID(332), geometry_output=gtype)
+ api.details(napi.PlaceID(332), geometry_output=gtype)
"""
Tests for lookup API call.
"""
+import json
+
import pytest
import nominatim.api as napi
-def test_lookup_empty_list(apiobj):
- assert apiobj.api.lookup([]) == []
+def test_lookup_empty_list(apiobj, frontend):
+ api = frontend(apiobj, options={'details'})
+ assert api.lookup([]) == []
-def test_lookup_non_existing(apiobj):
- assert apiobj.api.lookup((napi.PlaceID(332), napi.OsmID('W', 4),
- napi.OsmID('W', 4, 'highway'))) == []
+def test_lookup_non_existing(apiobj, frontend):
+ api = frontend(apiobj, options={'details'})
+ assert api.lookup((napi.PlaceID(332), napi.OsmID('W', 4),
+ napi.OsmID('W', 4, 'highway'))) == []
@pytest.mark.parametrize('idobj', (napi.PlaceID(332), napi.OsmID('W', 4),
napi.OsmID('W', 4, 'highway')))
-def test_lookup_single_placex(apiobj, idobj):
+def test_lookup_single_placex(apiobj, frontend, idobj):
apiobj.add_placex(place_id=332, osm_type='W', osm_id=4,
class_='highway', type='residential',
name={'name': 'Road'}, address={'city': 'Barrow'},
centroid=(23, 34),
geometry='LINESTRING(23 34, 23.1 34, 23.1 34.1, 23 34)')
- result = apiobj.api.lookup([idobj])
+ api = frontend(apiobj, options={'details'})
+ result = api.lookup([idobj])
assert len(result) == 1
assert result.geometry == {}
-def test_lookup_multiple_places(apiobj):
+def test_lookup_multiple_places(apiobj, frontend):
apiobj.add_placex(place_id=332, osm_type='W', osm_id=4,
class_='highway', type='residential',
name={'name': 'Road'}, address={'city': 'Barrow'},
geometry='LINESTRING(23 34, 23 35)')
- result = apiobj.api.lookup((napi.OsmID('W', 1),
- napi.OsmID('W', 4),
- napi.OsmID('W', 9928)))
+ api = frontend(apiobj, options={'details'})
+ result = api.lookup((napi.OsmID('W', 1),
+ napi.OsmID('W', 4),
+ napi.OsmID('W', 9928)))
assert len(result) == 2
assert set(r.place_id for r in result) == {332, 4924}
+
+
+@pytest.mark.parametrize('gtype', list(napi.GeometryFormat))
+def test_simple_place_with_geometry(apiobj, frontend, gtype):
+ apiobj.add_placex(place_id=332, osm_type='W', osm_id=4,
+ class_='highway', type='residential',
+ name={'name': 'Road'}, address={'city': 'Barrow'},
+ extratags={'surface': 'paved'},
+ parent_place_id=34, linked_place_id=55,
+ admin_level=15, country_code='gb',
+ housenumber='4',
+ postcode='34425', wikipedia='en:Faa',
+ rank_search=27, rank_address=26,
+ importance=0.01,
+ centroid=(23, 34),
+ geometry='POLYGON((23 34, 23.1 34, 23.1 34.1, 23 34))')
+
+ api = frontend(apiobj, options={'details'})
+ result = api.lookup([napi.OsmID('W', 4)], geometry_output=gtype)
+
+ assert len(result) == 1
+ assert result[0].place_id == 332
+
+ if gtype == napi.GeometryFormat.NONE:
+ assert list(result[0].geometry.keys()) == []
+ else:
+ assert list(result[0].geometry.keys()) == [gtype.name.lower()]
+
+
+def test_simple_place_with_geometry_simplified(apiobj, frontend):
+ apiobj.add_placex(place_id=332, osm_type='W', osm_id=4,
+ class_='highway', type='residential',
+ name={'name': 'Road'}, address={'city': 'Barrow'},
+ extratags={'surface': 'paved'},
+ parent_place_id=34, linked_place_id=55,
+ admin_level=15, country_code='gb',
+ housenumber='4',
+ postcode='34425', wikipedia='en:Faa',
+ rank_search=27, rank_address=26,
+ importance=0.01,
+ centroid=(23, 34),
+ geometry='POLYGON((23 34, 22.999 34, 23.1 34, 23.1 34.1, 23 34))')
+
+ api = frontend(apiobj, options={'details'})
+ result = api.lookup([napi.OsmID('W', 4)],
+ geometry_output=napi.GeometryFormat.GEOJSON,
+ geometry_simplification=0.1)
+
+ assert len(result) == 1
+ assert result[0].place_id == 332
+
+ geom = json.loads(result[0].geometry['geojson'])
+
+ assert geom['type'] == 'Polygon'
+ assert geom['coordinates'] == [[[23, 34], [23.1, 34], [23.1, 34.1], [23, 34]]]
--- /dev/null
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Tests for the deletable v1 API call.
+"""
+import json
+import datetime as dt
+from pathlib import Path
+
+import pytest
+import pytest_asyncio
+
+import psycopg2.extras
+
+from fake_adaptor import FakeAdaptor, FakeError, FakeResponse
+
+import nominatim.api.v1.server_glue as glue
+import nominatim.api as napi
+
+@pytest_asyncio.fixture
+async def api():
+ api = napi.NominatimAPIAsync(Path('/invalid'))
+ yield api
+ await api.close()
+
+
+class TestPolygonsEndPoint:
+
+ @pytest.fixture(autouse=True)
+ def setup_deletable_table(self, temp_db_cursor, table_factory, temp_db_with_extensions):
+ psycopg2.extras.register_hstore(temp_db_cursor)
+
+ self.now = dt.datetime.now()
+ self.recent = dt.datetime.now() - dt.timedelta(days=3)
+
+ table_factory('import_polygon_error',
+ definition="""osm_id bigint,
+ osm_type character(1),
+ class text,
+ type text,
+ name hstore,
+ country_code character varying(2),
+ updated timestamp without time zone,
+ errormessage text,
+ prevgeometry geometry(Geometry,4326),
+ newgeometry geometry(Geometry,4326)""",
+ content=[(345, 'N', 'boundary', 'administrative',
+ {'name': 'Foo'}, 'xx', self.recent,
+ 'some text', None, None),
+ (781, 'R', 'landuse', 'wood',
+ None, 'ds', self.now,
+ 'Area reduced by lots', None, None)])
+
+
+ @pytest.mark.asyncio
+ async def test_polygons_simple(self, api):
+ a = FakeAdaptor()
+
+ resp = await glue.polygons_endpoint(api, a)
+ results = json.loads(resp.output)
+
+ results.sort(key=lambda r: (r['osm_type'], r['osm_id']))
+
+ assert results == [{'osm_type': 'N', 'osm_id': 345,
+ 'class': 'boundary', 'type': 'administrative',
+ 'name': 'Foo', 'country_code': 'xx',
+ 'errormessage': 'some text',
+ 'updated': self.recent.isoformat(sep=' ', timespec='seconds')},
+ {'osm_type': 'R', 'osm_id': 781,
+ 'class': 'landuse', 'type': 'wood',
+ 'name': None, 'country_code': 'ds',
+ 'errormessage': 'Area reduced by lots',
+ 'updated': self.now.isoformat(sep=' ', timespec='seconds')}]
+
+
+ @pytest.mark.asyncio
+ async def test_polygons_days(self, api):
+ a = FakeAdaptor()
+ a.params['days'] = '2'
+
+ resp = await glue.polygons_endpoint(api, a)
+ results = json.loads(resp.output)
+
+ assert [r['osm_id'] for r in results] == [781]
+
+
+ @pytest.mark.asyncio
+ async def test_polygons_class(self, api):
+ a = FakeAdaptor()
+ a.params['class'] = 'landuse'
+
+ resp = await glue.polygons_endpoint(api, a)
+ results = json.loads(resp.output)
+
+ assert [r['osm_id'] for r in results] == [781]
+
+
+
+ @pytest.mark.asyncio
+ async def test_polygons_reduced(self, api):
+ a = FakeAdaptor()
+ a.params['reduced'] = '1'
+
+ resp = await glue.polygons_endpoint(api, a)
+ results = json.loads(resp.output)
+
+ assert [r['osm_id'] for r in results] == [781]
import nominatim.api as napi
-def test_reverse_rank_30(apiobj):
+API_OPTIONS = {'reverse'}
+
+def test_reverse_rank_30(apiobj, frontend):
apiobj.add_placex(place_id=223, class_='place', type='house',
housenumber='1',
centroid=(1.3, 0.7),
geometry='POINT(1.3 0.7)')
- result = apiobj.api.reverse((1.3, 0.7))
+ api = frontend(apiobj, options=API_OPTIONS)
+ result = api.reverse((1.3, 0.7))
assert result is not None
assert result.place_id == 223
@pytest.mark.parametrize('country', ['de', 'us'])
-def test_reverse_street(apiobj, country):
+def test_reverse_street(apiobj, frontend, country):
apiobj.add_placex(place_id=990, class_='highway', type='service',
rank_search=27, rank_address=27,
name = {'name': 'My Street'},
country_code=country,
geometry='LINESTRING(9.995 10, 10.005 10)')
- assert apiobj.api.reverse((9.995, 10)).place_id == 990
+ api = frontend(apiobj, options=API_OPTIONS)
+ assert api.reverse((9.995, 10)).place_id == 990
-def test_reverse_ignore_unindexed(apiobj):
+def test_reverse_ignore_unindexed(apiobj, frontend):
apiobj.add_placex(place_id=223, class_='place', type='house',
housenumber='1',
indexed_status=2,
centroid=(1.3, 0.7),
geometry='POINT(1.3 0.7)')
- result = apiobj.api.reverse((1.3, 0.7))
+ api = frontend(apiobj, options=API_OPTIONS)
+ result = api.reverse((1.3, 0.7))
assert result is None
(0.7, napi.DataLayer.RAILWAY, 226),
(0.7, napi.DataLayer.NATURAL, 227),
(0.70003, napi.DataLayer.MANMADE | napi.DataLayer.RAILWAY, 225),
- (0.70003, napi.DataLayer.MANMADE | napi.DataLayer.NATURAL, 225)])
-def test_reverse_rank_30_layers(apiobj, y, layer, place_id):
+ (0.70003, napi.DataLayer.MANMADE | napi.DataLayer.NATURAL, 225),
+ (5, napi.DataLayer.ADDRESS, 229)])
+def test_reverse_rank_30_layers(apiobj, frontend, y, layer, place_id):
apiobj.add_placex(place_id=223, class_='place', type='house',
housenumber='1',
rank_address=30,
rank_address=0,
rank_search=30,
centroid=(1.3, 0.70005))
+ apiobj.add_placex(place_id=229, class_='place', type='house',
+ name={'addr:housename': 'Old Cottage'},
+ rank_address=30,
+ rank_search=30,
+ centroid=(1.3, 5))
- assert apiobj.api.reverse((1.3, y), layers=layer).place_id == place_id
+ api = frontend(apiobj, options=API_OPTIONS)
+ assert api.reverse((1.3, y), layers=layer).place_id == place_id
-def test_reverse_poi_layer_with_no_pois(apiobj):
+def test_reverse_poi_layer_with_no_pois(apiobj, frontend):
apiobj.add_placex(place_id=223, class_='place', type='house',
housenumber='1',
rank_address=30,
rank_search=30,
centroid=(1.3, 0.70001))
- assert apiobj.api.reverse((1.3, 0.70001), max_rank=29,
+ api = frontend(apiobj, options=API_OPTIONS)
+ assert api.reverse((1.3, 0.70001), max_rank=29,
layers=napi.DataLayer.POI) is None
-def test_reverse_housenumber_on_street(apiobj):
+def test_reverse_housenumber_on_street(apiobj, frontend):
apiobj.add_placex(place_id=990, class_='highway', type='service',
rank_search=27, rank_address=27,
name = {'name': 'My Street'},
housenumber='23',
centroid=(10.0, 10.00001))
- assert apiobj.api.reverse((10.0, 10.0), max_rank=30).place_id == 991
- assert apiobj.api.reverse((10.0, 10.0), max_rank=27).place_id == 990
- assert apiobj.api.reverse((10.0, 10.00001), max_rank=30).place_id == 991
+ api = frontend(apiobj, options=API_OPTIONS)
+ assert api.reverse((10.0, 10.0), max_rank=30).place_id == 991
+ assert api.reverse((10.0, 10.0), max_rank=27).place_id == 990
+ assert api.reverse((10.0, 10.00001), max_rank=30).place_id == 991
-def test_reverse_housenumber_interpolation(apiobj):
+def test_reverse_housenumber_interpolation(apiobj, frontend):
apiobj.add_placex(place_id=990, class_='highway', type='service',
rank_search=27, rank_address=27,
name = {'name': 'My Street'},
centroid=(10.0, 10.00001),
geometry='LINESTRING(9.995 10.00001, 10.005 10.00001)')
- assert apiobj.api.reverse((10.0, 10.0)).place_id == 992
+ api = frontend(apiobj, options=API_OPTIONS)
+ assert api.reverse((10.0, 10.0)).place_id == 992
-def test_reverse_housenumber_point_interpolation(apiobj):
+def test_reverse_housenumber_point_interpolation(apiobj, frontend):
apiobj.add_placex(place_id=990, class_='highway', type='service',
rank_search=27, rank_address=27,
name = {'name': 'My Street'},
centroid=(10.0, 10.00001),
geometry='POINT(10.0 10.00001)')
- res = apiobj.api.reverse((10.0, 10.0))
+ api = frontend(apiobj, options=API_OPTIONS)
+ res = api.reverse((10.0, 10.0))
assert res.place_id == 992
assert res.housenumber == '42'
-def test_reverse_tiger_number(apiobj):
+def test_reverse_tiger_number(apiobj, frontend):
apiobj.add_placex(place_id=990, class_='highway', type='service',
rank_search=27, rank_address=27,
name = {'name': 'My Street'},
centroid=(10.0, 10.00001),
geometry='LINESTRING(9.995 10.00001, 10.005 10.00001)')
- assert apiobj.api.reverse((10.0, 10.0)).place_id == 992
- assert apiobj.api.reverse((10.0, 10.00001)).place_id == 992
+ api = frontend(apiobj, options=API_OPTIONS)
+ assert api.reverse((10.0, 10.0)).place_id == 992
+ assert api.reverse((10.0, 10.00001)).place_id == 992
-def test_reverse_point_tiger(apiobj):
+def test_reverse_point_tiger(apiobj, frontend):
apiobj.add_placex(place_id=990, class_='highway', type='service',
rank_search=27, rank_address=27,
name = {'name': 'My Street'},
centroid=(10.0, 10.00001),
geometry='POINT(10.0 10.00001)')
- res = apiobj.api.reverse((10.0, 10.0))
+ api = frontend(apiobj, options=API_OPTIONS)
+ res = api.reverse((10.0, 10.0))
assert res.place_id == 992
assert res.housenumber == '1'
-def test_reverse_low_zoom_address(apiobj):
+def test_reverse_low_zoom_address(apiobj, frontend):
apiobj.add_placex(place_id=1001, class_='place', type='house',
housenumber='1',
rank_address=30,
geometry="""POLYGON((59.3 80.70001, 59.3001 80.70001,
59.3001 80.70101, 59.3 80.70101, 59.3 80.70001))""")
- assert apiobj.api.reverse((59.30005, 80.7005)).place_id == 1001
- assert apiobj.api.reverse((59.30005, 80.7005), max_rank=18).place_id == 1002
+ api = frontend(apiobj, options=API_OPTIONS)
+ assert api.reverse((59.30005, 80.7005)).place_id == 1001
+ assert api.reverse((59.30005, 80.7005), max_rank=18).place_id == 1002
-def test_reverse_place_node_in_area(apiobj):
+def test_reverse_place_node_in_area(apiobj, frontend):
apiobj.add_placex(place_id=1002, class_='place', type='town',
name={'name': 'Town Area'},
rank_address=16,
rank_search=18,
centroid=(59.30004, 80.70055))
- assert apiobj.api.reverse((59.30004, 80.70055)).place_id == 1003
+ api = frontend(apiobj, options=API_OPTIONS)
+ assert api.reverse((59.30004, 80.70055)).place_id == 1003
@pytest.mark.parametrize('layer,place_id', [(napi.DataLayer.MANMADE, 225),
(napi.DataLayer.NATURAL, 227),
(napi.DataLayer.MANMADE | napi.DataLayer.RAILWAY, 225),
(napi.DataLayer.MANMADE | napi.DataLayer.NATURAL, 225)])
-def test_reverse_larger_area_layers(apiobj, layer, place_id):
+def test_reverse_larger_area_layers(apiobj, frontend, layer, place_id):
apiobj.add_placex(place_id=225, class_='man_made', type='dam',
name={'name': 'Dam'},
rank_address=0,
rank_search=16,
centroid=(1.3, 0.70005))
- assert apiobj.api.reverse((1.3, 0.7), layers=layer).place_id == place_id
+ api = frontend(apiobj, options=API_OPTIONS)
+ assert api.reverse((1.3, 0.7), layers=layer).place_id == place_id
-def test_reverse_country_lookup_no_objects(apiobj):
+def test_reverse_country_lookup_no_objects(apiobj, frontend):
apiobj.add_country('xx', 'POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))')
- assert apiobj.api.reverse((0.5, 0.5)) is None
+ api = frontend(apiobj, options=API_OPTIONS)
+ assert api.reverse((0.5, 0.5)) is None
@pytest.mark.parametrize('rank', [4, 30])
-def test_reverse_country_lookup_country_only(apiobj, rank):
+def test_reverse_country_lookup_country_only(apiobj, frontend, rank):
apiobj.add_country('xx', 'POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))')
apiobj.add_placex(place_id=225, class_='place', type='country',
name={'name': 'My Country'},
country_code='xx',
centroid=(0.7, 0.7))
- assert apiobj.api.reverse((0.5, 0.5), max_rank=rank).place_id == 225
+ api = frontend(apiobj, options=API_OPTIONS)
+ assert api.reverse((0.5, 0.5), max_rank=rank).place_id == 225
-def test_reverse_country_lookup_place_node_inside(apiobj):
+def test_reverse_country_lookup_place_node_inside(apiobj, frontend):
apiobj.add_country('xx', 'POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))')
apiobj.add_placex(place_id=225, class_='place', type='state',
osm_type='N',
country_code='xx',
centroid=(0.5, 0.505))
- assert apiobj.api.reverse((0.5, 0.5)).place_id == 225
+ api = frontend(apiobj, options=API_OPTIONS)
+ assert api.reverse((0.5, 0.5)).place_id == 225
@pytest.mark.parametrize('gtype', list(napi.GeometryFormat))
-def test_reverse_geometry_output_placex(apiobj, gtype):
+def test_reverse_geometry_output_placex(apiobj, frontend, gtype):
apiobj.add_country('xx', 'POLYGON((0 0, 0 1, 1 1, 1 0, 0 0))')
apiobj.add_placex(place_id=1001, class_='place', type='house',
housenumber='1',
country_code='xx',
centroid=(0.5, 0.5))
- assert apiobj.api.reverse((59.3, 80.70001), geometry_output=gtype).place_id == 1001
- assert apiobj.api.reverse((0.5, 0.5), geometry_output=gtype).place_id == 1003
+ api = frontend(apiobj, options=API_OPTIONS)
+ assert api.reverse((59.3, 80.70001), geometry_output=gtype).place_id == 1001
+ assert api.reverse((0.5, 0.5), geometry_output=gtype).place_id == 1003
-def test_reverse_simplified_geometry(apiobj):
+def test_reverse_simplified_geometry(apiobj, frontend):
apiobj.add_placex(place_id=1001, class_='place', type='house',
housenumber='1',
rank_address=30,
rank_search=30,
centroid=(59.3, 80.70001))
+ api = frontend(apiobj, options=API_OPTIONS)
details = dict(geometry_output=napi.GeometryFormat.GEOJSON,
geometry_simplification=0.1)
- assert apiobj.api.reverse((59.3, 80.70001), **details).place_id == 1001
+ assert api.reverse((59.3, 80.70001), **details).place_id == 1001
-def test_reverse_interpolation_geometry(apiobj):
+def test_reverse_interpolation_geometry(apiobj, frontend):
apiobj.add_osmline(place_id=992,
parent_place_id=990,
startnumber=1, endnumber=3, step=1,
centroid=(10.0, 10.00001),
geometry='LINESTRING(9.995 10.00001, 10.005 10.00001)')
- assert apiobj.api.reverse((10.0, 10.0), geometry_output=napi.GeometryFormat.TEXT)\
+ api = frontend(apiobj, options=API_OPTIONS)
+ assert api.reverse((10.0, 10.0), geometry_output=napi.GeometryFormat.TEXT)\
.geometry['text'] == 'POINT(10 10.00001)'
-def test_reverse_tiger_geometry(apiobj):
+def test_reverse_tiger_geometry(apiobj, frontend):
apiobj.add_placex(place_id=990, class_='highway', type='service',
rank_search=27, rank_address=27,
name = {'name': 'My Street'},
centroid=(10.0, 10.00001),
geometry='LINESTRING(9.995 10.00001, 10.005 10.00001)')
- output = apiobj.api.reverse((10.0, 10.0),
+ api = frontend(apiobj, options=API_OPTIONS)
+ output = api.reverse((10.0, 10.0),
geometry_output=napi.GeometryFormat.GEOJSON).geometry['geojson']
assert json.loads(output) == {'coordinates': [10, 10.00001], 'type': 'Point'}
from nominatim.version import NOMINATIM_VERSION, NominatimVersion
import nominatim.api as napi
-def test_status_no_extra_info(apiobj):
- result = apiobj.api.status()
+def test_status_no_extra_info(apiobj, frontend):
+ api = frontend(apiobj)
+ result = api.status()
assert result.status == 0
assert result.message == 'OK'
assert result.data_updated is None
-def test_status_full(apiobj):
+def test_status_full(apiobj, frontend):
import_date = dt.datetime(2022, 12, 7, 14, 14, 46, 0, tzinfo=dt.timezone.utc)
apiobj.add_data('import_status',
[{'lastimportdate': import_date}])
apiobj.add_data('properties',
[{'property': 'database_version', 'value': '99.5.4-2'}])
- result = apiobj.api.status()
+ api = frontend(apiobj)
+ result = api.status()
assert result.status == 0
assert result.message == 'OK'
--- /dev/null
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Tests for export CLI function.
+"""
+import pytest
+
+import nominatim.cli
+
+@pytest.fixture
+def run_export(tmp_path, capsys):
+ def _exec(args):
+ assert 0 == nominatim.cli.nominatim(module_dir='MODULE NOT AVAILABLE',
+ osm2pgsql_path='OSM2PGSQL NOT AVAILABLE',
+ cli_args=['export', '--project-dir', str(tmp_path)]
+ + args)
+ return capsys.readouterr().out.split('\r\n')
+
+ return _exec
+
+
+@pytest.fixture(autouse=True)
+def setup_database_with_context(apiobj):
+ apiobj.add_placex(place_id=332, osm_type='W', osm_id=4,
+ class_='highway', type='residential', name='Street',
+ country_code='pl', postcode='55674',
+ rank_search=27, rank_address=26)
+ apiobj.add_address_placex(332, fromarea=False, isaddress=False,
+ distance=0.0034,
+ place_id=1000, osm_type='N', osm_id=3333,
+ class_='place', type='suburb', name='Smallplace',
+ country_code='pl', admin_level=13,
+ rank_search=24, rank_address=23)
+ apiobj.add_address_placex(332, fromarea=True, isaddress=True,
+ place_id=1001, osm_type='N', osm_id=3334,
+ class_='place', type='city', name='Bigplace',
+ country_code='pl',
+ rank_search=17, rank_address=16)
+
+
+def test_export_default(run_export):
+ csv = run_export([])
+
+ assert csv == ['street,suburb,city,county,state,country', 'Street,,Bigplace,,,', '']
+
+
+def test_export_output_type(run_export):
+ csv = run_export(['--output-type', 'city'])
+
+ assert csv == ['street,suburb,city,county,state,country', ',,Bigplace,,,', '']
+
+
+def test_export_output_format(run_export):
+ csv = run_export(['--output-format', 'placeid;street;nothing;postcode'])
+
+ assert csv == ['placeid,street,nothing,postcode', '332,Street,,55674', '']
+
+
+def test_export_restrict_to_node_good(run_export):
+ csv = run_export(['--restrict-to-osm-node', '3334'])
+
+ assert csv == ['street,suburb,city,county,state,country', 'Street,,Bigplace,,,', '']
+
+
+def test_export_restrict_to_node_not_address(run_export):
+ csv = run_export(['--restrict-to-osm-node', '3333'])
+
+ assert csv == ['street,suburb,city,county,state,country', '']
import nominatim.api.v1.helpers as helper
-@pytest.mark.parametrize('inp', ['', 'abc', '12 23', 'abc -78.90, 12.456 def'])
+@pytest.mark.parametrize('inp', ['',
+ 'abc',
+ '12 23',
+ 'abc -78.90, 12.456 def',
+ '40 N 60 W'])
def test_extract_coords_no_coords(inp):
query, x, y = helper.extract_coords_from_query(inp)
class FakeRow:
def __init__(self, **kwargs):
+ if 'parent_place_id' not in kwargs:
+ kwargs['parent_place_id'] = None
for k, v in kwargs.items():
setattr(self, k, v)
self._mapping = kwargs
"""
Tests for the Python web frameworks adaptor, v1 API.
"""
-from collections import namedtuple
import json
import xml.etree.ElementTree as ET
from pathlib import Path
import pytest
-from nominatim.config import Configuration
+from fake_adaptor import FakeAdaptor, FakeError, FakeResponse
+
import nominatim.api.v1.server_glue as glue
import nominatim.api as napi
import nominatim.api.logging as loglib
-class FakeError(BaseException):
-
- def __init__(self, msg, status):
- self.msg = msg
- self.status = status
-
- def __str__(self):
- return f'{self.status} -- {self.msg}'
-
-FakeResponse = namedtuple('FakeResponse', ['status', 'output', 'content_type'])
-
-class FakeAdaptor(glue.ASGIAdaptor):
-
- def __init__(self, params=None, headers=None, config=None):
- self.params = params or {}
- self.headers = headers or {}
- self._config = config or Configuration(None)
-
-
- def get(self, name, default=None):
- return self.params.get(name, default)
-
-
- def get_header(self, name, default=None):
- return self.headers.get(name, default)
-
-
- def error(self, msg, status=400):
- return FakeError(msg, status)
-
-
- def create_response(self, status, output):
- return FakeResponse(status, output, self.content_type)
-
-
- def config(self):
- return self._config
-
# ASGIAdaptor.get_int/bool()
adaptor = FakeAdaptor(params={'format': 'json'})
assert adaptor.parse_format(napi.StatusResult, 'text') == 'json'
- assert adaptor.content_type == 'application/json'
+ assert adaptor.content_type == 'application/json; charset=utf-8'
def test_adaptor_parse_format_invalid_value():
def test_json(self):
- self.adaptor.content_type = 'application/json'
+ self.adaptor.content_type = 'application/json; charset=utf-8'
err = self.run_raise_error('TEST', 501)
assert isinstance(resp, FakeResponse)
assert resp.status == 404
assert resp.output == 'stuff\nmore stuff'
- assert resp.content_type == 'application/json'
+ assert resp.content_type == 'application/json; charset=utf-8'
def test_build_response_jsonp_with_json():
assert isinstance(resp, FakeResponse)
assert resp.status == 200
assert resp.output == 'test.func({})'
- assert resp.content_type == 'application/javascript'
+ assert resp.content_type == 'application/javascript; charset=utf-8'
def test_build_response_jsonp_without_json():
assert isinstance(resp, FakeResponse)
assert resp.status == 200
- assert resp.content_type == 'application/json'
+ assert resp.content_type == 'application/json; charset=utf-8'
@pytest.mark.asyncio
a.params['q'] = 'something'
a.params['city'] = 'ignored'
- res = await glue.search_endpoint(napi.NominatimAPIAsync(Path('/invalid')), a)
-
- assert len(json.loads(res.output)) == 1
+ with pytest.raises(FakeError, match='^400 -- .*cannot be used together'):
+ res = await glue.search_endpoint(napi.NominatimAPIAsync(Path('/invalid')), a)
@pytest.mark.asyncio
--- /dev/null
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2023 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Tests for warm-up CLI function.
+"""
+import pytest
+
+import nominatim.cli
+
+@pytest.fixture(autouse=True)
+def setup_database_with_context(apiobj, table_factory):
+ table_factory('word',
+ definition='word_id INT, word_token TEXT, type TEXT, word TEXT, info JSONB',
+ content=[(55, 'test', 'W', 'test', None),
+ (2, 'test', 'w', 'test', None)])
+
+ apiobj.add_data('properties',
+ [{'property': 'tokenizer', 'value': 'icu'},
+ {'property': 'tokenizer_import_normalisation', 'value': ':: lower();'},
+ {'property': 'tokenizer_import_transliteration', 'value': "'1' > '/1/'; 'ä' > 'ä '"},
+ ])
+
+
+@pytest.mark.parametrize('args', [['--search-only'], ['--reverse-only']])
+def test_warm_all(tmp_path, args):
+ assert 0 == nominatim.cli.nominatim(module_dir='MODULE NOT AVAILABLE',
+ osm2pgsql_path='OSM2PGSQL NOT AVAILABLE',
+ cli_args=['admin', '--project-dir', str(tmp_path),
+ '--warm'] + args)
@pytest.fixture
-def cli_call(src_dir):
+def cli_call():
""" Call the nominatim main function with the correct paths set.
Returns a function that can be called with the desired CLI arguments.
"""
def _call_nominatim(*args):
return nominatim.cli.nominatim(module_dir='MODULE NOT AVAILABLE',
osm2pgsql_path='OSM2PGSQL NOT AVAILABLE',
- phpcgi_path='/usr/bin/php-cgi',
cli_args=args)
return _call_nominatim
-@pytest.fixture
-def mock_run_legacy(monkeypatch):
- mock = MockParamCapture()
- monkeypatch.setattr(nominatim.cli, 'run_legacy_script', mock)
- return mock
-
-
@pytest.fixture
def mock_func_factory(monkeypatch):
def get_mock(module, func):
assert func.last_kwargs['host'] == '127.0.0.1'
assert func.last_kwargs['port'] == 8088
-def test_cli_export_command(cli_call, mock_run_legacy):
- assert cli_call('export', '--output-all-postcodes') == 0
-
- assert mock_run_legacy.called == 1
- assert mock_run_legacy.last_args[0] == 'export.php'
-
-
-@pytest.mark.parametrize("param,value", [('output-type', 'country'),
- ('output-format', 'street;city'),
- ('language', 'xf'),
- ('restrict-to-country', 'us'),
- ('restrict-to-osm-node', '536'),
- ('restrict-to-osm-way', '727'),
- ('restrict-to-osm-relation', '197532')
- ])
-def test_export_parameters(src_dir, tmp_path, param, value, monkeypatch):
- (tmp_path / 'admin').mkdir()
- (tmp_path / 'admin' / 'export.php').write_text(f"""<?php
- exit(strpos(implode(' ', $_SERVER['argv']), '--{param} {value}') >= 0 ? 0 : 10);
- """)
-
- monkeypatch.setattr(nominatim.paths, 'PHPLIB_DIR', tmp_path)
-
- assert nominatim.cli.nominatim(module_dir='MODULE NOT AVAILABLE',
- osm2pgsql_path='OSM2PGSQL NOT AVAILABLE',
- phpcgi_path='/usr/bin/php-cgi',
- cli_args=['export', '--' + param, value]) == 0
-
-
class TestCliWithDb:
import nominatim.clicmd.admin
-@pytest.mark.parametrize("params", [('--warm', ),
- ('--warm', '--reverse-only'),
- ('--warm', '--search-only')])
-def test_admin_command_legacy(cli_call, mock_func_factory, params):
- mock_run_legacy = mock_func_factory(nominatim.clicmd.admin, 'run_legacy_script')
-
- assert cli_call('admin', *params) == 0
-
- assert mock_run_legacy.called == 1
-
-
def test_admin_command_check_database(cli_call, mock_func_factory):
mock = mock_func_factory(nominatim.tools.check_database, 'check_database')
assert mock.called == 1
+def test_admin_clean_deleted_relations(cli_call, mock_func_factory):
+ mock = mock_func_factory(nominatim.tools.admin, 'clean_deleted_relations')
+
+ assert cli_call('admin', '--clean-deleted', '1 month') == 0
+ assert mock.called == 1
+
+def test_admin_clean_deleted_relations_no_age(cli_call, mock_func_factory):
+ mock = mock_func_factory(nominatim.tools.admin, 'clean_deleted_relations')
+
+ assert cli_call('admin', '--clean-deleted') == 1
+
class TestCliAdminWithDb:
@pytest.fixture(autouse=True)
result = napi.ReverseResult(napi.SourceTable.PLACEX, ('place', 'thing'),
napi.Point(1.0, -3.0),
names={'name':'Name', 'name:fr': 'Nom'},
- extratags={'extra':'Extra'})
+ extratags={'extra':'Extra'},
+ locale_name='Name',
+ display_name='Name')
monkeypatch.setattr(napi.NominatimAPI, 'reverse',
lambda *args, **kwargs: result)
assert out['type'] == 'FeatureCollection'
- def test_reverse_language(self, cli_call, tmp_path, capsys):
- result = cli_call('reverse', '--project-dir', str(tmp_path),
- '--lat', '34', '--lon', '34', '--lang', 'fr')
-
- assert result == 0
-
- out = json.loads(capsys.readouterr().out)
- assert out['name'] == 'Nom'
-
-
class TestCliLookupCall:
@pytest.fixture(autouse=True)
result = napi.SearchResult(napi.SourceTable.PLACEX, ('place', 'thing'),
napi.Point(1.0, -3.0),
names={'name':'Name', 'name:fr': 'Nom'},
- extratags={'extra':'Extra'})
+ extratags={'extra':'Extra'},
+ locale_name='Name',
+ display_name='Name')
monkeypatch.setattr(napi.NominatimAPI, 'lookup',
lambda *args, **kwargs: napi.SearchResults([result]))
])
def test_search(cli_call, tmp_path, capsys, monkeypatch, endpoint, params):
result = napi.SearchResult(napi.SourceTable.PLACEX, ('place', 'thing'),
- napi.Point(1.0, -3.0),
- names={'name':'Name', 'name:fr': 'Nom'},
- extratags={'extra':'Extra'})
+ napi.Point(1.0, -3.0),
+ names={'name':'Name', 'name:fr': 'Nom'},
+ extratags={'extra':'Extra'},
+ locale_name='Name',
+ display_name='Name')
monkeypatch.setattr(napi.NominatimAPI, endpoint,
lambda *args, **kwargs: napi.SearchResults([result]))
assert sanitize(country='se', postcode=postcode) == []
+@pytest.mark.parametrize("postcode", ('AD123', '123', 'AD 123', 'AD-123'))
+def test_postcode_andorra_pass(sanitize, postcode):
+ assert sanitize(country='ad', postcode=postcode) == [('postcode', 'AD123')]
+
+
+@pytest.mark.parametrize("postcode", ('AD1234', 'AD AD123', 'XX123'))
+@pytest.mark.sanitizer_params(convert_to_address=False)
+def test_postcode_andorra_fail(sanitize, postcode):
+ assert sanitize(country='ad', postcode=postcode) == []
+
+
+@pytest.mark.parametrize("postcode", ('AI-2640', '2640', 'AI 2640'))
+def test_postcode_anguilla_pass(sanitize, postcode):
+ assert sanitize(country='ai', postcode=postcode) == [('postcode', 'AI-2640')]
+
+
+@pytest.mark.parametrize("postcode", ('AI-2000', 'AI US-2640', 'AI AI-2640'))
+@pytest.mark.sanitizer_params(convert_to_address=False)
+def test_postcode_anguilla_fail(sanitize, postcode):
+ assert sanitize(country='ai', postcode=postcode) == []
+
+
+@pytest.mark.parametrize("postcode", ('BN1111', 'BN 1111', 'BN BN1111', 'BN BN 1111'))
+def test_postcode_brunei_pass(sanitize, postcode):
+ assert sanitize(country='bn', postcode=postcode) == [('postcode', 'BN1111')]
+
+
+@pytest.mark.parametrize("postcode", ('BN-1111', 'BNN1111'))
+@pytest.mark.sanitizer_params(convert_to_address=False)
+def test_postcode_brunei_fail(sanitize, postcode):
+ assert sanitize(country='bn', postcode=postcode) == []
+
+
+@pytest.mark.parametrize("postcode", ('IM1 1AA', 'IM11AA', 'IM IM11AA'))
+def test_postcode_isle_of_man_pass(sanitize, postcode):
+ assert sanitize(country='im', postcode=postcode) == [('postcode', 'IM1 1AA')]
+
+
+@pytest.mark.parametrize("postcode", ('IZ1 1AA', 'IM1 AA'))
+@pytest.mark.sanitizer_params(convert_to_address=False)
+def test_postcode_isle_of_man_fail(sanitize, postcode):
+ assert sanitize(country='im', postcode=postcode) == []
+
+
+@pytest.mark.parametrize("postcode", ('JE5 0LA', 'JE50LA', 'JE JE50LA', 'je JE5 0LA'))
+def test_postcode_jersey_pass(sanitize, postcode):
+ assert sanitize(country='je', postcode=postcode) == [('postcode', 'JE5 0LA')]
+
+
+@pytest.mark.parametrize("postcode", ('gb JE5 0LA', 'IM50LA', 'IM5 012'))
+@pytest.mark.sanitizer_params(convert_to_address=False)
+def test_postcode_jersey_fail(sanitize, postcode):
+ assert sanitize(country='je', postcode=postcode) == []
+
+
+@pytest.mark.parametrize("postcode", ('KY1-1234', '1-1234', 'KY 1-1234'))
+def test_postcode_cayman_islands_pass(sanitize, postcode):
+ assert sanitize(country='ky', postcode=postcode) == [('postcode', 'KY1-1234')]
+
+
+@pytest.mark.parametrize("postcode", ('KY-1234', 'KZ1-1234', 'KY1 1234', 'KY1-123', 'KY KY1-1234'))
+@pytest.mark.sanitizer_params(convert_to_address=False)
+def test_postcode_cayman_islands_fail(sanitize, postcode):
+ assert sanitize(country='ky', postcode=postcode) == []
+
+
+@pytest.mark.parametrize("postcode", ('LC11 222', '11 222', '11222', 'LC 11 222'))
+def test_postcode_saint_lucia_pass(sanitize, postcode):
+ assert sanitize(country='lc', postcode=postcode) == [('postcode', 'LC11 222')]
+
+
+@pytest.mark.parametrize("postcode", ('11 2222', 'LC LC11 222'))
+@pytest.mark.sanitizer_params(convert_to_address=False)
+def test_postcode_saint_lucia_fail(sanitize, postcode):
+ assert sanitize(country='lc', postcode=postcode) == []
+
+
+@pytest.mark.parametrize("postcode", ('LV-1111', '1111', 'LV 1111', 'LV1111',))
+def test_postcode_latvia_pass(sanitize, postcode):
+ assert sanitize(country='lv', postcode=postcode) == [('postcode', 'LV-1111')]
+
+
+@pytest.mark.parametrize("postcode", ('111', '11111', 'LV LV-1111'))
+@pytest.mark.sanitizer_params(convert_to_address=False)
+def test_postcode_latvia_fail(sanitize, postcode):
+ assert sanitize(country='lv', postcode=postcode) == []
+
+
+@pytest.mark.parametrize("postcode", ('MD-1111', '1111', 'MD 1111', 'MD1111'))
+def test_postcode_moldova_pass(sanitize, postcode):
+ assert sanitize(country='md', postcode=postcode) == [('postcode', 'MD-1111')]
+
+
+@pytest.mark.parametrize("postcode", ("MD MD-1111", "MD MD1111", "MD MD 1111"))
+@pytest.mark.sanitizer_params(convert_to_address=False)
+def test_postcode_moldova_fail(sanitize, postcode):
+ assert sanitize(country='md', postcode=postcode) == []
+
+
+@pytest.mark.parametrize("postcode", ('VLT 1117', 'GDJ 1234', 'BZN 2222'))
+def test_postcode_malta_pass(sanitize, postcode):
+ assert sanitize(country='mt', postcode=postcode) == [('postcode', postcode)]
+
+
+@pytest.mark.parametrize("postcode", ('MTF 1111', 'MT MTF 1111', 'MTF1111', 'MT MTF1111'))
+def test_postcode_malta_mtarfa_pass(sanitize, postcode):
+ assert sanitize(country='mt', postcode=postcode) == [('postcode', 'MTF 1111')]
+
+
+@pytest.mark.parametrize("postcode", ('1111', 'MTMT 1111'))
+@pytest.mark.sanitizer_params(convert_to_address=False)
+def test_postcode_malta_fail(sanitize, postcode):
+ assert sanitize(country='mt', postcode=postcode) == []
+
+
+@pytest.mark.parametrize("postcode", ('VC1111', '1111', 'VC-1111', 'VC 1111'))
+def test_postcode_saint_vincent_pass(sanitize, postcode):
+ assert sanitize(country='vc', postcode=postcode) == [('postcode', 'VC1111')]
+
+
+@pytest.mark.parametrize("postcode", ('VC11', 'VC VC1111'))
+@pytest.mark.sanitizer_params(convert_to_address=False)
+def test_postcode_saint_vincent_fail(sanitize, postcode):
+ assert sanitize(country='vc', postcode=postcode) == []
+
+
+@pytest.mark.parametrize("postcode", ('VG1111', '1111', 'VG 1111', 'VG-1111'))
+def test_postcode_virgin_islands_pass(sanitize, postcode):
+ assert sanitize(country='vg', postcode=postcode) == [('postcode', 'VG1111')]
+
+
+@pytest.mark.parametrize("postcode", ('111', '11111', 'VG VG1111'))
+@pytest.mark.sanitizer_params(convert_to_address=False)
+def test_postcode_virgin_islands_fail(sanitize, postcode):
+ assert sanitize(country='vg', postcode=postcode) == []
+
+
@pytest.mark.parametrize("postcode", ('AB1', '123-456-7890', '1 as 44'))
@pytest.mark.sanitizer_params(default_pattern='[A-Z0-9- ]{3,12}')
def test_postcode_default_pattern_pass(sanitize, postcode):
@pytest.mark.sanitizer_params(convert_to_address=False, default_pattern='[A-Z0-9- ]{3,12}')
def test_postcode_default_pattern_fail(sanitize, postcode):
assert sanitize(country='an', postcode=postcode) == []
-
--- /dev/null
+from nominatim.data.place_info import PlaceInfo
+from nominatim.data.place_name import PlaceName
+from nominatim.tokenizer.place_sanitizer import PlaceSanitizer
+from typing import Mapping, Optional, List
+import pytest
+
+class TestTagJapanese:
+ @pytest.fixture(autouse=True)
+ def setup_country(self, def_config):
+ self.config = def_config
+
+ def run_sanitizer_on(self,type, **kwargs):
+ place = PlaceInfo({
+ 'address': kwargs,
+ 'country_code': 'jp'
+ })
+ sanitizer_args = {'step': 'tag-japanese'}
+ _, address = PlaceSanitizer([sanitizer_args], self.config).process_names(place)
+ tmp_list = [(p.name,p.kind) for p in address]
+ return sorted(tmp_list)
+
+ def test_on_address(self):
+ res = self.run_sanitizer_on('address', name='foo', ref='bar', ref_abc='baz')
+ assert res == [('bar','ref'),('baz','ref_abc'),('foo','name')]
+
+ def test_housenumber(self):
+ res = self.run_sanitizer_on('address', housenumber='2')
+ assert res == [('2','housenumber')]
+
+ def test_blocknumber(self):
+ res = self.run_sanitizer_on('address', block_number='6')
+ assert res == [('6','housenumber')]
+
+ def test_neighbourhood(self):
+ res = self.run_sanitizer_on('address', neighbourhood='8')
+ assert res == [('8','place')]
+
+ def test_quarter(self):
+ res = self.run_sanitizer_on('address', quarter='kase')
+ assert res==[('kase','place')]
+
+ def test_housenumber_blocknumber(self):
+ res = self.run_sanitizer_on('address', housenumber='2', block_number='6')
+ assert res == [('6-2','housenumber')]
+
+ def test_quarter_neighbourhood(self):
+ res = self.run_sanitizer_on('address', quarter='kase', neighbourhood='8')
+ assert res == [('kase8','place')]
+
+ def test_blocknumber_housenumber_quarter(self):
+ res = self.run_sanitizer_on('address', block_number='6', housenumber='2', quarter='kase')
+ assert res == [('6-2','housenumber'),('kase','place')]
+
+ def test_blocknumber_housenumber_quarter_neighbourhood(self):
+ res = self.run_sanitizer_on('address', block_number='6', housenumber='2', neighbourhood='8')
+ assert res == [('6-2','housenumber'),('8','place')]
+
+ def test_blocknumber_quarter_neighbourhood(self):
+ res = self.run_sanitizer_on('address',block_number='6', quarter='kase', neighbourhood='8')
+ assert res == [('6','housenumber'),('kase8','place')]
+
+ def test_blocknumber_quarter(self):
+ res = self.run_sanitizer_on('address',block_number='6', quarter='kase')
+ assert res == [('6','housenumber'),('kase','place')]
+
+ def test_blocknumber_neighbourhood(self):
+ res = self.run_sanitizer_on('address',block_number='6', neighbourhood='8')
+ assert res == [('6','housenumber'),('8','place')]
+
+ def test_housenumber_quarter_neighbourhood(self):
+ res = self.run_sanitizer_on('address',housenumber='2', quarter='kase', neighbourhood='8')
+ assert res == [('2','housenumber'),('kase8','place')]
+
+ def test_housenumber_quarter(self):
+ res = self.run_sanitizer_on('address',housenumber='2', quarter='kase')
+ assert res == [('2','housenumber'),('kase','place')]
+
+ def test_housenumber_blocknumber_neighbourhood_quarter(self):
+ res = self.run_sanitizer_on('address', block_number='6', housenumber='2', quarter='kase', neighbourhood='8')
+ assert res == [('6-2','housenumber'),('kase8','place')]
from nominatim.errors import UsageError
from nominatim.tools import admin
from nominatim.tokenizer import factory
+from nominatim.db.sql_preprocessor import SQLPreprocessor
@pytest.fixture(autouse=True)
def create_placex_table(project_env, tokenizer_mock, temp_db_cursor, placex_table):
VALUES(9988, 'N', 10000)""")
admin.analyse_indexing(project_env, osm_id='N10000')
+
+
+class TestAdminCleanDeleted:
+
+ @pytest.fixture(autouse=True)
+ def setup_polygon_delete(self, project_env, table_factory, place_table, osmline_table, temp_db_cursor, temp_db_conn, def_config, src_dir):
+ """ Set up place_force_delete function and related tables
+ """
+ self.project_env = project_env
+ self.temp_db_cursor = temp_db_cursor
+ table_factory('import_polygon_delete',
+ """osm_id BIGINT,
+ osm_type CHAR(1),
+ class TEXT NOT NULL,
+ type TEXT NOT NULL""",
+ ((100, 'N', 'boundary', 'administrative'),
+ (145, 'N', 'boundary', 'administrative'),
+ (175, 'R', 'landcover', 'grass')))
+ temp_db_cursor.execute("""INSERT INTO placex (place_id, osm_id, osm_type, class, type, indexed_date, indexed_status)
+ VALUES(1, 100, 'N', 'boundary', 'administrative', current_date - INTERVAL '1 month', 1),
+ (2, 145, 'N', 'boundary', 'administrative', current_date - INTERVAL '3 month', 1),
+ (3, 175, 'R', 'landcover', 'grass', current_date - INTERVAL '3 months', 1)""")
+ # set up tables and triggers for utils function
+ table_factory('place_to_be_deleted',
+ """osm_id BIGINT,
+ osm_type CHAR(1),
+ class TEXT NOT NULL,
+ type TEXT NOT NULL,
+ deferred BOOLEAN""")
+ table_factory('country_name', 'partition INT')
+ table_factory('import_polygon_error', """osm_id BIGINT,
+ osm_type CHAR(1),
+ class TEXT NOT NULL,
+ type TEXT NOT NULL""")
+ temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION place_delete()
+ RETURNS TRIGGER AS $$
+ BEGIN RETURN NULL; END;
+ $$ LANGUAGE plpgsql;""")
+ temp_db_cursor.execute("""CREATE TRIGGER place_before_delete BEFORE DELETE ON place
+ FOR EACH ROW EXECUTE PROCEDURE place_delete();""")
+ orig_sql = def_config.lib_dir.sql
+ def_config.lib_dir.sql = src_dir / 'lib-sql'
+ sqlproc = SQLPreprocessor(temp_db_conn, def_config)
+ sqlproc.run_sql_file(temp_db_conn, 'functions/utils.sql')
+ def_config.lib_dir.sql = orig_sql
+
+
+ def test_admin_clean_deleted_no_records(self):
+ admin.clean_deleted_relations(self.project_env, age='1 year')
+ assert self.temp_db_cursor.row_set('SELECT osm_id, osm_type, class, type, indexed_status FROM placex') == {(100, 'N', 'boundary', 'administrative', 1),
+ (145, 'N', 'boundary', 'administrative', 1),
+ (175, 'R', 'landcover', 'grass', 1)}
+ assert self.temp_db_cursor.table_rows('import_polygon_delete') == 3
+
+
+ @pytest.mark.parametrize('test_age', ['T week', '1 welk', 'P1E'])
+ def test_admin_clean_deleted_bad_age(self, test_age):
+ with pytest.raises(UsageError):
+ admin.clean_deleted_relations(self.project_env, age = test_age)
+
+
+ def test_admin_clean_deleted_partial(self):
+ admin.clean_deleted_relations(self.project_env, age = '2 months')
+ assert self.temp_db_cursor.row_set('SELECT osm_id, osm_type, class, type, indexed_status FROM placex') == {(100, 'N', 'boundary', 'administrative', 1),
+ (145, 'N', 'boundary', 'administrative', 100),
+ (175, 'R', 'landcover', 'grass', 100)}
+ assert self.temp_db_cursor.table_rows('import_polygon_delete') == 1
+
+ @pytest.mark.parametrize('test_age', ['1 week', 'P3D', '5 hours'])
+ def test_admin_clean_deleted(self, test_age):
+ admin.clean_deleted_relations(self.project_env, age = test_age)
+ assert self.temp_db_cursor.row_set('SELECT osm_id, osm_type, class, type, indexed_status FROM placex') == {(100, 'N', 'boundary', 'administrative', 100),
+ (145, 'N', 'boundary', 'administrative', 100),
+ (175, 'R', 'landcover', 'grass', 100)}
+ assert self.temp_db_cursor.table_rows('import_polygon_delete') == 0
import nominatim.tools.exec_utils as exec_utils
import nominatim.paths
-class TestRunLegacyScript:
-
- @pytest.fixture(autouse=True)
- def setup_nominatim_env(self, tmp_path, monkeypatch):
- tmp_phplib_dir = tmp_path / 'phplib'
- tmp_phplib_dir.mkdir()
- (tmp_phplib_dir / 'admin').mkdir()
-
- monkeypatch.setattr(nominatim.paths, 'PHPLIB_DIR', tmp_phplib_dir)
-
- self.phplib_dir = tmp_phplib_dir
- self.config = Configuration(tmp_path)
- self.config.set_libdirs(module='.', osm2pgsql='default_osm2pgsql',
- php=tmp_phplib_dir)
-
-
- def mk_script(self, code):
- codefile = self.phplib_dir / 'admin' / 't.php'
- codefile.write_text('<?php\n' + code + '\n')
-
- return 't.php'
-
-
- @pytest.mark.parametrize("return_code", (0, 1, 15, 255))
- def test_run_legacy_return_exit_code(self, return_code):
- fname = self.mk_script('exit({});'.format(return_code))
- assert return_code == \
- exec_utils.run_legacy_script(fname, config=self.config)
-
-
- def test_run_legacy_return_throw_on_fail(self):
- fname = self.mk_script('exit(11);')
- with pytest.raises(subprocess.CalledProcessError):
- exec_utils.run_legacy_script(fname, config=self.config,
- throw_on_fail=True)
-
-
- def test_run_legacy_return_dont_throw_on_success(self):
- fname = self.mk_script('exit(0);')
- assert exec_utils.run_legacy_script(fname, config=self.config,
- throw_on_fail=True) == 0
-
- def test_run_legacy_use_given_module_path(self):
- fname = self.mk_script("exit($_SERVER['NOMINATIM_DATABASE_MODULE_PATH'] == '' ? 0 : 23);")
-
- assert exec_utils.run_legacy_script(fname, config=self.config) == 0
-
-
- def test_run_legacy_do_not_overwrite_module_path(self, monkeypatch):
- monkeypatch.setenv('NOMINATIM_DATABASE_MODULE_PATH', 'other')
- fname = self.mk_script(
- "exit($_SERVER['NOMINATIM_DATABASE_MODULE_PATH'] == 'other' ? 0 : 1);")
-
- assert exec_utils.run_legacy_script(fname, config=self.config) == 0
-
-
- def test_run_legacy_default_osm2pgsql_binary(self, monkeypatch):
- fname = self.mk_script("exit($_SERVER['NOMINATIM_OSM2PGSQL_BINARY'] == 'default_osm2pgsql' ? 0 : 23);")
-
- assert exec_utils.run_legacy_script(fname, config=self.config) == 0
-
-
- def test_run_legacy_override_osm2pgsql_binary(self, monkeypatch):
- monkeypatch.setenv('NOMINATIM_OSM2PGSQL_BINARY', 'somethingelse')
-
- fname = self.mk_script("exit($_SERVER['NOMINATIM_OSM2PGSQL_BINARY'] == 'somethingelse' ? 0 : 23);")
-
- assert exec_utils.run_legacy_script(fname, config=self.config) == 0
-
-
-class TestRunApiScript:
-
- @staticmethod
- @pytest.fixture(autouse=True)
- def setup_project_dir(tmp_path):
- webdir = tmp_path / 'website'
- webdir.mkdir()
- (webdir / 'test.php').write_text("<?php\necho 'OK\n';")
-
-
- @staticmethod
- def test_run_api(tmp_path):
- assert exec_utils.run_api_script('test', tmp_path) == 0
-
- @staticmethod
- def test_run_api_execution_error(tmp_path):
- assert exec_utils.run_api_script('badname', tmp_path) != 0
-
- @staticmethod
- def test_run_api_with_extra_env(tmp_path):
- extra_env = dict(SCRIPT_FILENAME=str(tmp_path / 'website' / 'test.php'))
- assert exec_utils.run_api_script('badname', tmp_path, extra_env=extra_env) == 0
-
- @staticmethod
- def test_custom_phpcgi(tmp_path, capfd):
- assert exec_utils.run_api_script('test', tmp_path, phpcgi_bin='env',
- params={'q' : 'Berlin'}) == 0
- captured = capfd.readouterr()
-
- assert '?q=Berlin' in captured.out
-
- @staticmethod
- def test_fail_on_error_output(tmp_path):
- # Starting PHP 8 the PHP CLI no longer has STDERR defined as constant
- php = """
- <?php
- if(!defined('STDERR')) define('STDERR', fopen('php://stderr', 'wb'));
- fwrite(STDERR, 'WARNING'.PHP_EOL);
- """
- (tmp_path / 'website' / 'bad.php').write_text(php)
-
- assert exec_utils.run_api_script('bad', tmp_path) == 1
### run_osm2pgsql
# Now you can install all packages needed for Nominatim:
- sudo apt install -y php-cgi
sudo apt install -y build-essential cmake g++ libboost-dev libboost-system-dev \
libboost-filesystem-dev libexpat1-dev zlib1g-dev \
libbz2-dev libpq-dev liblua5.3-dev lua5.3 lua-dkjson \
- postgresql-12-postgis-3 \
+ nlohmann-json3-dev postgresql-12-postgis-3 \
postgresql-contrib-12 postgresql-12-postgis-3-scripts \
php-cli php-pgsql php-intl libicu-dev python3-dotenv \
python3-psycopg2 python3-psutil python3-jinja2 python3-pip \
# ---------------------
#
# Tune the postgresql configuration, which is located in
-# `/etc/postgresql/12/main/postgresql.conf`. See section *Postgres Tuning* in
-# [the installation page](../admin/Installation.md#postgresql-tuning)
+# `/etc/postgresql/12/main/postgresql.conf`. See section *Tuning the PostgreSQL database*
+# in [the installation page](../admin/Installation.md#tuning-the-postgresql-database)
# for the parameters to change.
#
# Restart the postgresql service after updating this config file.
# Now you can install all packages needed for Nominatim:
- sudo apt install -y php-cgi
sudo apt install -y build-essential cmake g++ libboost-dev libboost-system-dev \
libboost-filesystem-dev libexpat1-dev zlib1g-dev \
libbz2-dev libpq-dev liblua5.3-dev lua5.3 lua-dkjson \
- postgresql-server-dev-14 postgresql-14-postgis-3 \
+ nlohmann-json3-dev postgresql-14-postgis-3 \
postgresql-contrib-14 postgresql-14-postgis-3-scripts \
php-cli php-pgsql php-intl libicu-dev python3-dotenv \
python3-psycopg2 python3-psutil python3-jinja2 \
python3-icu python3-datrie python3-sqlalchemy \
- python3-asyncpg git
+ python3-asyncpg python3-yaml git
#
# System Configuration
# ---------------------
#
# Tune the postgresql configuration, which is located in
-# `/etc/postgresql/14/main/postgresql.conf`. See section *Postgres Tuning* in
-# [the installation page](../admin/Installation.md#postgresql-tuning)
+# `/etc/postgresql/14/main/postgresql.conf`. See section *Tuning the PostgreSQL database*
+# in [the installation page](../admin/Installation.md#tuning-the-postgresql-database)
# for the parameters to change.
#
# Restart the postgresql service after updating this config file.