set(BUILD_DOCS on CACHE BOOL "Build documentation")
set(BUILD_MANPAGE on CACHE BOOL "Build Manual Page")
set(BUILD_OSM2PGSQL on CACHE BOOL "Build osm2pgsql (expert only)")
+set(INSTALL_MUNIN_PLUGINS on CACHE BOOL "Install Munin plugins for supervising Nominatim")
#-----------------------------------------------------------------------------
# osm2pgsql (imports/updates only)
#-----------------------------------------------------------------------------
if (BUILD_MANPAGE)
- add_subdirectory(manual)
+ add_subdirectory(man)
endif()
#-----------------------------------------------------------------------------
set(NOMINATIM_DATADIR ${CMAKE_INSTALL_FULL_DATADIR}/${PROJECT_NAME})
set(NOMINATIM_LIBDIR ${CMAKE_INSTALL_FULL_LIBDIR}/${PROJECT_NAME})
set(NOMINATIM_CONFIGDIR ${CMAKE_INSTALL_FULL_SYSCONFDIR}/${PROJECT_NAME})
+set(NOMINATIM_MUNINDIR ${CMAKE_INSTALL_FULL_DATADIR}/munin/plugins)
if (BUILD_IMPORTER)
configure_file(${PROJECT_SOURCE_DIR}/cmake/tool-installed.tmpl installed.bin)
settings/import-full.style
settings/import-extratags.style
settings/icu_tokenizer.yaml
- settings/icu-rules/extended-unicode-to-asccii.yaml
settings/country_settings.yaml
DESTINATION ${NOMINATIM_CONFIGDIR})
+
+install(DIRECTORY settings/icu-rules
+ DESTINATION ${NOMINATIM_CONFIGDIR})
+
+if (INSTALL_MUNIN_PLUGINS)
+ install(FILES munin/nominatim_importlag
+ munin/nominatim_query_speed
+ munin/nominatim_requests
+ DESTINATION ${NOMINATIM_MUNINDIR})
+endif()
admin
develop
api
+ customize
index.md
extra.css
styles.css
```
Now change the `NOMINATIM_DATABASE_DSN` to point to your remote server and continue
-to follow the [standard instructions for importing](/admin/Import).
+to follow the [standard instructions for importing](Import.md).
+++ /dev/null
-# Customization of the Database
-
-This section explains in detail how to configure a Nominatim import and
-the various means to use external data.
-
-## External postcode data
-
-Nominatim creates a table of known postcode centroids during import. This table
-is used for searches of postcodes and for adding postcodes to places where the
-OSM data does not provide one. These postcode centroids are mainly computed
-from the OSM data itself. In addition, Nominatim supports reading postcode
-information from an external CSV file, to supplement the postcodes that are
-missing in OSM.
-
-To enable external postcode support, simply put one CSV file per country into
-your project directory and name it `<CC>_postcodes.csv`. `<CC>` must be the
-two-letter country code for which to apply the file. The file may also be
-gzipped. Then it must be called `<CC>_postcodes.csv.gz`.
-
-The CSV file must use commas as a delimiter and have a header line. Nominatim
-expects three columns to be present: `postcode`, `lat` and `lon`. All other
-columns are ignored. `lon` and `lat` must describe the x and y coordinates of the
-postcode centroids in WGS84.
-
-The postcode files are loaded only when there is data for the given country
-in your database. For example, if there is a `us_postcodes.csv` file in your
-project directory but you import only an excerpt of Italy, then the US postcodes
-will simply be ignored.
-
-As a rule, the external postcode data should be put into the project directory
-**before** starting the initial import. Still, you can add, remove and update the
-external postcode data at any time. Simply
-run:
-
-```
-nominatim refresh --postcodes
-```
-
-to make the changes visible in your database. Be aware, however, that the changes
-only have an immediate effect on searches for postcodes. Postcodes that were
-added to places are only updated, when they are reindexed. That usually happens
-only during replication updates.
-
-## Installing Tiger housenumber data for the US
-
-Nominatim is able to use the official [TIGER](https://www.census.gov/geographies/mapping-files/time-series/geo/tiger-line-file.html)
-address set to complement the OSM house number data in the US. You can add
-TIGER data to your own Nominatim instance by following these steps. The
-entire US adds about 10GB to your database.
-
- 1. Get preprocessed TIGER 2021 data:
-
- cd $PROJECT_DIR
- wget https://nominatim.org/data/tiger2021-nominatim-preprocessed.csv.tar.gz
-
- 2. Import the data into your Nominatim database:
-
- nominatim add-data --tiger-data tiger2021-nominatim-preprocessed.csv.tar.gz
-
- 3. Enable use of the Tiger data in your `.env` by adding:
-
- echo NOMINATIM_USE_US_TIGER_DATA=yes >> .env
-
- 4. Apply the new settings:
-
- nominatim refresh --functions
-
-
-See the [developer's guide](../develop/data-sources.md#us-census-tiger) for more
-information on how the data got preprocessed.
-
-## Special phrases import
-
-As described in the [Importation chapter](Import.md), it is possible to
-import special phrases from the wiki with the following command:
-
-```sh
-nominatim special-phrases --import-from-wiki
-```
-
-But, it is also possible to import some phrases from a csv file.
-To do so, you have access to the following command:
-
-```sh
-nominatim special-phrases --import-from-csv <csv file>
-```
-
-Note that the two previous import commands will update the phrases from your database.
-This means that if you import some phrases from a csv file, only the phrases
-present in the csv file will be kept into the database. All other phrases will
-be removed.
-
-If you want to only add new phrases and not update the other ones you can add
-the argument `--no-replace` to the import command. For example:
-
-```sh
-nominatim special-phrases --import-from-csv <csv file> --no-replace
-```
-
-This will add the phrases present in the csv file into the database without
-removing the other ones.
`/usr/lib/systemd/system/httpd.service` contains a line `PrivateTmp=true`. If
so then Apache cannot see the `/tmp/.s.PGSQL.5432` file. It's a good security
feature, so use the
-[preferred solution](../appendix/Install-on-Centos-7/#adding-selinux-security-settings).
+[preferred solution](../appendix/Install-on-Centos-7.md#adding-selinux-security-settings).
However, you can solve this the quick and dirty way by commenting out that line and then run
Try `chmod a+r nominatim.so; chmod a+x nominatim.so`.
When running SELinux, make sure that the
-[context is set up correctly](../appendix/Install-on-Centos-7/#adding-selinux-security-settings).
+[context is set up correctly](../appendix/Install-on-Centos-7.md#adding-selinux-security-settings).
When you recently updated your operating system, updated PostgreSQL to
a new version or moved files (e.g. the build directory) you should
settings have a `NOMINATIM_` prefix to avoid conflicts with other environment
variables.
-There are lots of configuration settings you can tweak. Have a look
-at `Nominatim/settings/env.default` for a full list. Most should have a sensible default.
+There are lots of configuration settings you can tweak. A full reference
+can be found in the chapter [Configuration Settings](../customize/Settings.md).
+Most should have a sensible default.
#### Flatnode files
wget https://www.nominatim.org/data/us_postcodes.csv.gz
You can also add your own custom postcode sources, see
-[Customization of postcodes](Customization.md#external-postcode-data).
+[Customization of postcodes](../customize/Postcodes.md).
## Choosing the data to import
are a good way to reduce the database size and import time.
[Geofabrik](https://download.geofabrik.de) offers extracts for most countries.
They even have daily updates which can be used with the update process described
-[in the next section](../Update). There are also
+[in the next section](Update.md). There are also
[other providers for extracts](https://wiki.openstreetmap.org/wiki/Planet.osm#Downloading).
Please be aware that some extracts are not cut exactly along the country
import. So this option is particularly interesting if you plan to transfer the
database or reuse the space later.
+!!! warning
+ The datastructure for updates are also required when adding additional data
+ after the import, for example [TIGER housenumber data](../customize/Tiger.md).
+ If you plan to use those, you must not use the `--no-updates` parameter.
+ Do a normal import, add the external data and once you are done with
+ everything run `nominatim freeze`.
+
+
### Reverse-only Imports
If you only want to use the Nominatim database for reverse lookups or
boundaries, places, streets, addresses and POI data. There are also other
import styles available which only read selected data:
-* **settings/import-admin.style**
+* **admin**
Only import administrative boundaries and places.
-* **settings/import-street.style**
+* **street**
Like the admin style but also adds streets.
-* **settings/import-address.style**
+* **address**
Import all data necessary to compute addresses down to house number level.
-* **settings/import-full.style**
+* **full**
Default style that also includes points of interest.
-* **settings/import-extratags.style**
+* **extratags**
Like the full style but also adds most of the OSM tags into the extratags
column.
extratags | 54h | 650 GB | 340 GB
You can also customize the styles further.
-A [description of the style format](../develop/Import.md#configuring-the-import)
-can be found in the development section.
+A [description of the style format](../customize/Import-Styles.md)
+can be found in the customization guide.
## Initial import of the data
```
The **project directory** is the one that you have set up at the beginning.
-See [creating the project directory](Import#creating-the-project-directory).
+See [creating the project directory](#creating-the-project-directory).
### Notes on full planet imports
in terms of RAM usage. osm2pgsql and PostgreSQL are running in parallel at
this point. PostgreSQL blocks at least the part of RAM that has been configured
with the `shared_buffers` parameter during
-[PostgreSQL tuning](Installation#postgresql-tuning)
+[PostgreSQL tuning](Installation.md#postgresql-tuning)
and needs some memory on top of that. osm2pgsql needs at least 2GB of RAM for
its internal data structures, potentially more when it has to process very large
relations. In addition it needs to maintain a cache for node locations. The size
### Testing the installation
-Run this script to verify all required tables and indices got created successfully.
+Run this script to verify that all required tables and indices got created
+successfully.
```sh
nominatim admin --check-database
running this function.
If you want to be able to search for places by their type through
-[special key phrases](https://wiki.openstreetmap.org/wiki/Nominatim/Special_Phrases)
+[special phrases](https://wiki.openstreetmap.org/wiki/Nominatim/Special_Phrases)
you also need to import these key phrases like this:
```sh
need internet access for the step.
You can also import special phrases from a csv file, for more
-information please read the [Customization chapter](Customization.md).
+information please see the [Customization part](../customize/Special-Phrases.md).
The debugging UI is no longer directly provided with Nominatim. Instead we
now provide a simple Javascript application. Please refer to
-[Setting up the Nominatim UI](../Setup-Nominatim-UI) for details on how to
+[Setting up the Nominatim UI](Setup-Nominatim-UI.md) for details on how to
set up the UI.
The icons served together with the API responses have been moved to the
[nominatim-ui's Github release page](https://github.com/osm-search/nominatim-ui/releases)
and unpack it. You can use `nominatim-ui-x.x.x.tar.gz` or `nominatim-ui-x.x.x.zip`.
-Copy the example configuration into the right place:
+Next you need to adapt the UI yo your installation. Custom settings need to be
+put into `dist/theme/config.theme.js`. At a minimum you need to
+set `Nominatim_API_Endpoint` to point to your Nominatim installation:
cd nominatim-ui
- cp dist/config.example.js dist/config.js
+ echo "Nominatim_Config.Nominatim_API_Endpoint='https:\\myserver.org\nominatim';" > dist/theme/config.theme.js
-Now adapt the configuration to your needs. You need at least
-to change the `Nominatim_API_Endpoint` to point to your Nominatim installation.
+For the full set of available settings, have a look at `dist/config.defaults.js`.
Then you can just test it locally by spinning up a webserver in the `dist`
directory. For example, with Python:
#### 3. I get different counties/states/countries when I change the zoom parameter in the reverse query. How is that possible?
This is basically the same problem as in the previous answer.
-The zoom level influences at which [search rank](https://wiki.openstreetmap.org/wiki/Nominatim/Development_overview#Country_to_street_level) Nominatim starts looking
+The zoom level influences at which [search rank](../customize/Ranking.md#search-rank) Nominatim starts looking
for the closest object. So the closest house number maybe on one side of the
border while the closest street is on the other. As the address details contain
the address of the closest object found, you might sometimes get one result,
Free-form query string to search for.
Free-form queries are processed first left-to-right and then right-to-left if that fails. So you may search for
- [pilkington avenue, birmingham](//nominatim.openstreetmap.org/search?q=pilkington+avenue,birmingham) as well as for
- [birmingham, pilkington avenue](//nominatim.openstreetmap.org/search?q=birmingham,+pilkington+avenue).
+ [pilkington avenue, birmingham](https://nominatim.openstreetmap.org/search?q=pilkington+avenue,birmingham) as well as for
+ [birmingham, pilkington avenue](https://nominatim.openstreetmap.org/search?q=birmingham,+pilkington+avenue).
Commas are optional, but improve performance by reducing the complexity of the search.
--- /dev/null
+## Configuring the Import
+
+Which OSM objects are added to the database and which of the tags are used
+can be configured via the import style configuration file. This
+is a JSON file which contains a list of rules which are matched against every
+tag of every object and then assign the tag its specific role.
+
+The style to use is given by the `NOMINATIM_IMPORT_STYLE` configuration
+option. There are a number of default styles, which are explained in detail
+in the [Import section](../admin/Import.md#filtering-imported-data). These
+standard styles may be referenced by their name.
+
+You can also create your own custom syle. Put the style file into your
+project directory and then set `NOMINATIM_IMPORT_STYLE` to the name of the file.
+It is always recommended to start with one of the standard styles and customize
+those. You find the standard styles under the name `import-<stylename>.style`
+in the standard Nominatim configuration path (usually `/etc/nominatim` or
+`/usr/local/etc/nominatim`).
+
+The remainder of the page describes the format of the file.
+
+### Configuration Rules
+
+A single rule looks like this:
+
+```json
+{
+ "keys" : ["key1", "key2", ...],
+ "values" : {
+ "value1" : "prop",
+ "value2" : "prop1,prop2"
+ }
+}
+```
+
+A rule first defines a list of keys to apply the rule to. This is always a list
+of strings. The string may have four forms. An empty string matches against
+any key. A string that ends in an asterisk `*` is a prefix match and accordingly
+matches against any key that starts with the given string (minus the `*`). A
+suffix match can be defined similarly with a string that starts with a `*`. Any
+other string constitutes an exact match.
+
+The second part of the rules defines a list of values and the properties that
+apply to a successful match. Value strings may be either empty, which
+means that they match any value, or describe an exact match. Prefix
+or suffix matching of values is not possible.
+
+For a rule to match, it has to find a valid combination of keys and values. The
+resulting property is that of the matched values.
+
+The rules in a configuration file are processed sequentially and the first
+match for each tag wins.
+
+A rule where key and value are the empty string is special. This defines the
+fallback when none of the rules match. The fallback is always used as a last
+resort when nothing else matches, no matter where the rule appears in the file.
+Defining multiple fallback rules is not allowed. What happens in this case,
+is undefined.
+
+### Tag Properties
+
+One or more of the following properties may be given for each tag:
+
+* `main`
+
+ A principal tag. A new row will be added for the object with key and value
+ as `class` and `type`.
+
+* `with_name`
+
+ When the tag is a principal tag (`main` property set): only really add a new
+ row, if there is any name tag found (a reference tag is not sufficient, see
+ below).
+
+* `with_name_key`
+
+ When the tag is a principal tag (`main` property set): only really add a new
+ row, if there is also a name tag that matches the key of the principal tag.
+ For example, if the main tag is `bridge=yes`, then it will only be added as
+ an extra row, if there is a tag `bridge:name[:XXX]` for the same object.
+ If this property is set, all other names that are not domain-specific are
+ ignored.
+
+* `fallback`
+
+ When the tag is a principal tag (`main` property set): only really add a new
+ row, when no other principal tags for this object have been found. Only one
+ fallback tag can win for an object.
+
+* `operator`
+
+ When the tag is a principal tag (`main` property set): also include the
+ `operator` tag in the list of names. This is a special construct for an
+ out-dated tagging practise in OSM. Fuel stations and chain restaurants
+ in particular used to have the name of the chain tagged as `operator`.
+ These days the chain can be more commonly found in the `brand` tag but
+ there is still enough old data around to warrant this special case.
+
+* `name`
+
+ Add tag to the list of names.
+
+* `ref`
+
+ Add tag to the list of names as a reference. At the moment this only means
+ that the object is not considered to be named for `with_name`.
+
+* `address`
+
+ Add tag to the list of address tags. If the tag starts with `addr:` or
+ `is_in:`, then this prefix is cut off before adding it to the list.
+
+* `postcode`
+
+ Add the value as a postcode to the address tags. If multiple tags are
+ candidate for postcodes, one wins out and the others are dropped.
+
+* `country`
+
+ Add the value as a country code to the address tags. The value must be a
+ two letter country code, otherwise it is ignored. If there are multiple
+ tags that match, then one wins out and the others are dropped.
+
+* `house`
+
+ If no principle tags can be found for the object, still add the object with
+ `class`=`place` and `type`=`house`. Use this for address nodes that have no
+ other function.
+
+* `interpolation`
+
+ Add this object as an address interpolation (appears as `class`=`place` and
+ `type`=`houses` in the database).
+
+* `extra`
+
+ Add tag to the list of extra tags.
+
+* `skip`
+
+ Skip the tag completely. Useful when a custom default fallback is defined
+ or to define exceptions to rules.
+
+A rule can define as many of these properties for one match as it likes. For
+example, if the property is `"main,extra"` then the tag will open a new row
+but also have the tag appear in the list of extra tags.
+
+### Changing the Style of Existing Databases
+
+There is normally no issue changing the style of a database that is already
+imported and now kept up-to-date with change files. Just be aware that any
+change in the style applies to updates only. If you want to change the data
+that is already in the database, then a reimport is necessary.
--- /dev/null
+Nominatim comes with a predefined set of configuration options that should
+work for most standard installations. If you have special requirements, there
+are many places where the configuration can be adapted. This chapter describes
+the following configurable parts:
+
+* [Global Settings](Settings.md) has a detailed description of all parameters that
+ can be set in your local `.env` configuration
+* [Import styles](Import-Styles.md) explains how to write your own import style
+ in order to control what kind of OSM data will be imported
+* [Place ranking](Ranking.md) describes the configuration around classifing
+ places in terms of their importance and their role in an address
+* [Tokenizers](Tokenizers.md) describes the configuration of the module
+ responsible for analysing and indexing names
+* [Special Phrases](Special-Phrases.md) are common nouns or phrases that
+ can be used in search to identify a class of places
+
+There are also guides for adding the following external data:
+
+* [US house numbers from the TIGER dataset](Tiger.md)
+* [External postcodes](Postcodes.md)
--- /dev/null
+# External postcode data
+
+Nominatim creates a table of known postcode centroids during import. This table
+is used for searches of postcodes and for adding postcodes to places where the
+OSM data does not provide one. These postcode centroids are mainly computed
+from the OSM data itself. In addition, Nominatim supports reading postcode
+information from an external CSV file, to supplement the postcodes that are
+missing in OSM.
+
+To enable external postcode support, simply put one CSV file per country into
+your project directory and name it `<CC>_postcodes.csv`. `<CC>` must be the
+two-letter country code for which to apply the file. The file may also be
+gzipped. Then it must be called `<CC>_postcodes.csv.gz`.
+
+The CSV file must use commas as a delimiter and have a header line. Nominatim
+expects three columns to be present: `postcode`, `lat` and `lon`. All other
+columns are ignored. `lon` and `lat` must describe the x and y coordinates of the
+postcode centroids in WGS84.
+
+The postcode files are loaded only when there is data for the given country
+in your database. For example, if there is a `us_postcodes.csv` file in your
+project directory but you import only an excerpt of Italy, then the US postcodes
+will simply be ignored.
+
+As a rule, the external postcode data should be put into the project directory
+**before** starting the initial import. Still, you can add, remove and update the
+external postcode data at any time. Simply
+run:
+
+```
+nominatim refresh --postcodes
+```
+
+to make the changes visible in your database. Be aware, however, that the changes
+only have an immediate effect on searches for postcodes. Postcodes that were
+added to places are only updated, when they are reindexed. That usually happens
+only during replication updates.
# Place Ranking in Nominatim
Nominatim uses two metrics to rank a place: search rank and address rank.
-Both can be assigned a value between 0 and 30. They serve slightly
-different purposes, which are explained in this chapter.
+This chapter explains what place ranking means and how it can be customized.
## Search rank
--- /dev/null
+This section provides a reference of all configuration parameters that can
+be used with Nominatim.
+
+# Configuring Nominatim
+
+Nominatim uses [dotenv](https://github.com/theskumar/python-dotenv) to manage
+its configuration settings. There are two means to set configuration
+variables: through an `.env` configuration file or through an environment
+variable.
+
+The `.env` configuration file needs to be placed into the
+[project directory](../admin/Import.md#creating-the-project-directory). It
+must contain configuration parameters in `<parameter>=<value>` format.
+Please refer to the dotenv documentation for details.
+
+The configuration options may also be set in the form of shell environment
+variables. This is particularly useful, when you want to temporarily change
+a configuration option. For example, to force the replication serve to
+download the next change, you can temporarily disable the update interval:
+
+ NOMINATIM_REPLICATION_UPDATE_INTERVAL=0 nominatim replication --once
+
+If a configuration option is defined through .env file and environment
+variable, then the latter takes precedence.
+
+## Configuration Parameter Reference
+
+### Import and Database Settings
+
+#### NOMINATIM_DATABASE_DSN
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Database connection string |
+| **Format:** | string: `pgsql:<param1>=<value1>;<param2>=<value2>;...` |
+| **Default:** | pgsql:dbname=nominatim |
+| **After Changes:** | run `nominatim refresh --website` |
+
+Sets the connection parameters for the Nominatim database. At a minimum
+the name of the database (`dbname`) is required. You can set any additional
+parameter that is understood by libpq. See the [Postgres documentation](https://www.postgresql.org/docs/current/libpq-connect.html#LIBPQ-PARAMKEYWORDS) for a full list.
+
+!!! note
+ It is usually recommended not to set the password directly in this
+ configuration parameter. Use a
+ [password file](https://www.postgresql.org/docs/current/libpq-pgpass.html)
+ instead.
+
+
+#### NOMINATIM_DATABASE_WEBUSER
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Database query user |
+| **Format:** | string |
+| **Default:** | www-data |
+| **After Changes:** | cannot be changed after import |
+
+Defines the name of the database user that will run search queries. Usually
+this is the user under which the webserver is executed. When running Nominatim
+via php-fpm, you can also define a separate query user. The Postgres user
+needs to be set up before starting the import.
+
+Nominatim grants minimal rights to this user to all tables that are needed
+for running geocoding queries.
+
+
+#### NOMINATIM_DATABASE_MODULE_PATH
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Directory where to find the PostgreSQL server module |
+| **Format:** | path |
+| **Default:** | _empty_ (use `<project_directory>/module`) |
+| **After Changes:** | run `nominatim refresh --functions` |
+| **Comment:** | Legacy tokenizer only |
+
+Defines the directory in which the PostgreSQL server module `nominatim.so`
+is stored. The directory and module must be accessible by the PostgreSQL
+server.
+
+For information on how to use this setting when working with external databases,
+see [Advanced Installations](../admin/Advanced-Installations.md).
+
+The option is only used by the Legacy tokenizer and ignored otherwise.
+
+
+#### NOMINATIM_TOKENIZER
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Tokenizer used for normalizing and parsing queries and names |
+| **Format:** | string |
+| **Default:** | legacy |
+| **After Changes:** | cannot be changed after import |
+
+Sets the tokenizer type to use for the import. For more information on
+available tokenizers and how they are configured, see
+[Tokenizers](../customize/Tokenizers.md).
+
+
+#### NOMINATIM_TOKENIZER_CONFIG
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Configuration file for the tokenizer |
+| **Format:** | path |
+| **Default:** | _empty_ (default file depends on tokenizer) |
+| **After Changes:** | see documentation for each tokenizer |
+
+Points to the file with additional configuration for the tokenizer.
+See the [Tokenizer](../customize/Tokenizers.md) descriptions for details
+on the file format.
+
+#### NOMINATIM_MAX_WORD_FREQUENCY
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Number of occurrences before a word is considered frequent |
+| **Format:** | int |
+| **Default:** | 50000 |
+| **After Changes:** | cannot be changed after import |
+| **Comment:** | Legacy tokenizer only |
+
+The word frequency count is used by the Legacy tokenizer to automatically
+identify _stop words_. Any partial term that occurs more often then what
+is defined in this setting, is effectively ignored during search.
+
+
+#### NOMINATIM_LIMIT_REINDEXING
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Avoid invalidating large areas |
+| **Format:** | bool |
+| **Default:** | yes |
+
+Nominatim computes the address of each place at indexing time. This has the
+advantage to make search faster but also means that more objects needs to
+be invalidated when the data changes. For example, changing the name of
+the state of Florida would require recomputing every single address point
+in the state to make the new name searchable in conjunction with addresses.
+
+Setting this option to 'yes' means that Nominatim skips reindexing of contained
+objects when the area becomes too large.
+
+
+#### NOMINATIM_LANGUAGES
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Restrict search languages |
+| **Format:** | comma,separated list of language codes |
+| **Default:** | _empty_ |
+
+Normally Nominatim will include all language variants of name:XX
+in the search index. Set this to a comma separated list of language
+codes, to restrict import to a subset of languages.
+
+Currently only affects the initial import of country names and special phrases.
+
+
+#### NOMINATIM_TERM_NORMALIZATION
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Rules for normalizing terms for comparisons |
+| **Format:** | string: semicolon-separated list of ICU rules |
+| **Default:** | :: NFD (); [[:Nonspacing Mark:] [:Cf:]] >; :: lower (); [[:Punctuation:][:Space:]]+ > ' '; :: NFC (); |
+| **Comment:** | Legacy tokenizer only |
+
+[Special phrases](Special-Phrases.md) have stricter matching requirements than
+normal search terms. They must appear exactly in the query after this term
+normalization has been applied.
+
+Only has an effect on the Legacy tokenizer. For the ICU tokenizer the rules
+defined in the
+[normalization section](Tokenizers.md#normalization-and-transliteration)
+will be used.
+
+
+#### NOMINATIM_USE_US_TIGER_DATA
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Enable searching for Tiger house number data |
+| **Format:** | boolean |
+| **Default:** | no |
+| **After Changes:** | run `nominatim --refresh --functions` |
+
+When this setting is enabled, search and reverse queries also take data
+from [Tiger house number data](Tiger.md) into account.
+
+
+#### NOMINATIM_USE_AUX_LOCATION_DATA
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Enable searching in external house number tables |
+| **Format:** | boolean |
+| **Default:** | no |
+| **After Changes:** | run `nominatim --refresh --functions` |
+| **Comment:** | Do not use. |
+
+When this setting is enabled, search queries also take data from external
+house number tables into account.
+
+*Warning:* This feature is currently unmaintained and should not be used.
+
+
+#### NOMINATIM_HTTP_PROXY
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Use HTTP proxy when downloading data |
+| **Format:** | boolean |
+| **Default:** | no |
+
+When this setting is enabled and at least
+[NOMINATIM_HTTP_PROXY_HOST](#nominatim_http_proxy_host) and
+[NOMINATIM_HTTP_PROXY_PORT](#nominatim_http_proxy_port) are set, the
+configured proxy will be used, when downloading external data like
+replication diffs.
+
+
+#### NOMINATIM_HTTP_PROXY_HOST
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Host name of the proxy to use |
+| **Format:** | string |
+| **Default:** | _empty_ |
+
+When [NOMINATIM_HTTP_PROXY](#nominatim_http_proxy) is enabled, this setting
+configures the proxy host name.
+
+
+#### NOMINATIM_HTTP_PROXY_PORT
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Port number of the proxy to use |
+| **Format:** | integer |
+| **Default:** | 3128 |
+
+When [NOMINATIM_HTTP_PROXY](#nominatim_http_proxy) is enabled, this setting
+configures the port number to use with the proxy.
+
+
+#### NOMINATIM_HTTP_PROXY_LOGIN
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Username for proxies that require login |
+| **Format:** | string |
+| **Default:** | _empty_ |
+
+When [NOMINATIM_HTTP_PROXY](#nominatim_http_proxy) is enabled, use this
+setting to define the username for proxies that require a login.
+
+
+#### NOMINATIM_HTTP_PROXY_PASSWORD
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Password for proxies that require login |
+| **Format:** | string |
+| **Default:** | _empty_ |
+
+When [NOMINATIM_HTTP_PROXY](#nominatim_http_proxy) is enabled, use this
+setting to define the password for proxies that require a login.
+
+
+#### NOMINATIM_OSM2PGSQL_BINARY
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Location of the osm2pgsql binary |
+| **Format:** | path |
+| **Default:** | _empty_ (use binary shipped with Nominatim) |
+| **Comment:** | EXPERT ONLY |
+
+Nominatim uses [osm2pgsql](https://osm2pgsql.org) to load the OSM data
+initially into the database. Nominatim comes bundled with a version of
+osm2pgsql that is guaranteed to be compatible. Use this setting to use
+a different binary instead. You should do this only, when you know exactly
+what you are doing. If the osm2pgsql version is not compatible, then the
+result is undefined.
+
+
+#### NOMINATIM_WIKIPEDIA_DATA_PATH
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Directory with the wikipedia importance data |
+| **Format:** | path |
+| **Default:** | _empty_ (project directory) |
+
+Set a custom location for the
+[wikipedia ranking file](../admin/Import.md#wikipediawikidata-rankings). When
+unset, Nominatim expects the data to be saved in the project directory.
+
+#### NOMINATIM_PHRASE_CONFIG
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Configuration file for special phrase imports |
+| **Format:** | path |
+| **Default:** | _empty_ (use default settings) |
+
+The _phrase_config_ file configures black and white lists of tag types,
+so that some of them can be ignored, when loading special phrases from
+the OSM wiki. The default settings can be found in the configuration
+directory as `phrase-settings.json`.
+
+#### NOMINATIM_ADDRESS_LEVEL_CONFIG
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Configuration file for rank assignments |
+| **Format:** | path |
+| **Default:** | _empty_ (use default settings) |
+
+The _address level config_ configures rank assignments for places. See
+[Place Ranking](Ranking.md) for a detailed explanation what rank assignments
+are and what the configuration file must look like. The default configuration
+can be found in the configuration directory as `address-levels.json`.
+
+#### NOMINATIM_IMPORT_STYLE
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Configuration to use for the initial OSM data import |
+| **Format:** | string or path |
+| **Default:** | extratags |
+
+The _style configuration_ describes which OSM objects and tags are taken
+into consideration for the search database. This setting may either
+be a string pointing to one of the internal styles or it may be a path
+pointing to a custom style.
+
+See [Import Styles](Import-Styles.md)
+for more information on the available internal styles and the format of the
+configuration file.
+
+#### NOMINATIM_FLATNODE_FILE
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Location of osm2pgsql flatnode file |
+| **Format:** | path |
+| **Default:** | _empty_ (do not use a flatnote file) |
+| **After Changes:** | Only change when moving the file physically. |
+
+The `osm2pgsql flatnode file` is file that efficiently stores geographic
+location for OSM nodes. For larger imports it can significantly speed up
+the import. When this option is unset, then osm2pgsql uses a PsotgreSQL table
+to store the locations.
+
+!!! warning
+
+ The flatnode file is not only used during the initial import but also
+ when adding new data with `nominatim add-data` or `nominatim replication`.
+ Make sure you keep the flatnode file around and this setting unmodified,
+ if you plan to add more data or run regular updates.
+
+
+#### NOMINATIM_TABLESPACE_*
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Group of settings for distributing the database over tablespaces |
+| **Format:** | string |
+| **Default:** | _empty_ (do not use a table space) |
+| **After Changes:** | no effect after initial import |
+
+Nominatim allows to distribute the search database over up to 10 different
+[PostgreSQL tablespaces](https://www.postgresql.org/docs/current/manage-ag-tablespaces.html).
+If you use this option, make sure that the tablespaces exist before starting
+the import.
+
+The available tablespace groups are:
+
+NOMINATIM_TABLESPACE_SEARCH_DATA
+: Data used by the geocoding frontend.
+
+NOMINATIM_TABLESPACE_SEARCH_INDEX
+: Indexes used by the geocoding frontend.
+
+NOMINATIM_TABLESPACE_OSM_DATA
+: Raw OSM data cache used for import and updates.
+
+NOMINATIM_TABLESPACE_OSM_DATA
+: Indexes on the raw OSM data cache.
+
+NOMINATIM_TABLESPACE_PLACE_DATA
+: Data table with the pre-filtered but still unprocessed OSM data.
+ Used only during imports and updates.
+
+NOMINATIM_TABLESPACE_PLACE_INDEX
+: Indexes on raw data table. Used only during imports and updates.
+
+NOMINATIM_TABLESPACE_ADDRESS_DATA
+: Data tables used for computing search terms and addresses of places
+ during import and updates.
+
+NOMINATIM_TABLESPACE_ADDRESS_INDEX
+: Indexes on the data tables for search term and address computation.
+ Used only for import and updates.
+
+NOMINATIM_TABLESPACE_AUX_DATA
+: Auxiliary data tables for non-OSM data, e.g. for Tiger house number data.
+
+NOMINATIM_TABLESPACE_AUX_INDEX
+: Indexes on auxiliary data tables.
+
+
+### Replication Update Settings
+
+#### NOMINATIM_REPLICATION_URL
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Base URL of the replication service |
+| **Format:** | url |
+| **Default:** | https://planet.openstreetmap.org/replication/minute |
+| **After Changes:** | run `nominatim replication --init` |
+
+Replication services deliver updates to OSM data. Use this setting to choose
+which replication service to use. See [Updates](../admin/Update.md) for more
+information on how to set up regular updates.
+
+#### NOMINATIM_REPLICATION_MAX_DIFF
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Maximum amount of data to download per update cycle (in MB) |
+| **Format:** | integer |
+| **Default:** | 50 |
+| **After Changes:** | restart the replication process |
+
+At each update cycle Nominatim downloads diffs until either no more diffs
+are available on the server (i.e. the database is up-to-date) or the limit
+given in this setting is exceeded. Nominatim guarantees to downloads at least
+one diff, if one is available, no matter how small the setting.
+
+The default for this setting is fairly conservative because Nominatim keeps
+all data downloaded in one cycle in RAM. Using large values in a production
+server may interfere badly with the search frontend because it evicts data
+from RAM that is needed for speedy answers to incoming requests. It is usually
+a better idea to keep this setting lower and run multiple update cycles
+to catch up with updates.
+
+When catching up in non-production mode, for example after the initial import,
+the setting can easily be changed temporarily on the command line:
+
+ NOMINATIM_REPLICATION_MAX_DIFF=3000 nominatim replication
+
+
+#### NOMINATIM_REPLICATION_UPDATE_INTERVAL
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Publication interval of the replication service (in seconds) |
+| **Format:** | integer |
+| **Default:** | 75 |
+| **After Changes:** | restart the replication process |
+
+This setting determines when Nominatim will attempt to download again a new
+update. The time is computed from the publication date of the last diff
+downloaded. Setting this to a slightly higher value than the actual
+publication interval avoids unnecessary rechecks.
+
+
+#### NOMINATIM_REPLICATION_RECHECK_INTERVAL
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Wait time to recheck for a pending update (in seconds) |
+| **Format:** | integer |
+| **Default:** | 60 |
+| **After Changes:** | restart the replication process |
+
+When replication updates are run in continuous mode (using `nominatim replication`),
+this setting determines how long Nominatim waits until it looks for updates
+again when updates were not available on the server.
+
+Note that this is different from
+[NOMINATIM_REPLICATION_UPDATE_INTERVAL](#nominatim_replication_update_interval).
+Nominatim will never attempt to query for new updates for UPDATE_INTERVAL
+seconds after the current database date. Only after the update interval has
+passed it asks for new data. If then no new data is found, it waits for
+RECHECK_INTERVAL seconds before it attempts again.
+
+### API Settings
+
+#### NOMINATIM_CORS_NOACCESSCONTROL
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Send permissive CORS access headers |
+| **Format:** | boolean |
+| **Default:** | yes |
+| **After Changes:** | run `nominatim refresh --website` |
+
+When this setting is enabled, API HTTP responses include the HTTP
+[CORS](https://en.wikipedia.org/wiki/CORS) headers
+`access-control-allow-origin: *` and `access-control-allow-methods: OPTIONS,GET`.
+
+#### NOMINATIM_MAPICON_URL
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | URL prefix for static icon images |
+| **Format:** | url |
+| **Default:** | _empty_ |
+| **After Changes:** | run `nominatim refresh --website` |
+
+When a mapicon URL is configured, then Nominatim includes an additional `icon`
+field in the responses, pointing to an appropriate icon for the place type.
+
+Map icons used to be included in Nominatim itself but now have moved to the
+[nominatim-ui](https://github.com/osm-search/nominatim-ui/) project. If you
+want the URL to be included in API responses, make the `/mapicon`
+directory of the project available under a public URL and point this setting
+to the directory.
+
+
+#### NOMINATIM_DEFAULT_LANGUAGE
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Language of responses when no language is requested |
+| **Format:** | language code |
+| **Default:** | _empty_ (use the local language of the feature) |
+| **After Changes:** | run `nominatim refresh --website` |
+
+Nominatim localizes the place names in responses when the corresponding
+translation is available. Users can request a custom language setting through
+the HTTP accept-languages header or through the explicit parameter
+[accept-languages](../api/Search.md#language-of-results). If neither is
+given, it falls back to this setting. If the setting is also empty, then
+the local languages (in OSM: the name tag without any language suffix) is
+used.
+
+
+#### NOMINATIM_SEARCH_BATCH_MODE
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Enable a special batch query mode |
+| **Format:** | boolean |
+| **Default:** | no |
+| **After Changes:** | run `nominatim refresh --website` |
+
+This feature is currently undocumented and potentially broken.
+
+
+#### NOMINATIM_SEARCH_NAME_ONLY_THRESHOLD
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Threshold for switching the search index lookup strategy |
+| **Format:** | integer |
+| **Default:** | 500 |
+| **After Changes:** | run `nominatim refresh --website` |
+
+This setting defines the threshold over which a name is no longer considered
+as rare. When searching for places with rare names, only the name is used
+for place lookups. Otherwise the name and any address information is used.
+
+This setting only has an effect after `nominatim refresh --word-counts` has
+been called to compute the word frequencies.
+
+
+#### NOMINATIM_LOOKUP_MAX_COUNT
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Maximum number of OSM ids accepted by /lookup |
+| **Format:** | integer |
+| **Default:** | 50 |
+| **After Changes:** | run `nominatim refresh --website` |
+
+The /lookup point accepts list of ids to look up address details for. This
+setting restricts the number of places a user may look up with a single
+request.
+
+
+#### NOMINATIM_POLYGON_OUTPUT_MAX_TYPES
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Number of different geometry formats that may be returned |
+| **Format:** | integer |
+| **Default:** | 1 |
+| **After Changes:** | run `nominatim refresh --website` |
+
+Nominatim supports returning full geometries of places. The geometries may
+be requested in different formats with one of the
+[`polygon_*` parameters](../api/Search.md#polygon-output). Use this
+setting to restrict the number of geometry types that may be requested
+with a single query.
+
+Setting this parameter to 0 disables polygon output completely.
+
+### Logging Settings
+
+#### NOMINATIM_LOG_DB
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Log requests into the database |
+| **Format:** | boolean |
+| **Default:** | no |
+| **After Changes:** | run `nominatim refresh --website` |
+
+Enable logging requests into a database table with this setting. The logs
+can be found in the table `new_query_log`.
+
+When using this logging method, it is advisable to set up a job that
+regularly clears out old logging information. Nominatim will not do that
+on its own.
+
+Can be used as the same time as NOMINATIM_LOG_FILE.
+
+#### NOMINATIM_LOG_FILE
+
+| Summary | |
+| -------------- | --------------------------------------------------- |
+| **Description:** | Log requests into a file |
+| **Format:** | path |
+| **Default:** | _empty_ (logging disabled) |
+| **After Changes:** | run `nominatim refresh --website` |
+
+Enable logging of requests into a file with this setting by setting the log
+file where to log to. The entries in the log file have the following format:
+
+ <request time> <execution time in s> <number of results> <type> "<query string>"
+
+Request time is the time when the request was started. The execution time is
+given in ms and corresponds to the time the query took executing in PHP.
+type contains the name of the endpoint used.
+
+Can be used as the same time as NOMINATIM_LOG_DB.
--- /dev/null
+# Special phrases
+
+## Importing OSM user-maintained special phrases
+
+As described in the [Import section](../admin/Import.md), it is possible to
+import special phrases from the wiki with the following command:
+
+```sh
+nominatim special-phrases --import-from-wiki
+```
+
+## Importing custom special phrases
+
+But, it is also possible to import some phrases from a csv file.
+To do so, you have access to the following command:
+
+```sh
+nominatim special-phrases --import-from-csv <csv file>
+```
+
+Note that the two previous import commands will update the phrases from your database.
+This means that if you import some phrases from a csv file, only the phrases
+present in the csv file will be kept into the database. All other phrases will
+be removed.
+
+If you want to only add new phrases and not update the other ones you can add
+the argument `--no-replace` to the import command. For example:
+
+```sh
+nominatim special-phrases --import-from-csv <csv file> --no-replace
+```
+
+This will add the phrases present in the csv file into the database without
+removing the other ones.
--- /dev/null
+# Installing TIGER housenumber data for the US
+
+Nominatim is able to use the official [TIGER](https://www.census.gov/geographies/mapping-files/time-series/geo/tiger-line-file.html)
+address set to complement the OSM house number data in the US. You can add
+TIGER data to your own Nominatim instance by following these steps. The
+entire US adds about 10GB to your database.
+
+ 1. Get preprocessed TIGER 2021 data:
+
+ cd $PROJECT_DIR
+ wget https://nominatim.org/data/tiger2021-nominatim-preprocessed.csv.tar.gz
+
+ 2. Import the data into your Nominatim database:
+
+ nominatim add-data --tiger-data tiger2021-nominatim-preprocessed.csv.tar.gz
+
+ 3. Enable use of the Tiger data in your `.env` by adding:
+
+ echo NOMINATIM_USE_US_TIGER_DATA=yes >> .env
+
+ 4. Apply the new settings:
+
+ nominatim refresh --functions
+
+
+See the [TIGER-data project](https://github.com/osm-search/TIGER-data) for more
+information on how the data got preprocessed.
+
```
This is in particular useful when the database runs on a different server.
-See [Advanced installations](Advanced-Installations.md#importing-nominatim-to-an-external-postgresql-database) for details.
+See [Advanced installations](../admin/Advanced-Installations.md#importing-nominatim-to-an-external-postgresql-database) for details.
There are no other configuration options for the legacy tokenizer. All
normalization functions are hard-coded.
## ICU tokenizer
-!!! danger
- This tokenizer is currently in active development and still subject
- to backwards-incompatible changes.
-
The ICU tokenizer uses the [ICU library](http://site.icu-project.org/) to
normalize names and queries. It also offers configurable decomposition and
abbreviation handling.
Now you can start webserver for local testing
```
-build> mame serve-doc
+build> make serve-doc
[server:296] Serving on http://127.0.0.1:8000
[handlers:62] Start watching changes
```
`highway=motorway` and `bridge=yes`. This way would appear in the `place` table
once with `class` of `highway` and once with a `class` of `bridge`. Thus the
*unique key* for `place` is (`osm_type`, `osm_id`, `class`).
-
-## Configuring the Import
-
-How tags are interpreted and assigned to the different `place` columns can be
-configured via the import style configuration file (`NOMINATIM_IMPORT_STYLE`). This
-is a JSON file which contains a list of rules which are matched against every
-tag of every object and then assign the tag its specific role.
-
-### Configuration Rules
-
-A single rule looks like this:
-
-```json
-{
- "keys" : ["key1", "key2", ...],
- "values" : {
- "value1" : "prop",
- "value2" : "prop1,prop2"
- }
-}
-```
-
-A rule first defines a list of keys to apply the rule to. This is always a list
-of strings. The string may have four forms. An empty string matches against
-any key. A string that ends in an asterisk `*` is a prefix match and accordingly
-matches against any key that starts with the given string (minus the `*`). A
-suffix match can be defined similarly with a string that starts with a `*`. Any
-other string constitutes an exact match.
-
-The second part of the rules defines a list of values and the properties that
-apply to a successful match. Value strings may be either empty, which
-means that they match any value, or describe an exact match. Prefix
-or suffix matching of values is not possible.
-
-For a rule to match, it has to find a valid combination of keys and values. The
-resulting property is that of the matched values.
-
-The rules in a configuration file are processed sequentially and the first
-match for each tag wins.
-
-A rule where key and value are the empty string is special. This defines the
-fallback when none of the rules match. The fallback is always used as a last
-resort when nothing else matches, no matter where the rule appears in the file.
-Defining multiple fallback rules is not allowed. What happens in this case,
-is undefined.
-
-### Tag Properties
-
-One or more of the following properties may be given for each tag:
-
-* `main`
-
- A principal tag. A new row will be added for the object with key and value
- as `class` and `type`.
-
-* `with_name`
-
- When the tag is a principal tag (`main` property set): only really add a new
- row, if there is any name tag found (a reference tag is not sufficient, see
- below).
-
-* `with_name_key`
-
- When the tag is a principal tag (`main` property set): only really add a new
- row, if there is also a name tag that matches the key of the principal tag.
- For example, if the main tag is `bridge=yes`, then it will only be added as
- an extra row, if there is a tag `bridge:name[:XXX]` for the same object.
- If this property is set, all other names that are not domain-specific are
- ignored.
-
-* `fallback`
-
- When the tag is a principal tag (`main` property set): only really add a new
- row, when no other principal tags for this object have been found. Only one
- fallback tag can win for an object.
-
-* `operator`
-
- When the tag is a principal tag (`main` property set): also include the
- `operator` tag in the list of names. This is a special construct for an
- out-dated tagging practise in OSM. Fuel stations and chain restaurants
- in particular used to have the name of the chain tagged as `operator`.
- These days the chain can be more commonly found in the `brand` tag but
- there is still enough old data around to warrant this special case.
-
-* `name`
-
- Add tag to the list of names.
-
-* `ref`
-
- Add tag to the list of names as a reference. At the moment this only means
- that the object is not considered to be named for `with_name`.
-
-* `address`
-
- Add tag to the list of address tags. If the tag starts with `addr:` or
- `is_in:`, then this prefix is cut off before adding it to the list.
-
-* `postcode`
-
- Add the value as a postcode to the address tags. If multiple tags are
- candidate for postcodes, one wins out and the others are dropped.
-
-* `country`
-
- Add the value as a country code to the address tags. The value must be a
- two letter country code, otherwise it is ignored. If there are multiple
- tags that match, then one wins out and the others are dropped.
-
-* `house`
-
- If no principle tags can be found for the object, still add the object with
- `class`=`place` and `type`=`house`. Use this for address nodes that have no
- other function.
-
-* `interpolation`
-
- Add this object as an address interpolation (appears as `class`=`place` and
- `type`=`houses` in the database).
-
-* `extra`
-
- Add tag to the list of extra tags.
-
-* `skip`
-
- Skip the tag completely. Useful when a custom default fallback is defined
- or to define exceptions to rules.
-
-A rule can define as many of these properties for one match as it likes. For
-example, if the property is `"main,extra"` then the tag will open a new row
-but also have the tag appear in the list of extra tags.
-
-There are a number of pre-defined styles in the `settings/` directory. It is
-advisable to start from one of these styles when defining your own.
-
-### Changing the Style of Existing Databases
-
-There is normally no issue changing the style of a database that is already
-imported and now kept up-to-date with change files. Just be aware that any
-change in the style applies to updates only. If you want to change the data
-that is already in the database, then a reimport is necessary.
how tokenizers are expected to work and the public API that needs to be
implemented when creating a new tokenizer. For information on how to configure
a specific tokenizer for a database see the
-[tokenizer chapter in the administration guide](../admin/Tokenizers.md).
+[tokenizer chapter in the Customization Guide](../customize/Tokenizers.md).
## Generic Architecture
Nominatim (from the Latin, 'by name') is a tool to search OSM data by name and address and to generate synthetic addresses of OSM points (reverse geocoding).
-This guide comes in three parts:
+This guide comes in four parts:
* __[API reference](api/Overview.md)__ for users of Nominatim
* __[Administration Guide](admin/Installation.md)__ for those who want
to install their own Nominatim server
+ * __[Customization Guide](customize/Overview.md)__ for those who want to
+ adapt their own installation to their special requirements
* __[Developer's Guide](develop/overview.md)__ for developers of the software
- 'Import' : 'admin/Import.md'
- 'Update' : 'admin/Update.md'
- 'Deploy' : 'admin/Deployment.md'
- - 'Customize Imports' : 'admin/Customization.md'
- - 'Tokenizers' : 'admin/Tokenizers.md'
- 'Nominatim UI' : 'admin/Setup-Nominatim-UI.md'
- 'Advanced Installations' : 'admin/Advanced-Installations.md'
- 'Migration from older Versions' : 'admin/Migration.md'
- 'Troubleshooting' : 'admin/Faq.md'
+ - 'Customization Guide':
+ - 'Overview': 'customize/Overview.md'
+ - 'Import Styles': 'customize/Import-Styles.md'
+ - 'Configuration Settings': 'customize/Settings.md'
+ - 'Place Ranking' : 'customize/Ranking.md'
+ - 'Tokenizers' : 'customize/Tokenizers.md'
+ - 'Special Phrases': 'customize/Special-Phrases.md'
+ - 'External data: US housenumbers from TIGER': 'customize/Tiger.md'
+ - 'External data: Postcodes': 'customize/Postcodes.md'
- 'Developers Guide':
- 'Setup for Development' : 'develop/Development-Environment.md'
- 'Architecture Overview' : 'develop/overview.md'
- 'OSM Data Import' : 'develop/Import.md'
- - 'Place Ranking' : 'develop/Ranking.md'
- 'Tokenizers' : 'develop/Tokenizers.md'
- 'Postcodes' : 'develop/Postcodes.md'
- 'Testing' : 'develop/Testing.md'
markdown_extensions:
- codehilite
- admonition
+ - def_list
- toc:
permalink:
extra_css: [extra.css, styles.css]
+++ /dev/null
-DROP TABLE IF EXISTS word_frequencies;
-CREATE TABLE word_frequencies AS
- SELECT unnest(name_vector) as id, count(*) FROM search_name GROUP BY id;
-
-CREATE INDEX idx_word_frequencies ON word_frequencies(id);
-
-UPDATE word SET search_name_count = count
- FROM word_frequencies
- WHERE word_token like ' %' and word_id = id;
-
-DROP TABLE word_frequencies;
# Creates and installs manual page
-configure_file(${PROJECT_SOURCE_DIR}/manual/create-manpage.tmpl create_manpage.py)
+configure_file(${CMAKE_CURRENT_SOURCE_DIR}/create-manpage.tmpl create_manpage.py)
find_program(ARGPARSEMANPAGE argparse-manpage)
COMMAND ${ARGPARSEMANPAGE} --pyfile ${CMAKE_CURRENT_BINARY_DIR}/create_manpage.py
--function get_parser --project-name Nominatim
--url https://nominatim.org > ${CMAKE_CURRENT_SOURCE_DIR}/nominatim.1
-
- COMMAND sed -i '/.SH AUTHORS/I,+2 d' ${CMAKE_CURRENT_SOURCE_DIR}/nominatim.1
+ --author 'the Nominatim developer community'
+ --author-email info@nominatim.org
)
install(FILES ${CMAKE_CURRENT_SOURCE_DIR}/nominatim.1 DESTINATION share/man/man1 )
[-h] {import,freeze,replication,special-phrases,add-data,index,refresh,admin,export,serve,search,reverse,lookup,details,status} ...
.SH DESCRIPTION
Command\-line tools for importing, updating, administrating and
+.br
querying the Nominatim database.
+.br
.SH OPTIONS
Start a simple web server for serving the API.
.TP
\fBnominatim\fR \fI\,search\/\fR
- Execute API search query.
+ Execute a search query.
.TP
\fBnominatim\fR \fI\,reverse\/\fR
Execute API reverse query.
[--index-noanalyse]
Create a new Nominatim database from an OSM file.
+.br
+
+.br
+ This sub\-command sets up a new Nominatim database from scratch starting
+.br
+ with creating a new database in Postgresql. The user running this command
+.br
+ needs superuser rights on the database.
+.br
.TP
\fB\-\-osm\-file\fR FILE
-OSM file to be imported.
+OSM file to be imported (repeat for importing multiple files)
.TP
\fB\-\-continue\fR {load\-data,indexing,db\-postprocess}
.TP
\fB\-\-index\-noanalyse\fR
-Do not perform analyse operations during index
+Do not perform analyse operations during index (expert only)
.SH OPTIONS 'nominatim freeze'
usage: nominatim freeze [-h] [-q] [-v] [--project-dir DIR] [-j NUM]
Make database read\-only.
+.br
+.br
About half of data in the Nominatim database is kept only to be able to
+.br
keep the data up\-to\-date with new changes made in OpenStreetMap. This
+.br
command drops all this data and only keeps the part needed for geocoding
+.br
itself.
+.br
+.br
This command has the same effect as the `\-\-no\-updates` option for imports.
+.br
[--socket-timeout SOCKET_TIMEOUT]
Update the database using an online replication service.
+.br
+
+.br
+ An OSM replication service is an online service that provides regular
+.br
+ updates (OSM diff files) for the planet or update they provide. The OSMF
+.br
+ provides the primary replication service for the full planet at
+.br
+ https://planet.osm.org/replication/ but there are other providers of
+.br
+ extracts of OSM data who provide such a service as well.
+.br
+
+.br
+ This sub\-command allows to set up such a replication service and download
+.br
+ and import updates at regular intervals. You need to call '\-\-init' once to
+.br
+ set up the process or whenever you change the replication configuration
+.br
+ parameters. Without any arguments, the sub\-command will go into a loop and
+.br
+ continuously apply updates as they become available. Giving `\-\-once` just
+.br
+ downloads and imports the next batch of updates.
+.br
.TP
\fB\-\-no\-index\fR
-Do not index the new data. Only applicable together with \-\-once
+Do not index the new data. Only usable together with \-\-once
.TP
\fB\-\-osm2pgsql\-cache\fR SIZE
.TP
\fB\-\-socket\-timeout\fR \fI\,SOCKET_TIMEOUT\/\fR
-Set timeout for file downloads.
+Set timeout for file downloads
.SH OPTIONS 'nominatim special-phrases'
usage: nominatim special-phrases [-h] [-q] [-v] [--project-dir DIR] [-j NUM]
- [--import-from-wiki]
+ [--import-from-wiki] [--import-from-csv FILE]
+ [--no-replace]
Import special phrases.
+.br
+
+.br
+ Special phrases are search terms that narrow down the type of object
+.br
+ that should be searched. For example, you might want to search for
+.br
+ 'Hotels in Barcelona'. The OSM wiki has a selection of special phrases
+.br
+ in many languages, which can be imported with this command.
+.br
+
+.br
+ You can also provide your own phrases in a CSV file. The file needs to have
+.br
+ the following five columns:
+.br
+ * phrase \- the term expected for searching
+.br
+ * class \- the OSM tag key of the object type
+.br
+ * type \- the OSM tag value of the object type
+.br
+ * operator \- the kind of search to be done (one of: in, near, name, \-)
+.br
+ * plural \- whether the term is a plural or not (Y/N)
+.br
+
+.br
+ An example file can be found in the Nominatim sources at
+.br
+ 'test/testdb/full_en_phrases_test.csv'.
+.br
.TP
\fB\-\-import\-from\-wiki\fR
-Import special phrases from the OSM wiki to the database.
+Import special phrases from the OSM wiki to the database
+
+.TP
+\fB\-\-import\-from\-csv\fR FILE
+Import special phrases from a CSV file
+
+.TP
+\fB\-\-no\-replace\fR
+Keep the old phrases and only add the new ones
.SH OPTIONS 'nominatim add-data'
usage: nominatim add-data [-h] [-q] [-v] [--project-dir DIR] [-j NUM]
(--file FILE | --diff FILE | --node ID | --way ID | --relation ID | --tiger-data DIR)
- [--use-main-api]
+ [--use-main-api] [--osm2pgsql-cache SIZE]
+ [--socket-timeout SOCKET_TIMEOUT]
Add additional data from a file or an online source.
-
- Data is only imported, not indexed. You need to call `nominatim index`
- to complete the process.
+.br
+
+.br
+ This command allows to add or update the search data in the database.
+.br
+ The data can come either from an OSM file or single OSM objects can
+.br
+ directly be downloaded from the OSM API. This function only loads the
+.br
+ data into the database. Afterwards it still needs to be integrated
+.br
+ in the search index. Use the `nominatim index` command for that.
+.br
+
+.br
+ The command can also be used to add external non\-OSM data to the
+.br
+ database. At the moment the only supported format is TIGER housenumber
+.br
+ data. See the online documentation at
+.br
+ https://nominatim.org/release\-docs/latest/admin/Import/#installing\-tiger\-housenumber\-data\-for\-the\-us
+.br
+ for more information.
+.br
.TP
\fB\-\-file\fR FILE
-Import data from an OSM file
+Import data from an OSM file or diff file
.TP
\fB\-\-diff\fR FILE
-Import data from an OSM diff file
+Import data from an OSM diff file (deprecated: use \-\-file)
.TP
\fB\-\-node\fR ID
.TP
\fB\-\-tiger\-data\fR DIR
-Add housenumbers from the US TIGER census database.
+Add housenumbers from the US TIGER census database
.TP
\fB\-\-use\-main\-api\fR
Use OSM API instead of Overpass to download objects
+.TP
+\fB\-\-osm2pgsql\-cache\fR SIZE
+Size of cache to be used by osm2pgsql (in MB)
+
+.TP
+\fB\-\-socket\-timeout\fR \fI\,SOCKET_TIMEOUT\/\fR
+Set timeout for file downloads
+
.SH OPTIONS 'nominatim index'
usage: nominatim index [-h] [-q] [-v] [--project-dir DIR] [-j NUM]
[--boundaries-only] [--no-boundaries] [--minrank RANK]
[--maxrank RANK]
Reindex all new and modified data.
+.br
+
+.br
+ Indexing is the process of computing the address and search terms for
+.br
+ the places in the database. Every time data is added or changed, indexing
+.br
+ needs to be run. Imports and replication updates automatically take care
+.br
+ of indexing. For other cases, this function allows to run indexing manually.
+.br
[--enable-debug-statements]
Recompute auxiliary data used by the indexing process.
-
- These functions must not be run in parallel with other update commands.
+.br
+
+.br
+ This sub\-commands updates various static data and functions in the database.
+.br
+ It usually needs to be run after changing various aspects of the
+.br
+ configuration. The configuration documentation will mention the exact
+.br
+ command to use in such case.
+.br
+
+.br
+ Warning: the 'update' command must not be run in parallel with other update
+.br
+ commands like 'replication' or 'add\-data'.
+.br
.TP
\fB\-\-wiki\-data\fR
-Update Wikipedia/data importance numbers.
+Update Wikipedia/data importance numbers
.TP
\fB\-\-importance\fR
[--osm-id OSM_ID | --place-id PLACE_ID]
Analyse and maintain the database.
+.br
.TP
\fB\-\-warm\fR
-Warm database caches for search and reverse queries.
+Warm database caches for search and reverse queries
.TP
\fB\-\-check\-database\fR
-Check that the database is complete and operational.
+Check that the database is complete and operational
.TP
\fB\-\-migrate\fR
-Migrate the database to a new software version.
+Migrate the database to a new software version
.TP
\fB\-\-analyse\-indexing\fR
-Print performance analysis of the indexing process.
+Print performance analysis of the indexing process
.TP
\fB\-\-search\-only\fR
[--restrict-to-osm-relation ID]
Export addresses as CSV file from the database.
+.br
[--server SERVER]
Start a simple web server for serving the API.
+.br
+.br
This command starts the built\-in PHP webserver to serve the website
+.br
from the current project directory. This webserver is only suitable
- for testing and develop. Do not use it in production setups!
+.br
+ for testing and development. Do not use it in production setups!
+.br
+.br
By the default, the webserver can be accessed at: http://127.0.0.1:8088
+.br
[--exclude_place_ids ID,..] [--limit LIMIT]
[--viewbox X1,Y1,X2,Y2] [--bounded] [--no-dedupe]
- Execute API search query.
+ Execute a search query.
+.br
+
+.br
+ This command works exactly the same as if calling the /search endpoint on
+.br
+ the web API. See the online documentation for more details on the
+.br
+ various parameters:
+.br
+ https://nominatim.org/release\-docs/latest/api/Search/
+.br
.TP
\fB\-\-addressdetails\fR
-Include a breakdown of the address into elements.
+Include a breakdown of the address into elements
.TP
\fB\-\-extratags\fR
-Include additional information if available (e.g. wikipedia link, opening hours).
+Include additional information if available (e.g. wikipedia link, opening hours)
.TP
\fB\-\-namedetails\fR
-Include a list of alternative names.
+Include a list of alternative names
.TP
\fB\-\-lang\fR LANGS, \fB\-\-accept\-language\fR LANGS
.TP
\fB\-\-polygon\-output\fR {geojson,kml,svg,text}
-Output geometry of results as a GeoJSON, KML, SVG or WKT.
+Output geometry of results as a GeoJSON, KML, SVG or WKT
.TP
\fB\-\-polygon\-threshold\fR TOLERANCE
.TP
\fB\-\-countrycodes\fR CC,..
-Limit search results to one or more countries.
+Limit search results to one or more countries
.TP
\fB\-\-exclude_place_ids\fR ID,..
[--polygon-threshold TOLERANCE]
Execute API reverse query.
+.br
+
+.br
+ This command works exactly the same as if calling the /reverse endpoint on
+.br
+ the web API. See the online documentation for more details on the
+.br
+ various parameters:
+.br
+ https://nominatim.org/release\-docs/latest/api/Reverse/
+.br
.TP
\fB\-\-addressdetails\fR
-Include a breakdown of the address into elements.
+Include a breakdown of the address into elements
.TP
\fB\-\-extratags\fR
-Include additional information if available (e.g. wikipedia link, opening hours).
+Include additional information if available (e.g. wikipedia link, opening hours)
.TP
\fB\-\-namedetails\fR
-Include a list of alternative names.
+Include a list of alternative names
.TP
\fB\-\-lang\fR LANGS, \fB\-\-accept\-language\fR LANGS
.TP
\fB\-\-polygon\-output\fR {geojson,kml,svg,text}
-Output geometry of results as a GeoJSON, KML, SVG or WKT.
+Output geometry of results as a GeoJSON, KML, SVG or WKT
.TP
\fB\-\-polygon\-threshold\fR TOLERANCE
[--polygon-threshold TOLERANCE]
Execute API lookup query.
+.br
+
+.br
+ This command works exactly the same as if calling the /lookup endpoint on
+.br
+ the web API. See the online documentation for more details on the
+.br
+ various parameters:
+.br
+ https://nominatim.org/release\-docs/latest/api/Lookup/
+.br
.TP
\fB\-\-addressdetails\fR
-Include a breakdown of the address into elements.
+Include a breakdown of the address into elements
.TP
\fB\-\-extratags\fR
-Include additional information if available (e.g. wikipedia link, opening hours).
+Include additional information if available (e.g. wikipedia link, opening hours)
.TP
\fB\-\-namedetails\fR
-Include a list of alternative names.
+Include a list of alternative names
.TP
\fB\-\-lang\fR LANGS, \fB\-\-accept\-language\fR LANGS
.TP
\fB\-\-polygon\-output\fR {geojson,kml,svg,text}
-Output geometry of results as a GeoJSON, KML, SVG or WKT.
+Output geometry of results as a GeoJSON, KML, SVG or WKT
.TP
\fB\-\-polygon\-threshold\fR TOLERANCE
[--lang LANGS]
Execute API details query.
+.br
+
+.br
+ This command works exactly the same as if calling the /details endpoint on
+.br
+ the web API. See the online documentation for more details on the
+.br
+ various parameters:
+.br
+ https://nominatim.org/release\-docs/latest/api/Details/
+.br
.TP
\fB\-\-place_id\fR \fI\,PLACE_ID\/\fR, \fB\-p\fR \fI\,PLACE_ID\/\fR
-Database internal identifier of the OSM object to look up.
+Database internal identifier of the OSM object to look up
.TP
\fB\-\-class\fR \fI\,OBJECT_CLASS\/\fR
.TP
\fB\-\-addressdetails\fR
-Include a breakdown of the address into elements.
+Include a breakdown of the address into elements
.TP
\fB\-\-keywords\fR
-Include a list of name keywords and address keywords.
+Include a list of name keywords and address keywords
.TP
\fB\-\-linkedplaces\fR
-Include a details of places that are linked with this one.
+Include a details of places that are linked with this one
.TP
\fB\-\-hierarchy\fR
-Include details of places lower in the address hierarchy.
+Include details of places lower in the address hierarchy
.TP
\fB\-\-group_hierarchy\fR
-Group the places by type.
+Group the places by type
.TP
\fB\-\-polygon_geojson\fR
-Include geometry of result.
+Include geometry of result
.TP
\fB\-\-lang\fR LANGS, \fB\-\-accept\-language\fR LANGS
[--format {text,json}]
Execute API status query.
+.br
+
+.br
+ This command works exactly the same as if calling the /status endpoint on
+.br
+ the web API. See the online documentation for more details on the
+.br
+ various parameters:
+.br
+ https://nominatim.org/release\-docs/latest/api/Status/
+.br
\fB\-\-format\fR {text,json}
Format of result
+.SH AUTHORS
+.B Nominatim
+was written by the Nominatim developer community <info@nominatim.org>.
.SH DISTRIBUTION
The latest version of Nominatim may be downloaded from
.UR https://nominatim.org
This command starts the built-in PHP webserver to serve the website
from the current project directory. This webserver is only suitable
- for testing and develop. Do not use it in production setups!
+ for testing and development. Do not use it in production setups!
By the default, the webserver can be accessed at: http://127.0.0.1:8088
"""
"""
import logging
+import psutil
+
# Do not repeat documentation of subcommand classes.
# pylint: disable=C0111
# Using non-top-level imports to avoid eventually unused imports.
"""\
Add additional data from a file or an online source.
- Data is only imported, not indexed. You need to call `nominatim index`
- to complete the process.
+ This command allows to add or update the search data in the database.
+ The data can come either from an OSM file or single OSM objects can
+ directly be downloaded from the OSM API. This function only loads the
+ data into the database. Afterwards it still needs to be integrated
+ in the search index. Use the `nominatim index` command for that.
+
+ The command can also be used to add external non-OSM data to the
+ database. At the moment the only supported format is TIGER housenumber
+ data. See the online documentation at
+ https://nominatim.org/release-docs/latest/admin/Import/#installing-tiger-housenumber-data-for-the-us
+ for more information.
"""
@staticmethod
group.add_argument('--relation', metavar='ID', type=int,
help='Import a single relation from the API')
group.add_argument('--tiger-data', metavar='DIR',
- help='Add housenumbers from the US TIGER census database.')
+ help='Add housenumbers from the US TIGER census database')
group = parser.add_argument_group('Extra arguments')
group.add_argument('--use-main-api', action='store_true',
help='Use OSM API instead of Overpass to download objects')
group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
help='Size of cache to be used by osm2pgsql (in MB)')
group.add_argument('--socket-timeout', dest='socket_timeout', type=int, default=60,
- help='Set timeout for file downloads.')
+ help='Set timeout for file downloads')
@staticmethod
def run(args):
if args.tiger_data:
tokenizer = tokenizer_factory.get_tokenizer_for_db(args.config)
return tiger_data.add_tiger_data(args.tiger_data,
- args.config, args.threads or 1,
+ args.config,
+ args.threads or psutil.cpu_count() or 1,
tokenizer)
osm2pgsql_params = args.osm2pgsql_options(default_cache=1000, default_threads=1)
group = parser.add_argument_group('Admin tasks')
objs = group.add_mutually_exclusive_group(required=True)
objs.add_argument('--warm', action='store_true',
- help='Warm database caches for search and reverse queries.')
+ help='Warm database caches for search and reverse queries')
objs.add_argument('--check-database', action='store_true',
- help='Check that the database is complete and operational.')
+ help='Check that the database is complete and operational')
objs.add_argument('--migrate', action='store_true',
- help='Migrate the database to a new software version.')
+ help='Migrate the database to a new software version')
objs.add_argument('--analyse-indexing', action='store_true',
- help='Print performance analysis of the indexing process.')
+ help='Print performance analysis of the indexing process')
group = parser.add_argument_group('Arguments for cache warming')
group.add_argument('--search-only', action='store_const', dest='target',
const='search',
)
EXTRADATA_PARAMS = (
- ('addressdetails', 'Include a breakdown of the address into elements.'),
+ ('addressdetails', 'Include a breakdown of the address into elements'),
('extratags', ("Include additional information if available "
- "(e.g. wikipedia link, opening hours).")),
- ('namedetails', 'Include a list of alternative names.')
+ "(e.g. wikipedia link, opening hours)")),
+ ('namedetails', 'Include a list of alternative names')
)
DETAILS_SWITCHES = (
- ('addressdetails', 'Include a breakdown of the address into elements.'),
- ('keywords', 'Include a list of name keywords and address keywords.'),
- ('linkedplaces', 'Include a details of places that are linked with this one.'),
- ('hierarchy', 'Include details of places lower in the address hierarchy.'),
- ('group_hierarchy', 'Group the places by type.'),
- ('polygon_geojson', 'Include geometry of result.')
+ ('addressdetails', 'Include a breakdown of the address into elements'),
+ ('keywords', 'Include a list of name keywords and address keywords'),
+ ('linkedplaces', 'Include a details of places that are linked with this one'),
+ ('hierarchy', 'Include details of places lower in the address hierarchy'),
+ ('group_hierarchy', 'Group the places by type'),
+ ('polygon_geojson', 'Include geometry of result')
)
def _add_api_output_arguments(parser):
help='Preferred language order for presenting search results')
group.add_argument('--polygon-output',
choices=['geojson', 'kml', 'svg', 'text'],
- help='Output geometry of results as a GeoJSON, KML, SVG or WKT.')
+ help='Output geometry of results as a GeoJSON, KML, SVG or WKT')
group.add_argument('--polygon-threshold', type=float, metavar='TOLERANCE',
help=("Simplify output geometry."
"Parameter is difference tolerance in degrees."))
class APISearch:
"""\
- Execute API search query.
+ Execute a search query.
+
+ This command works exactly the same as if calling the /search endpoint on
+ the web API. See the online documentation for more details on the
+ various parameters:
+ https://nominatim.org/release-docs/latest/api/Search/
"""
@staticmethod
group = parser.add_argument_group('Result limitation')
group.add_argument('--countrycodes', metavar='CC,..',
- help='Limit search results to one or more countries.')
+ help='Limit search results to one or more countries')
group.add_argument('--exclude_place_ids', metavar='ID,..',
help='List of search object to be excluded')
group.add_argument('--limit', type=int,
class APIReverse:
"""\
Execute API reverse query.
+
+ This command works exactly the same as if calling the /reverse endpoint on
+ the web API. See the online documentation for more details on the
+ various parameters:
+ https://nominatim.org/release-docs/latest/api/Reverse/
"""
@staticmethod
class APILookup:
"""\
Execute API lookup query.
+
+ This command works exactly the same as if calling the /lookup endpoint on
+ the web API. See the online documentation for more details on the
+ various parameters:
+ https://nominatim.org/release-docs/latest/api/Lookup/
"""
@staticmethod
class APIDetails:
"""\
Execute API details query.
+
+ This command works exactly the same as if calling the /details endpoint on
+ the web API. See the online documentation for more details on the
+ various parameters:
+ https://nominatim.org/release-docs/latest/api/Details/
"""
@staticmethod
objs.add_argument('--relation', '-r', type=int,
help="Look up the OSM relation with the given ID.")
objs.add_argument('--place_id', '-p', type=int,
- help='Database internal identifier of the OSM object to look up.')
+ help='Database internal identifier of the OSM object to look up')
group.add_argument('--class', dest='object_class',
help=("Class type to disambiguated multiple entries "
"of the same object."))
class APIStatus:
"""\
Execute API status query.
+
+ This command works exactly the same as if calling the /status endpoint on
+ the web API. See the online documentation for more details on the
+ various parameters:
+ https://nominatim.org/release-docs/latest/api/Status/
"""
@staticmethod
class UpdateIndex:
"""\
Reindex all new and modified data.
+
+ Indexing is the process of computing the address and search terms for
+ the places in the database. Every time data is added or changed, indexing
+ needs to be run. Imports and replication updates automatically take care
+ of indexing. For other cases, this function allows to run indexing manually.
"""
@staticmethod
"""\
Recompute auxiliary data used by the indexing process.
- These functions must not be run in parallel with other update commands.
+ This sub-commands updates various static data and functions in the database.
+ It usually needs to be run after changing various aspects of the
+ configuration. The configuration documentation will mention the exact
+ command to use in such case.
+
+ Warning: the 'update' command must not be run in parallel with other update
+ commands like 'replication' or 'add-data'.
"""
def __init__(self):
self.tokenizer = None
group.add_argument('--functions', action='store_true',
help='Update the PL/pgSQL functions in the database')
group.add_argument('--wiki-data', action='store_true',
- help='Update Wikipedia/data importance numbers.')
+ help='Update Wikipedia/data importance numbers')
group.add_argument('--importance', action='store_true',
help='Recompute place importances (expensive!)')
group.add_argument('--website', action='store_true',
"Postcode updates on a frozen database is not possible.")
if args.word_counts:
- LOG.warning('Recompute frequency of full-word search terms')
- refresh.recompute_word_counts(args.config.get_libpq_dsn(), args.sqllib_dir)
+ LOG.warning('Recompute word statistics')
+ self._get_tokenizer(args.config).update_statistics()
if args.address_levels:
cfg = Path(args.config.ADDRESS_LEVEL_CONFIG)
class UpdateReplication:
"""\
Update the database using an online replication service.
+
+ An OSM replication service is an online service that provides regular
+ updates (OSM diff files) for the planet or update they provide. The OSMF
+ provides the primary replication service for the full planet at
+ https://planet.osm.org/replication/ but there are other providers of
+ extracts of OSM data who provide such a service as well.
+
+ This sub-command allows to set up such a replication service and download
+ and import updates at regular intervals. You need to call '--init' once to
+ set up the process or whenever you change the replication configuration
+ parameters. Without any arguments, the sub-command will go into a loop and
+ continuously apply updates as they become available. Giving `--once` just
+ downloads and imports the next batch of updates.
"""
@staticmethod
help=("Download and apply updates only once. When "
"not set, updates are continuously applied"))
group.add_argument('--no-index', action='store_false', dest='do_index',
- help=("Do not index the new data. Only applicable "
+ help=("Do not index the new data. Only usable "
"together with --once"))
group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
help='Size of cache to be used by osm2pgsql (in MB)')
group = parser.add_argument_group('Download parameters')
group.add_argument('--socket-timeout', dest='socket_timeout', type=int, default=60,
- help='Set timeout for file downloads.')
+ help='Set timeout for file downloads')
@staticmethod
def _init_replication(args):
class SetupAll:
"""\
Create a new Nominatim database from an OSM file.
+
+ This sub-command sets up a new Nominatim database from scratch starting
+ with creating a new database in Postgresql. The user running this command
+ needs superuser rights on the database.
"""
@staticmethod
group = group_name.add_mutually_exclusive_group(required=True)
group.add_argument('--osm-file', metavar='FILE', action='append',
help='OSM file to be imported'
- ' (repeat for importing multiple files.')
+ ' (repeat for importing multiple files)')
group.add_argument('--continue', dest='continue_at',
choices=['load-data', 'indexing', 'db-postprocess'],
help='Continue an import that was interrupted')
group.add_argument('--ignore-errors', action='store_true',
help='Continue import even when errors in SQL are present')
group.add_argument('--index-noanalyse', action='store_true',
- help='Do not perform analyse operations during index')
+ help='Do not perform analyse operations during index (expert only)')
@staticmethod
class ImportSpecialPhrases:
"""\
Import special phrases.
+
+ Special phrases are search terms that narrow down the type of object
+ that should be searched. For example, you might want to search for
+ 'Hotels in Barcelona'. The OSM wiki has a selection of special phrases
+ in many languages, which can be imported with this command.
+
+ You can also provide your own phrases in a CSV file. The file needs to have
+ the following five columns:
+ * phrase - the term expected for searching
+ * class - the OSM tag key of the object type
+ * type - the OSM tag value of the object type
+ * operator - the kind of search to be done (one of: in, near, name, -)
+ * plural - whether the term is a plural or not (Y/N)
+
+ An example file can be found in the Nominatim sources at
+ 'test/testdb/full_en_phrases_test.csv'.
"""
@staticmethod
def add_args(parser):
group = parser.add_argument_group('Input arguments')
group.add_argument('--import-from-wiki', action='store_true',
- help='Import special phrases from the OSM wiki to the database.')
+ help='Import special phrases from the OSM wiki to the database')
group.add_argument('--import-from-csv', metavar='FILE',
- help='Import special phrases from a CSV file.')
+ help='Import special phrases from a CSV file')
group.add_argument('--no-replace', action='store_true',
- help='Keep the old phrases and only add the new ones.')
+ help='Keep the old phrases and only add the new ones')
@staticmethod
def run(args):
pass
+ @abstractmethod
+ def update_statistics(self) -> None:
+ """ Recompute any tokenizer statistics necessary for efficient lookup.
+ This function is meant to be called from time to time by the user
+ to improve performance. However, the tokenizer must not depend on
+ it to be called in order to work.
+ """
+ pass
+
+
@abstractmethod
def name_analyzer(self) -> AbstractAnalyzer:
""" Create a new analyzer for tokenizing names and queries
Tokenizer implementing normalisation as used before Nominatim 4 but using
libICU instead of the PostgreSQL module.
"""
-from collections import Counter
import itertools
import json
import logging
return None
+ def update_statistics(self):
+ """ Recompute frequencies for all name words.
+ """
+ with connect(self.dsn) as conn:
+ with conn.cursor() as cur:
+ cur.drop_table("word_frequencies")
+ LOG.info("Computing word frequencies")
+ cur.execute("""CREATE TEMP TABLE word_frequencies AS
+ SELECT unnest(name_vector) as id, count(*)
+ FROM search_name GROUP BY id""")
+ cur.execute("CREATE INDEX ON word_frequencies(id)")
+ LOG.info("Update word table with recomputed frequencies")
+ cur.execute("""UPDATE word
+ SET info = info || jsonb_build_object('count', count)
+ FROM word_frequencies WHERE word_id = id""")
+ cur.drop_table("word_frequencies")
+ conn.commit()
+
+
def name_analyzer(self):
""" Create a new analyzer for tokenizing names and queries
using this tokinzer. Analyzers are context managers and should
sqlp.run_sql_file(conn, 'tokenizer/icu_tokenizer_tables.sql')
conn.commit()
- LOG.warning("Precomputing word tokens")
-
- # get partial words and their frequencies
- words = self._count_partial_terms(conn)
-
- # copy them back into the word table
- with CopyBuffer() as copystr:
- for term, cnt in words.items():
- copystr.add('w', term, json.dumps({'count': cnt}))
-
- with conn.cursor() as cur:
- copystr.copy_out(cur, 'word',
- columns=['type', 'word_token', 'info'])
- cur.execute("""UPDATE word SET word_id = nextval('seq_word')
- WHERE word_id is null and type = 'w'""")
-
- conn.commit()
-
- def _count_partial_terms(self, conn):
- """ Count the partial terms from the names in the place table.
- """
- words = Counter()
- analysis = self.loader.make_token_analysis()
-
- with conn.cursor(name="words") as cur:
- cur.execute(""" SELECT v, count(*) FROM
- (SELECT svals(name) as v FROM place)x
- WHERE length(v) < 75 GROUP BY v""")
-
- for name, cnt in cur:
- word = analysis.search.transliterate(name)
- if word and ' ' in word:
- for term in set(word.split()):
- words[term] += cnt
-
- return words
-
class LegacyICUNameAnalyzer(AbstractAnalyzer):
""" The legacy analyzer uses the ICU library for splitting names.
self._save_config(conn, config)
+ def update_statistics(self):
+ """ Recompute the frequency of full words.
+ """
+ with connect(self.dsn) as conn:
+ with conn.cursor() as cur:
+ cur.drop_table("word_frequencies")
+ LOG.info("Computing word frequencies")
+ cur.execute("""CREATE TEMP TABLE word_frequencies AS
+ SELECT unnest(name_vector) as id, count(*)
+ FROM search_name GROUP BY id""")
+ cur.execute("CREATE INDEX ON word_frequencies(id)")
+ LOG.info("Update word table with recomputed frequencies")
+ cur.execute("""UPDATE word SET search_name_count = count
+ FROM word_frequencies
+ WHERE word_token like ' %' and word_id = id""")
+ cur.drop_table("word_frequencies")
+ conn.commit()
+
def name_analyzer(self):
""" Create a new analyzer for tokenizing names and queries
using this tokinzer. Analyzers are context managers and should
LOG = logging.getLogger()
-def recompute_word_counts(dsn, sql_dir):
- """ Compute the frequency of full-word search terms.
- """
- execute_file(dsn, sql_dir / 'words_from_search_name.sql')
-
-
def _add_address_level_rows_from_entry(rows, entry):
""" Converts a single entry from the JSON format for address rank
descriptions into a flat format suitable for inserting into a
# Currently available tokenizers: legacy
NOMINATIM_TOKENIZER="legacy"
-# Number of occurances of a word before it is considered frequent.
+# Number of occurrences of a word before it is considered frequent.
# Similar to the concept of stop words. Frequent partial words get ignored
# or handled differently during search.
# Changing this value requires a reimport.
# Configuration file for the tokenizer.
# The content depends on the tokenizer used. If left empty the default settings
-# for the chooseen tokenizer will be used. The configuration can only be set
+# for the chosen tokenizer will be used. The configuration can only be set
# on import and not be changed afterwards.
NOMINATIM_TOKENIZER_CONFIG=
# Changing this value requires to run ./utils/setup --create-functions --setup-website.
NOMINATIM_USE_US_TIGER_DATA=no
-# Search in the auxilary housenumber table.
+# Search in the auxiliary housenumber table.
# Changing this value requires to run ./utils/setup --create-functions --setup-website.
NOMINATIM_USE_AUX_LOCATION_DATA=no
# The following settings allow to set a proxy to use when remotely downloading
# data. Host and port are required. Login and password are optional.
NOMINATIM_HTTP_PROXY=no
-NOMINATIM_HTTP_PROXY_HOST=proxy.mydomain.com
+NOMINATIM_HTTP_PROXY_HOST=
NOMINATIM_HTTP_PROXY_PORT=3128
NOMINATIM_HTTP_PROXY_LOGIN=
NOMINATIM_HTTP_PROXY_PASSWORD=
NOMINATIM_DEFAULT_LANGUAGE=
# Enable a special batch query mode.
-# This features is currently undocumented and potentially broken.
+# This feature is currently undocumented and potentially broken.
NOMINATIM_SEARCH_BATCH_MODE=no
# Threshold for searches by name only.
def __init__(self, *args, **kwargs):
self.update_sql_functions_called = False
self.finalize_import_called = False
+ self.update_statistics_called = False
def update_sql_functions(self, *args):
self.update_sql_functions_called = True
def finalize_import(self, *args):
self.finalize_import_called = True
+ def update_statistics(self):
+ self.update_statistics_called = True
+
+
tok = DummyTokenizer()
monkeypatch.setattr(nominatim.tokenizer.factory, 'get_tokenizer_for_db',
lambda *args: tok)
assert func.called == 1
@pytest.mark.parametrize("command,func", [
- ('word-counts', 'recompute_word_counts'),
('address-levels', 'load_address_levels_from_file'),
('wiki-data', 'import_wikipedia_articles'),
('importance', 'recompute_importance'),
assert func_mock.called == 1
+ def test_refresh_word_count(self):
+ assert self.call_nominatim('refresh', '--word-count') == 0
+ assert self.tokenizer_mock.update_statistics_called
+
+
def test_refresh_postcodes(self, mock_func_factory, place_table):
func_mock = mock_func_factory(nominatim.tools.postcodes, 'update_postcodes')
idx_mock = mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_postcodes')
assert db_prop(icu_tokenizer.DBCFG_TERM_NORMALIZATION) == ':: lower();'
-def test_init_word_table(tokenizer_factory, test_config, place_row, word_table):
+def test_init_word_table(tokenizer_factory, test_config, place_row, temp_db_cursor):
place_row(names={'name' : 'Test Area', 'ref' : '52'})
place_row(names={'name' : 'No Area'})
place_row(names={'name' : 'Holzstrasse'})
tok = tokenizer_factory()
tok.init_new_db(test_config)
- assert word_table.get_partial_words() == {('test', 1),
- ('no', 1), ('area', 2)}
+ assert temp_db_cursor.table_exists('word')
def test_init_from_project(monkeypatch, test_config, tokenizer_factory):