The file is about 400MB and adds around 4GB to the Nominatim database.
!!! tip
- If you forgot to download the wikipedia rankings, you can also add
- importances after the import. Download the files, then run
- `nominatim refresh --wiki-data --importance`. Updating importances for
- a planet can take a couple of hours.
+ If you forgot to download the wikipedia rankings, then you can
+ also add importances after the import. Download the SQL files, then
+ run `nominatim refresh --wiki-data --importance`. Updating
+ importances for a planet will take a couple of hours.
### External postcodes
database or reuse the space later.
!!! warning
- The datastructure for updates are also required when adding additional data
+ The data structure for updates are also required when adding additional data
after the import, for example [TIGER housenumber data](../customize/Tiger.md).
If you plan to use those, you must not use the `--no-updates` parameter.
Do a normal import, add the external data and once you are done with
--- /dev/null
+## Importance
+
+Search requests can yield multiple results which match equally well with
+the original query. In such case Nominatim needs to order the results
+according to a different criterion: importance. This is a measure for how
+likely it is that a user will search for a given place. This section explains
+the sources Nominatim uses for computing importance of a place and how to
+customize them.
+
+### How importance is computed
+
+The main value for importance is derived from page ranking values for Wikipedia
+pages for a place. For places that do not have their own
+Wikipedia page, a formula is used that derives a static importance from the
+places [search rank](../customize/Ranking#search-rank).
+
+In a second step, a secondary importance value is added which is meant to
+represent how well-known the general area is where the place is located. It
+functions as a tie-breaker between places with very similar primary
+importance values.
+
+nominatim.org has preprocessed importance tables for the
+[primary Wikipedia rankings](https://nominatim.org/data/wikimedia-importance.sql.gz)
+and for a secondary importance based on the number of tile views on openstreetmap.org.
+
+### Customizing secondary importance
+
+The secondary importance is implemented as a simple
+[Postgis raster](https://postgis.net/docs/raster.html) table, where Nominatim
+looks up the value for the coordinates of the centroid of a place. You can
+provide your own secondary importance raster in form of an SQL file named
+`secondary_importance.sql.gz` in your project directory.
+
+The SQL file needs to drop and (re)create a table `secondary_importance` which
+must as a minimum contain a column `rast` of type `raster`. The raster must
+be in EPSG:4326 and contain 16bit unsigned ints
+(`raster_constraint_pixel_types(rast) = '{16BUI}'). Any other columns in the
+table will be ignored. You must furthermore create an index as follows:
+
+```
+CREATE INDEX ON secondary_importance USING gist(ST_ConvexHull(gist))
+```
+
+The following raster2pgsql command will create a table that conforms to
+the requirements:
+
+```
+raster2pgsql -I -C -Y -d -t 128x128 input.tiff public.secondary_importance
+```
- 'Configuration Settings': 'customize/Settings.md'
- 'Per-Country Data': 'customize/Country-Settings.md'
- 'Place Ranking' : 'customize/Ranking.md'
+ - 'Importance' : 'customize/Importance.md'
- 'Tokenizers' : 'customize/Tokenizers.md'
- 'Special Phrases': 'customize/Special-Phrases.md'
- 'External data: US housenumbers from TIGER': 'customize/Tiger.md'
CREATE OR REPLACE FUNCTION compute_importance(extratags HSTORE,
country_code varchar(2),
- osm_type varchar(1), osm_id BIGINT)
+ rank_search SMALLINT,
+ centroid GEOMETRY)
RETURNS place_importance
AS $$
DECLARE
match RECORD;
result place_importance;
+ osm_views_exists BIGINT;
+ views BIGINT;
BEGIN
- FOR match IN SELECT * FROM get_wikipedia_match(extratags, country_code)
- WHERE language is not NULL
+ -- add importance by wikipedia article if the place has one
+ FOR match IN
+ SELECT * FROM get_wikipedia_match(extratags, country_code)
+ WHERE language is not NULL
LOOP
result.importance := match.importance;
result.wikipedia := match.language || ':' || match.title;
RETURN result;
END LOOP;
- IF extratags ? 'wikidata' THEN
+ -- Nothing? Then try with the wikidata tag.
+ IF result.importance is null AND extratags ? 'wikidata' THEN
FOR match IN SELECT * FROM wikipedia_article
WHERE wd_page_title = extratags->'wikidata'
- ORDER BY language = 'en' DESC, langcount DESC LIMIT 1 LOOP
+ ORDER BY language = 'en' DESC, langcount DESC LIMIT 1
+ LOOP
result.importance := match.importance;
result.wikipedia := match.language || ':' || match.title;
RETURN result;
END LOOP;
END IF;
- RETURN null;
+ -- Still nothing? Fall back to a default.
+ IF result.importance is null THEN
+ result.importance := 0.75001 - (rank_search::float / 40);
+ END IF;
+
+{% if 'secondary_importance' in db.tables %}
+ FOR match IN
+ SELECT ST_Value(rast, centroid) as importance
+ FROM secondary_importance
+ WHERE ST_Intersects(ST_ConvexHull(rast), centroid) LIMIT 1
+ LOOP
+ -- Secondary importance as tie breaker with 0.0001 weight.
+ result.importance := result.importance + match.importance::float / 655350000;
+ END LOOP;
+{% endif %}
+
+ RETURN result;
END;
$$
LANGUAGE plpgsql;
NEW.importance := null;
SELECT wikipedia, importance
- FROM compute_importance(NEW.extratags, NEW.country_code, NEW.osm_type, NEW.osm_id)
+ FROM compute_importance(NEW.extratags, NEW.country_code, NEW.rank_search, NEW.centroid)
INTO NEW.wikipedia,NEW.importance;
{% if debug %}RAISE WARNING 'Importance computed from wikipedia: %', NEW.importance;{% endif %}
IF linked_place is not null THEN
-- Recompute the ranks here as the ones from the linked place might
-- have been shifted to accommodate surrounding boundaries.
- SELECT place_id, osm_id, class, type, extratags,
+ SELECT place_id, osm_id, class, type, extratags, rank_search,
centroid, geometry,
(compute_place_rank(country_code, osm_type, class, type, admin_level,
(extratags->'capital') = 'yes', null)).*
SELECT wikipedia, importance
FROM compute_importance(location.extratags, NEW.country_code,
- 'N', location.osm_id)
+ location.rank_search, NEW.centroid)
INTO linked_wikipedia,linked_importance;
-- Use the maximum importance if one could be computed from the linked object.
-- null table so it won't error
-- deliberately no drop - importing the table is expensive and static, if it is already there better to avoid removing it
-CREATE TABLE wikipedia_article (
+CREATE TABLE IF NOT EXISTS wikipedia_article (
language text NOT NULL,
title text NOT NULL,
langcount integer,
wd_page_title text,
instance_of text
);
-ALTER TABLE ONLY wikipedia_article ADD CONSTRAINT wikipedia_article_pkey PRIMARY KEY (language, title);
-CREATE INDEX idx_wikipedia_article_osm_id ON wikipedia_article USING btree (osm_type, osm_id);
-CREATE TABLE wikipedia_redirect (
+CREATE TABLE IF NOT EXISTS wikipedia_redirect (
language text,
from_title text,
to_title text
);
-ALTER TABLE ONLY wikipedia_redirect ADD CONSTRAINT wikipedia_redirect_pkey PRIMARY KEY (language, from_title);
-- osm2pgsql does not create indexes on the middle tables for Nominatim
-- Add one for lookup of associated street relations.
address_levels: bool
functions: bool
wiki_data: bool
+ secondary_importance: bool
importance: bool
website: bool
diffs: bool
help='Update the PL/pgSQL functions in the database')
group.add_argument('--wiki-data', action='store_true',
help='Update Wikipedia/data importance numbers')
+ group.add_argument('--secondary-importance', action='store_true',
+ help='Update secondary importance raster data')
group.add_argument('--importance', action='store_true',
help='Recompute place importances (expensive!)')
group.add_argument('--website', action='store_true',
help='Enable debug warning statements in functions')
- def run(self, args: NominatimArgs) -> int: #pylint: disable=too-many-branches
+ def run(self, args: NominatimArgs) -> int: #pylint: disable=too-many-branches, too-many-statements
from ..tools import refresh, postcodes
from ..indexer.indexer import Indexer
with connect(args.config.get_libpq_dsn()) as conn:
refresh.load_address_levels_from_config(conn, args.config)
+ # Attention: must come BEFORE functions
+ if args.secondary_importance:
+ with connect(args.config.get_libpq_dsn()) as conn:
+ # If the table did not exist before, then the importance code
+ # needs to be enabled.
+ if not conn.table_exists('secondary_importance'):
+ args.functions = True
+
+ LOG.warning('Import secondary importance raster data from %s', args.project_dir)
+ if refresh.import_secondary_importance(args.config.get_libpq_dsn(),
+ args.project_dir) > 0:
+ LOG.fatal('FATAL: Cannot update sendary importance raster data')
+ return 1
+
if args.functions:
LOG.warning('Create functions')
with connect(args.config.get_libpq_dsn()) as conn:
help="Do not keep tables that are only needed for "
"updating the database later")
group2.add_argument('--offline', action='store_true',
- help="Do not attempt to load any additional data from the internet")
+ help="Do not attempt to load any additional data from the internet")
group3 = parser.add_argument_group('Expert options')
group3.add_argument('--ignore-errors', action='store_true',
help='Continue import even when errors in SQL are present')
drop=args.no_updates,
ignore_errors=args.ignore_errors)
- self._setup_tables(args.config, args.reverse_only)
-
LOG.warning('Importing wikipedia importance data')
data_path = Path(args.config.WIKIPEDIA_DATA_PATH or args.project_dir)
if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(),
data_path) > 0:
LOG.error('Wikipedia importance dump file not found. '
- 'Will be using default importances.')
+ 'Calculating importance values of locations will not '
+ 'use Wikipedia importance data.')
+
+ LOG.warning('Importing secondary importance raster data')
+ if refresh.import_secondary_importance(args.config.get_libpq_dsn(),
+ args.project_dir) != 0:
+ LOG.error('Secondary importance file not imported. '
+ 'Falling back to default ranking.')
+
+ self._setup_tables(args.config, args.reverse_only)
if args.continue_at is None or args.continue_at == 'load-data':
LOG.warning('Initialise tables')
with conn.cursor() as cur:
cur.execute('CREATE EXTENSION IF NOT EXISTS hstore')
cur.execute('CREATE EXTENSION IF NOT EXISTS postgis')
+
+ postgis_version = conn.postgis_version_tuple()
+ if postgis_version[0] >= 3:
+ cur.execute('CREATE EXTENSION IF NOT EXISTS postgis_raster')
+
conn.commit()
_require_version('PostGIS',
from psycopg2 import sql as pysql
from nominatim.config import Configuration
-from nominatim.db.connection import Connection
+from nominatim.db.connection import Connection, connect
from nominatim.db.utils import execute_file
from nominatim.db.sql_preprocessor import SQLPreprocessor
from nominatim.version import version_str
return 0
+def import_secondary_importance(dsn: str, data_path: Path, ignore_errors: bool = False) -> int:
+ """ Replaces the secondary importance raster data table with new data.
+
+ Returns 0 if all was well and 1 if the raster SQL file could not
+ be found. Throws an exception if there was an error reading the file.
+ """
+ datafile = data_path / 'secondary_importance.sql.gz'
+ if not datafile.exists():
+ return 1
+
+ with connect(dsn) as conn:
+ postgis_version = conn.postgis_version_tuple()
+ if postgis_version[0] < 3:
+ LOG.error('PostGIS version is too old for using OSM raster data.')
+ return 2
+
+ execute_file(dsn, datafile, ignore_errors=ignore_errors)
+
+ return 0
def recompute_importance(conn: Connection) -> None:
""" Recompute wikipedia links and importance for all entries in placex.
cur.execute("""
UPDATE placex SET (wikipedia, importance) =
(SELECT wikipedia, importance
- FROM compute_importance(extratags, country_code, osm_type, osm_id))
+ FROM compute_importance(extratags, country_code, osm_type, osm_id, centroid))
""")
cur.execute("""
UPDATE placex s SET wikipedia = d.wikipedia, importance = d.importance
-Subproject commit b0352aa8f15e2739ba36d72561854a2738123770
+Subproject commit 6a5d2500e9689f55485d186306aadc55560085fd
| 0 |
Then there are duplicates
+ @fail-legacy
Scenario: Search with bounded viewbox in right area
- When sending json search query "bar" with address
+ When sending json search query "post" with address
| bounded | viewbox |
| 1 | 9,47,10,48 |
Then result addresses contain
| ID | town |
| 0 | Vaduz |
- When sending json search query "bar" with address
+ When sending json search query "post" with address
| bounded | viewbox |
| 1 | 9.49712,47.17122,9.52605,47.16242 |
Then result addresses contain
Then result has centroid in 9.49712,47.16242,9.52605,47.17122
Scenario: Prefer results within viewbox
- When sending json search query "Gässle" with address
- | accept-language |
- | en |
- Then result addresses contain
- | ID | town |
- | 0 | Balzers |
When sending json search query "Gässle" with address
| accept-language | viewbox |
| en | 9.52413,47.10759,9.53140,47.10539 |
Then result addresses contain
| ID | village |
| 0 | Triesen |
+ When sending json search query "Gässle" with address
+ | accept-language | viewbox |
+ | en | 9.45949,47.08421,9.54094,47.05466 |
+ Then result addresses contain
+ | ID | town |
+ | 0 | Balzers |
Scenario: viewboxes cannot be points
When sending json search query "foo"
Scenario: Limit number of search results
When sending json search query "landstr"
+ | dedupe |
+ | 0 |
Then more than 4 results are returned
When sending json search query "landstr"
- | limit |
- | 4 |
+ | limit | dedupe |
+ | 4 | 0 |
Then exactly 4 results are returned
Scenario: Limit parameter must be a number
self.api_db_done = True
if not self._reuse_or_drop_db(self.api_test_db):
- testdata = Path('__file__') / '..' / '..' / 'testdb'
- self.test_env['NOMINATIM_WIKIPEDIA_DATA_PATH'] = str(testdata.resolve())
+ testdata = (Path(__file__) / '..' / '..' / '..' / 'testdb').resolve()
+ self.test_env['NOMINATIM_WIKIPEDIA_DATA_PATH'] = str(testdata)
+ simp_file = Path(self.website_dir.name) / 'secondary_importance.sql.gz'
+ simp_file.symlink_to(testdata / 'secondary_importance.sql.gz')
try:
self.run_nominatim('import', '--osm-file', str(self.api_test_file))
- self.run_nominatim('add-data', '--tiger-data', str((testdata / 'tiger').resolve()))
+ self.run_nominatim('add-data', '--tiger-data', str(testdata / 'tiger'))
self.run_nominatim('freeze')
if self.tokenizer == 'legacy':
- phrase_file = str((testdata / 'specialphrases_testdb.sql').resolve())
+ phrase_file = str(testdata / 'specialphrases_testdb.sql')
run_script(['psql', '-d', self.api_test_db, '-f', phrase_file])
else:
- csv_path = str((testdata / 'full_en_phrases_test.csv').resolve())
+ csv_path = str(testdata / 'full_en_phrases_test.csv')
self.run_nominatim('special-phrases', '--import-from-csv', csv_path)
except:
self.db_drop_database(self.api_test_db)
mock_func_factory(nominatim.data.country_info, 'setup_country_tables'),
mock_func_factory(nominatim.tools.database_import, 'import_osm_data'),
mock_func_factory(nominatim.tools.refresh, 'import_wikipedia_articles'),
+ mock_func_factory(nominatim.tools.refresh, 'import_secondary_importance'),
mock_func_factory(nominatim.tools.database_import, 'truncate_data_tables'),
mock_func_factory(nominatim.tools.database_import, 'load_data'),
mock_func_factory(nominatim.tools.database_import, 'create_tables'),
assert self.call_nominatim('refresh', '--wiki-data') == 1
+ def test_refresh_secondary_importance_file_not_found(self):
+ assert self.call_nominatim('refresh', '--secondary-importance') == 1
+
+
+ def test_refresh_secondary_importance_new_table(self, mock_func_factory):
+ mocks = [mock_func_factory(nominatim.tools.refresh, 'import_secondary_importance'),
+ mock_func_factory(nominatim.tools.refresh, 'create_functions')]
+
+ assert self.call_nominatim('refresh', '--secondary-importance') == 0
+ assert mocks[0].called == 1
+ assert mocks[1].called == 1
+
def test_refresh_importance_computed_after_wiki_import(self, monkeypatch):
calls = []
assert refresh.import_wikipedia_articles(dsn, Path('.')) == 1
+def test_refresh_import_secondary_importance_non_existing(dsn):
+ assert refresh.import_secondary_importance(dsn, Path('.')) == 1
+
+def test_refresh_import_secondary_importance_testdb(dsn, src_dir, temp_db_conn, temp_db_cursor):
+ temp_db_cursor.execute('CREATE EXTENSION postgis')
+
+ if temp_db_conn.postgis_version_tuple()[0] < 3:
+ assert refresh.import_secondary_importance(dsn, src_dir / 'test' / 'testdb') > 0
+ else:
+ temp_db_cursor.execute('CREATE EXTENSION postgis_raster')
+ assert refresh.import_secondary_importance(dsn, src_dir / 'test' / 'testdb') == 0
+
+ assert temp_db_conn.table_exists('secondary_importance')
+
+
@pytest.mark.parametrize("replace", (True, False))
def test_refresh_import_wikipedia(dsn, src_dir, table_factory, temp_db_cursor, replace):
if replace:
temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION compute_importance(extratags HSTORE,
country_code varchar(2),
osm_type varchar(1), osm_id BIGINT,
+ centroid GEOMETRY,
OUT importance FLOAT,
OUT wikipedia TEXT)
AS $$ SELECT 0.1::float, 'foo'::text $$ LANGUAGE SQL""")