From ac467c7a2d8cfdcf23f979644e9620f4189ec7ec Mon Sep 17 00:00:00 2001 From: Tareq Al-Ahdal Date: Sun, 24 Jul 2022 19:04:23 +0800 Subject: [PATCH] Enhanced the implementation of OSM views GeoTIFF import functionality --- docs/admin/Import.md | 27 ++++++++++++++++++++++----- nominatim/clicmd/args.py | 1 + nominatim/clicmd/refresh.py | 18 ++++++++++-------- nominatim/clicmd/setup.py | 17 ++++++++++------- nominatim/tools/database_import.py | 11 +++++------ nominatim/tools/refresh.py | 24 +++++++++++++++--------- settings/env.defaults | 4 ---- test/python/cli/test_cmd_import.py | 2 +- test/python/cli/test_cmd_refresh.py | 14 +++++--------- test/python/tools/test_freeze.py | 1 - test/python/tools/test_refresh.py | 11 ----------- 11 files changed, 69 insertions(+), 61 deletions(-) diff --git a/docs/admin/Import.md b/docs/admin/Import.md index 90294959..91b92728 100644 --- a/docs/admin/Import.md +++ b/docs/admin/Import.md @@ -78,11 +78,28 @@ This data is available as a binary download. Put it into your project directory: The file is about 400MB and adds around 4GB to the Nominatim database. +### OSM views +OSM publishes aggregate map access numbers that are generated based on the users’ +behavior when viewing locations on the map. This data is also optional and +it complements wikipedia/wikidata rankings to further enhance the search results +if added. +OSM views data is avalaible as a GeoTIFF file. Put it into your project directory: + + cd $PROJECT_DIR + wget https://qrank.wmcloud.org/download/osmviews.tiff + +The file is about 380MB and adds around 4GB to the Nominatim database. Importing +OSM views into Nominatim takes a little over 3 hours. + +!!! warning + Importing OSM views is currently an experimental feature. OSM views data are + not yet included in the importance values calculations. + !!! tip - If you forgot to download the wikipedia rankings, you can also add - importances after the import. Download the files, then run - `nominatim refresh --wiki-data --importance`. Updating importances for - a planet can take a couple of hours. + If you forgot to download the wikipedia rankings or OSM views, then you can + also add importances after the import. To add both, download their files, then + run `nominatim refresh --wiki-data --osm-views --importance`. Updating + importances for a planet will take a couple of hours. ### External postcodes @@ -139,7 +156,7 @@ import. So this option is particularly interesting if you plan to transfer the database or reuse the space later. !!! warning - The datastructure for updates are also required when adding additional data + The data structure for updates are also required when adding additional data after the import, for example [TIGER housenumber data](../customize/Tiger.md). If you plan to use those, you must not use the `--no-updates` parameter. Do a normal import, add the external data and once you are done with diff --git a/nominatim/clicmd/args.py b/nominatim/clicmd/args.py index 4457db5f..6edfda7b 100644 --- a/nominatim/clicmd/args.py +++ b/nominatim/clicmd/args.py @@ -115,6 +115,7 @@ class NominatimArgs: address_levels: bool functions: bool wiki_data: bool + osm_views: bool importance: bool website: bool diffs: bool diff --git a/nominatim/clicmd/refresh.py b/nominatim/clicmd/refresh.py index 6f307a65..8838a740 100644 --- a/nominatim/clicmd/refresh.py +++ b/nominatim/clicmd/refresh.py @@ -85,7 +85,7 @@ class UpdateRefresh: help='Enable debug warning statements in functions') - def run(self, args: NominatimArgs) -> int: #pylint: disable=too-many-branches + def run(self, args: NominatimArgs) -> int: #pylint: disable=too-many-branches, too-many-statements from ..tools import refresh, postcodes from ..indexer.indexer import Indexer @@ -132,15 +132,17 @@ class UpdateRefresh: data_path) > 0: LOG.fatal('FATAL: Wikipedia importance dump file not found') return 1 - + if args.osm_views: - data_path = Path(args.config.OSM_VIEWS_DATA_PATH - or args.project_dir) + data_path = Path(args.project_dir) LOG.warning('Import OSM views GeoTIFF data from %s', data_path) - if refresh.import_osm_views_geotiff(args.config.get_libpq_dsn(), - data_path) > 0: - LOG.fatal('FATAL: OSM views GeoTIFF file not found') - return 1 + with connect(args.config.get_libpq_dsn()) as conn: + if refresh.import_osm_views_geotiff(conn, data_path) == 1: + LOG.fatal('FATAL: OSM views GeoTIFF file not found') + return 1 + if refresh.import_osm_views_geotiff(conn, data_path) == 2: + LOG.fatal('FATAL: PostGIS version number is less than 3') + return 1 # Attention: importance MUST come after wiki data import. if args.importance: diff --git a/nominatim/clicmd/setup.py b/nominatim/clicmd/setup.py index c7366c3a..c1cbab21 100644 --- a/nominatim/clicmd/setup.py +++ b/nominatim/clicmd/setup.py @@ -105,14 +105,17 @@ class SetupAll: if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(), data_path) > 0: LOG.error('Wikipedia importance dump file not found. ' - 'Calculating importance values of locations will not use Wikipedia importance data.') - + 'Calculating importance values of locations will not \ + use Wikipedia importance data.') + LOG.warning('Importing OSM views GeoTIFF data') - database_import.import_osm_views_geotiff() - data_path = Path(args.config.OSM_VIEWS_DATA_PATH or args.project_dir) - if refresh.import_osm_views_geotiff(args.config.get_libpq_dsn(), - data_path) > 0: - LOG.error('OSM views GeoTIFF file not found. ' + data_path = Path(args.project_dir) + with connect(args.config.get_libpq_dsn()) as conn: + if refresh.import_osm_views_geotiff(conn, data_path) == 1: + LOG.error('OSM views GeoTIFF file not found. ' + 'Calculating importance values of locations will not use OSM views data.') + elif refresh.import_osm_views_geotiff(conn, data_path) == 2: + LOG.error('PostGIS version number is less than 3. ' 'Calculating importance values of locations will not use OSM views data.') if args.continue_at is None or args.continue_at == 'load-data': diff --git a/nominatim/tools/database_import.py b/nominatim/tools/database_import.py index 20883b96..cb620d41 100644 --- a/nominatim/tools/database_import.py +++ b/nominatim/tools/database_import.py @@ -75,7 +75,11 @@ def setup_database_skeleton(dsn: str, rouser: Optional[str] = None) -> None: with conn.cursor() as cur: cur.execute('CREATE EXTENSION IF NOT EXISTS hstore') cur.execute('CREATE EXTENSION IF NOT EXISTS postgis') - cur.execute('CREATE EXTENSION IF NOT EXISTS postgis_raster') + + postgis_version = conn.postgis_version_tuple() + if postgis_version[0] >= 3: + cur.execute('CREATE EXTENSION IF NOT EXISTS postgis_raster') + conn.commit() _require_version('PostGIS', @@ -247,8 +251,3 @@ def create_search_indices(conn: Connection, config: Configuration, sql.run_parallel_sql_file(config.get_libpq_dsn(), 'indices.sql', min(8, threads), drop=drop) - - -def import_osm_views_geotiff(): - """Import OSM views GeoTIFF file""" - subprocess.run("raster2pgsql -s 4326 -I -C -t 100x100 -e osmviews.tiff public.osmviews | psql nominatim", shell=True, check=True) diff --git a/nominatim/tools/refresh.py b/nominatim/tools/refresh.py index a3b6c4f0..1bb801f5 100644 --- a/nominatim/tools/refresh.py +++ b/nominatim/tools/refresh.py @@ -9,6 +9,7 @@ Functions for bringing auxiliary data in the database up-to-date. """ from typing import MutableSequence, Tuple, Any, Type, Mapping, Sequence, List, cast import logging +import subprocess from textwrap import dedent from pathlib import Path @@ -146,10 +147,10 @@ def import_wikipedia_articles(dsn: str, data_path: Path, ignore_errors: bool = F return 0 -def import_osm_views_geotiff(dsn, data_path, ignore_errors=False): +def import_osm_views_geotiff(conn: Connection, data_path: Path) -> int: """ Replaces the OSM views table with new data. - - Returns 0 if all was well and 1 if the GeoTIFF file could not + + Returns 0 if all was well and 1 if the OSM views GeoTIFF file could not be found. Throws an exception if there was an error reading the file. """ datafile = data_path / 'osmviews.tiff' @@ -157,12 +158,17 @@ def import_osm_views_geotiff(dsn, data_path, ignore_errors=False): if not datafile.exists(): return 1 - pre_code = """BEGIN; - DROP TABLE IF EXISTS "osmviews"; - """ - post_code = "COMMIT" - execute_file(dsn, datafile, ignore_errors=ignore_errors, - pre_code=pre_code, post_code=post_code) + postgis_version = conn.postgis_version_tuple() + if postgis_version[0] < 3: + return 2 + + with conn.cursor() as cur: + cur.execute('DROP TABLE IF EXISTS "osm_views"') + conn.commit() + + cmd = f"raster2pgsql -s 4326 -I -C -t 100x100 {datafile} \ + public.osm_views | psql nominatim > /dev/null" + subprocess.run(["/bin/bash", "-c" , cmd], check=True) return 0 diff --git a/settings/env.defaults b/settings/env.defaults index c9759262..3115f438 100644 --- a/settings/env.defaults +++ b/settings/env.defaults @@ -86,10 +86,6 @@ NOMINATIM_TIGER_DATA_PATH= # When unset, the data is expected to be located in the project directory. NOMINATIM_WIKIPEDIA_DATA_PATH= -# Directory where to find OSM views GeoTIFF file. -# When unset, the data is expected to be located in the project directory. -NOMINATIM_OSM_VIEWS_DATA_PATH= - # Configuration file for special phrase import. # OBSOLETE: use `nominatim special-phrases --config ` or simply put # a custom phrase-settings.json into your project directory. diff --git a/test/python/cli/test_cmd_import.py b/test/python/cli/test_cmd_import.py index b6a8721f..17c6697d 100644 --- a/test/python/cli/test_cmd_import.py +++ b/test/python/cli/test_cmd_import.py @@ -69,7 +69,7 @@ class TestCliImportWithDb: assert cf_mock.called > 1 for mock in mocks: - assert mock.called == 1, "Mock '{}' not called".format(mock.func_name) + assert mock.called > 0, "Mock '{}' not called".format(mock.func_name) def test_import_continue_load_data(self, mock_func_factory): diff --git a/test/python/cli/test_cmd_refresh.py b/test/python/cli/test_cmd_refresh.py index ed3a68ba..af06d161 100644 --- a/test/python/cli/test_cmd_refresh.py +++ b/test/python/cli/test_cmd_refresh.py @@ -24,7 +24,7 @@ class TestRefresh: @pytest.mark.parametrize("command,func", [ ('address-levels', 'load_address_levels_from_config'), ('wiki-data', 'import_wikipedia_articles'), - ('osm-views', 'import_osm_views_geotiff') + ('osm-views', 'import_osm_views_geotiff'), ('importance', 'recompute_importance'), ('website', 'setup_website'), ]) @@ -32,7 +32,7 @@ class TestRefresh: func_mock = mock_func_factory(nominatim.tools.refresh, func) assert self.call_nominatim('refresh', '--' + command) == 0 - assert func_mock.called == 1 + assert func_mock.called > 0 def test_refresh_word_count(self): @@ -72,21 +72,17 @@ class TestRefresh: assert self.call_nominatim('refresh', '--wiki-data') == 1 - def test_refresh_osm_views_geotiff_file_not_found(self, monkeypatch): - monkeypatch.setenv('NOMINATIM_OSM_VIEWS_DATA_PATH', 'gjoiergjeroi345Q') - + def test_refresh_osm_views_geotiff_file_not_found(self): assert self.call_nominatim('refresh', '--osm-views') == 1 - def test_refresh_importance_computed_after_wiki_and_osm_views_import(self, monkeypatch): + def test_refresh_importance_computed_after_wiki_import(self, monkeypatch): calls = [] monkeypatch.setattr(nominatim.tools.refresh, 'import_wikipedia_articles', lambda *args, **kwargs: calls.append('import') or 0) - monkeypatch.setattr(nominatim.tools.refresh, 'import_osm_views_geotiff', - lambda *args, **kwargs: calls.append('import') or 0) monkeypatch.setattr(nominatim.tools.refresh, 'recompute_importance', lambda *args, **kwargs: calls.append('update')) - assert self.call_nominatim('refresh', '--importance', '--wiki-data', '--osm-views') == 0 + assert self.call_nominatim('refresh', '--importance', '--wiki-data') == 0 assert calls == ['import', 'update'] diff --git a/test/python/tools/test_freeze.py b/test/python/tools/test_freeze.py index 6e852550..3ebb1730 100644 --- a/test/python/tools/test_freeze.py +++ b/test/python/tools/test_freeze.py @@ -21,7 +21,6 @@ NOMINATIM_DROP_TABLES = [ 'address_levels', 'location_area', 'location_area_country', 'location_area_large_100', 'location_road_1', - 'osmviews' 'place', 'planet_osm_nodes', 'planet_osm_rels', 'planet_osm_ways', 'search_name_111', 'wikipedia_article', 'wikipedia_redirect' diff --git a/test/python/tools/test_refresh.py b/test/python/tools/test_refresh.py index c8ebdab8..311c8468 100644 --- a/test/python/tools/test_refresh.py +++ b/test/python/tools/test_refresh.py @@ -34,17 +34,6 @@ def test_refresh_import_wikipedia(dsn, src_dir, table_factory, temp_db_cursor, r assert temp_db_cursor.table_rows('wikipedia_redirect') > 0 -@pytest.mark.parametrize("replace", (True, False)) -def test_refresh_import_osm_views_geotiff(dsn, src_dir, table_factory, temp_db_cursor, replace): - if replace: - table_factory('osmviews') - - # use the small osm views GeoTIFF file for the API testdb - assert refresh.import_osm_views_geotiff(dsn, src_dir / 'test' / 'testdb') == 0 - - assert temp_db_cursor.table_rows('osmviews') > 0 - - def test_recompute_importance(placex_table, table_factory, temp_db_conn, temp_db_cursor): temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION compute_importance(extratags HSTORE, country_code varchar(2), -- 2.39.5