From: Tareq Al-Ahdal Date: Thu, 25 Aug 2022 07:45:18 +0000 (+0200) Subject: Integrated OSM views into importance computation X-Git-Tag: v4.2.0~25^2~6 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/0ab0f0ea44c2e93043f00e90c2935f7e742ed9e4 Integrated OSM views into importance computation --- diff --git a/lib-sql/functions/importance.sql b/lib-sql/functions/importance.sql index ac3aa7f8..30e778d1 100644 --- a/lib-sql/functions/importance.sql +++ b/lib-sql/functions/importance.sql @@ -98,19 +98,74 @@ $$ LANGUAGE plpgsql STABLE; +CREATE OR REPLACE FUNCTION get_osm_views(centroid GEOMETRY) + RETURNS BIGINT + AS $$ +DECLARE + result BIGINT; +BEGIN + SELECT ST_Value(osm_views.rast, centroid) + FROM osm_views + WHERE ST_Intersects(ST_ConvexHull(osm_views.rast), centroid) LIMIT 1 INTO result; + + return COALESCE(result, 0); +END; +$$ +LANGUAGE plpgsql STABLE; + + +CREATE OR REPLACE FUNCTION normalize_osm_views(views BIGINT) + RETURNS FLOAT + AS $$ + DECLARE + normalized_osm_views FLOAT; + max_views BIGINT; + BEGIN + IF views > 0 THEN + -- Get the highest view count to use it in normalizing the data + SELECT max_views_count FROM osm_views_stat INTO max_views; + normalized_osm_views := (LOG(views))/(LOG(max_views)); + ELSE + normalized_osm_views := 0.0; + END IF; + + RETURN normalized_osm_views; + END; +$$ +LANGUAGE plpgsql; + + CREATE OR REPLACE FUNCTION compute_importance(extratags HSTORE, country_code varchar(2), - osm_type varchar(1), osm_id BIGINT) + osm_type varchar(1), osm_id BIGINT, + centroid GEOMETRY) RETURNS place_importance AS $$ DECLARE match RECORD; result place_importance; + osm_views_exists BIGINT; + views BIGINT; BEGIN + -- check if osm_views table exists + SELECT COUNT(table_name) + INTO osm_views_exists + FROM information_schema.tables + WHERE table_schema LIKE 'public' AND + table_type LIKE 'BASE TABLE' AND + table_name = 'osm_views'; + + -- add importance by OSM views if osm_views table exists + IF osm_views_exists THEN + views := get_osm_views(centroid); + result.importance := normalize_osm_views(views) * 0.35; + END IF; + + -- add importance by wiki data if the place has one FOR match IN SELECT * FROM get_wikipedia_match(extratags, country_code) WHERE language is not NULL LOOP - result.importance := match.importance; + result.importance := COALESCE(result.importance, 0) + match.importance * 0.65; result.wikipedia := match.language || ':' || match.title; RETURN result; END LOOP; @@ -119,13 +174,13 @@ BEGIN FOR match IN SELECT * FROM wikipedia_article WHERE wd_page_title = extratags->'wikidata' ORDER BY language = 'en' DESC, langcount DESC LIMIT 1 LOOP - result.importance := match.importance; + result.importance := COALESCE(result.importance, 0) + match.importance * 0.65; result.wikipedia := match.language || ':' || match.title; RETURN result; END LOOP; END IF; - RETURN null; + RETURN result; END; $$ LANGUAGE plpgsql; diff --git a/lib-sql/functions/placex_triggers.sql b/lib-sql/functions/placex_triggers.sql index a2276f07..4432fa24 100644 --- a/lib-sql/functions/placex_triggers.sql +++ b/lib-sql/functions/placex_triggers.sql @@ -965,7 +965,7 @@ BEGIN NEW.importance := null; SELECT wikipedia, importance - FROM compute_importance(NEW.extratags, NEW.country_code, NEW.osm_type, NEW.osm_id) + FROM compute_importance(NEW.extratags, NEW.country_code, NEW.osm_type, NEW.osm_id, NEW.centroid) INTO NEW.wikipedia,NEW.importance; {% if debug %}RAISE WARNING 'Importance computed from wikipedia: %', NEW.importance;{% endif %} @@ -1088,7 +1088,7 @@ BEGIN SELECT wikipedia, importance FROM compute_importance(location.extratags, NEW.country_code, - 'N', location.osm_id) + 'N', location.osm_id, NEW.centroid) INTO linked_wikipedia,linked_importance; -- Use the maximum importance if one could be computed from the linked object. diff --git a/nominatim/clicmd/refresh.py b/nominatim/clicmd/refresh.py index 8838a740..d03d682f 100644 --- a/nominatim/clicmd/refresh.py +++ b/nominatim/clicmd/refresh.py @@ -136,13 +136,13 @@ class UpdateRefresh: if args.osm_views: data_path = Path(args.project_dir) LOG.warning('Import OSM views GeoTIFF data from %s', data_path) - with connect(args.config.get_libpq_dsn()) as conn: - if refresh.import_osm_views_geotiff(conn, data_path) == 1: - LOG.fatal('FATAL: OSM views GeoTIFF file not found') - return 1 - if refresh.import_osm_views_geotiff(conn, data_path) == 2: - LOG.fatal('FATAL: PostGIS version number is less than 3') - return 1 + num = refresh.import_osm_views_geotiff(args.config.get_libpq_dsn(), data_path) + if num == 1: + LOG.fatal('FATAL: OSM views GeoTIFF file not found') + return 1 + if num == 2: + LOG.fatal('FATAL: PostGIS version number is less than 3') + return 1 # Attention: importance MUST come after wiki data import. if args.importance: diff --git a/nominatim/clicmd/setup.py b/nominatim/clicmd/setup.py index c1cbab21..c870fb17 100644 --- a/nominatim/clicmd/setup.py +++ b/nominatim/clicmd/setup.py @@ -105,18 +105,18 @@ class SetupAll: if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(), data_path) > 0: LOG.error('Wikipedia importance dump file not found. ' - 'Calculating importance values of locations will not \ - use Wikipedia importance data.') + 'Calculating importance values of locations will not ' + 'use Wikipedia importance data.') LOG.warning('Importing OSM views GeoTIFF data') data_path = Path(args.project_dir) - with connect(args.config.get_libpq_dsn()) as conn: - if refresh.import_osm_views_geotiff(conn, data_path) == 1: - LOG.error('OSM views GeoTIFF file not found. ' - 'Calculating importance values of locations will not use OSM views data.') - elif refresh.import_osm_views_geotiff(conn, data_path) == 2: - LOG.error('PostGIS version number is less than 3. ' - 'Calculating importance values of locations will not use OSM views data.') + num = refresh.import_osm_views_geotiff(args.config.get_libpq_dsn(), data_path) + if num == 1: + LOG.error('OSM views GeoTIFF file not found. ' + 'Calculating importance values of locations will not use OSM views data.') + elif num == 2: + LOG.error('PostGIS version number is less than 3. ' + 'Calculating importance values of locations will not use OSM views data.') if args.continue_at is None or args.continue_at == 'load-data': LOG.warning('Initialise tables') diff --git a/nominatim/tools/refresh.py b/nominatim/tools/refresh.py index 1bb801f5..531de14d 100644 --- a/nominatim/tools/refresh.py +++ b/nominatim/tools/refresh.py @@ -16,7 +16,7 @@ from pathlib import Path from psycopg2 import sql as pysql from nominatim.config import Configuration -from nominatim.db.connection import Connection +from nominatim.db.connection import Connection, connect from nominatim.db.utils import execute_file from nominatim.db.sql_preprocessor import SQLPreprocessor from nominatim.version import version_str @@ -147,28 +147,48 @@ def import_wikipedia_articles(dsn: str, data_path: Path, ignore_errors: bool = F return 0 -def import_osm_views_geotiff(conn: Connection, data_path: Path) -> int: +def import_osm_views_geotiff(dsn: str, data_path: Path) -> int: """ Replaces the OSM views table with new data. Returns 0 if all was well and 1 if the OSM views GeoTIFF file could not be found. Throws an exception if there was an error reading the file. """ datafile = data_path / 'osmviews.tiff' - if not datafile.exists(): return 1 - - postgis_version = conn.postgis_version_tuple() - if postgis_version[0] < 3: - return 2 - - with conn.cursor() as cur: - cur.execute('DROP TABLE IF EXISTS "osm_views"') - conn.commit() - - cmd = f"raster2pgsql -s 4326 -I -C -t 100x100 {datafile} \ - public.osm_views | psql nominatim > /dev/null" - subprocess.run(["/bin/bash", "-c" , cmd], check=True) + with connect(dsn) as conn: + + postgis_version = conn.postgis_version_tuple() + if postgis_version[0] < 3: + return 2 + + with conn.cursor() as cur: + cur.drop_table("osm_views") + cur.drop_table("osm_views_stat") + + # -ovr: 6 -> zoom 12, 5 -> zoom 13, 4 -> zoom 14, 3 -> zoom 15 + reproject_geotiff = f"gdalwarp -q -multi -ovr 3 -overwrite \ + -co COMPRESS=LZW -tr 0.01 0.01 -t_srs EPSG:4326 {datafile} raster2import.tiff" + subprocess.run(["/bin/bash", "-c" , reproject_geotiff], check=True) + + tile_size = 256 + import_geotiff = f"raster2pgsql -I -C -Y -t {tile_size}x{tile_size} raster2import.tiff \ + public.osm_views | psql {dsn} > /dev/null" + subprocess.run(["/bin/bash", "-c" , import_geotiff], check=True) + + cleanup = "rm raster2import.tiff" + subprocess.run(["/bin/bash", "-c" , cleanup], check=True) + + # To normalize osm views data, the max view value is needed + cur.execute(f""" + CREATE TABLE osm_views_stat AS ( + SELECT MAX(ST_Value(osm_views.rast, 1, x, y)) AS max_views_count + FROM osm_views CROSS JOIN + generate_series(1, {tile_size}) As x + CROSS JOIN generate_series(1, {tile_size}) As y + WHERE x <= ST_Width(rast) AND y <= ST_Height(rast)); + """) + conn.commit() return 0 @@ -182,7 +202,7 @@ def recompute_importance(conn: Connection) -> None: cur.execute(""" UPDATE placex SET (wikipedia, importance) = (SELECT wikipedia, importance - FROM compute_importance(extratags, country_code, osm_type, osm_id)) + FROM compute_importance(extratags, country_code, osm_type, osm_id, centroid)) """) cur.execute(""" UPDATE placex s SET wikipedia = d.wikipedia, importance = d.importance diff --git a/test/python/cli/test_cmd_import.py b/test/python/cli/test_cmd_import.py index 17c6697d..b6a8721f 100644 --- a/test/python/cli/test_cmd_import.py +++ b/test/python/cli/test_cmd_import.py @@ -69,7 +69,7 @@ class TestCliImportWithDb: assert cf_mock.called > 1 for mock in mocks: - assert mock.called > 0, "Mock '{}' not called".format(mock.func_name) + assert mock.called == 1, "Mock '{}' not called".format(mock.func_name) def test_import_continue_load_data(self, mock_func_factory): diff --git a/test/python/cli/test_cmd_refresh.py b/test/python/cli/test_cmd_refresh.py index af06d161..211c1d64 100644 --- a/test/python/cli/test_cmd_refresh.py +++ b/test/python/cli/test_cmd_refresh.py @@ -32,7 +32,7 @@ class TestRefresh: func_mock = mock_func_factory(nominatim.tools.refresh, func) assert self.call_nominatim('refresh', '--' + command) == 0 - assert func_mock.called > 0 + assert func_mock.called == 1 def test_refresh_word_count(self): diff --git a/test/python/tools/test_refresh.py b/test/python/tools/test_refresh.py index 311c8468..fa6e216e 100644 --- a/test/python/tools/test_refresh.py +++ b/test/python/tools/test_refresh.py @@ -38,6 +38,7 @@ def test_recompute_importance(placex_table, table_factory, temp_db_conn, temp_db temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION compute_importance(extratags HSTORE, country_code varchar(2), osm_type varchar(1), osm_id BIGINT, + centroid GEOMETRY, OUT importance FLOAT, OUT wikipedia TEXT) AS $$ SELECT 0.1::float, 'foo'::text $$ LANGUAGE SQL""")