From: Sarah Hoffmann Date: Tue, 14 May 2024 21:08:52 +0000 (+0200) Subject: add tests for new importance CSV import X-Git-Tag: deploy~5^2~1^2~1 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/5b02cd22b9d9bdf9d3cd32d9e4cfcb971a92c606 add tests for new importance CSV import --- diff --git a/lib-sql/functions/importance.sql b/lib-sql/functions/importance.sql index 22a87240..1de5899c 100644 --- a/lib-sql/functions/importance.sql +++ b/lib-sql/functions/importance.sql @@ -65,7 +65,7 @@ BEGIN RETURN NULL; END; $$ -LANGUAGE plpgsql IMMUTABLE STRICT; +LANGUAGE plpgsql IMMUTABLE; {% else %} diff --git a/test/python/mocks.py b/test/python/mocks.py index a2fff677..32b6e6df 100644 --- a/test/python/mocks.py +++ b/test/python/mocks.py @@ -54,16 +54,17 @@ class MockPlacexTable: def add(self, osm_type='N', osm_id=None, cls='amenity', typ='cafe', names=None, admin_level=None, address=None, extratags=None, geom='POINT(10 4)', - country=None, housenumber=None): + country=None, housenumber=None, rank_search=30): with self.conn.cursor() as cur: psycopg2.extras.register_hstore(cur) cur.execute("""INSERT INTO placex (place_id, osm_type, osm_id, class, type, name, admin_level, address, - housenumber, + housenumber, rank_search, extratags, geometry, country_code) - VALUES(nextval('seq_place'), %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""", + VALUES(nextval('seq_place'), %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""", (osm_type, osm_id or next(self.idseq), cls, typ, names, - admin_level, address, housenumber, extratags, 'SRID=4326;' + geom, + admin_level, address, housenumber, rank_search, + extratags, 'SRID=4326;' + geom, country)) self.conn.commit() diff --git a/test/python/tools/test_refresh.py b/test/python/tools/test_refresh.py index 3e0a2801..f7621ab1 100644 --- a/test/python/tools/test_refresh.py +++ b/test/python/tools/test_refresh.py @@ -35,8 +35,7 @@ def test_refresh_import_secondary_importance_testdb(dsn, src_dir, temp_db_conn, @pytest.mark.parametrize("replace", (True, False)) def test_refresh_import_wikipedia(dsn, src_dir, table_factory, temp_db_cursor, replace): if replace: - table_factory('wikipedia_article') - table_factory('wikipedia_redirect') + table_factory('wikimedia_importance') # use the small wikipedia file for the API testdb assert refresh.import_wikipedia_articles(dsn, src_dir / 'test' / 'testdb') == 0 diff --git a/test/python/tools/test_refresh_wiki_data.py b/test/python/tools/test_refresh_wiki_data.py new file mode 100644 index 00000000..c10a7757 --- /dev/null +++ b/test/python/tools/test_refresh_wiki_data.py @@ -0,0 +1,63 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2022 by the Nominatim developer community. +# For a full list of authors see the git log. +""" +Tests for correctly assigning wikipedia pages to places. +""" +import gzip +import csv + +import pytest + +from nominatim.tools.refresh import import_wikipedia_articles, recompute_importance, create_functions + +@pytest.fixture +def wiki_csv(tmp_path, sql_preprocessor): + def _import(data): + with gzip.open(tmp_path / 'wikimedia-importance.csv.gz', mode='wt') as fd: + writer = csv.DictWriter(fd, fieldnames=['language', 'type', 'title', + 'importance', 'wikidata_id'], + delimiter='\t', quotechar='|') + writer.writeheader() + for lang, title, importance, wd in data: + writer.writerow({'language': lang, 'type': 'a', + 'title': title, 'importance': str(importance), + 'wikidata_id' : wd}) + return tmp_path + + return _import + + +@pytest.mark.parametrize('extra', [{'wikipedia:en': 'Test'}, + {'wikipedia': 'en:Test'}, + {'wikidata': 'Q123'}]) +def test_wikipedia(dsn, temp_db_conn, temp_db_cursor, def_config, wiki_csv, placex_table, extra): + import_wikipedia_articles(dsn, wiki_csv([('en', 'Test', 0.3, 'Q123')])) + create_functions(temp_db_conn, def_config) + + content = temp_db_cursor.row_set( + 'SELECT language, title, importance, wikidata FROM wikimedia_importance') + assert content == set([('en', 'Test', 0.3, 'Q123')]) + + placex_table.add(osm_id=12, extratags=extra) + + recompute_importance(temp_db_conn) + + content = temp_db_cursor.row_set('SELECT wikipedia, importance FROM placex') + assert content == set([('en:Test', 0.3)]) + + +def test_wikipedia_no_match(dsn, temp_db_conn, temp_db_cursor, def_config, wiki_csv, + placex_table): + import_wikipedia_articles(dsn, wiki_csv([('de', 'Test', 0.3, 'Q123')])) + create_functions(temp_db_conn, def_config) + + placex_table.add(osm_id=12, extratags={'wikipedia': 'en:Test'}, rank_search=10) + + recompute_importance(temp_db_conn) + + content = temp_db_cursor.row_set('SELECT wikipedia, importance FROM placex') + assert list(content) == [(None, pytest.approx(0.26667666))]