]> git.openstreetmap.org Git - nominatim.git/commitdiff
add tests for new importance CSV import
authorSarah Hoffmann <lonvia@denofr.de>
Tue, 14 May 2024 21:08:52 +0000 (23:08 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Thu, 16 May 2024 13:23:54 +0000 (15:23 +0200)
lib-sql/functions/importance.sql
test/python/mocks.py
test/python/tools/test_refresh.py
test/python/tools/test_refresh_wiki_data.py [new file with mode: 0644]

index 22a87240bd87a2a88b3faa191e9033129079863a..1de5899ca2950c1b3fc3fee3750164eb3db52091 100644 (file)
@@ -65,7 +65,7 @@ BEGIN
   RETURN NULL;
 END;
 $$
-LANGUAGE plpgsql IMMUTABLE STRICT;
+LANGUAGE plpgsql IMMUTABLE;
 
 {% else %}
 
index a2fff67794b482decc1a6883e9858b57425e8a80..32b6e6dfa5321fe656cd55dcd744b22fa5690776 100644 (file)
@@ -54,16 +54,17 @@ class MockPlacexTable:
 
     def add(self, osm_type='N', osm_id=None, cls='amenity', typ='cafe', names=None,
             admin_level=None, address=None, extratags=None, geom='POINT(10 4)',
-            country=None, housenumber=None):
+            country=None, housenumber=None, rank_search=30):
         with self.conn.cursor() as cur:
             psycopg2.extras.register_hstore(cur)
             cur.execute("""INSERT INTO placex (place_id, osm_type, osm_id, class,
                                                type, name, admin_level, address,
-                                               housenumber,
+                                               housenumber, rank_search,
                                                extratags, geometry, country_code)
-                            VALUES(nextval('seq_place'), %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""",
+                            VALUES(nextval('seq_place'), %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)""",
                         (osm_type, osm_id or next(self.idseq), cls, typ, names,
-                         admin_level, address, housenumber, extratags, 'SRID=4326;' + geom,
+                         admin_level, address, housenumber, rank_search,
+                         extratags, 'SRID=4326;' + geom,
                          country))
         self.conn.commit()
 
index 3e0a280127a1e38b1f7122592bad922882734bc5..f7621ab180f54d2733184076ff805eb59a9c5799 100644 (file)
@@ -35,8 +35,7 @@ def test_refresh_import_secondary_importance_testdb(dsn, src_dir, temp_db_conn,
 @pytest.mark.parametrize("replace", (True, False))
 def test_refresh_import_wikipedia(dsn, src_dir, table_factory, temp_db_cursor, replace):
     if replace:
-        table_factory('wikipedia_article')
-        table_factory('wikipedia_redirect')
+        table_factory('wikimedia_importance')
 
     # use the small wikipedia file for the API testdb
     assert refresh.import_wikipedia_articles(dsn, src_dir / 'test' / 'testdb') == 0
diff --git a/test/python/tools/test_refresh_wiki_data.py b/test/python/tools/test_refresh_wiki_data.py
new file mode 100644 (file)
index 0000000..c10a775
--- /dev/null
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2022 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Tests for correctly assigning wikipedia pages to places.
+"""
+import gzip
+import csv
+
+import pytest
+
+from nominatim.tools.refresh import import_wikipedia_articles, recompute_importance, create_functions
+
+@pytest.fixture
+def wiki_csv(tmp_path, sql_preprocessor):
+    def _import(data):
+        with gzip.open(tmp_path / 'wikimedia-importance.csv.gz', mode='wt') as fd:
+            writer = csv.DictWriter(fd, fieldnames=['language', 'type', 'title',
+                                                    'importance', 'wikidata_id'],
+                                    delimiter='\t', quotechar='|')
+            writer.writeheader()
+            for lang, title, importance, wd in data:
+                writer.writerow({'language': lang, 'type': 'a',
+                                 'title': title, 'importance': str(importance),
+                                 'wikidata_id' : wd})
+        return tmp_path
+
+    return _import
+
+
+@pytest.mark.parametrize('extra', [{'wikipedia:en': 'Test'},
+                                   {'wikipedia': 'en:Test'},
+                                   {'wikidata': 'Q123'}])
+def test_wikipedia(dsn, temp_db_conn, temp_db_cursor, def_config, wiki_csv, placex_table, extra):
+    import_wikipedia_articles(dsn, wiki_csv([('en', 'Test', 0.3, 'Q123')]))
+    create_functions(temp_db_conn, def_config)
+
+    content = temp_db_cursor.row_set(
+        'SELECT language, title, importance, wikidata FROM wikimedia_importance')
+    assert content == set([('en', 'Test', 0.3, 'Q123')])
+
+    placex_table.add(osm_id=12, extratags=extra)
+
+    recompute_importance(temp_db_conn)
+
+    content = temp_db_cursor.row_set('SELECT wikipedia, importance FROM placex')
+    assert content == set([('en:Test', 0.3)])
+
+
+def test_wikipedia_no_match(dsn, temp_db_conn, temp_db_cursor, def_config, wiki_csv,
+                            placex_table):
+    import_wikipedia_articles(dsn, wiki_csv([('de', 'Test', 0.3, 'Q123')]))
+    create_functions(temp_db_conn, def_config)
+
+    placex_table.add(osm_id=12, extratags={'wikipedia': 'en:Test'}, rank_search=10)
+
+    recompute_importance(temp_db_conn)
+
+    content = temp_db_cursor.row_set('SELECT wikipedia, importance FROM placex')
+    assert list(content) == [(None, pytest.approx(0.26667666))]