]> git.openstreetmap.org Git - nominatim.git/commitdiff
add unit tests for postcode parser
authorSarah Hoffmann <lonvia@denofr.de>
Tue, 4 Mar 2025 15:25:00 +0000 (16:25 +0100)
committerSarah Hoffmann <lonvia@denofr.de>
Tue, 4 Mar 2025 15:25:00 +0000 (16:25 +0100)
test/python/api/search/test_postcode_parser.py [new file with mode: 0644]

diff --git a/test/python/api/search/test_postcode_parser.py b/test/python/api/search/test_postcode_parser.py
new file mode 100644 (file)
index 0000000..f691a58
--- /dev/null
@@ -0,0 +1,133 @@
+
+# SPDX-License-Identifier: GPL-3.0-or-later
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2025 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Test for parsing of postcodes in queries.
+"""
+import re
+from itertools import zip_longest
+
+import pytest
+
+from nominatim_api.search.postcode_parser import PostcodeParser
+from nominatim_api.search.query import QueryStruct, PHRASE_ANY
+
+@pytest.fixture
+def pc_config(project_env):
+    country_file = project_env.project_dir / 'country_settings.yaml'
+    country_file.write_text(r"""
+ab:
+  postcode:
+    pattern: "ddddd ll"
+ba:
+  postcode:
+    pattern: "ddddd"
+de:
+  postcode:
+    pattern: "ddddd"
+gr:
+  postcode:
+    pattern: "(ddd) ?(dd)"
+    output: \1 \2
+in:
+  postcode:
+    pattern: "(ddd) ?(ddd)"
+    output: \1\2
+mc:
+  postcode:
+    pattern: "980dd"
+mz:
+  postcode:
+    pattern: "(dddd)(?:-dd)?"
+bn:
+  postcode:
+    pattern: "(ll) ?(dddd)"
+    output: \1\2
+ky:
+  postcode:
+    pattern: "(d)-(dddd)"
+    output: KY\1-\2
+    """)
+
+    return project_env
+
+def mk_query(inp):
+    query = QueryStruct([])
+    phrase_split = re.split(r"([ ,:'-])", inp)
+
+    for word, breakchar in zip_longest(*[iter(phrase_split)]*2, fillvalue='>'):
+        query.add_node(breakchar, PHRASE_ANY, 0.1, word, word)
+
+    return query
+
+
+@pytest.mark.parametrize('query,pos', [('45325 Berlin', 0),
+                                       ('45325:Berlin', 0),
+                                       ('45325,Berlin', 0),
+                                       ('Berlin 45325', 1),
+                                       ('Berlin,45325', 1),
+                                       ('Berlin:45325', 1),
+                                       ('Hansastr,45325 Berlin', 1),
+                                       ('Hansastr 45325 Berlin', 1)])
+def test_simple_postcode(pc_config, query, pos):
+    parser = PostcodeParser(pc_config)
+
+    result = parser.parse(mk_query(query))
+
+    assert result == {(pos, pos + 1, '45325'), (pos, pos + 1, '453 25')}
+
+def test_contained_postcode(pc_config):
+    parser = PostcodeParser(pc_config)
+
+    assert parser.parse(mk_query('12345 dx')) == {(0, 1, '12345'), (0, 1, '123 45'),
+                                                  (0, 2, '12345 DX')}
+
+
+
+@pytest.mark.parametrize('query,frm,to', [('345987', 0, 1), ('345 987', 0, 2),
+                                          ('Aina 345 987', 1, 3),
+                                          ('Aina 23 345 987 ff', 2, 4)])
+def test_postcode_with_space(pc_config, query, frm, to):
+    parser = PostcodeParser(pc_config)
+
+    result = parser.parse(mk_query(query))
+
+    assert result == {(frm, to, '345987')}
+
+def test_overlapping_postcode(pc_config):
+    parser = PostcodeParser(pc_config)
+
+    assert parser.parse(mk_query('123 456 78')) == {(0, 2, '123456'), (1, 3, '456 78')}
+
+
+@pytest.mark.parametrize('query', ['45325-Berlin', "45325'Berlin",
+                                   'Berlin-45325', "Berlin'45325", '45325Berlin'
+                                   '345-987', "345'987", '345,987', '345:987'])
+def test_not_a_postcode(pc_config, query):
+    parser = PostcodeParser(pc_config)
+
+    assert not parser.parse(mk_query(query))
+
+
+@pytest.mark.parametrize('query', ['ba 12233', 'ba-12233'])
+def test_postcode_with_country_prefix(pc_config, query):
+    parser = PostcodeParser(pc_config)
+
+    assert (0, 2, '12233') in parser.parse(mk_query(query))
+
+
+def test_postcode_with_joined_country_prefix(pc_config):
+    parser = PostcodeParser(pc_config)
+
+    assert parser.parse(mk_query('ba12233')) == {(0, 1, '12233')}
+
+
+def test_postcode_with_non_matching_country_prefix(pc_config):
+    parser = PostcodeParser(pc_config)
+
+    assert not parser.parse(mk_query('ky12233'))
+