2 # SPDX-License-Identifier: GPL-3.0-or-later
4 # This file is part of Nominatim. (https://nominatim.org)
6 # Copyright (C) 2025 by the Nominatim developer community.
7 # For a full list of authors see the git log.
9 Test for parsing of postcodes in queries.
12 from itertools import zip_longest
16 from nominatim_api.search.postcode_parser import PostcodeParser
17 from nominatim_api.search.query import QueryStruct, PHRASE_ANY, PHRASE_POSTCODE, PHRASE_STREET
20 def pc_config(project_env):
21 country_file = project_env.project_dir / 'country_settings.yaml'
22 country_file.write_text(r"""
34 pattern: "(ddd) ?(dd)"
38 pattern: "(ddd) ?(ddd)"
45 pattern: "(dddd)(?:-dd)?"
48 pattern: "(ll) ?(dddd)"
59 query = QueryStruct([])
60 phrase_split = re.split(r"([ ,:'-])", inp)
62 for word, breakchar in zip_longest(*[iter(phrase_split)]*2, fillvalue='>'):
63 query.add_node(breakchar, PHRASE_ANY, 0.1, word, word)
68 @pytest.mark.parametrize('query,pos', [('45325 Berlin', 0),
74 ('Hansastr,45325 Berlin', 1),
75 ('Hansastr 45325 Berlin', 1)])
76 def test_simple_postcode(pc_config, query, pos):
77 parser = PostcodeParser(pc_config)
79 result = parser.parse(mk_query(query))
81 assert result == {(pos, pos + 1, '45325'), (pos, pos + 1, '453 25')}
83 def test_contained_postcode(pc_config):
84 parser = PostcodeParser(pc_config)
86 assert parser.parse(mk_query('12345 dx')) == {(0, 1, '12345'), (0, 1, '123 45'),
91 @pytest.mark.parametrize('query,frm,to', [('345987', 0, 1), ('345 987', 0, 2),
92 ('Aina 345 987', 1, 3),
93 ('Aina 23 345 987 ff', 2, 4)])
94 def test_postcode_with_space(pc_config, query, frm, to):
95 parser = PostcodeParser(pc_config)
97 result = parser.parse(mk_query(query))
99 assert result == {(frm, to, '345987')}
101 def test_overlapping_postcode(pc_config):
102 parser = PostcodeParser(pc_config)
104 assert parser.parse(mk_query('123 456 78')) == {(0, 2, '123456'), (1, 3, '456 78')}
107 @pytest.mark.parametrize('query', ['45325-Berlin', "45325'Berlin",
108 'Berlin-45325', "Berlin'45325", '45325Berlin'
109 '345-987', "345'987", '345,987', '345:987'])
110 def test_not_a_postcode(pc_config, query):
111 parser = PostcodeParser(pc_config)
113 assert not parser.parse(mk_query(query))
116 @pytest.mark.parametrize('query', ['ba 12233', 'ba-12233'])
117 def test_postcode_with_country_prefix(pc_config, query):
118 parser = PostcodeParser(pc_config)
120 assert (0, 2, '12233') in parser.parse(mk_query(query))
123 def test_postcode_with_joined_country_prefix(pc_config):
124 parser = PostcodeParser(pc_config)
126 assert parser.parse(mk_query('ba12233')) == {(0, 1, '12233')}
129 def test_postcode_with_non_matching_country_prefix(pc_config):
130 parser = PostcodeParser(pc_config)
132 assert not parser.parse(mk_query('ky12233'))
134 def test_postcode_inside_postcode_phrase(pc_config):
135 parser = PostcodeParser(pc_config)
137 query = QueryStruct([])
138 query.nodes[-1].ptype = PHRASE_STREET
139 query.add_node(',', PHRASE_STREET, 0.1, '12345', '12345')
140 query.add_node(',', PHRASE_POSTCODE, 0.1, 'xz', 'xz')
141 query.add_node('>', PHRASE_POSTCODE, 0.1, '4444', '4444')
143 assert parser.parse(query) == {(2, 3, '4444')}
146 def test_partial_postcode_in_postcode_phrase(pc_config):
147 parser = PostcodeParser(pc_config)
149 query = QueryStruct([])
150 query.nodes[-1].ptype = PHRASE_POSTCODE
151 query.add_node(' ', PHRASE_POSTCODE, 0.1, '2224', '2224')
152 query.add_node('>', PHRASE_POSTCODE, 0.1, '12345', '12345')
154 assert not parser.parse(query)