]> git.openstreetmap.org Git - nominatim.git/blob - test/python/test_tools_postcodes.py
Merge pull request #2342 from lonvia/icu-tokenizer-ci
[nominatim.git] / test / python / test_tools_postcodes.py
1 """
2 Tests for functions to maintain the artificial postcode table.
3 """
4 import subprocess
5
6 import pytest
7
8 from nominatim.tools import postcodes
9 import dummy_tokenizer
10
11 class MockPostcodeTable:
12     """ A location_postcode table for testing.
13     """
14     def __init__(self, conn):
15         self.conn = conn
16         with conn.cursor() as cur:
17             cur.execute("""CREATE TABLE location_postcode (
18                                place_id BIGINT,
19                                parent_place_id BIGINT,
20                                rank_search SMALLINT,
21                                rank_address SMALLINT,
22                                indexed_status SMALLINT,
23                                indexed_date TIMESTAMP,
24                                country_code varchar(2),
25                                postcode TEXT,
26                                geometry GEOMETRY(Geometry, 4326))""")
27             cur.execute("""CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT)
28                            RETURNS TEXT AS $$ BEGIN RETURN postcode; END; $$ LANGUAGE plpgsql;
29                         """)
30         conn.commit()
31
32     def add(self, country, postcode, x, y):
33         with self.conn.cursor() as cur:
34             cur.execute("""INSERT INTO location_postcode (place_id, indexed_status,
35                                                           country_code, postcode,
36                                                           geometry)
37                            VALUES (nextval('seq_place'), 1, %s, %s,
38                                    'SRID=4326;POINT(%s %s)')""",
39                         (country, postcode, x, y))
40         self.conn.commit()
41
42
43     @property
44     def row_set(self):
45         with self.conn.cursor() as cur:
46             cur.execute("""SELECT country_code, postcode,
47                                   ST_X(geometry), ST_Y(geometry)
48                            FROM location_postcode""")
49             return set((tuple(row) for row in cur))
50
51
52 @pytest.fixture
53 def tokenizer():
54     return dummy_tokenizer.DummyTokenizer(None, None)
55
56 @pytest.fixture
57 def postcode_table(temp_db_conn, placex_table, word_table):
58     return MockPostcodeTable(temp_db_conn)
59
60
61 def test_postcodes_empty(dsn, postcode_table, tmp_path, tokenizer):
62     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
63
64     assert not postcode_table.row_set
65
66
67 def test_postcodes_add_new(dsn, placex_table, postcode_table, tmp_path, tokenizer):
68     placex_table.add(country='xx', geom='POINT(10 12)',
69                      address=dict(postcode='9486'))
70     postcode_table.add('yy', '9486', 99, 34)
71
72     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
73
74     assert postcode_table.row_set == {('xx', '9486', 10, 12), }
75
76
77 def test_postcodes_replace_coordinates(dsn, placex_table, postcode_table,
78                                        tmp_path, tokenizer):
79     placex_table.add(country='xx', geom='POINT(10 12)',
80                      address=dict(postcode='AB 4511'))
81     postcode_table.add('xx', 'AB 4511', 99, 34)
82
83     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
84
85     assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12)}
86
87
88 def test_postcodes_replace_coordinates_close(dsn, placex_table, postcode_table,
89                                              tmp_path, tokenizer):
90     placex_table.add(country='xx', geom='POINT(10 12)',
91                      address=dict(postcode='AB 4511'))
92     postcode_table.add('xx', 'AB 4511', 10, 11.99999)
93
94     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
95
96     assert postcode_table.row_set == {('xx', 'AB 4511', 10, 11.99999)}
97
98
99 def test_postcodes_remove(dsn, placex_table, postcode_table, tmp_path, tokenizer):
100     placex_table.add(country='xx', geom='POINT(10 12)',
101                      address=dict(postcode='AB 4511'))
102     postcode_table.add('xx', 'badname', 10, 12)
103
104     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
105
106     assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12)}
107
108
109 def test_postcodes_ignore_empty_country(dsn, placex_table, postcode_table, tmp_path, tokenizer):
110     placex_table.add(country=None, geom='POINT(10 12)',
111                      address=dict(postcode='AB 4511'))
112
113     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
114
115     assert not postcode_table.row_set
116
117
118 def test_postcodes_remove_all(dsn, postcode_table, tmp_path, tokenizer):
119     postcode_table.add('ch', '5613', 10, 12)
120
121     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
122
123     assert not postcode_table.row_set
124
125
126 def test_postcodes_multi_country(dsn, placex_table, postcode_table, tmp_path, tokenizer):
127     placex_table.add(country='de', geom='POINT(10 12)',
128                      address=dict(postcode='54451'))
129     placex_table.add(country='cc', geom='POINT(100 56)',
130                      address=dict(postcode='DD23 T'))
131     placex_table.add(country='de', geom='POINT(10.3 11.0)',
132                      address=dict(postcode='54452'))
133     placex_table.add(country='cc', geom='POINT(10.3 11.0)',
134                      address=dict(postcode='54452'))
135
136     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
137
138     assert postcode_table.row_set == {('de', '54451', 10, 12),
139                                       ('de', '54452', 10.3, 11.0),
140                                       ('cc', '54452', 10.3, 11.0),
141                                       ('cc', 'DD23 T', 100, 56)}
142
143
144 @pytest.mark.parametrize("gzipped", [True, False])
145 def test_postcodes_extern(dsn, placex_table, postcode_table, tmp_path,
146                           tokenizer, gzipped):
147     placex_table.add(country='xx', geom='POINT(10 12)',
148                      address=dict(postcode='AB 4511'))
149
150     extfile = tmp_path / 'xx_postcodes.csv'
151     extfile.write_text("postcode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10")
152
153     if gzipped:
154         subprocess.run(['gzip', str(extfile)])
155         assert not extfile.is_file()
156
157     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
158
159     assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12),
160                                       ('xx', 'CD 4511', -10, -5)}
161
162
163 def test_postcodes_extern_bad_column(dsn, placex_table, postcode_table,
164                                      tmp_path, tokenizer):
165     placex_table.add(country='xx', geom='POINT(10 12)',
166                      address=dict(postcode='AB 4511'))
167
168     extfile = tmp_path / 'xx_postcodes.csv'
169     extfile.write_text("postode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10")
170
171     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
172
173     assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12)}
174
175
176 def test_postcodes_extern_bad_number(dsn, placex_table, postcode_table,
177                                      tmp_path, tokenizer):
178     placex_table.add(country='xx', geom='POINT(10 12)',
179                      address=dict(postcode='AB 4511'))
180
181     extfile = tmp_path / 'xx_postcodes.csv'
182     extfile.write_text("postcode,lat,lon\nXX 4511,-4,NaN\nCD 4511,-5, -10\n34,200,0")
183
184     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
185
186     assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12),
187                                       ('xx', 'CD 4511', -10, -5)}