]> git.openstreetmap.org Git - nominatim.git/blob - test/python/test_tools_postcodes.py
Merge pull request #2326 from lonvia/wokerpool-for-tiger-data
[nominatim.git] / test / python / test_tools_postcodes.py
1 """
2 Tests for functions to maintain the artificial postcode table.
3 """
4 import subprocess
5
6 import pytest
7
8 from nominatim.tools import postcodes
9 import dummy_tokenizer
10
11 class MockPostcodeTable:
12     """ A location_postcode table for testing.
13     """
14     def __init__(self, conn):
15         self.conn = conn
16         with conn.cursor() as cur:
17             cur.execute("""CREATE TABLE location_postcode (
18                                place_id BIGINT,
19                                parent_place_id BIGINT,
20                                rank_search SMALLINT,
21                                rank_address SMALLINT,
22                                indexed_status SMALLINT,
23                                indexed_date TIMESTAMP,
24                                country_code varchar(2),
25                                postcode TEXT,
26                                geometry GEOMETRY(Geometry, 4326))""")
27             cur.execute("""CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT)
28                            RETURNS TEXT AS $$ BEGIN RETURN postcode; END; $$ LANGUAGE plpgsql;
29                         """)
30         conn.commit()
31
32     def add(self, country, postcode, x, y):
33         with self.conn.cursor() as cur:
34             cur.execute("""INSERT INTO location_postcode (place_id, indexed_status,
35                                                           country_code, postcode,
36                                                           geometry)
37                            VALUES (nextval('seq_place'), 1, %s, %s,
38                                    'SRID=4326;POINT(%s %s)')""",
39                         (country, postcode, x, y))
40         self.conn.commit()
41
42
43     @property
44     def row_set(self):
45         with self.conn.cursor() as cur:
46             cur.execute("""SELECT country_code, postcode,
47                                   ST_X(geometry), ST_Y(geometry)
48                            FROM location_postcode""")
49             return set((tuple(row) for row in cur))
50
51
52 @pytest.fixture
53 def tokenizer():
54     return dummy_tokenizer.DummyTokenizer(None, None)
55
56 @pytest.fixture
57 def postcode_table(temp_db_conn, placex_table, word_table):
58     return MockPostcodeTable(temp_db_conn)
59
60
61 def test_import_postcodes_empty(dsn, postcode_table, tmp_path, tokenizer):
62     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
63
64     assert not postcode_table.row_set
65
66
67 def test_import_postcodes_add_new(dsn, placex_table, postcode_table, tmp_path, tokenizer):
68     placex_table.add(country='xx', geom='POINT(10 12)',
69                      address=dict(postcode='9486'))
70     postcode_table.add('yy', '9486', 99, 34)
71
72     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
73
74     assert postcode_table.row_set == {('xx', '9486', 10, 12), }
75
76
77 def test_import_postcodes_replace_coordinates(dsn, placex_table, postcode_table, tmp_path, tokenizer):
78     placex_table.add(country='xx', geom='POINT(10 12)',
79                      address=dict(postcode='AB 4511'))
80     postcode_table.add('xx', 'AB 4511', 99, 34)
81
82     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
83
84     assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12)}
85
86
87 def test_import_postcodes_replace_coordinates_close(dsn, placex_table, postcode_table, tmp_path, tokenizer):
88     placex_table.add(country='xx', geom='POINT(10 12)',
89                      address=dict(postcode='AB 4511'))
90     postcode_table.add('xx', 'AB 4511', 10, 11.99999)
91
92     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
93
94     assert postcode_table.row_set == {('xx', 'AB 4511', 10, 11.99999)}
95
96
97 def test_import_postcodes_remove(dsn, placex_table, postcode_table, tmp_path, tokenizer):
98     placex_table.add(country='xx', geom='POINT(10 12)',
99                      address=dict(postcode='AB 4511'))
100     postcode_table.add('xx', 'badname', 10, 12)
101
102     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
103
104     assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12)}
105
106
107 def test_import_postcodes_ignore_empty_country(dsn, placex_table, postcode_table, tmp_path, tokenizer):
108     placex_table.add(country=None, geom='POINT(10 12)',
109                      address=dict(postcode='AB 4511'))
110
111     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
112
113     assert not postcode_table.row_set
114
115
116 def test_import_postcodes_remove_all(dsn, placex_table, postcode_table, tmp_path, tokenizer):
117     postcode_table.add('ch', '5613', 10, 12)
118
119     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
120
121     assert not postcode_table.row_set
122
123
124 def test_import_postcodes_multi_country(dsn, placex_table, postcode_table, tmp_path, tokenizer):
125     placex_table.add(country='de', geom='POINT(10 12)',
126                      address=dict(postcode='54451'))
127     placex_table.add(country='cc', geom='POINT(100 56)',
128                      address=dict(postcode='DD23 T'))
129     placex_table.add(country='de', geom='POINT(10.3 11.0)',
130                      address=dict(postcode='54452'))
131     placex_table.add(country='cc', geom='POINT(10.3 11.0)',
132                      address=dict(postcode='54452'))
133
134     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
135
136     assert postcode_table.row_set == {('de', '54451', 10, 12),
137                                       ('de', '54452', 10.3, 11.0),
138                                       ('cc', '54452', 10.3, 11.0),
139                                       ('cc', 'DD23 T', 100, 56)}
140
141
142 @pytest.mark.parametrize("gzipped", [True, False])
143 def test_import_postcodes_extern(dsn, placex_table, postcode_table, tmp_path,
144                                  tokenizer, gzipped):
145     placex_table.add(country='xx', geom='POINT(10 12)',
146                      address=dict(postcode='AB 4511'))
147
148     extfile = tmp_path / 'xx_postcodes.csv'
149     extfile.write_text("postcode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10")
150
151     if gzipped:
152         subprocess.run(['gzip', str(extfile)])
153         assert not extfile.is_file()
154
155     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
156
157     assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12),
158                                       ('xx', 'CD 4511', -10, -5)}
159
160
161 def test_import_postcodes_extern_bad_column(dsn, placex_table, postcode_table,
162                                             tmp_path, tokenizer):
163     placex_table.add(country='xx', geom='POINT(10 12)',
164                      address=dict(postcode='AB 4511'))
165
166     extfile = tmp_path / 'xx_postcodes.csv'
167     extfile.write_text("postode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10")
168
169     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
170
171     assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12)}
172
173
174 def test_import_postcodes_extern_bad_number(dsn, placex_table, postcode_table,
175                                             tmp_path, tokenizer):
176     placex_table.add(country='xx', geom='POINT(10 12)',
177                      address=dict(postcode='AB 4511'))
178
179     extfile = tmp_path / 'xx_postcodes.csv'
180     extfile.write_text("postcode,lat,lon\nXX 4511,-4,NaN\nCD 4511,-5, -10\n34,200,0")
181
182     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
183
184     assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12),
185                                       ('xx', 'CD 4511', -10, -5)}