]> git.openstreetmap.org Git - nominatim.git/blob - test/python/test_tools_postcodes.py
Improved performance of the postcodes query and some code cleaning
[nominatim.git] / test / python / test_tools_postcodes.py
1 """
2 Tests for functions to maintain the artificial postcode table.
3 """
4 import subprocess
5
6 import pytest
7
8 from nominatim.tools import postcodes
9 import dummy_tokenizer
10
11 class MockPostcodeTable:
12     """ A location_postcode table for testing.
13     """
14     def __init__(self, conn):
15         self.conn = conn
16         with conn.cursor() as cur:
17             cur.execute("""CREATE TABLE location_postcode (
18                                place_id BIGINT,
19                                parent_place_id BIGINT,
20                                rank_search SMALLINT,
21                                rank_address SMALLINT,
22                                indexed_status SMALLINT,
23                                indexed_date TIMESTAMP,
24                                country_code varchar(2),
25                                postcode TEXT,
26                                geometry GEOMETRY(Geometry, 4326))""")
27             cur.execute("""CREATE OR REPLACE FUNCTION token_normalized_postcode(postcode TEXT)
28                            RETURNS TEXT AS $$ BEGIN RETURN postcode; END; $$ LANGUAGE plpgsql;
29
30                            CREATE OR REPLACE FUNCTION get_country_code(place geometry)
31                            RETURNS TEXT AS $$ BEGIN 
32                            RETURN null;
33                            END; $$ LANGUAGE plpgsql;
34                         """)
35         conn.commit()
36
37     def add(self, country, postcode, x, y):
38         with self.conn.cursor() as cur:
39             cur.execute("""INSERT INTO location_postcode (place_id, indexed_status,
40                                                           country_code, postcode,
41                                                           geometry)
42                            VALUES (nextval('seq_place'), 1, %s, %s,
43                                    'SRID=4326;POINT(%s %s)')""",
44                         (country, postcode, x, y))
45         self.conn.commit()
46
47
48     @property
49     def row_set(self):
50         with self.conn.cursor() as cur:
51             cur.execute("""SELECT country_code, postcode,
52                                   ST_X(geometry), ST_Y(geometry)
53                            FROM location_postcode""")
54             return set((tuple(row) for row in cur))
55
56
57 @pytest.fixture
58 def tokenizer():
59     return dummy_tokenizer.DummyTokenizer(None, None)
60
61 @pytest.fixture
62 def postcode_table(temp_db_conn, placex_table, word_table):
63     return MockPostcodeTable(temp_db_conn)
64
65
66 def test_postcodes_empty(dsn, postcode_table, place_table,
67                          tmp_path, tokenizer):
68     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
69
70     assert not postcode_table.row_set
71
72
73 def test_postcodes_add_new(dsn, postcode_table, tmp_path,
74                            insert_implicit_postcode, tokenizer):
75     insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='9486'))
76     postcode_table.add('yy', '9486', 99, 34)
77
78     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
79
80     assert postcode_table.row_set == {('xx', '9486', 10, 12), }
81
82
83 def test_postcodes_replace_coordinates(dsn, postcode_table, tmp_path,
84                                        insert_implicit_postcode, tokenizer):
85     insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='AB 4511'))
86     postcode_table.add('xx', 'AB 4511', 99, 34)
87
88     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
89
90     assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12)}
91
92
93 def test_postcodes_replace_coordinates_close(dsn, postcode_table, tmp_path,
94                                              insert_implicit_postcode, tokenizer):
95     insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='AB 4511'))
96     postcode_table.add('xx', 'AB 4511', 10, 11.99999)
97
98     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
99
100     assert postcode_table.row_set == {('xx', 'AB 4511', 10, 11.99999)}
101
102
103 def test_postcodes_remove(dsn, postcode_table, tmp_path,
104                           insert_implicit_postcode, tokenizer):
105     insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='AB 4511'))
106     postcode_table.add('xx', 'badname', 10, 12)
107
108     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
109
110     assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12)}
111
112
113 def test_postcodes_ignore_empty_country(dsn, postcode_table, tmp_path,
114                                         insert_implicit_postcode, tokenizer):
115     insert_implicit_postcode(1, None, 'POINT(10 12)', dict(postcode='AB 4511'))
116     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
117     assert not postcode_table.row_set
118
119
120 def test_postcodes_remove_all(dsn, postcode_table, place_table,
121                               tmp_path, tokenizer):
122     postcode_table.add('ch', '5613', 10, 12)
123     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
124
125     assert not postcode_table.row_set
126
127
128 def test_postcodes_multi_country(dsn, postcode_table, tmp_path,
129                                  insert_implicit_postcode, tokenizer):
130     insert_implicit_postcode(1, 'de', 'POINT(10 12)', dict(postcode='54451'))
131     insert_implicit_postcode(2, 'cc', 'POINT(100 56)', dict(postcode='DD23 T'))
132     insert_implicit_postcode(3, 'de', 'POINT(10.3 11.0)', dict(postcode='54452'))
133     insert_implicit_postcode(4, 'cc', 'POINT(10.3 11.0)', dict(postcode='54452'))
134
135     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
136
137     assert postcode_table.row_set == {('de', '54451', 10, 12),
138                                       ('de', '54452', 10.3, 11.0),
139                                       ('cc', '54452', 10.3, 11.0),
140                                       ('cc', 'DD23 T', 100, 56)}
141
142
143 @pytest.mark.parametrize("gzipped", [True, False])
144 def test_postcodes_extern(dsn, postcode_table, tmp_path,
145                           insert_implicit_postcode, tokenizer, gzipped):
146     insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='AB 4511'))
147
148     extfile = tmp_path / 'xx_postcodes.csv'
149     extfile.write_text("postcode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10")
150
151     if gzipped:
152         subprocess.run(['gzip', str(extfile)])
153         assert not extfile.is_file()
154
155     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
156
157     assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12),
158                                       ('xx', 'CD 4511', -10, -5)}
159
160
161 def test_postcodes_extern_bad_column(dsn, postcode_table, tmp_path, 
162                                      insert_implicit_postcode, tokenizer):
163     insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='AB 4511'))
164
165     extfile = tmp_path / 'xx_postcodes.csv'
166     extfile.write_text("postode,lat,lon\nAB 4511,-4,-1\nCD 4511,-5, -10")
167
168     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
169
170     assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12)}
171
172
173 def test_postcodes_extern_bad_number(dsn, insert_implicit_postcode,
174                                      postcode_table, tmp_path, tokenizer):
175     insert_implicit_postcode(1, 'xx', 'POINT(10 12)', dict(postcode='AB 4511'))
176
177     extfile = tmp_path / 'xx_postcodes.csv'
178     extfile.write_text("postcode,lat,lon\nXX 4511,-4,NaN\nCD 4511,-5, -10\n34,200,0")
179
180     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
181
182     assert postcode_table.row_set == {('xx', 'AB 4511', 10, 12),
183                                       ('xx', 'CD 4511', -10, -5)}
184
185 def test_can_compute(dsn, table_factory):
186     assert not postcodes.can_compute(dsn)
187     table_factory('place')
188     assert postcodes.can_compute(dsn)
189
190 def test_no_placex_entry(dsn, tmp_path, temp_db_cursor, place_row, postcode_table, tokenizer):
191     #Rewrite the get_country_code function to verify its execution.
192     temp_db_cursor.execute("""
193         CREATE OR REPLACE FUNCTION get_country_code(place geometry)
194         RETURNS TEXT AS $$ BEGIN 
195         RETURN 'fr';
196         END; $$ LANGUAGE plpgsql;
197     """)
198     place_row(geom='SRID=4326;POINT(10 12)', address=dict(postcode='AB 4511'))
199     postcodes.update_postcodes(dsn, tmp_path, tokenizer)
200
201     assert postcode_table.row_set == {('fr', 'AB 4511', 10, 12)}
202
203 @pytest.fixture
204 def insert_implicit_postcode(placex_table, place_row):
205     """
206         Inserts data into the placex and place table
207         which can then be used to compute one postcode.
208     """
209     def _insert_implicit_postcode(osm_id, country, geometry, address):
210         placex_table.add(osm_id=osm_id, country=country, geom=geometry)
211         place_row(osm_id=osm_id, geom='SRID=4326;'+geometry, address=address)
212
213     return _insert_implicit_postcode