2 Tests for running the indexing.
8 from nominatim.indexer import indexer
9 from nominatim.tokenizer import factory
13 def __init__(self, conn):
14 self.placex_id = itertools.count(100000)
15 self.osmline_id = itertools.count(500000)
16 self.postcode_id = itertools.count(700000)
19 self.conn.set_isolation_level(0)
20 with self.conn.cursor() as cur:
21 cur.execute('CREATE EXTENSION hstore')
22 cur.execute("""CREATE TABLE placex (place_id BIGINT,
25 rank_address SMALLINT,
27 indexed_status SMALLINT,
28 indexed_date TIMESTAMP,
33 geometry_sector INTEGER)""")
34 cur.execute("""CREATE TABLE location_property_osmline (
39 indexed_status SMALLINT,
40 indexed_date TIMESTAMP,
41 geometry_sector INTEGER)""")
42 cur.execute("""CREATE TABLE location_postcode (
44 indexed_status SMALLINT,
45 indexed_date TIMESTAMP,
46 country_code varchar(2),
48 cur.execute("""CREATE OR REPLACE FUNCTION date_update() RETURNS TRIGGER
51 IF NEW.indexed_status = 0 and OLD.indexed_status != 0 THEN
52 NEW.indexed_date = now();
55 END; $$ LANGUAGE plpgsql;""")
56 cur.execute("""CREATE OR REPLACE FUNCTION placex_prepare_update(p placex,
59 OUT country_feature VARCHAR)
65 $$ LANGUAGE plpgsql STABLE;
67 cur.execute("""CREATE OR REPLACE FUNCTION get_interpolation_address(in_address HSTORE, wayid BIGINT)
72 $$ LANGUAGE plpgsql STABLE;
75 for table in ('placex', 'location_property_osmline', 'location_postcode'):
76 cur.execute("""CREATE TRIGGER {0}_update BEFORE UPDATE ON {0}
77 FOR EACH ROW EXECUTE PROCEDURE date_update()
80 def scalar(self, query):
81 with self.conn.cursor() as cur:
83 return cur.fetchone()[0]
85 def add_place(self, cls='place', typ='locality',
86 rank_search=30, rank_address=30, sector=20):
87 next_id = next(self.placex_id)
88 with self.conn.cursor() as cur:
89 cur.execute("""INSERT INTO placex
90 (place_id, class, type, rank_search, rank_address,
91 indexed_status, geometry_sector)
92 VALUES (%s, %s, %s, %s, %s, 1, %s)""",
93 (next_id, cls, typ, rank_search, rank_address, sector))
96 def add_admin(self, **kwargs):
97 kwargs['cls'] = 'boundary'
98 kwargs['typ'] = 'administrative'
99 return self.add_place(**kwargs)
101 def add_osmline(self, sector=20):
102 next_id = next(self.osmline_id)
103 with self.conn.cursor() as cur:
104 cur.execute("""INSERT INTO location_property_osmline
105 (place_id, osm_id, indexed_status, geometry_sector)
106 VALUES (%s, %s, 1, %s)""",
107 (next_id, next_id, sector))
110 def add_postcode(self, country, postcode):
111 next_id = next(self.postcode_id)
112 with self.conn.cursor() as cur:
113 cur.execute("""INSERT INTO location_postcode
114 (place_id, indexed_status, country_code, postcode)
115 VALUES (%s, 1, %s, %s)""",
116 (next_id, country, postcode))
119 def placex_unindexed(self):
120 return self.scalar('SELECT count(*) from placex where indexed_status > 0')
122 def osmline_unindexed(self):
123 return self.scalar('SELECT count(*) from location_property_osmline where indexed_status > 0')
127 def test_db(temp_db_conn):
128 yield IndexerTestDB(temp_db_conn)
132 def test_tokenizer(tokenizer_mock, def_config, tmp_path):
133 def_config.project_dir = tmp_path
134 return factory.create_tokenizer(def_config)
137 @pytest.mark.parametrize("threads", [1, 15])
138 def test_index_all_by_rank(test_db, threads, test_tokenizer):
139 for rank in range(31):
140 test_db.add_place(rank_address=rank, rank_search=rank)
141 test_db.add_osmline()
143 assert 31 == test_db.placex_unindexed()
144 assert 1 == test_db.osmline_unindexed()
146 idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
147 idx.index_by_rank(0, 30)
149 assert 0 == test_db.placex_unindexed()
150 assert 0 == test_db.osmline_unindexed()
152 assert 0 == test_db.scalar("""SELECT count(*) from placex
153 WHERE indexed_status = 0 and indexed_date is null""")
154 # ranks come in order of rank address
155 assert 0 == test_db.scalar("""
156 SELECT count(*) FROM placex p WHERE rank_address > 0
157 AND indexed_date >= (SELECT min(indexed_date) FROM placex o
158 WHERE p.rank_address < o.rank_address)""")
159 # placex rank < 30 objects come before interpolations
160 assert 0 == test_db.scalar(
161 """SELECT count(*) FROM placex WHERE rank_address < 30
162 AND indexed_date > (SELECT min(indexed_date) FROM location_property_osmline)""")
163 # placex rank = 30 objects come after interpolations
164 assert 0 == test_db.scalar(
165 """SELECT count(*) FROM placex WHERE rank_address = 30
166 AND indexed_date < (SELECT max(indexed_date) FROM location_property_osmline)""")
167 # rank 0 comes after rank 29 and before rank 30
168 assert 0 == test_db.scalar(
169 """SELECT count(*) FROM placex WHERE rank_address < 30
170 AND indexed_date > (SELECT min(indexed_date) FROM placex WHERE rank_address = 0)""")
171 assert 0 == test_db.scalar(
172 """SELECT count(*) FROM placex WHERE rank_address = 30
173 AND indexed_date < (SELECT max(indexed_date) FROM placex WHERE rank_address = 0)""")
176 @pytest.mark.parametrize("threads", [1, 15])
177 def test_index_partial_without_30(test_db, threads, test_tokenizer):
178 for rank in range(31):
179 test_db.add_place(rank_address=rank, rank_search=rank)
180 test_db.add_osmline()
182 assert 31 == test_db.placex_unindexed()
183 assert 1 == test_db.osmline_unindexed()
185 idx = indexer.Indexer('dbname=test_nominatim_python_unittest',
186 test_tokenizer, threads)
187 idx.index_by_rank(4, 15)
189 assert 19 == test_db.placex_unindexed()
190 assert 1 == test_db.osmline_unindexed()
192 assert 0 == test_db.scalar("""
193 SELECT count(*) FROM placex
194 WHERE indexed_status = 0 AND not rank_address between 4 and 15""")
197 @pytest.mark.parametrize("threads", [1, 15])
198 def test_index_partial_with_30(test_db, threads, test_tokenizer):
199 for rank in range(31):
200 test_db.add_place(rank_address=rank, rank_search=rank)
201 test_db.add_osmline()
203 assert 31 == test_db.placex_unindexed()
204 assert 1 == test_db.osmline_unindexed()
206 idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
207 idx.index_by_rank(28, 30)
209 assert 27 == test_db.placex_unindexed()
210 assert 0 == test_db.osmline_unindexed()
212 assert 0 == test_db.scalar("""
213 SELECT count(*) FROM placex
214 WHERE indexed_status = 0 AND rank_address between 1 and 27""")
216 @pytest.mark.parametrize("threads", [1, 15])
217 def test_index_boundaries(test_db, threads, test_tokenizer):
218 for rank in range(4, 10):
219 test_db.add_admin(rank_address=rank, rank_search=rank)
220 for rank in range(31):
221 test_db.add_place(rank_address=rank, rank_search=rank)
222 test_db.add_osmline()
224 assert 37 == test_db.placex_unindexed()
225 assert 1 == test_db.osmline_unindexed()
227 idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
228 idx.index_boundaries(0, 30)
230 assert 31 == test_db.placex_unindexed()
231 assert 1 == test_db.osmline_unindexed()
233 assert 0 == test_db.scalar("""
234 SELECT count(*) FROM placex
235 WHERE indexed_status = 0 AND class != 'boundary'""")
238 @pytest.mark.parametrize("threads", [1, 15])
239 def test_index_postcodes(test_db, threads, test_tokenizer):
240 for postcode in range(1000):
241 test_db.add_postcode('de', postcode)
242 for postcode in range(32000, 33000):
243 test_db.add_postcode('us', postcode)
245 idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
246 idx.index_postcodes()
248 assert 0 == test_db.scalar("""SELECT count(*) FROM location_postcode
249 WHERE indexed_status != 0""")
252 @pytest.mark.parametrize("analyse", [True, False])
253 def test_index_full(test_db, analyse, test_tokenizer):
254 for rank in range(4, 10):
255 test_db.add_admin(rank_address=rank, rank_search=rank)
256 for rank in range(31):
257 test_db.add_place(rank_address=rank, rank_search=rank)
258 test_db.add_osmline()
259 for postcode in range(1000):
260 test_db.add_postcode('de', postcode)
262 idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, 4)
263 idx.index_full(analyse=analyse)
265 assert 0 == test_db.placex_unindexed()
266 assert 0 == test_db.osmline_unindexed()
267 assert 0 == test_db.scalar("""SELECT count(*) FROM location_postcode
268 WHERE indexed_status != 0""")
271 @pytest.mark.parametrize("threads", [1, 15])
272 def test_index_reopen_connection(test_db, threads, monkeypatch, test_tokenizer):
273 monkeypatch.setattr(indexer.WorkerPool, "REOPEN_CONNECTIONS_AFTER", 15)
275 for _ in range(1000):
276 test_db.add_place(rank_address=30, rank_search=30)
278 idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
279 idx.index_by_rank(28, 30)
281 assert 0 == test_db.placex_unindexed()