2 Tests for running the indexing.
8 from nominatim.indexer import indexer
9 from nominatim.tokenizer import factory
13 def __init__(self, conn):
14 self.placex_id = itertools.count(100000)
15 self.osmline_id = itertools.count(500000)
16 self.postcode_id = itertools.count(700000)
19 self.conn.set_isolation_level(0)
20 with self.conn.cursor() as cur:
21 cur.execute('CREATE EXTENSION hstore')
22 cur.execute("""CREATE TABLE placex (place_id BIGINT,
25 rank_address SMALLINT,
27 indexed_status SMALLINT,
28 indexed_date TIMESTAMP,
32 geometry_sector INTEGER)""")
33 cur.execute("""CREATE TABLE location_property_osmline (
35 indexed_status SMALLINT,
36 indexed_date TIMESTAMP,
37 geometry_sector INTEGER)""")
38 cur.execute("""CREATE TABLE location_postcode (
40 indexed_status SMALLINT,
41 indexed_date TIMESTAMP,
42 country_code varchar(2),
44 cur.execute("""CREATE OR REPLACE FUNCTION date_update() RETURNS TRIGGER
47 IF NEW.indexed_status = 0 and OLD.indexed_status != 0 THEN
48 NEW.indexed_date = now();
51 END; $$ LANGUAGE plpgsql;""")
52 cur.execute("""CREATE OR REPLACE FUNCTION placex_prepare_update(p placex,
55 OUT country_feature VARCHAR)
61 $$ LANGUAGE plpgsql STABLE;
63 for table in ('placex', 'location_property_osmline', 'location_postcode'):
64 cur.execute("""CREATE TRIGGER {0}_update BEFORE UPDATE ON {0}
65 FOR EACH ROW EXECUTE PROCEDURE date_update()
68 def scalar(self, query):
69 with self.conn.cursor() as cur:
71 return cur.fetchone()[0]
73 def add_place(self, cls='place', typ='locality',
74 rank_search=30, rank_address=30, sector=20):
75 next_id = next(self.placex_id)
76 with self.conn.cursor() as cur:
77 cur.execute("""INSERT INTO placex
78 (place_id, class, type, rank_search, rank_address,
79 indexed_status, geometry_sector)
80 VALUES (%s, %s, %s, %s, %s, 1, %s)""",
81 (next_id, cls, typ, rank_search, rank_address, sector))
84 def add_admin(self, **kwargs):
85 kwargs['cls'] = 'boundary'
86 kwargs['typ'] = 'administrative'
87 return self.add_place(**kwargs)
89 def add_osmline(self, sector=20):
90 next_id = next(self.osmline_id)
91 with self.conn.cursor() as cur:
92 cur.execute("""INSERT INTO location_property_osmline
93 (place_id, indexed_status, geometry_sector)
94 VALUES (%s, 1, %s)""",
98 def add_postcode(self, country, postcode):
99 next_id = next(self.postcode_id)
100 with self.conn.cursor() as cur:
101 cur.execute("""INSERT INTO location_postcode
102 (place_id, indexed_status, country_code, postcode)
103 VALUES (%s, 1, %s, %s)""",
104 (next_id, country, postcode))
107 def placex_unindexed(self):
108 return self.scalar('SELECT count(*) from placex where indexed_status > 0')
110 def osmline_unindexed(self):
111 return self.scalar('SELECT count(*) from location_property_osmline where indexed_status > 0')
115 def test_db(temp_db_conn):
116 yield IndexerTestDB(temp_db_conn)
120 def test_tokenizer(tokenizer_mock, def_config, tmp_path):
121 def_config.project_dir = tmp_path
122 return factory.create_tokenizer(def_config)
125 @pytest.mark.parametrize("threads", [1, 15])
126 def test_index_all_by_rank(test_db, threads, test_tokenizer):
127 for rank in range(31):
128 test_db.add_place(rank_address=rank, rank_search=rank)
129 test_db.add_osmline()
131 assert 31 == test_db.placex_unindexed()
132 assert 1 == test_db.osmline_unindexed()
134 idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
135 idx.index_by_rank(0, 30)
137 assert 0 == test_db.placex_unindexed()
138 assert 0 == test_db.osmline_unindexed()
140 assert 0 == test_db.scalar("""SELECT count(*) from placex
141 WHERE indexed_status = 0 and indexed_date is null""")
142 # ranks come in order of rank address
143 assert 0 == test_db.scalar("""
144 SELECT count(*) FROM placex p WHERE rank_address > 0
145 AND indexed_date >= (SELECT min(indexed_date) FROM placex o
146 WHERE p.rank_address < o.rank_address)""")
147 # placex rank < 30 objects come before interpolations
148 assert 0 == test_db.scalar(
149 """SELECT count(*) FROM placex WHERE rank_address < 30
150 AND indexed_date > (SELECT min(indexed_date) FROM location_property_osmline)""")
151 # placex rank = 30 objects come after interpolations
152 assert 0 == test_db.scalar(
153 """SELECT count(*) FROM placex WHERE rank_address = 30
154 AND indexed_date < (SELECT max(indexed_date) FROM location_property_osmline)""")
155 # rank 0 comes after rank 29 and before rank 30
156 assert 0 == test_db.scalar(
157 """SELECT count(*) FROM placex WHERE rank_address < 30
158 AND indexed_date > (SELECT min(indexed_date) FROM placex WHERE rank_address = 0)""")
159 assert 0 == test_db.scalar(
160 """SELECT count(*) FROM placex WHERE rank_address = 30
161 AND indexed_date < (SELECT max(indexed_date) FROM placex WHERE rank_address = 0)""")
164 @pytest.mark.parametrize("threads", [1, 15])
165 def test_index_partial_without_30(test_db, threads, test_tokenizer):
166 for rank in range(31):
167 test_db.add_place(rank_address=rank, rank_search=rank)
168 test_db.add_osmline()
170 assert 31 == test_db.placex_unindexed()
171 assert 1 == test_db.osmline_unindexed()
173 idx = indexer.Indexer('dbname=test_nominatim_python_unittest',
174 test_tokenizer, threads)
175 idx.index_by_rank(4, 15)
177 assert 19 == test_db.placex_unindexed()
178 assert 1 == test_db.osmline_unindexed()
180 assert 0 == test_db.scalar("""
181 SELECT count(*) FROM placex
182 WHERE indexed_status = 0 AND not rank_address between 4 and 15""")
185 @pytest.mark.parametrize("threads", [1, 15])
186 def test_index_partial_with_30(test_db, threads, test_tokenizer):
187 for rank in range(31):
188 test_db.add_place(rank_address=rank, rank_search=rank)
189 test_db.add_osmline()
191 assert 31 == test_db.placex_unindexed()
192 assert 1 == test_db.osmline_unindexed()
194 idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
195 idx.index_by_rank(28, 30)
197 assert 27 == test_db.placex_unindexed()
198 assert 0 == test_db.osmline_unindexed()
200 assert 0 == test_db.scalar("""
201 SELECT count(*) FROM placex
202 WHERE indexed_status = 0 AND rank_address between 1 and 27""")
204 @pytest.mark.parametrize("threads", [1, 15])
205 def test_index_boundaries(test_db, threads, test_tokenizer):
206 for rank in range(4, 10):
207 test_db.add_admin(rank_address=rank, rank_search=rank)
208 for rank in range(31):
209 test_db.add_place(rank_address=rank, rank_search=rank)
210 test_db.add_osmline()
212 assert 37 == test_db.placex_unindexed()
213 assert 1 == test_db.osmline_unindexed()
215 idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
216 idx.index_boundaries(0, 30)
218 assert 31 == test_db.placex_unindexed()
219 assert 1 == test_db.osmline_unindexed()
221 assert 0 == test_db.scalar("""
222 SELECT count(*) FROM placex
223 WHERE indexed_status = 0 AND class != 'boundary'""")
226 @pytest.mark.parametrize("threads", [1, 15])
227 def test_index_postcodes(test_db, threads, test_tokenizer):
228 for postcode in range(1000):
229 test_db.add_postcode('de', postcode)
230 for postcode in range(32000, 33000):
231 test_db.add_postcode('us', postcode)
233 idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
234 idx.index_postcodes()
236 assert 0 == test_db.scalar("""SELECT count(*) FROM location_postcode
237 WHERE indexed_status != 0""")
240 @pytest.mark.parametrize("analyse", [True, False])
241 def test_index_full(test_db, analyse, test_tokenizer):
242 for rank in range(4, 10):
243 test_db.add_admin(rank_address=rank, rank_search=rank)
244 for rank in range(31):
245 test_db.add_place(rank_address=rank, rank_search=rank)
246 test_db.add_osmline()
247 for postcode in range(1000):
248 test_db.add_postcode('de', postcode)
250 idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, 4)
251 idx.index_full(analyse=analyse)
253 assert 0 == test_db.placex_unindexed()
254 assert 0 == test_db.osmline_unindexed()
255 assert 0 == test_db.scalar("""SELECT count(*) FROM location_postcode
256 WHERE indexed_status != 0""")
259 @pytest.mark.parametrize("threads", [1, 15])
260 def test_index_reopen_connection(test_db, threads, monkeypatch, test_tokenizer):
261 monkeypatch.setattr(indexer.WorkerPool, "REOPEN_CONNECTIONS_AFTER", 15)
263 for _ in range(1000):
264 test_db.add_place(rank_address=30, rank_search=30)
266 idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
267 idx.index_by_rank(28, 30)
269 assert 0 == test_db.placex_unindexed()