]> git.openstreetmap.org Git - nominatim.git/blob - test/python/test_indexing.py
require tokeinzer for indexer
[nominatim.git] / test / python / test_indexing.py
1 """
2 Tests for running the indexing.
3 """
4 import itertools
5 import psycopg2
6 import pytest
7
8 from nominatim.indexer import indexer
9 from nominatim.tokenizer import factory
10
11 class IndexerTestDB:
12
13     def __init__(self, conn):
14         self.placex_id = itertools.count(100000)
15         self.osmline_id = itertools.count(500000)
16         self.postcode_id = itertools.count(700000)
17
18         self.conn = conn
19         self.conn.set_isolation_level(0)
20         with self.conn.cursor() as cur:
21             cur.execute('CREATE EXTENSION hstore')
22             cur.execute("""CREATE TABLE placex (place_id BIGINT,
23                                                 class TEXT,
24                                                 type TEXT,
25                                                 rank_address SMALLINT,
26                                                 rank_search SMALLINT,
27                                                 indexed_status SMALLINT,
28                                                 indexed_date TIMESTAMP,
29                                                 partition SMALLINT,
30                                                 admin_level SMALLINT,
31                                                 address HSTORE,
32                                                 geometry_sector INTEGER)""")
33             cur.execute("""CREATE TABLE location_property_osmline (
34                                place_id BIGINT,
35                                indexed_status SMALLINT,
36                                indexed_date TIMESTAMP,
37                                geometry_sector INTEGER)""")
38             cur.execute("""CREATE TABLE location_postcode (
39                                place_id BIGINT,
40                                indexed_status SMALLINT,
41                                indexed_date TIMESTAMP,
42                                country_code varchar(2),
43                                postcode TEXT)""")
44             cur.execute("""CREATE OR REPLACE FUNCTION date_update() RETURNS TRIGGER
45                            AS $$
46                            BEGIN
47                              IF NEW.indexed_status = 0 and OLD.indexed_status != 0 THEN
48                                NEW.indexed_date = now();
49                              END IF;
50                              RETURN NEW;
51                            END; $$ LANGUAGE plpgsql;""")
52             cur.execute("""CREATE OR REPLACE FUNCTION placex_prepare_update(p placex,
53                                                       OUT name HSTORE,
54                                                       OUT address HSTORE,
55                                                       OUT country_feature VARCHAR)
56                            AS $$
57                            BEGIN
58                             address := p.address;
59                             name := p.address;
60                            END;
61                            $$ LANGUAGE plpgsql STABLE;
62                         """)
63             for table in ('placex', 'location_property_osmline', 'location_postcode'):
64                 cur.execute("""CREATE TRIGGER {0}_update BEFORE UPDATE ON {0}
65                                FOR EACH ROW EXECUTE PROCEDURE date_update()
66                             """.format(table))
67
68     def scalar(self, query):
69         with self.conn.cursor() as cur:
70             cur.execute(query)
71             return cur.fetchone()[0]
72
73     def add_place(self, cls='place', typ='locality',
74                   rank_search=30, rank_address=30, sector=20):
75         next_id = next(self.placex_id)
76         with self.conn.cursor() as cur:
77             cur.execute("""INSERT INTO placex
78                               (place_id, class, type, rank_search, rank_address,
79                                indexed_status, geometry_sector)
80                               VALUES (%s, %s, %s, %s, %s, 1, %s)""",
81                         (next_id, cls, typ, rank_search, rank_address, sector))
82         return next_id
83
84     def add_admin(self, **kwargs):
85         kwargs['cls'] = 'boundary'
86         kwargs['typ'] = 'administrative'
87         return self.add_place(**kwargs)
88
89     def add_osmline(self, sector=20):
90         next_id = next(self.osmline_id)
91         with self.conn.cursor() as cur:
92             cur.execute("""INSERT INTO location_property_osmline
93                               (place_id, indexed_status, geometry_sector)
94                               VALUES (%s, 1, %s)""",
95                         (next_id, sector))
96         return next_id
97
98     def add_postcode(self, country, postcode):
99         next_id = next(self.postcode_id)
100         with self.conn.cursor() as cur:
101             cur.execute("""INSERT INTO location_postcode
102                             (place_id, indexed_status, country_code, postcode)
103                             VALUES (%s, 1, %s, %s)""",
104                         (next_id, country, postcode))
105         return next_id
106
107     def placex_unindexed(self):
108         return self.scalar('SELECT count(*) from placex where indexed_status > 0')
109
110     def osmline_unindexed(self):
111         return self.scalar('SELECT count(*) from location_property_osmline where indexed_status > 0')
112
113
114 @pytest.fixture
115 def test_db(temp_db_conn):
116     yield IndexerTestDB(temp_db_conn)
117
118
119 @pytest.fixture
120 def test_tokenizer(tokenizer_mock, def_config, tmp_path):
121     def_config.project_dir = tmp_path
122     return factory.create_tokenizer(def_config)
123
124
125 @pytest.mark.parametrize("threads", [1, 15])
126 def test_index_all_by_rank(test_db, threads, test_tokenizer):
127     for rank in range(31):
128         test_db.add_place(rank_address=rank, rank_search=rank)
129     test_db.add_osmline()
130
131     assert 31 == test_db.placex_unindexed()
132     assert 1 == test_db.osmline_unindexed()
133
134     idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
135     idx.index_by_rank(0, 30)
136
137     assert 0 == test_db.placex_unindexed()
138     assert 0 == test_db.osmline_unindexed()
139
140     assert 0 == test_db.scalar("""SELECT count(*) from placex
141                                WHERE indexed_status = 0 and indexed_date is null""")
142     # ranks come in order of rank address
143     assert 0 == test_db.scalar("""
144         SELECT count(*) FROM placex p WHERE rank_address > 0
145           AND indexed_date >= (SELECT min(indexed_date) FROM placex o
146                                WHERE p.rank_address < o.rank_address)""")
147     # placex rank < 30 objects come before interpolations
148     assert 0 == test_db.scalar(
149         """SELECT count(*) FROM placex WHERE rank_address < 30
150              AND indexed_date > (SELECT min(indexed_date) FROM location_property_osmline)""")
151     # placex rank = 30 objects come after interpolations
152     assert 0 == test_db.scalar(
153         """SELECT count(*) FROM placex WHERE rank_address = 30
154              AND indexed_date < (SELECT max(indexed_date) FROM location_property_osmline)""")
155     # rank 0 comes after rank 29 and before rank 30
156     assert 0 == test_db.scalar(
157         """SELECT count(*) FROM placex WHERE rank_address < 30
158              AND indexed_date > (SELECT min(indexed_date) FROM placex WHERE rank_address = 0)""")
159     assert 0 == test_db.scalar(
160         """SELECT count(*) FROM placex WHERE rank_address = 30
161              AND indexed_date < (SELECT max(indexed_date) FROM placex WHERE rank_address = 0)""")
162
163
164 @pytest.mark.parametrize("threads", [1, 15])
165 def test_index_partial_without_30(test_db, threads, test_tokenizer):
166     for rank in range(31):
167         test_db.add_place(rank_address=rank, rank_search=rank)
168     test_db.add_osmline()
169
170     assert 31 == test_db.placex_unindexed()
171     assert 1 == test_db.osmline_unindexed()
172
173     idx = indexer.Indexer('dbname=test_nominatim_python_unittest',
174                           test_tokenizer, threads)
175     idx.index_by_rank(4, 15)
176
177     assert 19 == test_db.placex_unindexed()
178     assert 1 == test_db.osmline_unindexed()
179
180     assert 0 == test_db.scalar("""
181                     SELECT count(*) FROM placex
182                       WHERE indexed_status = 0 AND not rank_address between 4 and 15""")
183
184
185 @pytest.mark.parametrize("threads", [1, 15])
186 def test_index_partial_with_30(test_db, threads, test_tokenizer):
187     for rank in range(31):
188         test_db.add_place(rank_address=rank, rank_search=rank)
189     test_db.add_osmline()
190
191     assert 31 == test_db.placex_unindexed()
192     assert 1 == test_db.osmline_unindexed()
193
194     idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
195     idx.index_by_rank(28, 30)
196
197     assert 27 == test_db.placex_unindexed()
198     assert 0 == test_db.osmline_unindexed()
199
200     assert 0 == test_db.scalar("""
201                     SELECT count(*) FROM placex
202                       WHERE indexed_status = 0 AND rank_address between 1 and 27""")
203
204 @pytest.mark.parametrize("threads", [1, 15])
205 def test_index_boundaries(test_db, threads, test_tokenizer):
206     for rank in range(4, 10):
207         test_db.add_admin(rank_address=rank, rank_search=rank)
208     for rank in range(31):
209         test_db.add_place(rank_address=rank, rank_search=rank)
210     test_db.add_osmline()
211
212     assert 37 == test_db.placex_unindexed()
213     assert 1 == test_db.osmline_unindexed()
214
215     idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
216     idx.index_boundaries(0, 30)
217
218     assert 31 == test_db.placex_unindexed()
219     assert 1 == test_db.osmline_unindexed()
220
221     assert 0 == test_db.scalar("""
222                     SELECT count(*) FROM placex
223                       WHERE indexed_status = 0 AND class != 'boundary'""")
224
225
226 @pytest.mark.parametrize("threads", [1, 15])
227 def test_index_postcodes(test_db, threads, test_tokenizer):
228     for postcode in range(1000):
229         test_db.add_postcode('de', postcode)
230     for postcode in range(32000, 33000):
231         test_db.add_postcode('us', postcode)
232
233     idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
234     idx.index_postcodes()
235
236     assert 0 == test_db.scalar("""SELECT count(*) FROM location_postcode
237                                   WHERE indexed_status != 0""")
238
239
240 @pytest.mark.parametrize("analyse", [True, False])
241 def test_index_full(test_db, analyse, test_tokenizer):
242     for rank in range(4, 10):
243         test_db.add_admin(rank_address=rank, rank_search=rank)
244     for rank in range(31):
245         test_db.add_place(rank_address=rank, rank_search=rank)
246     test_db.add_osmline()
247     for postcode in range(1000):
248         test_db.add_postcode('de', postcode)
249
250     idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, 4)
251     idx.index_full(analyse=analyse)
252
253     assert 0 == test_db.placex_unindexed()
254     assert 0 == test_db.osmline_unindexed()
255     assert 0 == test_db.scalar("""SELECT count(*) FROM location_postcode
256                                   WHERE indexed_status != 0""")
257
258
259 @pytest.mark.parametrize("threads", [1, 15])
260 def test_index_reopen_connection(test_db, threads, monkeypatch, test_tokenizer):
261     monkeypatch.setattr(indexer.WorkerPool, "REOPEN_CONNECTIONS_AFTER", 15)
262
263     for _ in range(1000):
264         test_db.add_place(rank_address=30, rank_search=30)
265
266     idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads)
267     idx.index_by_rank(28, 30)
268
269     assert 0 == test_db.placex_unindexed()