]> git.openstreetmap.org Git - nominatim.git/blob - test/python/test_indexing.py
introduce tokenizer modules
[nominatim.git] / test / python / test_indexing.py
1 """
2 Tests for running the indexing.
3 """
4 import itertools
5 import psycopg2
6 import pytest
7
8 from nominatim.indexer import indexer
9
10 class IndexerTestDB:
11
12     def __init__(self, conn):
13         self.placex_id = itertools.count(100000)
14         self.osmline_id = itertools.count(500000)
15         self.postcode_id = itertools.count(700000)
16
17         self.conn = conn
18         self.conn.set_isolation_level(0)
19         with self.conn.cursor() as cur:
20             cur.execute("""CREATE TABLE placex (place_id BIGINT,
21                                                 class TEXT,
22                                                 type TEXT,
23                                                 rank_address SMALLINT,
24                                                 rank_search SMALLINT,
25                                                 indexed_status SMALLINT,
26                                                 indexed_date TIMESTAMP,
27                                                 partition SMALLINT,
28                                                 admin_level SMALLINT,
29                                                 geometry_sector INTEGER)""")
30             cur.execute("""CREATE TABLE location_property_osmline (
31                                place_id BIGINT,
32                                indexed_status SMALLINT,
33                                indexed_date TIMESTAMP,
34                                geometry_sector INTEGER)""")
35             cur.execute("""CREATE TABLE location_postcode (
36                                place_id BIGINT,
37                                indexed_status SMALLINT,
38                                indexed_date TIMESTAMP,
39                                country_code varchar(2),
40                                postcode TEXT)""")
41             cur.execute("""CREATE OR REPLACE FUNCTION date_update() RETURNS TRIGGER
42                            AS $$
43                            BEGIN
44                              IF NEW.indexed_status = 0 and OLD.indexed_status != 0 THEN
45                                NEW.indexed_date = now();
46                              END IF;
47                              RETURN NEW;
48                            END; $$ LANGUAGE plpgsql;""")
49             for table in ('placex', 'location_property_osmline', 'location_postcode'):
50                 cur.execute("""CREATE TRIGGER {0}_update BEFORE UPDATE ON {0}
51                                FOR EACH ROW EXECUTE PROCEDURE date_update()
52                             """.format(table))
53
54     def scalar(self, query):
55         with self.conn.cursor() as cur:
56             cur.execute(query)
57             return cur.fetchone()[0]
58
59     def add_place(self, cls='place', typ='locality',
60                   rank_search=30, rank_address=30, sector=20):
61         next_id = next(self.placex_id)
62         with self.conn.cursor() as cur:
63             cur.execute("""INSERT INTO placex
64                               (place_id, class, type, rank_search, rank_address,
65                                indexed_status, geometry_sector)
66                               VALUES (%s, %s, %s, %s, %s, 1, %s)""",
67                         (next_id, cls, typ, rank_search, rank_address, sector))
68         return next_id
69
70     def add_admin(self, **kwargs):
71         kwargs['cls'] = 'boundary'
72         kwargs['typ'] = 'administrative'
73         return self.add_place(**kwargs)
74
75     def add_osmline(self, sector=20):
76         next_id = next(self.osmline_id)
77         with self.conn.cursor() as cur:
78             cur.execute("""INSERT INTO location_property_osmline
79                               (place_id, indexed_status, geometry_sector)
80                               VALUES (%s, 1, %s)""",
81                         (next_id, sector))
82         return next_id
83
84     def add_postcode(self, country, postcode):
85         next_id = next(self.postcode_id)
86         with self.conn.cursor() as cur:
87             cur.execute("""INSERT INTO location_postcode
88                             (place_id, indexed_status, country_code, postcode)
89                             VALUES (%s, 1, %s, %s)""",
90                         (next_id, country, postcode))
91         return next_id
92
93     def placex_unindexed(self):
94         return self.scalar('SELECT count(*) from placex where indexed_status > 0')
95
96     def osmline_unindexed(self):
97         return self.scalar('SELECT count(*) from location_property_osmline where indexed_status > 0')
98
99
100 @pytest.fixture
101 def test_db(temp_db_conn):
102     yield IndexerTestDB(temp_db_conn)
103
104
105 @pytest.mark.parametrize("threads", [1, 15])
106 def test_index_all_by_rank(test_db, threads):
107     for rank in range(31):
108         test_db.add_place(rank_address=rank, rank_search=rank)
109     test_db.add_osmline()
110
111     assert 31 == test_db.placex_unindexed()
112     assert 1 == test_db.osmline_unindexed()
113
114     idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads)
115     idx.index_by_rank(0, 30)
116
117     assert 0 == test_db.placex_unindexed()
118     assert 0 == test_db.osmline_unindexed()
119
120     assert 0 == test_db.scalar("""SELECT count(*) from placex
121                                WHERE indexed_status = 0 and indexed_date is null""")
122     # ranks come in order of rank address
123     assert 0 == test_db.scalar("""
124         SELECT count(*) FROM placex p WHERE rank_address > 0
125           AND indexed_date >= (SELECT min(indexed_date) FROM placex o
126                                WHERE p.rank_address < o.rank_address)""")
127     # placex rank < 30 objects come before interpolations
128     assert 0 == test_db.scalar(
129         """SELECT count(*) FROM placex WHERE rank_address < 30
130              AND indexed_date > (SELECT min(indexed_date) FROM location_property_osmline)""")
131     # placex rank = 30 objects come after interpolations
132     assert 0 == test_db.scalar(
133         """SELECT count(*) FROM placex WHERE rank_address = 30
134              AND indexed_date < (SELECT max(indexed_date) FROM location_property_osmline)""")
135     # rank 0 comes after rank 29 and before rank 30
136     assert 0 == test_db.scalar(
137         """SELECT count(*) FROM placex WHERE rank_address < 30
138              AND indexed_date > (SELECT min(indexed_date) FROM placex WHERE rank_address = 0)""")
139     assert 0 == test_db.scalar(
140         """SELECT count(*) FROM placex WHERE rank_address = 30
141              AND indexed_date < (SELECT max(indexed_date) FROM placex WHERE rank_address = 0)""")
142
143
144 @pytest.mark.parametrize("threads", [1, 15])
145 def test_index_partial_without_30(test_db, threads):
146     for rank in range(31):
147         test_db.add_place(rank_address=rank, rank_search=rank)
148     test_db.add_osmline()
149
150     assert 31 == test_db.placex_unindexed()
151     assert 1 == test_db.osmline_unindexed()
152
153     idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads)
154     idx.index_by_rank(4, 15)
155
156     assert 19 == test_db.placex_unindexed()
157     assert 1 == test_db.osmline_unindexed()
158
159     assert 0 == test_db.scalar("""
160                     SELECT count(*) FROM placex
161                       WHERE indexed_status = 0 AND not rank_address between 4 and 15""")
162
163
164 @pytest.mark.parametrize("threads", [1, 15])
165 def test_index_partial_with_30(test_db, threads):
166     for rank in range(31):
167         test_db.add_place(rank_address=rank, rank_search=rank)
168     test_db.add_osmline()
169
170     assert 31 == test_db.placex_unindexed()
171     assert 1 == test_db.osmline_unindexed()
172
173     idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads)
174     idx.index_by_rank(28, 30)
175
176     assert 27 == test_db.placex_unindexed()
177     assert 0 == test_db.osmline_unindexed()
178
179     assert 0 == test_db.scalar("""
180                     SELECT count(*) FROM placex
181                       WHERE indexed_status = 0 AND rank_address between 1 and 27""")
182
183 @pytest.mark.parametrize("threads", [1, 15])
184 def test_index_boundaries(test_db, threads):
185     for rank in range(4, 10):
186         test_db.add_admin(rank_address=rank, rank_search=rank)
187     for rank in range(31):
188         test_db.add_place(rank_address=rank, rank_search=rank)
189     test_db.add_osmline()
190
191     assert 37 == test_db.placex_unindexed()
192     assert 1 == test_db.osmline_unindexed()
193
194     idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads)
195     idx.index_boundaries(0, 30)
196
197     assert 31 == test_db.placex_unindexed()
198     assert 1 == test_db.osmline_unindexed()
199
200     assert 0 == test_db.scalar("""
201                     SELECT count(*) FROM placex
202                       WHERE indexed_status = 0 AND class != 'boundary'""")
203
204
205 @pytest.mark.parametrize("threads", [1, 15])
206 def test_index_postcodes(test_db, threads):
207     for postcode in range(1000):
208         test_db.add_postcode('de', postcode)
209     for postcode in range(32000, 33000):
210         test_db.add_postcode('us', postcode)
211
212     idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads)
213     idx.index_postcodes()
214
215     assert 0 == test_db.scalar("""SELECT count(*) FROM location_postcode
216                                   WHERE indexed_status != 0""")
217
218
219 @pytest.mark.parametrize("analyse", [True, False])
220 def test_index_full(test_db, analyse):
221     for rank in range(4, 10):
222         test_db.add_admin(rank_address=rank, rank_search=rank)
223     for rank in range(31):
224         test_db.add_place(rank_address=rank, rank_search=rank)
225     test_db.add_osmline()
226     for postcode in range(1000):
227         test_db.add_postcode('de', postcode)
228
229     idx = indexer.Indexer('dbname=test_nominatim_python_unittest', 4)
230     idx.index_full(analyse=analyse)
231
232     assert 0 == test_db.placex_unindexed()
233     assert 0 == test_db.osmline_unindexed()
234     assert 0 == test_db.scalar("""SELECT count(*) FROM location_postcode
235                                   WHERE indexed_status != 0""")
236
237
238 @pytest.mark.parametrize("threads", [1, 15])
239 def test_index_reopen_connection(test_db, threads, monkeypatch):
240     monkeypatch.setattr(indexer.WorkerPool, "REOPEN_CONNECTIONS_AFTER", 15)
241
242     for _ in range(1000):
243         test_db.add_place(rank_address=30, rank_search=30)
244
245     idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads)
246     idx.index_by_rank(28, 30)
247
248     assert 0 == test_db.placex_unindexed()