]> git.openstreetmap.org Git - nominatim.git/blob - test/python/test_indexing.py
223a599ec4fc818c0bd3ad1a3712251b6ab8ec7a
[nominatim.git] / test / python / test_indexing.py
1 """
2 Tests for running the indexing.
3 """
4 import itertools
5 import psycopg2
6 import pytest
7
8 from nominatim.indexer import indexer
9
10 class IndexerTestDB:
11
12     def __init__(self, conn):
13         self.placex_id = itertools.count(100000)
14         self.osmline_id = itertools.count(500000)
15         self.postcode_id = itertools.count(700000)
16
17         self.conn = conn
18         self.conn.set_isolation_level(0)
19         with self.conn.cursor() as cur:
20             cur.execute('CREATE EXTENSION hstore')
21             cur.execute("""CREATE TABLE placex (place_id BIGINT,
22                                                 class TEXT,
23                                                 type TEXT,
24                                                 rank_address SMALLINT,
25                                                 rank_search SMALLINT,
26                                                 indexed_status SMALLINT,
27                                                 indexed_date TIMESTAMP,
28                                                 partition SMALLINT,
29                                                 admin_level SMALLINT,
30                                                 address HSTORE,
31                                                 geometry_sector INTEGER)""")
32             cur.execute("""CREATE TABLE location_property_osmline (
33                                place_id BIGINT,
34                                indexed_status SMALLINT,
35                                indexed_date TIMESTAMP,
36                                geometry_sector INTEGER)""")
37             cur.execute("""CREATE TABLE location_postcode (
38                                place_id BIGINT,
39                                indexed_status SMALLINT,
40                                indexed_date TIMESTAMP,
41                                country_code varchar(2),
42                                postcode TEXT)""")
43             cur.execute("""CREATE OR REPLACE FUNCTION date_update() RETURNS TRIGGER
44                            AS $$
45                            BEGIN
46                              IF NEW.indexed_status = 0 and OLD.indexed_status != 0 THEN
47                                NEW.indexed_date = now();
48                              END IF;
49                              RETURN NEW;
50                            END; $$ LANGUAGE plpgsql;""")
51             cur.execute("""CREATE OR REPLACE FUNCTION placex_prepare_update(p placex,
52                                                       OUT name HSTORE,
53                                                       OUT address HSTORE,
54                                                       OUT country_feature VARCHAR)
55                            AS $$
56                            BEGIN
57                             address := p.address;
58                             name := p.address;
59                            END;
60                            $$ LANGUAGE plpgsql STABLE;
61                         """)
62             for table in ('placex', 'location_property_osmline', 'location_postcode'):
63                 cur.execute("""CREATE TRIGGER {0}_update BEFORE UPDATE ON {0}
64                                FOR EACH ROW EXECUTE PROCEDURE date_update()
65                             """.format(table))
66
67     def scalar(self, query):
68         with self.conn.cursor() as cur:
69             cur.execute(query)
70             return cur.fetchone()[0]
71
72     def add_place(self, cls='place', typ='locality',
73                   rank_search=30, rank_address=30, sector=20):
74         next_id = next(self.placex_id)
75         with self.conn.cursor() as cur:
76             cur.execute("""INSERT INTO placex
77                               (place_id, class, type, rank_search, rank_address,
78                                indexed_status, geometry_sector)
79                               VALUES (%s, %s, %s, %s, %s, 1, %s)""",
80                         (next_id, cls, typ, rank_search, rank_address, sector))
81         return next_id
82
83     def add_admin(self, **kwargs):
84         kwargs['cls'] = 'boundary'
85         kwargs['typ'] = 'administrative'
86         return self.add_place(**kwargs)
87
88     def add_osmline(self, sector=20):
89         next_id = next(self.osmline_id)
90         with self.conn.cursor() as cur:
91             cur.execute("""INSERT INTO location_property_osmline
92                               (place_id, indexed_status, geometry_sector)
93                               VALUES (%s, 1, %s)""",
94                         (next_id, sector))
95         return next_id
96
97     def add_postcode(self, country, postcode):
98         next_id = next(self.postcode_id)
99         with self.conn.cursor() as cur:
100             cur.execute("""INSERT INTO location_postcode
101                             (place_id, indexed_status, country_code, postcode)
102                             VALUES (%s, 1, %s, %s)""",
103                         (next_id, country, postcode))
104         return next_id
105
106     def placex_unindexed(self):
107         return self.scalar('SELECT count(*) from placex where indexed_status > 0')
108
109     def osmline_unindexed(self):
110         return self.scalar('SELECT count(*) from location_property_osmline where indexed_status > 0')
111
112
113 @pytest.fixture
114 def test_db(temp_db_conn):
115     yield IndexerTestDB(temp_db_conn)
116
117
118 @pytest.mark.parametrize("threads", [1, 15])
119 def test_index_all_by_rank(test_db, threads):
120     for rank in range(31):
121         test_db.add_place(rank_address=rank, rank_search=rank)
122     test_db.add_osmline()
123
124     assert 31 == test_db.placex_unindexed()
125     assert 1 == test_db.osmline_unindexed()
126
127     idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads)
128     idx.index_by_rank(0, 30)
129
130     assert 0 == test_db.placex_unindexed()
131     assert 0 == test_db.osmline_unindexed()
132
133     assert 0 == test_db.scalar("""SELECT count(*) from placex
134                                WHERE indexed_status = 0 and indexed_date is null""")
135     # ranks come in order of rank address
136     assert 0 == test_db.scalar("""
137         SELECT count(*) FROM placex p WHERE rank_address > 0
138           AND indexed_date >= (SELECT min(indexed_date) FROM placex o
139                                WHERE p.rank_address < o.rank_address)""")
140     # placex rank < 30 objects come before interpolations
141     assert 0 == test_db.scalar(
142         """SELECT count(*) FROM placex WHERE rank_address < 30
143              AND indexed_date > (SELECT min(indexed_date) FROM location_property_osmline)""")
144     # placex rank = 30 objects come after interpolations
145     assert 0 == test_db.scalar(
146         """SELECT count(*) FROM placex WHERE rank_address = 30
147              AND indexed_date < (SELECT max(indexed_date) FROM location_property_osmline)""")
148     # rank 0 comes after rank 29 and before rank 30
149     assert 0 == test_db.scalar(
150         """SELECT count(*) FROM placex WHERE rank_address < 30
151              AND indexed_date > (SELECT min(indexed_date) FROM placex WHERE rank_address = 0)""")
152     assert 0 == test_db.scalar(
153         """SELECT count(*) FROM placex WHERE rank_address = 30
154              AND indexed_date < (SELECT max(indexed_date) FROM placex WHERE rank_address = 0)""")
155
156
157 @pytest.mark.parametrize("threads", [1, 15])
158 def test_index_partial_without_30(test_db, threads):
159     for rank in range(31):
160         test_db.add_place(rank_address=rank, rank_search=rank)
161     test_db.add_osmline()
162
163     assert 31 == test_db.placex_unindexed()
164     assert 1 == test_db.osmline_unindexed()
165
166     idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads)
167     idx.index_by_rank(4, 15)
168
169     assert 19 == test_db.placex_unindexed()
170     assert 1 == test_db.osmline_unindexed()
171
172     assert 0 == test_db.scalar("""
173                     SELECT count(*) FROM placex
174                       WHERE indexed_status = 0 AND not rank_address between 4 and 15""")
175
176
177 @pytest.mark.parametrize("threads", [1, 15])
178 def test_index_partial_with_30(test_db, threads):
179     for rank in range(31):
180         test_db.add_place(rank_address=rank, rank_search=rank)
181     test_db.add_osmline()
182
183     assert 31 == test_db.placex_unindexed()
184     assert 1 == test_db.osmline_unindexed()
185
186     idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads)
187     idx.index_by_rank(28, 30)
188
189     assert 27 == test_db.placex_unindexed()
190     assert 0 == test_db.osmline_unindexed()
191
192     assert 0 == test_db.scalar("""
193                     SELECT count(*) FROM placex
194                       WHERE indexed_status = 0 AND rank_address between 1 and 27""")
195
196 @pytest.mark.parametrize("threads", [1, 15])
197 def test_index_boundaries(test_db, threads):
198     for rank in range(4, 10):
199         test_db.add_admin(rank_address=rank, rank_search=rank)
200     for rank in range(31):
201         test_db.add_place(rank_address=rank, rank_search=rank)
202     test_db.add_osmline()
203
204     assert 37 == test_db.placex_unindexed()
205     assert 1 == test_db.osmline_unindexed()
206
207     idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads)
208     idx.index_boundaries(0, 30)
209
210     assert 31 == test_db.placex_unindexed()
211     assert 1 == test_db.osmline_unindexed()
212
213     assert 0 == test_db.scalar("""
214                     SELECT count(*) FROM placex
215                       WHERE indexed_status = 0 AND class != 'boundary'""")
216
217
218 @pytest.mark.parametrize("threads", [1, 15])
219 def test_index_postcodes(test_db, threads):
220     for postcode in range(1000):
221         test_db.add_postcode('de', postcode)
222     for postcode in range(32000, 33000):
223         test_db.add_postcode('us', postcode)
224
225     idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads)
226     idx.index_postcodes()
227
228     assert 0 == test_db.scalar("""SELECT count(*) FROM location_postcode
229                                   WHERE indexed_status != 0""")
230
231
232 @pytest.mark.parametrize("analyse", [True, False])
233 def test_index_full(test_db, analyse):
234     for rank in range(4, 10):
235         test_db.add_admin(rank_address=rank, rank_search=rank)
236     for rank in range(31):
237         test_db.add_place(rank_address=rank, rank_search=rank)
238     test_db.add_osmline()
239     for postcode in range(1000):
240         test_db.add_postcode('de', postcode)
241
242     idx = indexer.Indexer('dbname=test_nominatim_python_unittest', 4)
243     idx.index_full(analyse=analyse)
244
245     assert 0 == test_db.placex_unindexed()
246     assert 0 == test_db.osmline_unindexed()
247     assert 0 == test_db.scalar("""SELECT count(*) FROM location_postcode
248                                   WHERE indexed_status != 0""")
249
250
251 @pytest.mark.parametrize("threads", [1, 15])
252 def test_index_reopen_connection(test_db, threads, monkeypatch):
253     monkeypatch.setattr(indexer.WorkerPool, "REOPEN_CONNECTIONS_AFTER", 15)
254
255     for _ in range(1000):
256         test_db.add_place(rank_address=30, rank_search=30)
257
258     idx = indexer.Indexer('dbname=test_nominatim_python_unittest', threads)
259     idx.index_by_rank(28, 30)
260
261     assert 0 == test_db.placex_unindexed()