From cbb474999659836173d410b38e65533e465fa140 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 31 May 2022 14:16:06 +0200 Subject: [PATCH] change indexing order for interpolations Interpolations are now indexed after rank 30 objects. The housenumber nodes no longer need information from the interpolations while the interpolations can make use of precomputed postcodes. --- nominatim/indexer/indexer.py | 7 ++----- test/python/indexer/test_indexing.py | 17 ++++------------- 2 files changed, 6 insertions(+), 18 deletions(-) diff --git a/nominatim/indexer/indexer.py b/nominatim/indexer/indexer.py index 98bb5211..555f8704 100644 --- a/nominatim/indexer/indexer.py +++ b/nominatim/indexer/indexer.py @@ -160,15 +160,12 @@ class Indexer: minrank, maxrank, self.num_threads) with self.tokenizer.name_analyzer() as analyzer: - for rank in range(max(1, minrank), maxrank): - self._index(runners.RankRunner(rank, analyzer)) + for rank in range(max(1, minrank), maxrank + 1): + self._index(runners.RankRunner(rank, analyzer), 20 if rank == 30 else 1) if maxrank == 30: self._index(runners.RankRunner(0, analyzer)) self._index(runners.InterpolationRunner(analyzer), 20) - self._index(runners.RankRunner(30, analyzer), 20) - else: - self._index(runners.RankRunner(maxrank, analyzer)) def index_postcodes(self): diff --git a/test/python/indexer/test_indexing.py b/test/python/indexer/test_indexing.py index e303f381..45c68a33 100644 --- a/test/python/indexer/test_indexing.py +++ b/test/python/indexer/test_indexing.py @@ -177,25 +177,16 @@ def test_index_all_by_rank(test_db, threads, test_tokenizer): SELECT count(*) FROM placex p WHERE rank_address > 0 AND indexed_date >= (SELECT min(indexed_date) FROM placex o WHERE p.rank_address < o.rank_address)""") == 0 - # placex rank < 30 objects come before interpolations + # placex address ranked objects come before interpolations assert test_db.scalar( - """SELECT count(*) FROM placex WHERE rank_address < 30 + """SELECT count(*) FROM placex WHERE rank_address > 0 AND indexed_date > (SELECT min(indexed_date) FROM location_property_osmline)""") == 0 - # placex rank = 30 objects come after interpolations + # rank 0 comes after all other placex objects assert test_db.scalar( - """SELECT count(*) FROM placex WHERE rank_address = 30 - AND indexed_date < - (SELECT max(indexed_date) FROM location_property_osmline)""") == 0 - # rank 0 comes after rank 29 and before rank 30 - assert test_db.scalar( - """SELECT count(*) FROM placex WHERE rank_address < 30 + """SELECT count(*) FROM placex WHERE rank_address > 0 AND indexed_date > (SELECT min(indexed_date) FROM placex WHERE rank_address = 0)""") == 0 - assert test_db.scalar( - """SELECT count(*) FROM placex WHERE rank_address = 30 - AND indexed_date < - (SELECT max(indexed_date) FROM placex WHERE rank_address = 0)""") == 0 @pytest.mark.parametrize("threads", [1, 15]) -- 2.39.5