X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/b90e719da595b6760b39b7cd64ee29447de2d5e8..0add25e33500110a36458a0bb3fa292d408d2b85:/test/python/indexer/test_indexing.py?ds=inline diff --git a/test/python/indexer/test_indexing.py b/test/python/indexer/test_indexing.py index 4c9d940d..fe65b69c 100644 --- a/test/python/indexer/test_indexing.py +++ b/test/python/indexer/test_indexing.py @@ -1,11 +1,18 @@ +# SPDX-License-Identifier: GPL-3.0-or-later +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2024 by the Nominatim developer community. +# For a full list of authors see the git log. """ Tests for running the indexing. """ import itertools import pytest +import pytest_asyncio -from nominatim.indexer import indexer -from nominatim.tokenizer import factory +from nominatim_db.indexer import indexer +from nominatim_db.tokenizer import factory class IndexerTestDB: @@ -15,9 +22,8 @@ class IndexerTestDB: self.postcode_id = itertools.count(700000) self.conn = conn - self.conn.set_isolation_level(0) + self.conn.autocimmit = True with self.conn.cursor() as cur: - cur.execute('CREATE EXTENSION hstore') cur.execute("""CREATE TABLE placex (place_id BIGINT, name HSTORE, class TEXT, @@ -145,13 +151,13 @@ def test_db(temp_db_conn): @pytest.fixture -def test_tokenizer(tokenizer_mock, def_config, tmp_path): - def_config.project_dir = tmp_path - return factory.create_tokenizer(def_config) +def test_tokenizer(tokenizer_mock, project_env): + return factory.create_tokenizer(project_env) @pytest.mark.parametrize("threads", [1, 15]) -def test_index_all_by_rank(test_db, threads, test_tokenizer): +@pytest.mark.asyncio +async def test_index_all_by_rank(test_db, threads, test_tokenizer): for rank in range(31): test_db.add_place(rank_address=rank, rank_search=rank) test_db.add_osmline() @@ -160,7 +166,7 @@ def test_index_all_by_rank(test_db, threads, test_tokenizer): assert test_db.osmline_unindexed() == 1 idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads) - idx.index_by_rank(0, 30) + await idx.index_by_rank(0, 30) assert test_db.placex_unindexed() == 0 assert test_db.osmline_unindexed() == 0 @@ -172,29 +178,21 @@ def test_index_all_by_rank(test_db, threads, test_tokenizer): SELECT count(*) FROM placex p WHERE rank_address > 0 AND indexed_date >= (SELECT min(indexed_date) FROM placex o WHERE p.rank_address < o.rank_address)""") == 0 - # placex rank < 30 objects come before interpolations + # placex address ranked objects come before interpolations assert test_db.scalar( - """SELECT count(*) FROM placex WHERE rank_address < 30 + """SELECT count(*) FROM placex WHERE rank_address > 0 AND indexed_date > (SELECT min(indexed_date) FROM location_property_osmline)""") == 0 - # placex rank = 30 objects come after interpolations + # rank 0 comes after all other placex objects assert test_db.scalar( - """SELECT count(*) FROM placex WHERE rank_address = 30 - AND indexed_date < - (SELECT max(indexed_date) FROM location_property_osmline)""") == 0 - # rank 0 comes after rank 29 and before rank 30 - assert test_db.scalar( - """SELECT count(*) FROM placex WHERE rank_address < 30 + """SELECT count(*) FROM placex WHERE rank_address > 0 AND indexed_date > (SELECT min(indexed_date) FROM placex WHERE rank_address = 0)""") == 0 - assert test_db.scalar( - """SELECT count(*) FROM placex WHERE rank_address = 30 - AND indexed_date < - (SELECT max(indexed_date) FROM placex WHERE rank_address = 0)""") == 0 @pytest.mark.parametrize("threads", [1, 15]) -def test_index_partial_without_30(test_db, threads, test_tokenizer): +@pytest.mark.asyncio +async def test_index_partial_without_30(test_db, threads, test_tokenizer): for rank in range(31): test_db.add_place(rank_address=rank, rank_search=rank) test_db.add_osmline() @@ -204,7 +202,7 @@ def test_index_partial_without_30(test_db, threads, test_tokenizer): idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads) - idx.index_by_rank(4, 15) + await idx.index_by_rank(4, 15) assert test_db.placex_unindexed() == 19 assert test_db.osmline_unindexed() == 1 @@ -215,7 +213,8 @@ def test_index_partial_without_30(test_db, threads, test_tokenizer): @pytest.mark.parametrize("threads", [1, 15]) -def test_index_partial_with_30(test_db, threads, test_tokenizer): +@pytest.mark.asyncio +async def test_index_partial_with_30(test_db, threads, test_tokenizer): for rank in range(31): test_db.add_place(rank_address=rank, rank_search=rank) test_db.add_osmline() @@ -224,7 +223,7 @@ def test_index_partial_with_30(test_db, threads, test_tokenizer): assert test_db.osmline_unindexed() == 1 idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads) - idx.index_by_rank(28, 30) + await idx.index_by_rank(28, 30) assert test_db.placex_unindexed() == 27 assert test_db.osmline_unindexed() == 0 @@ -234,7 +233,8 @@ def test_index_partial_with_30(test_db, threads, test_tokenizer): WHERE indexed_status = 0 AND rank_address between 1 and 27""") == 0 @pytest.mark.parametrize("threads", [1, 15]) -def test_index_boundaries(test_db, threads, test_tokenizer): +@pytest.mark.asyncio +async def test_index_boundaries(test_db, threads, test_tokenizer): for rank in range(4, 10): test_db.add_admin(rank_address=rank, rank_search=rank) for rank in range(31): @@ -245,7 +245,7 @@ def test_index_boundaries(test_db, threads, test_tokenizer): assert test_db.osmline_unindexed() == 1 idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads) - idx.index_boundaries(0, 30) + await idx.index_boundaries(0, 30) assert test_db.placex_unindexed() == 31 assert test_db.osmline_unindexed() == 1 @@ -256,21 +256,23 @@ def test_index_boundaries(test_db, threads, test_tokenizer): @pytest.mark.parametrize("threads", [1, 15]) -def test_index_postcodes(test_db, threads, test_tokenizer): +@pytest.mark.asyncio +async def test_index_postcodes(test_db, threads, test_tokenizer): for postcode in range(1000): test_db.add_postcode('de', postcode) for postcode in range(32000, 33000): test_db.add_postcode('us', postcode) idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads) - idx.index_postcodes() + await idx.index_postcodes() assert test_db.scalar("""SELECT count(*) FROM location_postcode WHERE indexed_status != 0""") == 0 @pytest.mark.parametrize("analyse", [True, False]) -def test_index_full(test_db, analyse, test_tokenizer): +@pytest.mark.asyncio +async def test_index_full(test_db, analyse, test_tokenizer): for rank in range(4, 10): test_db.add_admin(rank_address=rank, rank_search=rank) for rank in range(31): @@ -280,22 +282,9 @@ def test_index_full(test_db, analyse, test_tokenizer): test_db.add_postcode('de', postcode) idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, 4) - idx.index_full(analyse=analyse) + await idx.index_full(analyse=analyse) assert test_db.placex_unindexed() == 0 assert test_db.osmline_unindexed() == 0 assert test_db.scalar("""SELECT count(*) FROM location_postcode WHERE indexed_status != 0""") == 0 - - -@pytest.mark.parametrize("threads", [1, 15]) -def test_index_reopen_connection(test_db, threads, monkeypatch, test_tokenizer): - monkeypatch.setattr(indexer.WorkerPool, "REOPEN_CONNECTIONS_AFTER", 15) - - for _ in range(1000): - test_db.add_place(rank_address=30, rank_search=30) - - idx = indexer.Indexer('dbname=test_nominatim_python_unittest', test_tokenizer, threads) - idx.index_by_rank(28, 30) - - assert test_db.placex_unindexed() == 0