X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/d3a575319feaf32009e0abe3efe4d8f2dc196e28..31412e06740727695c5d9512e0cd59c0dd683322:/test/python/api/search/test_icu_query_analyzer.py diff --git a/test/python/api/search/test_icu_query_analyzer.py b/test/python/api/search/test_icu_query_analyzer.py index 6a17e32a..84d8b393 100644 --- a/test/python/api/search/test_icu_query_analyzer.py +++ b/test/python/api/search/test_icu_query_analyzer.py @@ -2,20 +2,19 @@ # # This file is part of Nominatim. (https://nominatim.org) # -# Copyright (C) 2023 by the Nominatim developer community. +# Copyright (C) 2024 by the Nominatim developer community. # For a full list of authors see the git log. """ Tests for query analyzer for ICU tokenizer. """ -from pathlib import Path - import pytest import pytest_asyncio -from nominatim.api import NominatimAPIAsync -from nominatim.api.search.query import Phrase, PhraseType, TokenType, BreakType -import nominatim.api.search.icu_tokenizer as tok -from nominatim.api.logging import set_log_output, get_and_disable +from nominatim_api import NominatimAPIAsync +from nominatim_api.search.query import Phrase, PhraseType +import nominatim_api.search.query as qmod +import nominatim_api.search.icu_tokenizer as tok +from nominatim_api.logging import set_log_output, get_and_disable async def add_word(conn, word_id, word_token, wtype, word, info = None): t = conn.t.meta.tables['word'] @@ -40,10 +39,9 @@ async def conn(table_factory): table_factory('word', definition='word_id INT, word_token TEXT, type TEXT, word TEXT, info JSONB') - api = NominatimAPIAsync(Path('/invalid'), {}) - async with api.begin() as conn: - yield conn - await api.close() + async with NominatimAPIAsync() as api: + async with api.begin() as conn: + yield conn @pytest.mark.asyncio @@ -99,12 +97,12 @@ async def test_splitting_in_transliteration(conn): assert query.num_token_slots() == 2 assert query.nodes[0].starting assert query.nodes[1].starting - assert query.nodes[1].btype == BreakType.TOKEN + assert query.nodes[1].btype == qmod.BREAK_TOKEN @pytest.mark.asyncio -@pytest.mark.parametrize('term,order', [('23456', ['POSTCODE', 'HOUSENUMBER', 'WORD', 'PARTIAL']), - ('3', ['HOUSENUMBER', 'POSTCODE', 'WORD', 'PARTIAL']) +@pytest.mark.parametrize('term,order', [('23456', ['P', 'H', 'W', 'w']), + ('3', ['H', 'P', 'W', 'w']) ]) async def test_penalty_postcodes_and_housenumbers(conn, term, order): ana = await tok.create_query_analyzer(conn) @@ -118,7 +116,7 @@ async def test_penalty_postcodes_and_housenumbers(conn, term, order): assert query.num_token_slots() == 1 - torder = [(tl.tokens[0].penalty, tl.ttype.name) for tl in query.nodes[0].starting] + torder = [(tl.tokens[0].penalty, tl.ttype) for tl in query.nodes[0].starting] torder.sort() assert [t[1] for t in torder] == order @@ -134,10 +132,23 @@ async def test_category_words_only_at_beginning(conn): assert query.num_token_slots() == 3 assert len(query.nodes[0].starting) == 1 - assert query.nodes[0].starting[0].ttype == TokenType.NEAR_ITEM + assert query.nodes[0].starting[0].ttype == qmod.TOKEN_NEAR_ITEM assert not query.nodes[2].starting +@pytest.mark.asyncio +async def test_freestanding_qualifier_words_become_category(conn): + ana = await tok.create_query_analyzer(conn) + + await add_word(conn, 1, 'foo', 'S', 'FOO', {'op': '-'}) + + query = await ana.analyze_query(make_phrase('foo')) + + assert query.num_token_slots() == 1 + assert len(query.nodes[0].starting) == 1 + assert query.nodes[0].starting[0].ttype == qmod.TOKEN_NEAR_ITEM + + @pytest.mark.asyncio async def test_qualifier_words(conn): ana = await tok.create_query_analyzer(conn) @@ -148,9 +159,9 @@ async def test_qualifier_words(conn): query = await ana.analyze_query(make_phrase('foo BAR foo BAR foo')) assert query.num_token_slots() == 5 - assert set(t.ttype for t in query.nodes[0].starting) == {TokenType.QUALIFIER} - assert set(t.ttype for t in query.nodes[2].starting) == {TokenType.QUALIFIER} - assert set(t.ttype for t in query.nodes[4].starting) == {TokenType.QUALIFIER} + assert set(t.ttype for t in query.nodes[0].starting) == {qmod.TOKEN_QUALIFIER} + assert set(t.ttype for t in query.nodes[2].starting) == {qmod.TOKEN_QUALIFIER} + assert set(t.ttype for t in query.nodes[4].starting) == {qmod.TOKEN_QUALIFIER} @pytest.mark.asyncio @@ -162,10 +173,10 @@ async def test_add_unknown_housenumbers(conn): query = await ana.analyze_query(make_phrase('466 23 99834 34a')) assert query.num_token_slots() == 4 - assert query.nodes[0].starting[0].ttype == TokenType.HOUSENUMBER + assert query.nodes[0].starting[0].ttype == qmod.TOKEN_HOUSENUMBER assert len(query.nodes[0].starting[0].tokens) == 1 assert query.nodes[0].starting[0].tokens[0].token == 0 - assert query.nodes[1].starting[0].ttype == TokenType.HOUSENUMBER + assert query.nodes[1].starting[0].ttype == qmod.TOKEN_HOUSENUMBER assert len(query.nodes[1].starting[0].tokens) == 1 assert query.nodes[1].starting[0].tokens[0].token == 1 assert not query.nodes[2].starting