X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/f448423727ce8f96723820b6ad1aacc2e5310843..2aba0ad4bc0a924337a2541e983d61a26cf67ce6:/test/python/api/search/test_icu_query_analyzer.py?ds=sidebyside diff --git a/test/python/api/search/test_icu_query_analyzer.py b/test/python/api/search/test_icu_query_analyzer.py index a88ca8b8..eb453fda 100644 --- a/test/python/api/search/test_icu_query_analyzer.py +++ b/test/python/api/search/test_icu_query_analyzer.py @@ -2,20 +2,19 @@ # # This file is part of Nominatim. (https://nominatim.org) # -# Copyright (C) 2023 by the Nominatim developer community. +# Copyright (C) 2024 by the Nominatim developer community. # For a full list of authors see the git log. """ Tests for query analyzer for ICU tokenizer. """ -from pathlib import Path - import pytest import pytest_asyncio -from nominatim.api import NominatimAPIAsync -from nominatim.api.search.query import Phrase, PhraseType, TokenType, BreakType -import nominatim.api.search.icu_tokenizer as tok -from nominatim.api.logging import set_log_output, get_and_disable +from nominatim_api import NominatimAPIAsync +from nominatim_api.search.query import Phrase +import nominatim_api.search.query as qmod +import nominatim_api.search.icu_tokenizer as tok +from nominatim_api.logging import set_log_output, get_and_disable async def add_word(conn, word_id, word_token, wtype, word, info = None): t = conn.t.meta.tables['word'] @@ -27,7 +26,7 @@ async def add_word(conn, word_id, word_token, wtype, word, info = None): def make_phrase(query): - return [Phrase(PhraseType.NONE, s) for s in query.split(',')] + return [Phrase(qmod.PHRASE_ANY, s) for s in query.split(',')] @pytest_asyncio.fixture async def conn(table_factory): @@ -40,10 +39,9 @@ async def conn(table_factory): table_factory('word', definition='word_id INT, word_token TEXT, type TEXT, word TEXT, info JSONB') - api = NominatimAPIAsync(Path('/invalid'), {}) - async with api.begin() as conn: - yield conn - await api.close() + async with NominatimAPIAsync() as api: + async with api.begin() as conn: + yield conn @pytest.mark.asyncio @@ -65,7 +63,7 @@ async def test_single_phrase_with_unknown_terms(conn): query = await ana.analyze_query(make_phrase('foo BAR')) assert len(query.source) == 1 - assert query.source[0].ptype == PhraseType.NONE + assert query.source[0].ptype == qmod.PHRASE_ANY assert query.source[0].text == 'foo bar' assert query.num_token_slots() == 2 @@ -99,12 +97,12 @@ async def test_splitting_in_transliteration(conn): assert query.num_token_slots() == 2 assert query.nodes[0].starting assert query.nodes[1].starting - assert query.nodes[1].btype == BreakType.TOKEN + assert query.nodes[1].btype == qmod.BREAK_TOKEN @pytest.mark.asyncio -@pytest.mark.parametrize('term,order', [('23456', ['POSTCODE', 'HOUSENUMBER', 'WORD', 'PARTIAL']), - ('3', ['HOUSENUMBER', 'POSTCODE', 'WORD', 'PARTIAL']) +@pytest.mark.parametrize('term,order', [('23456', ['P', 'H', 'W', 'w']), + ('3', ['H', 'P', 'W', 'w']) ]) async def test_penalty_postcodes_and_housenumbers(conn, term, order): ana = await tok.create_query_analyzer(conn) @@ -118,7 +116,7 @@ async def test_penalty_postcodes_and_housenumbers(conn, term, order): assert query.num_token_slots() == 1 - torder = [(tl.tokens[0].penalty, tl.ttype.name) for tl in query.nodes[0].starting] + torder = [(tl.tokens[0].penalty, tl.ttype) for tl in query.nodes[0].starting] torder.sort() assert [t[1] for t in torder] == order @@ -134,10 +132,23 @@ async def test_category_words_only_at_beginning(conn): assert query.num_token_slots() == 3 assert len(query.nodes[0].starting) == 1 - assert query.nodes[0].starting[0].ttype == TokenType.NEAR_ITEM + assert query.nodes[0].starting[0].ttype == qmod.TOKEN_NEAR_ITEM assert not query.nodes[2].starting +@pytest.mark.asyncio +async def test_freestanding_qualifier_words_become_category(conn): + ana = await tok.create_query_analyzer(conn) + + await add_word(conn, 1, 'foo', 'S', 'FOO', {'op': '-'}) + + query = await ana.analyze_query(make_phrase('foo')) + + assert query.num_token_slots() == 1 + assert len(query.nodes[0].starting) == 1 + assert query.nodes[0].starting[0].ttype == qmod.TOKEN_NEAR_ITEM + + @pytest.mark.asyncio async def test_qualifier_words(conn): ana = await tok.create_query_analyzer(conn) @@ -148,9 +159,9 @@ async def test_qualifier_words(conn): query = await ana.analyze_query(make_phrase('foo BAR foo BAR foo')) assert query.num_token_slots() == 5 - assert set(t.ttype for t in query.nodes[0].starting) == {TokenType.NEAR_ITEM, TokenType.QUALIFIER} - assert set(t.ttype for t in query.nodes[2].starting) == {TokenType.QUALIFIER} - assert set(t.ttype for t in query.nodes[4].starting) == {TokenType.NEAR_ITEM, TokenType.QUALIFIER} + assert set(t.ttype for t in query.nodes[0].starting) == {qmod.TOKEN_QUALIFIER} + assert set(t.ttype for t in query.nodes[2].starting) == {qmod.TOKEN_QUALIFIER} + assert set(t.ttype for t in query.nodes[4].starting) == {qmod.TOKEN_QUALIFIER} @pytest.mark.asyncio @@ -162,10 +173,10 @@ async def test_add_unknown_housenumbers(conn): query = await ana.analyze_query(make_phrase('466 23 99834 34a')) assert query.num_token_slots() == 4 - assert query.nodes[0].starting[0].ttype == TokenType.HOUSENUMBER + assert query.nodes[0].starting[0].ttype == qmod.TOKEN_HOUSENUMBER assert len(query.nodes[0].starting[0].tokens) == 1 assert query.nodes[0].starting[0].tokens[0].token == 0 - assert query.nodes[1].starting[0].ttype == TokenType.HOUSENUMBER + assert query.nodes[1].starting[0].ttype == qmod.TOKEN_HOUSENUMBER assert len(query.nodes[1].starting[0].tokens) == 1 assert query.nodes[1].starting[0].tokens[0].token == 1 assert not query.nodes[2].starting