#
# This file is part of Nominatim. (https://nominatim.org)
#
-# Copyright (C) 2023 by the Nominatim developer community.
+# Copyright (C) 2024 by the Nominatim developer community.
# For a full list of authors see the git log.
"""
Tests for query analyzer for ICU tokenizer.
"""
-from pathlib import Path
-
import pytest
import pytest_asyncio
-from nominatim.api import NominatimAPIAsync
-from nominatim.api.search.query import Phrase, PhraseType, TokenType, BreakType
-import nominatim.api.search.icu_tokenizer as tok
-from nominatim.api.logging import set_log_output, get_and_disable
+from nominatim_api import NominatimAPIAsync
+from nominatim_api.search.query import Phrase
+import nominatim_api.search.query as qmod
+import nominatim_api.search.icu_tokenizer as tok
+from nominatim_api.logging import set_log_output, get_and_disable
async def add_word(conn, word_id, word_token, wtype, word, info = None):
t = conn.t.meta.tables['word']
def make_phrase(query):
- return [Phrase(PhraseType.NONE, s) for s in query.split(',')]
+ return [Phrase(qmod.PHRASE_ANY, s) for s in query.split(',')]
@pytest_asyncio.fixture
async def conn(table_factory):
table_factory('word',
definition='word_id INT, word_token TEXT, type TEXT, word TEXT, info JSONB')
- api = NominatimAPIAsync(Path('/invalid'), {})
- async with api.begin() as conn:
- yield conn
- await api.close()
+ async with NominatimAPIAsync() as api:
+ async with api.begin() as conn:
+ yield conn
@pytest.mark.asyncio
query = await ana.analyze_query(make_phrase('foo BAR'))
assert len(query.source) == 1
- assert query.source[0].ptype == PhraseType.NONE
+ assert query.source[0].ptype == qmod.PHRASE_ANY
assert query.source[0].text == 'foo bar'
assert query.num_token_slots() == 2
assert query.num_token_slots() == 2
assert query.nodes[0].starting
assert query.nodes[1].starting
- assert query.nodes[1].btype == BreakType.TOKEN
+ assert query.nodes[1].btype == qmod.BREAK_TOKEN
@pytest.mark.asyncio
-@pytest.mark.parametrize('term,order', [('23456', ['POSTCODE', 'HOUSENUMBER', 'WORD', 'PARTIAL']),
- ('3', ['HOUSENUMBER', 'POSTCODE', 'WORD', 'PARTIAL'])
+@pytest.mark.parametrize('term,order', [('23456', ['P', 'H', 'W', 'w']),
+ ('3', ['H', 'P', 'W', 'w'])
])
async def test_penalty_postcodes_and_housenumbers(conn, term, order):
ana = await tok.create_query_analyzer(conn)
assert query.num_token_slots() == 1
- torder = [(tl.tokens[0].penalty, tl.ttype.name) for tl in query.nodes[0].starting]
+ torder = [(tl.tokens[0].penalty, tl.ttype) for tl in query.nodes[0].starting]
torder.sort()
assert [t[1] for t in torder] == order
assert query.num_token_slots() == 3
assert len(query.nodes[0].starting) == 1
- assert query.nodes[0].starting[0].ttype == TokenType.CATEGORY
+ assert query.nodes[0].starting[0].ttype == qmod.TOKEN_NEAR_ITEM
assert not query.nodes[2].starting
+@pytest.mark.asyncio
+async def test_freestanding_qualifier_words_become_category(conn):
+ ana = await tok.create_query_analyzer(conn)
+
+ await add_word(conn, 1, 'foo', 'S', 'FOO', {'op': '-'})
+
+ query = await ana.analyze_query(make_phrase('foo'))
+
+ assert query.num_token_slots() == 1
+ assert len(query.nodes[0].starting) == 1
+ assert query.nodes[0].starting[0].ttype == qmod.TOKEN_NEAR_ITEM
+
+
@pytest.mark.asyncio
async def test_qualifier_words(conn):
ana = await tok.create_query_analyzer(conn)
query = await ana.analyze_query(make_phrase('foo BAR foo BAR foo'))
assert query.num_token_slots() == 5
- assert set(t.ttype for t in query.nodes[0].starting) == {TokenType.CATEGORY, TokenType.QUALIFIER}
- assert set(t.ttype for t in query.nodes[2].starting) == {TokenType.QUALIFIER}
- assert set(t.ttype for t in query.nodes[4].starting) == {TokenType.CATEGORY, TokenType.QUALIFIER}
+ assert set(t.ttype for t in query.nodes[0].starting) == {qmod.TOKEN_QUALIFIER}
+ assert set(t.ttype for t in query.nodes[2].starting) == {qmod.TOKEN_QUALIFIER}
+ assert set(t.ttype for t in query.nodes[4].starting) == {qmod.TOKEN_QUALIFIER}
@pytest.mark.asyncio
query = await ana.analyze_query(make_phrase('466 23 99834 34a'))
assert query.num_token_slots() == 4
- assert query.nodes[0].starting[0].ttype == TokenType.HOUSENUMBER
+ assert query.nodes[0].starting[0].ttype == qmod.TOKEN_HOUSENUMBER
assert len(query.nodes[0].starting[0].tokens) == 1
assert query.nodes[0].starting[0].tokens[0].token == 0
- assert query.nodes[1].starting[0].ttype == TokenType.HOUSENUMBER
+ assert query.nodes[1].starting[0].ttype == qmod.TOKEN_HOUSENUMBER
assert len(query.nodes[1].starting[0].tokens) == 1
assert query.nodes[1].starting[0].tokens[0].token == 1
assert not query.nodes[2].starting