From: Sarah Hoffmann Date: Sat, 6 Jan 2024 16:49:58 +0000 (+0100) Subject: do not run near queries on qualifier words X-Git-Tag: v4.4.0~31^2 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/10a5424a71022a787902b86ddcefedb8688bb2b5 do not run near queries on qualifier words There is too much potential for confusion (e.g. 'Rio Grande' read as 'river near Grande') fir too little gain. Use near phrases instead. --- diff --git a/nominatim/api/search/icu_tokenizer.py b/nominatim/api/search/icu_tokenizer.py index eabd329d..72e0f547 100644 --- a/nominatim/api/search/icu_tokenizer.py +++ b/nominatim/api/search/icu_tokenizer.py @@ -8,7 +8,6 @@ Implementation of query analysis for the ICU tokenizer. """ from typing import Tuple, Dict, List, Optional, NamedTuple, Iterator, Any, cast -from copy import copy from collections import defaultdict import dataclasses import difflib @@ -188,10 +187,6 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer): query.add_token(trange, qmod.TokenType.NEAR_ITEM, token) else: query.add_token(trange, qmod.TokenType.QUALIFIER, token) - if trange.start == 0 or trange.end == query.num_token_slots(): - token = copy(token) - token.penalty += 0.1 * (query.num_token_slots()) - query.add_token(trange, qmod.TokenType.NEAR_ITEM, token) else: query.add_token(trange, DB_TO_TOKEN_TYPE[row.type], token) diff --git a/test/python/api/search/test_icu_query_analyzer.py b/test/python/api/search/test_icu_query_analyzer.py index a88ca8b8..6a17e32a 100644 --- a/test/python/api/search/test_icu_query_analyzer.py +++ b/test/python/api/search/test_icu_query_analyzer.py @@ -148,9 +148,9 @@ async def test_qualifier_words(conn): query = await ana.analyze_query(make_phrase('foo BAR foo BAR foo')) assert query.num_token_slots() == 5 - assert set(t.ttype for t in query.nodes[0].starting) == {TokenType.NEAR_ITEM, TokenType.QUALIFIER} + assert set(t.ttype for t in query.nodes[0].starting) == {TokenType.QUALIFIER} assert set(t.ttype for t in query.nodes[2].starting) == {TokenType.QUALIFIER} - assert set(t.ttype for t in query.nodes[4].starting) == {TokenType.NEAR_ITEM, TokenType.QUALIFIER} + assert set(t.ttype for t in query.nodes[4].starting) == {TokenType.QUALIFIER} @pytest.mark.asyncio