]> git.openstreetmap.org Git - nominatim.git/commitdiff
do not run near queries on qualifier words
authorSarah Hoffmann <lonvia@denofr.de>
Sat, 6 Jan 2024 16:49:58 +0000 (17:49 +0100)
committerSarah Hoffmann <lonvia@denofr.de>
Sun, 7 Jan 2024 10:33:11 +0000 (11:33 +0100)
There is too much potential for confusion (e.g. 'Rio Grande' read
as 'river near Grande') fir too little gain. Use near phrases
instead.

nominatim/api/search/icu_tokenizer.py
test/python/api/search/test_icu_query_analyzer.py

index eabd329d57e08cac6d8b6cbc8c0a8c3c0fcf8fdd..72e0f547bcbaf9f5bb0798b8d26ce8b228b22249 100644 (file)
@@ -8,7 +8,6 @@
 Implementation of query analysis for the ICU tokenizer.
 """
 from typing import Tuple, Dict, List, Optional, NamedTuple, Iterator, Any, cast
 Implementation of query analysis for the ICU tokenizer.
 """
 from typing import Tuple, Dict, List, Optional, NamedTuple, Iterator, Any, cast
-from copy import copy
 from collections import defaultdict
 import dataclasses
 import difflib
 from collections import defaultdict
 import dataclasses
 import difflib
@@ -188,10 +187,6 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
                             query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
                     else:
                         query.add_token(trange, qmod.TokenType.QUALIFIER, token)
                             query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
                     else:
                         query.add_token(trange, qmod.TokenType.QUALIFIER, token)
-                        if trange.start == 0 or trange.end == query.num_token_slots():
-                            token = copy(token)
-                            token.penalty += 0.1 * (query.num_token_slots())
-                            query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
                 else:
                     query.add_token(trange, DB_TO_TOKEN_TYPE[row.type], token)
 
                 else:
                     query.add_token(trange, DB_TO_TOKEN_TYPE[row.type], token)
 
index a88ca8b82e4facc800aa23c507f309f57c4c8311..6a17e32abab37475d8ab5178d2a5ee5fc2fffe1e 100644 (file)
@@ -148,9 +148,9 @@ async def test_qualifier_words(conn):
     query = await ana.analyze_query(make_phrase('foo BAR foo BAR foo'))
 
     assert query.num_token_slots() == 5
     query = await ana.analyze_query(make_phrase('foo BAR foo BAR foo'))
 
     assert query.num_token_slots() == 5
-    assert set(t.ttype for t in query.nodes[0].starting) == {TokenType.NEAR_ITEM, TokenType.QUALIFIER}
+    assert set(t.ttype for t in query.nodes[0].starting) == {TokenType.QUALIFIER}
     assert set(t.ttype for t in query.nodes[2].starting) == {TokenType.QUALIFIER}
     assert set(t.ttype for t in query.nodes[2].starting) == {TokenType.QUALIFIER}
-    assert set(t.ttype for t in query.nodes[4].starting) == {TokenType.NEAR_ITEM, TokenType.QUALIFIER}
+    assert set(t.ttype for t in query.nodes[4].starting) == {TokenType.QUALIFIER}
 
 
 @pytest.mark.asyncio
 
 
 @pytest.mark.asyncio