]> git.openstreetmap.org Git - nominatim.git/blobdiff - nominatim/api/search/icu_tokenizer.py
Merge remote-tracking branch 'upstream/master'
[nominatim.git] / nominatim / api / search / icu_tokenizer.py
index 72e0f547bcbaf9f5bb0798b8d26ce8b228b22249..76a1a2e5d362688d5388044b511a9d3f0ae4a13c 100644 (file)
@@ -186,7 +186,10 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
                         if trange.start == 0:
                             query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
                     else:
                         if trange.start == 0:
                             query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
                     else:
-                        query.add_token(trange, qmod.TokenType.QUALIFIER, token)
+                        if trange.start == 0 and trange.end == query.num_token_slots():
+                            query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
+                        else:
+                            query.add_token(trange, qmod.TokenType.QUALIFIER, token)
                 else:
                     query.add_token(trange, DB_TO_TOKEN_TYPE[row.type], token)
 
                 else:
                     query.add_token(trange, DB_TO_TOKEN_TYPE[row.type], token)
 
@@ -203,7 +206,12 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
             standardized form search will work with. All information removed
             at this stage is inevitably lost.
         """
             standardized form search will work with. All information removed
             at this stage is inevitably lost.
         """
-        return cast(str, self.normalizer.transliterate(text))
+        norm = cast(str, self.normalizer.transliterate(text))
+        numspaces = norm.count(' ')
+        if numspaces > 4 and len(norm) <= (numspaces + 1) * 3:
+            return ''
+
+        return norm
 
 
     def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]:
 
 
     def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]: