X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/bd2c64876f7ddc99da14ea78a652f797e17134f4..9a84adef5981d1a8357fd1775358742aae83867b:/nominatim/api/search/legacy_tokenizer.py?ds=sidebyside

diff --git a/nominatim/api/search/legacy_tokenizer.py b/nominatim/api/search/legacy_tokenizer.py
index 96975704..e7984ee4 100644
--- a/nominatim/api/search/legacy_tokenizer.py
+++ b/nominatim/api/search/legacy_tokenizer.py
@@ -107,15 +107,15 @@ class LegacyQueryAnalyzer(AbstractQueryAnalyzer):
         for row in await self.lookup_in_db(lookup_words):
             for trange in words[row.word_token.strip()]:
                 token, ttype = self.make_token(row)
-                if ttype == qmod.TokenType.CATEGORY:
+                if ttype == qmod.TokenType.NEAR_ITEM:
                     if trange.start == 0:
-                        query.add_token(trange, qmod.TokenType.CATEGORY, token)
+                        query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
                 elif ttype == qmod.TokenType.QUALIFIER:
                     query.add_token(trange, qmod.TokenType.QUALIFIER, token)
                     if trange.start == 0 or trange.end == query.num_token_slots():
                         token = copy(token)
                         token.penalty += 0.1 * (query.num_token_slots())
-                        query.add_token(trange, qmod.TokenType.CATEGORY, token)
+                        query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
                 elif ttype != qmod.TokenType.PARTIAL or trange.start + 1 == trange.end:
                     query.add_token(trange, ttype, token)
 
@@ -127,6 +127,15 @@ class LegacyQueryAnalyzer(AbstractQueryAnalyzer):
         return query
 
 
+    def normalize_text(self, text: str) -> str:
+        """ Bring the given text into a normalized form.
+
+            This only removes case, so some difference with the normalization
+            in the phrase remains.
+        """
+        return text.lower()
+
+
     def split_query(self, query: qmod.QueryStruct) -> Tuple[List[str],
                                                             Dict[str, List[qmod.TokenRange]]]:
         """ Transliterate the phrases and split them into tokens.
@@ -186,7 +195,7 @@ class LegacyQueryAnalyzer(AbstractQueryAnalyzer):
                 ttype = qmod.TokenType.POSTCODE
                 lookup_word = row.word_token[1:]
             else:
-                ttype = qmod.TokenType.CATEGORY if row.operator in ('in', 'near')\
+                ttype = qmod.TokenType.NEAR_ITEM if row.operator in ('in', 'near')\
                         else qmod.TokenType.QUALIFIER
                 lookup_word = row.word
         elif row.word_token.startswith(' '):
@@ -233,12 +242,11 @@ class LegacyQueryAnalyzer(AbstractQueryAnalyzer):
                        and (repl.ttype != qmod.TokenType.HOUSENUMBER
                             or len(tlist.tokens[0].lookup_word) > 4):
                         repl.add_penalty(0.39)
-            elif tlist.ttype == qmod.TokenType.HOUSENUMBER:
+            elif tlist.ttype == qmod.TokenType.HOUSENUMBER \
+                 and len(tlist.tokens[0].lookup_word) <= 3:
                 if any(c.isdigit() for c in tlist.tokens[0].lookup_word):
                     for repl in node.starting:
-                        if repl.end == tlist.end and repl.ttype != qmod.TokenType.HOUSENUMBER \
-                           and (repl.ttype != qmod.TokenType.HOUSENUMBER
-                                or len(tlist.tokens[0].lookup_word) <= 3):
+                        if repl.end == tlist.end and repl.ttype != qmod.TokenType.HOUSENUMBER:
                             repl.add_penalty(0.5 - tlist.tokens[0].penalty)