X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/d8240f9ee475daef1412f0e2cdf36efeba6666f3..d8ed565bce27c638074fbc6f1961dfc0d160e312:/nominatim/api/search/legacy_tokenizer.py diff --git a/nominatim/api/search/legacy_tokenizer.py b/nominatim/api/search/legacy_tokenizer.py index 96975704..26e4c126 100644 --- a/nominatim/api/search/legacy_tokenizer.py +++ b/nominatim/api/search/legacy_tokenizer.py @@ -127,6 +127,15 @@ class LegacyQueryAnalyzer(AbstractQueryAnalyzer): return query + def normalize_text(self, text: str) -> str: + """ Bring the given text into a normalized form. + + This only removes case, so some difference with the normalization + in the phrase remains. + """ + return text.lower() + + def split_query(self, query: qmod.QueryStruct) -> Tuple[List[str], Dict[str, List[qmod.TokenRange]]]: """ Transliterate the phrases and split them into tokens. @@ -233,12 +242,11 @@ class LegacyQueryAnalyzer(AbstractQueryAnalyzer): and (repl.ttype != qmod.TokenType.HOUSENUMBER or len(tlist.tokens[0].lookup_word) > 4): repl.add_penalty(0.39) - elif tlist.ttype == qmod.TokenType.HOUSENUMBER: + elif tlist.ttype == qmod.TokenType.HOUSENUMBER \ + and len(tlist.tokens[0].lookup_word) <= 3: if any(c.isdigit() for c in tlist.tokens[0].lookup_word): for repl in node.starting: - if repl.end == tlist.end and repl.ttype != qmod.TokenType.HOUSENUMBER \ - and (repl.ttype != qmod.TokenType.HOUSENUMBER - or len(tlist.tokens[0].lookup_word) <= 3): + if repl.end == tlist.end and repl.ttype != qmod.TokenType.HOUSENUMBER: repl.add_penalty(0.5 - tlist.tokens[0].penalty)