return query
+ def normalize_text(self, text: str) -> str:
+ """ Bring the given text into a normalized form.
+
+ This only removes case, so some difference with the normalization
+ in the phrase remains.
+ """
+ return text.lower()
+
+
def split_query(self, query: qmod.QueryStruct) -> Tuple[List[str],
Dict[str, List[qmod.TokenRange]]]:
""" Transliterate the phrases and split them into tokens.
and (repl.ttype != qmod.TokenType.HOUSENUMBER
or len(tlist.tokens[0].lookup_word) > 4):
repl.add_penalty(0.39)
- elif tlist.ttype == qmod.TokenType.HOUSENUMBER:
+ elif tlist.ttype == qmod.TokenType.HOUSENUMBER \
+ and len(tlist.tokens[0].lookup_word) <= 3:
if any(c.isdigit() for c in tlist.tokens[0].lookup_word):
for repl in node.starting:
- if repl.end == tlist.end and repl.ttype != qmod.TokenType.HOUSENUMBER \
- and (repl.ttype != qmod.TokenType.HOUSENUMBER
- or len(tlist.tokens[0].lookup_word) <= 3):
+ if repl.end == tlist.end and repl.ttype != qmod.TokenType.HOUSENUMBER:
repl.add_penalty(0.5 - tlist.tokens[0].penalty)