normalized = list(filter(lambda p: p.text,
(qmod.Phrase(p.ptype, self.normalize_text(p.text))
for p in phrases)))
+ if len(normalized) == 1 \
+ and normalized[0].text.count(' ') > 3 \
+ and max(len(s) for s in normalized[0].text.split()) < 3:
+ normalized = []
query = qmod.QueryStruct(normalized)
log().var_dump('Normalized query', query.source)
if not query.source:
standardized form search will work with. All information removed
at this stage is inevitably lost.
"""
- norm = cast(str, self.normalizer.transliterate(text))
- numspaces = norm.count(' ')
- if numspaces > 4 and len(norm) <= (numspaces + 1) * 3:
- return ''
-
- return norm
+ return cast(str, self.normalizer.transliterate(text))
def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]:
""" Transliterate the phrases and split them into tokens.