From: Sarah Hoffmann Date: Sat, 12 Aug 2023 15:06:38 +0000 (+0200) Subject: Merge remote-tracking branch 'upstream/master' X-Git-Tag: deploy~56 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/56f9535aa9cf3f9f500471603e5876cd3e0ddb0b?hp=-c Merge remote-tracking branch 'upstream/master' --- 56f9535aa9cf3f9f500471603e5876cd3e0ddb0b diff --combined nominatim/api/search/icu_tokenizer.py index ad08294e,7bf516e3..d3e34537 --- a/nominatim/api/search/icu_tokenizer.py +++ b/nominatim/api/search/icu_tokenizer.py @@@ -83,7 -83,7 +83,7 @@@ class ICUToken(qmod.Token) seq = difflib.SequenceMatcher(a=self.lookup_word, b=norm) distance = 0 for tag, afrom, ato, bfrom, bto in seq.get_opcodes(): - if tag == 'delete' and (afrom == 0 or ato == len(self.lookup_word)): + if tag in ('delete', 'insert') and (afrom == 0 or ato == len(self.lookup_word)): distance += 1 elif tag == 'replace': distance += max((ato-afrom), (bto-bfrom)) @@@ -192,12 -192,7 +192,12 @@@ class ICUQueryAnalyzer(AbstractQueryAna standardized form search will work with. All information removed at this stage is inevitably lost. """ - return cast(str, self.normalizer.transliterate(text)) + norm = cast(str, self.normalizer.transliterate(text)) + numspaces = norm.count(' ') + if numspaces > 4 and len(norm) <= (numspaces + 1) * 3: + return '' + + return norm def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]: