]> git.openstreetmap.org Git - nominatim.git/commitdiff
strip normalisation results of normal and special spaces
authorSarah Hoffmann <lonvia@denofr.de>
Wed, 19 Feb 2025 09:26:06 +0000 (10:26 +0100)
committerSarah Hoffmann <lonvia@denofr.de>
Wed, 19 Feb 2025 13:40:35 +0000 (14:40 +0100)
src/nominatim_api/query_preprocessing/normalize.py
src/nominatim_api/search/icu_tokenizer.py

index afb93ded3087c8e0c592fbb41efcb9241c713746..0bb0c8ed744d00e1e97c3c114b05ed9d887729d2 100644 (file)
@@ -27,5 +27,5 @@ def create(config: QueryConfig) -> QueryProcessingFunc:
 
     return lambda phrases: list(
         filter(lambda p: p.text,
-               (Phrase(p.ptype, cast(str, normalizer.transliterate(p.text)))
+               (Phrase(p.ptype, cast(str, normalizer.transliterate(p.text)).strip('-: '))
                 for p in phrases)))
index 6f1dcf7902ab65e5a4481d6a3d7b65e6274deab8..487dd1710354c9c9801cef9ce22033c1a5169e87 100644 (file)
@@ -244,7 +244,7 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
             standardized form search will work with. All information removed
             at this stage is inevitably lost.
         """
-        return cast(str, self.normalizer.transliterate(text))
+        return cast(str, self.normalizer.transliterate(text)).strip('-: ')
 
     def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]:
         """ Transliterate the phrases and split them into tokens.