strip normalisation results of normal and special spaces

author Sarah Hoffmann <lonvia@denofr.de>

Wed, 19 Feb 2025 09:26:06 +0000 (10:26 +0100)

committer Sarah Hoffmann <lonvia@denofr.de>

Wed, 19 Feb 2025 13:40:35 +0000 (14:40 +0100)
author Sarah Hoffmann <lonvia@denofr.de>
Wed, 19 Feb 2025 09:26:06 +0000 (10:26 +0100)
committer Sarah Hoffmann <lonvia@denofr.de>
Wed, 19 Feb 2025 13:40:35 +0000 (14:40 +0100)
diff --git a/src/nominatim_api/query_preprocessing/normalize.py b/src/nominatim_api/query_preprocessing/normalize.py

index afb93ded3087c8e0c592fbb41efcb9241c713746..0bb0c8ed744d00e1e97c3c114b05ed9d887729d2 100644 (file)
--- a/src/nominatim_api/query_preprocessing/normalize.py
+++ b/src/nominatim_api/query_preprocessing/normalize.py
@@ -27,5 +27,5 @@ def create(config: QueryConfig) -> QueryProcessingFunc:
  
      return lambda phrases: list(
          filter(lambda p: p.text,
-               (Phrase(p.ptype, cast(str, normalizer.transliterate(p.text)))
+               (Phrase(p.ptype, cast(str, normalizer.transliterate(p.text)).strip('-: '))
                  for p in phrases)))
diff --git a/src/nominatim_api/search/icu_tokenizer.py b/src/nominatim_api/search/icu_tokenizer.py

index 6f1dcf7902ab65e5a4481d6a3d7b65e6274deab8..487dd1710354c9c9801cef9ce22033c1a5169e87 100644 (file)
--- a/src/nominatim_api/search/icu_tokenizer.py
+++ b/src/nominatim_api/search/icu_tokenizer.py
@@ -244,7 +244,7 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
              standardized form search will work with. All information removed
              at this stage is inevitably lost.
          """
-        return cast(str, self.normalizer.transliterate(text))
+        return cast(str, self.normalizer.transliterate(text)).strip('-: ')
  
      def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]:
          """ Transliterate the phrases and split them into tokens.
author	Sarah Hoffmann <lonvia@denofr.de>
	Wed, 19 Feb 2025 09:26:06 +0000 (10:26 +0100)
committer	Sarah Hoffmann <lonvia@denofr.de>
	Wed, 19 Feb 2025 13:40:35 +0000 (14:40 +0100)
src/nominatim_api/query_preprocessing/normalize.py		patch \| blob \| history
src/nominatim_api/search/icu_tokenizer.py		patch \| blob \| history