strip normalisation results of normal and special spaces

author Sarah Hoffmann <lonvia@denofr.de>

Wed, 19 Feb 2025 09:26:06 +0000 (10:26 +0100)

committer Sarah Hoffmann <lonvia@denofr.de>

Wed, 19 Feb 2025 13:40:35 +0000 (14:40 +0100)
author Sarah Hoffmann <lonvia@denofr.de>
Wed, 19 Feb 2025 09:26:06 +0000 (10:26 +0100)
committer Sarah Hoffmann <lonvia@denofr.de>
Wed, 19 Feb 2025 13:40:35 +0000 (14:40 +0100)
diff --git a/src/nominatim_api/query_preprocessing/normalize.py b/src/nominatim_api/query_preprocessing/normalize.py

index afb93ded3087c8e0c592fbb41efcb9241c713746..0bb0c8ed744d00e1e97c3c114b05ed9d887729d2 100644 (file)
--- a/src/nominatim_api/query_preprocessing/normalize.py
+++ b/src/nominatim_api/query_preprocessing/normalize.py
@@ -27,5 +27,5 @@ def create(config: QueryConfig) -> QueryProcessingFunc:
  
      return lambda phrases: list(
          filter(lambda p: p.text,
  
      return lambda phrases: list(
          filter(lambda p: p.text,
-               (Phrase(p.ptype, cast(str, normalizer.transliterate(p.text)))
+               (Phrase(p.ptype, cast(str, normalizer.transliterate(p.text)).strip('-: '))
                  for p in phrases)))
                  for p in phrases)))
diff --git a/src/nominatim_api/search/icu_tokenizer.py b/src/nominatim_api/search/icu_tokenizer.py

index 6f1dcf7902ab65e5a4481d6a3d7b65e6274deab8..487dd1710354c9c9801cef9ce22033c1a5169e87 100644 (file)
--- a/src/nominatim_api/search/icu_tokenizer.py
+++ b/src/nominatim_api/search/icu_tokenizer.py
@@ -244,7 +244,7 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer):
              standardized form search will work with. All information removed
              at this stage is inevitably lost.
          """
              standardized form search will work with. All information removed
              at this stage is inevitably lost.
          """
-        return cast(str, self.normalizer.transliterate(text))
+        return cast(str, self.normalizer.transliterate(text)).strip('-: ')
  
      def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]:
          """ Transliterate the phrases and split them into tokens.
  
      def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]:
          """ Transliterate the phrases and split them into tokens.
author	Sarah Hoffmann <lonvia@denofr.de>
	Wed, 19 Feb 2025 09:26:06 +0000 (10:26 +0100)
committer	Sarah Hoffmann <lonvia@denofr.de>
	Wed, 19 Feb 2025 13:40:35 +0000 (14:40 +0100)
src/nominatim_api/query_preprocessing/normalize.py		patch \| blob \| history
src/nominatim_api/search/icu_tokenizer.py		patch \| blob \| history