]> git.openstreetmap.org Git - nominatim.git/commitdiff
Merge remote-tracking branch 'upstream/master'
authorSarah Hoffmann <lonvia@denofr.de>
Wed, 13 Nov 2024 18:35:54 +0000 (19:35 +0100)
committerSarah Hoffmann <lonvia@denofr.de>
Wed, 13 Nov 2024 18:35:54 +0000 (19:35 +0100)
1  2 
src/nominatim_api/search/db_search_builder.py
src/nominatim_api/search/icu_tokenizer.py
src/nominatim_db/tokenizer/icu_tokenizer.py

index 0d7487a4c04fac42a0248ab97acacaf2e11c02e0,632270ef04176f394a10e29d9397141bdeb5a457..1fbb7168bb44a963f31e83bfd99f6f534bcf9be5
@@@ -230,8 -221,8 +221,8 @@@ class SearchBuilder
          if name_fulls:
              fulls_count = sum(t.count for t in name_fulls)
  
 -            if fulls_count < 50000 or addr_count < 30000:
 +            if fulls_count < 80000 or addr_count < 50000:
-                 yield penalty,fulls_count / (2**len(addr_tokens)), \
+                 yield penalty, fulls_count / (2**len(addr_tokens)), \
                      self.get_full_name_ranking(name_fulls, addr_partials,
                                                 fulls_count > 30000 / max(1, len(addr_tokens)))
  
index c2a265105a69d08eb3d7d8a75331e4a8c4d61dc9,fa14531aed0d6c07cf79c277255324495b1b063d..c18dd8be62ed1190284e9c0751464b5e54091a47
@@@ -208,14 -202,8 +202,13 @@@ class ICUQueryAnalyzer(AbstractQueryAna
              standardized form search will work with. All information removed
              at this stage is inevitably lost.
          """
 -        return cast(str, self.normalizer.transliterate(text))
 +        norm = cast(str, self.normalizer.transliterate(text))
 +        numspaces = norm.count(' ')
 +        if numspaces > 4 and len(norm) <= (numspaces + 1) * 3:
 +            return ''
 +
 +        return norm
  
      def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]:
          """ Transliterate the phrases and split them into tokens.