]> git.openstreetmap.org Git - nominatim.git/commitdiff
Merge remote-tracking branch 'upstream/master'
authorSarah Hoffmann <lonvia@denofr.de>
Mon, 18 Mar 2024 13:37:25 +0000 (14:37 +0100)
committerSarah Hoffmann <lonvia@denofr.de>
Mon, 18 Mar 2024 13:37:25 +0000 (14:37 +0100)
1  2 
nominatim/api/search/icu_tokenizer.py

index 76a1a2e5d362688d5388044b511a9d3f0ae4a13c,05ec7690c8ac0a34d8436fd08e641f4cb19bd680..23cfa5a166c003a1b5638f0334d10636a335d935
@@@ -97,6 -97,7 +97,7 @@@ class ICUToken(qmod.Token)
          """ Create a ICUToken from the row of the word table.
          """
          count = 1 if row.info is None else row.info.get('count', 1)
+         addr_count = 1 if row.info is None else row.info.get('addr_count', 1)
  
          penalty = 0.0
          if row.type == 'w':
  
          return ICUToken(penalty=penalty, token=row.word_id, count=count,
                          lookup_word=lookup_word, is_indexed=True,
-                         word_token=row.word_token, info=row.info)
+                         word_token=row.word_token, info=row.info,
+                         addr_count=addr_count)
  
  
  
@@@ -206,12 -208,7 +208,12 @@@ class ICUQueryAnalyzer(AbstractQueryAna
              standardized form search will work with. All information removed
              at this stage is inevitably lost.
          """
 -        return cast(str, self.normalizer.transliterate(text))
 +        norm = cast(str, self.normalizer.transliterate(text))
 +        numspaces = norm.count(' ')
 +        if numspaces > 4 and len(norm) <= (numspaces + 1) * 3:
 +            return ''
 +
 +        return norm
  
  
      def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]:
              if len(part.token) <= 4 and part[0].isdigit()\
                 and not node.has_tokens(i+1, qmod.TokenType.HOUSENUMBER):
                  query.add_token(qmod.TokenRange(i, i+1), qmod.TokenType.HOUSENUMBER,
-                                 ICUToken(0.5, 0, 1, part.token, True, part.token, None))
+                                 ICUToken(0.5, 0, 1, 1, part.token, True, part.token, None))
  
  
      def rerank_tokens(self, query: qmod.QueryStruct, parts: QueryParts) -> None: