]> git.openstreetmap.org Git - nominatim.git/commitdiff
Merge remote-tracking branch 'upstream/master'
authorSarah Hoffmann <lonvia@denofr.de>
Sun, 7 Jan 2024 14:24:30 +0000 (15:24 +0100)
committerSarah Hoffmann <lonvia@denofr.de>
Sun, 7 Jan 2024 14:24:30 +0000 (15:24 +0100)
1  2 
nominatim/api/search/icu_tokenizer.py

index ff1c3feed40069328d4fdc01aec77745356a70bf,72e0f547bcbaf9f5bb0798b8d26ce8b228b22249..6f3e09e88d2dc503ed0471c2eaa2f3c05f903562
@@@ -8,7 -8,6 +8,6 @@@
  Implementation of query analysis for the ICU tokenizer.
  """
  from typing import Tuple, Dict, List, Optional, NamedTuple, Iterator, Any, cast
- from copy import copy
  from collections import defaultdict
  import dataclasses
  import difflib
@@@ -188,10 -187,6 +187,6 @@@ class ICUQueryAnalyzer(AbstractQueryAna
                              query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
                      else:
                          query.add_token(trange, qmod.TokenType.QUALIFIER, token)
-                         if trange.start == 0 or trange.end == query.num_token_slots():
-                             token = copy(token)
-                             token.penalty += 0.1 * (query.num_token_slots())
-                             query.add_token(trange, qmod.TokenType.NEAR_ITEM, token)
                  else:
                      query.add_token(trange, DB_TO_TOKEN_TYPE[row.type], token)
  
              standardized form search will work with. All information removed
              at this stage is inevitably lost.
          """
 -        return cast(str, self.normalizer.transliterate(text))
 +        norm = cast(str, self.normalizer.transliterate(text))
 +        numspaces = norm.count(' ')
 +        if numspaces > 4 and len(norm) <= (numspaces + 1) * 3:
 +            return ''
 +
 +        return norm
  
  
      def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]: