]> git.openstreetmap.org Git - nominatim.git/commitdiff
Merge remote-tracking branch 'upstream/master'
authorSarah Hoffmann <lonvia@denofr.de>
Thu, 23 Nov 2023 11:06:17 +0000 (12:06 +0100)
committerSarah Hoffmann <lonvia@denofr.de>
Thu, 23 Nov 2023 11:06:17 +0000 (12:06 +0100)
1  2 
lib-sql/functions/address_lookup.sql
nominatim/api/search/icu_tokenizer.py

index a32bfe710419e97bb65ba951b17b29502450fd87,26ce20738d301d4e3b33c43ff1571fd52a4e9a45..cba11dbf3400d7fcc45a74f9c7fb85ed1a2f4c69
@@@ -232,7 -232,7 +232,7 @@@ BEGI
    FOR location IN
      SELECT placex.place_id, osm_type, osm_id, name, class, type,
             coalesce(extratags->'linked_place', extratags->'place') as place_type,
 -           admin_level, fromarea, isaddress,
 +           admin_level, fromarea, isaddress and linked_place_id is NULL as isaddress,
             CASE WHEN rank_address = 11 THEN 5 ELSE rank_address END as rank_address,
             distance, country_code, postcode
        FROM place_addressline join placex on (address_place_id = placex.place_id)
        -- If the place had a postcode assigned, take this one only
        -- into consideration when it is an area and the place does not have
        -- a postcode itself.
-       IF location.fromarea AND location.isaddress
+       IF location.fromarea AND location_isaddress
           AND (place.address is null or not place.address ? 'postcode')
        THEN
          place.postcode := null; -- remove the less exact postcode
index d2cdd96e16432d62f2b02d0fcbe390e060d905e1,196fde2a8444e69d5d74a0a0310dd94812425d96..14203e0081eb1df470025f6285b7b46896223123
@@@ -101,10 -101,16 +101,16 @@@ class ICUToken(qmod.Token)
          penalty = 0.0
          if row.type == 'w':
              penalty = 0.3
+         elif row.type == 'W':
+             if len(row.word_token) == 1 and row.word_token == row.word:
+                 penalty = 0.2 if row.word.isdigit() else 0.3
          elif row.type == 'H':
              penalty = sum(0.1 for c in row.word_token if c != ' ' and not c.isdigit())
              if all(not c.isdigit() for c in row.word_token):
                  penalty += 0.2 * (len(row.word_token) - 1)
+         elif row.type == 'C':
+             if len(row.word_token) == 1:
+                 penalty = 0.3
  
          if row.info is None:
              lookup_word = row.word
@@@ -201,12 -207,7 +207,12 @@@ class ICUQueryAnalyzer(AbstractQueryAna
              standardized form search will work with. All information removed
              at this stage is inevitably lost.
          """
 -        return cast(str, self.normalizer.transliterate(text))
 +        norm = cast(str, self.normalizer.transliterate(text))
 +        numspaces = norm.count(' ')
 +        if numspaces > 4 and len(norm) <= (numspaces + 1) * 3:
 +            return ''
 +
 +        return norm
  
  
      def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]: