Merge remote-tracking branch 'upstream/master'

author Sarah Hoffmann <lonvia@denofr.de>

Thu, 23 Nov 2023 11:06:17 +0000 (12:06 +0100)

committer Sarah Hoffmann <lonvia@denofr.de>

Thu, 23 Nov 2023 11:06:17 +0000 (12:06 +0100)
author Sarah Hoffmann <lonvia@denofr.de>
Thu, 23 Nov 2023 11:06:17 +0000 (12:06 +0100)
committer Sarah Hoffmann <lonvia@denofr.de>
Thu, 23 Nov 2023 11:06:17 +0000 (12:06 +0100)
diff --combined lib-sql/functions/address_lookup.sql

index a32bfe710419e97bb65ba951b17b29502450fd87,26ce20738d301d4e3b33c43ff1571fd52a4e9a45..cba11dbf3400d7fcc45a74f9c7fb85ed1a2f4c69
--- 1/lib-sql/functions/address_lookup.sql
--- 2/lib-sql/functions/address_lookup.sql
+++ b/lib-sql/functions/address_lookup.sql
@@@ -232,7 -232,7 +232,7 @@@ BEGI
     FOR location IN
       SELECT placex.place_id, osm_type, osm_id, name, class, type,
              coalesce(extratags->'linked_place', extratags->'place') as place_type,
- -           admin_level, fromarea, isaddress,
+ +           admin_level, fromarea, isaddress and linked_place_id is NULL as isaddress,
              CASE WHEN rank_address = 11 THEN 5 ELSE rank_address END as rank_address,
              distance, country_code, postcode
         FROM place_addressline join placex on (address_place_id = placex.place_id)
@@@ -262,7 -262,7 +262,7 @@@
         -- If the place had a postcode assigned, take this one only
         -- into consideration when it is an area and the place does not have
         -- a postcode itself.
-       IF location.fromarea AND location.isaddress
+       IF location.fromarea AND location_isaddress
            AND (place.address is null or not place.address ? 'postcode')
         THEN
           place.postcode := null; -- remove the less exact postcode
diff --combined nominatim/api/search/icu_tokenizer.py

index d2cdd96e16432d62f2b02d0fcbe390e060d905e1,196fde2a8444e69d5d74a0a0310dd94812425d96..14203e0081eb1df470025f6285b7b46896223123
--- 1/nominatim/api/search/icu_tokenizer.py
--- 2/nominatim/api/search/icu_tokenizer.py
+++ b/nominatim/api/search/icu_tokenizer.py
@@@ -101,10 -101,16 +101,16 @@@ class ICUToken(qmod.Token)
           penalty = 0.0
           if row.type == 'w':
               penalty = 0.3
+         elif row.type == 'W':
+             if len(row.word_token) == 1 and row.word_token == row.word:
+                 penalty = 0.2 if row.word.isdigit() else 0.3
           elif row.type == 'H':
               penalty = sum(0.1 for c in row.word_token if c != ' ' and not c.isdigit())
               if all(not c.isdigit() for c in row.word_token):
                   penalty += 0.2 * (len(row.word_token) - 1)
+         elif row.type == 'C':
+             if len(row.word_token) == 1:
+                 penalty = 0.3
   
           if row.info is None:
               lookup_word = row.word
@@@ -201,12 -207,7 +207,12 @@@ class ICUQueryAnalyzer(AbstractQueryAna
               standardized form search will work with. All information removed
               at this stage is inevitably lost.
           """
- -        return cast(str, self.normalizer.transliterate(text))
+ +        norm = cast(str, self.normalizer.transliterate(text))
+ +        numspaces = norm.count(' ')
+ +        if numspaces > 4 and len(norm) <= (numspaces + 1) * 3:
+ +            return ''
+ +
+ +        return norm
   
   
       def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]:
author	Sarah Hoffmann <lonvia@denofr.de>
	Thu, 23 Nov 2023 11:06:17 +0000 (12:06 +0100)
committer	Sarah Hoffmann <lonvia@denofr.de>
	Thu, 23 Nov 2023 11:06:17 +0000 (12:06 +0100)
		1	2
lib-sql/functions/address_lookup.sql	patch \|	diff1 \|	diff2 \|	blob \| history
nominatim/api/search/icu_tokenizer.py	patch \|	diff1 \|	diff2 \|	blob \| history