From: Sarah Hoffmann Date: Thu, 23 Nov 2023 11:06:17 +0000 (+0100) Subject: Merge remote-tracking branch 'upstream/master' X-Git-Tag: deploy~35 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/8c82f6ceb3b77ebadc0bfc08422652902fc437a8?hp=-c Merge remote-tracking branch 'upstream/master' --- 8c82f6ceb3b77ebadc0bfc08422652902fc437a8 diff --combined lib-sql/functions/address_lookup.sql index a32bfe71,26ce2073..cba11dbf --- a/lib-sql/functions/address_lookup.sql +++ b/lib-sql/functions/address_lookup.sql @@@ -232,7 -232,7 +232,7 @@@ BEGI FOR location IN SELECT placex.place_id, osm_type, osm_id, name, class, type, coalesce(extratags->'linked_place', extratags->'place') as place_type, - admin_level, fromarea, isaddress, + admin_level, fromarea, isaddress and linked_place_id is NULL as isaddress, CASE WHEN rank_address = 11 THEN 5 ELSE rank_address END as rank_address, distance, country_code, postcode FROM place_addressline join placex on (address_place_id = placex.place_id) @@@ -262,7 -262,7 +262,7 @@@ -- If the place had a postcode assigned, take this one only -- into consideration when it is an area and the place does not have -- a postcode itself. - IF location.fromarea AND location.isaddress + IF location.fromarea AND location_isaddress AND (place.address is null or not place.address ? 'postcode') THEN place.postcode := null; -- remove the less exact postcode diff --combined nominatim/api/search/icu_tokenizer.py index d2cdd96e,196fde2a..14203e00 --- a/nominatim/api/search/icu_tokenizer.py +++ b/nominatim/api/search/icu_tokenizer.py @@@ -101,10 -101,16 +101,16 @@@ class ICUToken(qmod.Token) penalty = 0.0 if row.type == 'w': penalty = 0.3 + elif row.type == 'W': + if len(row.word_token) == 1 and row.word_token == row.word: + penalty = 0.2 if row.word.isdigit() else 0.3 elif row.type == 'H': penalty = sum(0.1 for c in row.word_token if c != ' ' and not c.isdigit()) if all(not c.isdigit() for c in row.word_token): penalty += 0.2 * (len(row.word_token) - 1) + elif row.type == 'C': + if len(row.word_token) == 1: + penalty = 0.3 if row.info is None: lookup_word = row.word @@@ -201,12 -207,7 +207,12 @@@ class ICUQueryAnalyzer(AbstractQueryAna standardized form search will work with. All information removed at this stage is inevitably lost. """ - return cast(str, self.normalizer.transliterate(text)) + norm = cast(str, self.normalizer.transliterate(text)) + numspaces = norm.count(' ') + if numspaces > 4 and len(norm) <= (numspaces + 1) * 3: + return '' + + return norm def split_query(self, query: qmod.QueryStruct) -> Tuple[QueryParts, WordDict]: