From: Sarah Hoffmann Date: Thu, 23 Nov 2023 09:51:58 +0000 (+0100) Subject: increase penalty for one-letter words X-Git-Tag: v4.4.0~78^2 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/4e4d29f653d4929f49536255314ec19264166ec6?ds=inline;hp=-c increase penalty for one-letter words --- 4e4d29f653d4929f49536255314ec19264166ec6 diff --git a/nominatim/api/search/icu_tokenizer.py b/nominatim/api/search/icu_tokenizer.py index b68e8d10..196fde2a 100644 --- a/nominatim/api/search/icu_tokenizer.py +++ b/nominatim/api/search/icu_tokenizer.py @@ -101,10 +101,16 @@ class ICUToken(qmod.Token): penalty = 0.0 if row.type == 'w': penalty = 0.3 + elif row.type == 'W': + if len(row.word_token) == 1 and row.word_token == row.word: + penalty = 0.2 if row.word.isdigit() else 0.3 elif row.type == 'H': penalty = sum(0.1 for c in row.word_token if c != ' ' and not c.isdigit()) if all(not c.isdigit() for c in row.word_token): penalty += 0.2 * (len(row.word_token) - 1) + elif row.type == 'C': + if len(row.word_token) == 1: + penalty = 0.3 if row.info is None: lookup_word = row.word