From 78c19bc006d5957f183968d086226be4b2b65fe3 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Mon, 1 Apr 2024 14:25:51 +0200 Subject: [PATCH 1/1] minimum counts for tokens should always be 1 to avoid accidental devision by 0. --- nominatim/api/search/icu_tokenizer.py | 4 ++-- nominatim/api/search/legacy_tokenizer.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/nominatim/api/search/icu_tokenizer.py b/nominatim/api/search/icu_tokenizer.py index 05ec7690..eb90c122 100644 --- a/nominatim/api/search/icu_tokenizer.py +++ b/nominatim/api/search/icu_tokenizer.py @@ -122,10 +122,10 @@ class ICUToken(qmod.Token): else: lookup_word = row.word_token - return ICUToken(penalty=penalty, token=row.word_id, count=count, + return ICUToken(penalty=penalty, token=row.word_id, count=max(1, count), lookup_word=lookup_word, is_indexed=True, word_token=row.word_token, info=row.info, - addr_count=addr_count) + addr_count=max(1, addr_count)) diff --git a/nominatim/api/search/legacy_tokenizer.py b/nominatim/api/search/legacy_tokenizer.py index bd17706e..031f2456 100644 --- a/nominatim/api/search/legacy_tokenizer.py +++ b/nominatim/api/search/legacy_tokenizer.py @@ -209,7 +209,7 @@ class LegacyQueryAnalyzer(AbstractQueryAnalyzer): is_indexed = False return LegacyToken(penalty=penalty, token=row.word_id, - count=row.search_name_count or 1, + count=max(1, row.search_name_count or 1), addr_count=1, # not supported lookup_word=lookup_word, word_token=row.word_token.strip(), -- 2.39.5