X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/3d0bc85b4d33c8e88321785762edb50e531aac55..d8ed565bce27c638074fbc6f1961dfc0d160e312:/nominatim/api/search/icu_tokenizer.py?ds=sidebyside diff --git a/nominatim/api/search/icu_tokenizer.py b/nominatim/api/search/icu_tokenizer.py index 7bf516e3..196fde2a 100644 --- a/nominatim/api/search/icu_tokenizer.py +++ b/nominatim/api/search/icu_tokenizer.py @@ -101,10 +101,16 @@ class ICUToken(qmod.Token): penalty = 0.0 if row.type == 'w': penalty = 0.3 + elif row.type == 'W': + if len(row.word_token) == 1 and row.word_token == row.word: + penalty = 0.2 if row.word.isdigit() else 0.3 elif row.type == 'H': penalty = sum(0.1 for c in row.word_token if c != ' ' and not c.isdigit()) if all(not c.isdigit() for c in row.word_token): penalty += 0.2 * (len(row.word_token) - 1) + elif row.type == 'C': + if len(row.word_token) == 1: + penalty = 0.3 if row.info is None: lookup_word = row.word @@ -133,10 +139,19 @@ class ICUQueryAnalyzer(AbstractQueryAnalyzer): async def setup(self) -> None: """ Set up static data structures needed for the analysis. """ - rules = await self.conn.get_property('tokenizer_import_normalisation') - self.normalizer = Transliterator.createFromRules("normalization", rules) - rules = await self.conn.get_property('tokenizer_import_transliteration') - self.transliterator = Transliterator.createFromRules("transliteration", rules) + async def _make_normalizer() -> Any: + rules = await self.conn.get_property('tokenizer_import_normalisation') + return Transliterator.createFromRules("normalization", rules) + + self.normalizer = await self.conn.get_cached_value('ICUTOK', 'normalizer', + _make_normalizer) + + async def _make_transliterator() -> Any: + rules = await self.conn.get_property('tokenizer_import_transliteration') + return Transliterator.createFromRules("transliteration", rules) + + self.transliterator = await self.conn.get_cached_value('ICUTOK', 'transliterator', + _make_transliterator) if 'word' not in self.conn.t.meta.tables: sa.Table('word', self.conn.t.meta,