X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/44cfce1ca4caf9fb716ff7c37682427d9bd3d31e..fc038261b9fe7f7d89a4000496185e72ac100a2a:/nominatim/tokenizer/icu_tokenizer.py diff --git a/nominatim/tokenizer/icu_tokenizer.py b/nominatim/tokenizer/icu_tokenizer.py index f8f6af2e..90caec1c 100644 --- a/nominatim/tokenizer/icu_tokenizer.py +++ b/nominatim/tokenizer/icu_tokenizer.py @@ -416,12 +416,11 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer): elif item.kind in ('housenumber', 'streetnumber', 'conscriptionnumber'): hnrs.append(item.name) elif item.kind == 'street': - token = self._retrieve_full_token(item.name) - if token: - streets.append(token) + streets.extend(self._retrieve_full_tokens(item.name)) elif item.kind == 'place': - token_info.add_place(self._compute_partial_tokens(item.name)) - elif not item.kind.startswith('_') and \ + if not item.suffix: + token_info.add_place(self._compute_partial_tokens(item.name)) + elif not item.kind.startswith('_') and not item.suffix and \ item.kind not in ('country', 'full'): addr_terms.append((item.kind, self._compute_partial_tokens(item.name))) @@ -464,25 +463,20 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer): return tokens - def _retrieve_full_token(self, name): + def _retrieve_full_tokens(self, name): """ Get the full name token for the given name, if it exists. The name is only retrived for the standard analyser. """ - norm_name = self._normalized(name) + norm_name = self._search_normalized(name) # return cached if possible if norm_name in self._cache.fulls: return self._cache.fulls[norm_name] - # otherwise compute - full, _ = self._cache.names.get(norm_name, (None, None)) - - if full is None: - with self.conn.cursor() as cur: - cur.execute("SELECT word_id FROM word WHERE word = %s and type = 'W' LIMIT 1", - (norm_name, )) - if cur.rowcount > 0: - full = cur.fetchone()[0] + with self.conn.cursor() as cur: + cur.execute("SELECT word_id FROM word WHERE word_token = %s and type = 'W'", + (norm_name, )) + full = [row[0] for row in cur] self._cache.fulls[norm_name] = full