X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/b9fbfeff67b420905a4176f4f5e9312746d0c42e..bffbe68ec3ad93aa6188083441677ab0fdd54659:/nominatim/tokenizer/legacy_icu_tokenizer.py diff --git a/nominatim/tokenizer/legacy_icu_tokenizer.py b/nominatim/tokenizer/legacy_icu_tokenizer.py index 6bf409cc..c585c5af 100644 --- a/nominatim/tokenizer/legacy_icu_tokenizer.py +++ b/nominatim/tokenizer/legacy_icu_tokenizer.py @@ -163,7 +163,9 @@ class LegacyICUTokenizer: words = Counter() name_proc = ICUNameProcessor(self.naming_rules) with conn.cursor(name="words") as cur: - cur.execute("SELECT svals(name) as v, count(*) FROM place GROUP BY v") + cur.execute(""" SELECT v, count(*) FROM + (SELECT svals(name) as v FROM place)x + WHERE length(v) < 75 GROUP BY v""") for name, cnt in cur: terms = set()