]> git.openstreetmap.org Git - nominatim.git/blobdiff - nominatim/tokenizer/legacy_icu_tokenizer.py
restrict partial word counting to names of reasoanble length
[nominatim.git] / nominatim / tokenizer / legacy_icu_tokenizer.py
index 6bf409cca3ab3674b41605b06e8dfe49eda40e41..c585c5afe0bf28bfa24590ed05cb165f6fd2dd01 100644 (file)
@@ -163,7 +163,9 @@ class LegacyICUTokenizer:
             words = Counter()
             name_proc = ICUNameProcessor(self.naming_rules)
             with conn.cursor(name="words") as cur:
             words = Counter()
             name_proc = ICUNameProcessor(self.naming_rules)
             with conn.cursor(name="words") as cur:
-                cur.execute("SELECT svals(name) as v, count(*) FROM place GROUP BY v")
+                cur.execute(""" SELECT v, count(*) FROM
+                                  (SELECT svals(name) as v FROM place)x
+                                WHERE length(v) < 75 GROUP BY v""")
 
                 for name, cnt in cur:
                     terms = set()
 
                 for name, cnt in cur:
                     terms = set()