X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/d7a3039c2a4bd26d05f08ee3140b8dfaecd68f02..d2853de47cac812193083f9b144a0bc818205dd6:/nominatim/tokenizer/legacy_tokenizer.py?ds=sidebyside diff --git a/nominatim/tokenizer/legacy_tokenizer.py b/nominatim/tokenizer/legacy_tokenizer.py index e09700d9..1b68a494 100644 --- a/nominatim/tokenizer/legacy_tokenizer.py +++ b/nominatim/tokenizer/legacy_tokenizer.py @@ -256,6 +256,16 @@ class LegacyTokenizer(AbstractTokenizer): return LegacyNameAnalyzer(self.dsn, normalizer) + def most_frequent_words(self, conn: Connection, num: int) -> List[str]: + """ Return a list of the `num` most frequent full words + in the database. + """ + with conn.cursor() as cur: + cur.execute(""" SELECT word FROM word WHERE word is not null + ORDER BY search_name_count DESC LIMIT %s""", (num,)) + return list(s[0] for s in cur) + + def _install_php(self, config: Configuration, overwrite: bool = True) -> None: """ Install the php script for the tokenizer. """