return LegacyNameAnalyzer(self.dsn, normalizer)
+ def most_frequent_words(self, conn: Connection, num: int) -> List[str]:
+ """ Return a list of the `num` most frequent full words
+ in the database.
+ """
+ with conn.cursor() as cur:
+ cur.execute(""" SELECT word FROM word WHERE word is not null
+ ORDER BY search_name_count DESC LIMIT %s""", (num,))
+ return list(s[0] for s in cur)
+
+
def _install_php(self, config: Configuration, overwrite: bool = True) -> None:
""" Install the php script for the tokenizer.
"""
def add_street(self, conn: Connection, street: str) -> None:
""" Add addr:street match terms.
"""
- def _get_street(name: str) -> List[int]:
+ def _get_street(name: str) -> Optional[str]:
with conn.cursor() as cur:
- return cast(List[int],
+ return cast(Optional[str],
cur.scalar("SELECT word_ids_from_name(%s)::text", (name, )))
tokens = self.cache.streets.get(street, _get_street)
- if tokens:
- self.data['street'] = tokens
+ self.data['street'] = tokens or '{}'
def add_place(self, conn: Connection, place: str) -> None: