+
+
+ @abstractmethod
+ def most_frequent_words(self, conn: Connection, num: int) -> List[str]:
+ """ Return a list of the most frequent full words in the database.
+
+ Arguments:
+ conn: Open connection to the database which may be used to
+ retrieve the words.
+ num: Maximum number of words to return.
+ """
+
+
+class TokenizerModule(Protocol):
+ """ Interface that must be exported by modules that implement their
+ own tokenizer.
+ """
+
+ def create(self, dsn: str, data_dir: Path) -> AbstractTokenizer:
+ """ Factory for new tokenizers.
+ """