exclude country-level searches with non-address layers

[nominatim.git] / nominatim / tokenizer / base.py
diff --git a/nominatim/tokenizer/base.py b/nominatim/tokenizer/base.py

index afbd1914b35d84219812afdd64f3061d306944f5..061cff36b99f22273e55e350d410d4291c425b91 100644 (file)
--- a/nominatim/tokenizer/base.py
+++ b/nominatim/tokenizer/base.py
@@ -13,6 +13,7 @@ from typing import List, Tuple, Dict, Any, Optional, Iterable
  from pathlib import Path
  
  from nominatim.config import Configuration
  from pathlib import Path
  
  from nominatim.config import Configuration
+from nominatim.db.connection import Connection
  from nominatim.data.place_info import PlaceInfo
  from nominatim.typing import Protocol
  
  from nominatim.data.place_info import PlaceInfo
  from nominatim.typing import Protocol
  
@@ -52,8 +53,8 @@ class AbstractAnalyzer(ABC):
  
              Returns:
                  The function returns the list of all tuples that could be
  
              Returns:
                  The function returns the list of all tuples that could be
-                found for the given words. Each list entry is a tuple of
-                (original word, word token, word id).
+                    found for the given words. Each list entry is a tuple of
+                    (original word, word token, word id).
          """
  
  
          """
  
  
@@ -117,7 +118,7 @@ class AbstractAnalyzer(ABC):
  
              Returns:
                  A JSON-serialisable structure that will be handed into
  
              Returns:
                  A JSON-serialisable structure that will be handed into
-                the database via the `token_info` field.
+                    the database via the `token_info` field.
          """
  
  
          """
  
  
@@ -143,8 +144,6 @@ class AbstractTokenizer(ABC):
                  tables should be skipped. This option is only required for
                  migration purposes and can be safely ignored by custom
                  tokenizers.
                  tables should be skipped. This option is only required for
                  migration purposes and can be safely ignored by custom
                  tokenizers.
-
-            TODO: can we move the init_db parameter somewhere else?
          """
  
  
          """
  
  
@@ -196,8 +195,8 @@ class AbstractTokenizer(ABC):
  
              Returns:
                If an issue was found, return an error message with the
  
              Returns:
                If an issue was found, return an error message with the
-              description of the issue as well as hints for the user on
-              how to resolve the issue. If everything is okay, return `None`.
+                  description of the issue as well as hints for the user on
+                  how to resolve the issue. If everything is okay, return `None`.
          """
  
  
          """
  
  
@@ -233,6 +232,17 @@ class AbstractTokenizer(ABC):
          """
  
  
          """
  
  
+    @abstractmethod
+    def most_frequent_words(self, conn: Connection, num: int) -> List[str]:
+        """ Return a list of the most frequent full words in the database.
+
+            Arguments:
+              conn: Open connection to the database which may be used to
+                    retrieve the words.
+              num: Maximum number of words to return.
+        """
+
+
  class TokenizerModule(Protocol):
      """ Interface that must be exported by modules that implement their
          own tokenizer.
  class TokenizerModule(Protocol):
      """ Interface that must be exported by modules that implement their
          own tokenizer.