add type annotations to special phrase importer

author Sarah Hoffmann <lonvia@denofr.de>

Sun, 17 Jul 2022 08:46:59 +0000 (10:46 +0200)

committer Sarah Hoffmann <lonvia@denofr.de>

Mon, 18 Jul 2022 07:54:29 +0000 (09:54 +0200)
author Sarah Hoffmann <lonvia@denofr.de>
Sun, 17 Jul 2022 08:46:59 +0000 (10:46 +0200)
committer Sarah Hoffmann <lonvia@denofr.de>
Mon, 18 Jul 2022 07:54:29 +0000 (09:54 +0200)
diff --git a/nominatim/tokenizer/base.py b/nominatim/tokenizer/base.py

index afcb0864214eb6a2c7e2c6378b0dca0c7843c4e3..1c1ca9f7bcfca3d7fa407504ac0b0c9d728191a1 100644 (file)
--- a/nominatim/tokenizer/base.py
+++ b/nominatim/tokenizer/base.py
@@ -9,7 +9,7 @@ Abstract class defintions for tokenizers. These base classes are here
  mainly for documentation purposes.
  """
  from abc import ABC, abstractmethod
-from typing import List, Tuple, Dict, Any, Optional
+from typing import List, Tuple, Dict, Any, Optional, Iterable
  from pathlib import Path
  
  from typing_extensions import Protocol
@@ -81,7 +81,8 @@ class AbstractAnalyzer(ABC):
  
  
      @abstractmethod
-    def update_special_phrases(self, phrases: List[Tuple[str, str, str, str]],
+    def update_special_phrases(self,
+                               phrases: Iterable[Tuple[str, str, str, str]],
                                 should_replace: bool) -> None:
          """ Update the tokenizer's special phrase tokens from the given
              list of special phrases.
diff --git a/nominatim/tokenizer/icu_tokenizer.py b/nominatim/tokenizer/icu_tokenizer.py

index 1e3eab98a3ff202d65d97e22b690e4315c0bac3b..31eaaf2958aef1411a8228462fee04d68507555b 100644 (file)
--- a/nominatim/tokenizer/icu_tokenizer.py
+++ b/nominatim/tokenizer/icu_tokenizer.py
@@ -8,7 +8,8 @@
  Tokenizer implementing normalisation as used before Nominatim 4 but using
  libICU instead of the PostgreSQL module.
  """
-from typing import Optional, Sequence, List, Tuple, Mapping, Any, cast, Dict, Set, Iterable
+from typing import Optional, Sequence, List, Tuple, Mapping, Any, cast, \
+                   Dict, Set, Iterable
  import itertools
  import json
  import logging
@@ -374,7 +375,7 @@ class ICUNameAnalyzer(AbstractAnalyzer):
  
  
  
-    def update_special_phrases(self, phrases: Sequence[Tuple[str, str, str, str]],
+    def update_special_phrases(self, phrases: Iterable[Tuple[str, str, str, str]],
                                 should_replace: bool) -> None:
          """ Replace the search index for special phrases with the new phrases.
              If `should_replace` is True, then the previous set of will be
diff --git a/nominatim/tokenizer/legacy_tokenizer.py b/nominatim/tokenizer/legacy_tokenizer.py

index 848d619116abde7aeca34d78cf3d330db69eab64..f52eaadac36eafbd5fa45eea4651c059e89b7d13 100644 (file)
--- a/nominatim/tokenizer/legacy_tokenizer.py
+++ b/nominatim/tokenizer/legacy_tokenizer.py
@@ -7,7 +7,8 @@
  """
  Tokenizer implementing normalisation as used before Nominatim 4.
  """
-from typing import Optional, Sequence, List, Tuple, Mapping, Any, Callable, cast, Dict, Set
+from typing import Optional, Sequence, List, Tuple, Mapping, Any, Callable, \
+                   cast, Dict, Set, Iterable
  from collections import OrderedDict
  import logging
  from pathlib import Path
@@ -392,7 +393,7 @@ class LegacyNameAnalyzer(AbstractAnalyzer):
  
  
  
-    def update_special_phrases(self, phrases: Sequence[Tuple[str, str, str, str]],
+    def update_special_phrases(self, phrases: Iterable[Tuple[str, str, str, str]],
                                 should_replace: bool) -> None:
          """ Replace the search index for special phrases with the new phrases.
          """
diff --git a/nominatim/tools/special_phrases/importer_statistics.py b/nominatim/tools/special_phrases/importer_statistics.py

index b1a9c4389e15e7745f19f06829627a705c837756..0bb118c856a921777ea336060fe887f1a2d129e3 100644 (file)
--- a/nominatim/tools/special_phrases/importer_statistics.py
+++ b/nominatim/tools/special_phrases/importer_statistics.py
@@ -12,15 +12,14 @@ import logging
  LOG = logging.getLogger()
  
  class SpecialPhrasesImporterStatistics():
-    # pylint: disable-msg=too-many-instance-attributes
      """
          Class handling statistics of the import
          process of special phrases.
      """
-    def __init__(self):
+    def __init__(self) -> None:
          self._intialize_values()
  
-    def _intialize_values(self):
+    def _intialize_values(self) -> None:
          """
              Set all counts for the global
              import to 0.
@@ -30,32 +29,32 @@ class SpecialPhrasesImporterStatistics():
          self.tables_ignored = 0
          self.invalids = 0
  
-    def notify_one_phrase_invalid(self):
+    def notify_one_phrase_invalid(self) -> None:
          """
              Add +1 to the count of invalid entries
              fetched from the wiki.
          """
          self.invalids += 1
  
-    def notify_one_table_created(self):
+    def notify_one_table_created(self) -> None:
          """
              Add +1 to the count of created tables.
          """
          self.tables_created += 1
  
-    def notify_one_table_deleted(self):
+    def notify_one_table_deleted(self) -> None:
          """
              Add +1 to the count of deleted tables.
          """
          self.tables_deleted += 1
  
-    def notify_one_table_ignored(self):
+    def notify_one_table_ignored(self) -> None:
          """
              Add +1 to the count of ignored tables.
          """
          self.tables_ignored += 1
  
-    def notify_import_done(self):
+    def notify_import_done(self) -> None:
          """
              Print stats for the whole import process
              and reset all values.
diff --git a/nominatim/tools/special_phrases/sp_csv_loader.py b/nominatim/tools/special_phrases/sp_csv_loader.py

index 0bd93c004ef836616835a8112c915d9e4561a5e9..400f9fa91aa3efec500a8e40b3e7f1df08e609bf 100644 (file)
--- a/nominatim/tools/special_phrases/sp_csv_loader.py
+++ b/nominatim/tools/special_phrases/sp_csv_loader.py
@@ -9,6 +9,7 @@
  
      The class allows to load phrases from a csv file.
  """
+from typing import Iterable
  import csv
  import os
  from nominatim.tools.special_phrases.special_phrase import SpecialPhrase
@@ -18,12 +19,11 @@ class SPCsvLoader:
      """
          Handles loading of special phrases from external csv file.
      """
-    def __init__(self, csv_path):
-        super().__init__()
+    def __init__(self, csv_path: str) -> None:
          self.csv_path = csv_path
  
  
-    def generate_phrases(self):
+    def generate_phrases(self) -> Iterable[SpecialPhrase]:
          """ Open and parse the given csv file.
              Create the corresponding SpecialPhrases.
          """
@@ -35,7 +35,7 @@ class SPCsvLoader:
                  yield SpecialPhrase(row['phrase'], row['class'], row['type'], row['operator'])
  
  
-    def _check_csv_validity(self):
+    def _check_csv_validity(self) -> None:
          """
              Check that the csv file has the right extension.
          """
diff --git a/nominatim/tools/special_phrases/sp_importer.py b/nominatim/tools/special_phrases/sp_importer.py

index 805f8937875beb4dda7d5063b33d6d1c39605d6c..6ca6a1e17b8ef7db34f5015ca65ce61d7e7c0f52 100644 (file)
--- a/nominatim/tools/special_phrases/sp_importer.py
+++ b/nominatim/tools/special_phrases/sp_importer.py
@@ -13,19 +13,37 @@
      The phrases already present in the database which are not
      valids anymore are removed.
  """
+from typing import Iterable, Tuple, Mapping, Sequence, Optional, Set
  import logging
  import re
  
+from typing_extensions import Protocol
+
  from psycopg2.sql import Identifier, SQL
+
+from nominatim.config import Configuration
+from nominatim.db.connection import Connection
  from nominatim.tools.special_phrases.importer_statistics import SpecialPhrasesImporterStatistics
+from nominatim.tools.special_phrases.special_phrase import SpecialPhrase
+from nominatim.tokenizer.base import AbstractTokenizer
  
  LOG = logging.getLogger()
  
-def _classtype_table(phrase_class, phrase_type):
+def _classtype_table(phrase_class: str, phrase_type: str) -> str:
      """ Return the name of the table for the given class and type.
      """
      return f'place_classtype_{phrase_class}_{phrase_type}'
  
+
+class SpecialPhraseLoader(Protocol):
+    """ Protocol for classes implementing a loader for special phrases.
+    """
+
+    def generate_phrases(self) -> Iterable[SpecialPhrase]:
+        """ Generates all special phrase terms this loader can produce.
+        """
+
+
  class SPImporter():
      # pylint: disable-msg=too-many-instance-attributes
      """
@@ -33,21 +51,22 @@ class SPImporter():
  
          Take a sp loader which load the phrases from an external source.
      """
-    def __init__(self, config, db_connection, sp_loader):
+    def __init__(self, config: Configuration, conn: Connection,
+                 sp_loader: SpecialPhraseLoader) -> None:
          self.config = config
-        self.db_connection = db_connection
+        self.db_connection = conn
          self.sp_loader = sp_loader
          self.statistics_handler = SpecialPhrasesImporterStatistics()
          self.black_list, self.white_list = self._load_white_and_black_lists()
          self.sanity_check_pattern = re.compile(r'^\w+$')
          # This set will contain all existing phrases to be added.
          # It contains tuples with the following format: (lable, class, type, operator)
-        self.word_phrases = set()
+        self.word_phrases: Set[Tuple[str, str, str, str]] = set()
          # This set will contain all existing place_classtype tables which doesn't match any
          # special phrases class/type on the wiki.
-        self.table_phrases_to_delete = set()
+        self.table_phrases_to_delete: Set[str] = set()
  
-    def import_phrases(self, tokenizer, should_replace):
+    def import_phrases(self, tokenizer: AbstractTokenizer, should_replace: bool) -> None:
          """
              Iterate through all SpecialPhrases extracted from the
              loader and import them into the database.
@@ -67,7 +86,7 @@ class SPImporter():
              if result:
                  class_type_pairs.add(result)
  
-        self._create_place_classtype_table_and_indexes(class_type_pairs)
+        self._create_classtype_table_and_indexes(class_type_pairs)
          if should_replace:
              self._remove_non_existent_tables_from_db()
          self.db_connection.commit()
@@ -79,7 +98,7 @@ class SPImporter():
          self.statistics_handler.notify_import_done()
  
  
-    def _fetch_existing_place_classtype_tables(self):
+    def _fetch_existing_place_classtype_tables(self) -> None:
          """
              Fetch existing place_classtype tables.
              Fill the table_phrases_to_delete set of the class.
@@ -95,7 +114,8 @@ class SPImporter():
              for row in db_cursor:
                  self.table_phrases_to_delete.add(row[0])
  
-    def _load_white_and_black_lists(self):
+    def _load_white_and_black_lists(self) \
+          -> Tuple[Mapping[str, Sequence[str]], Mapping[str, Sequence[str]]]:
          """
              Load white and black lists from phrases-settings.json.
          """
@@ -103,7 +123,7 @@ class SPImporter():
  
          return settings['blackList'], settings['whiteList']
  
-    def _check_sanity(self, phrase):
+    def _check_sanity(self, phrase: SpecialPhrase) -> bool:
          """
              Check sanity of given inputs in case somebody added garbage in the wiki.
              If a bad class/type is detected the system will exit with an error.
@@ -117,7 +137,7 @@ class SPImporter():
              return False
          return True
  
-    def _process_phrase(self, phrase):
+    def _process_phrase(self, phrase: SpecialPhrase) -> Optional[Tuple[str, str]]:
          """
              Processes the given phrase by checking black and white list
              and sanity.
@@ -145,7 +165,8 @@ class SPImporter():
          return (phrase.p_class, phrase.p_type)
  
  
-    def _create_place_classtype_table_and_indexes(self, class_type_pairs):
+    def _create_classtype_table_and_indexes(self,
+                                            class_type_pairs: Iterable[Tuple[str, str]]) -> None:
          """
              Create table place_classtype for each given pair.
              Also create indexes on place_id and centroid.
@@ -188,7 +209,8 @@ class SPImporter():
              db_cursor.execute("DROP INDEX idx_placex_classtype")
  
  
-    def _create_place_classtype_table(self, sql_tablespace, phrase_class, phrase_type):
+    def _create_place_classtype_table(self, sql_tablespace: str,
+                                      phrase_class: str, phrase_type: str) -> None:
          """
              Create table place_classtype of the given phrase_class/phrase_type
              if doesn't exit.
@@ -204,7 +226,8 @@ class SPImporter():
                          (phrase_class, phrase_type))
  
  
-    def _create_place_classtype_indexes(self, sql_tablespace, phrase_class, phrase_type):
+    def _create_place_classtype_indexes(self, sql_tablespace: str,
+                                        phrase_class: str, phrase_type: str) -> None:
          """
              Create indexes on centroid and place_id for the place_classtype table.
          """
@@ -227,7 +250,7 @@ class SPImporter():
                                            SQL(sql_tablespace)))
  
  
-    def _grant_access_to_webuser(self, phrase_class, phrase_type):
+    def _grant_access_to_webuser(self, phrase_class: str, phrase_type: str) -> None:
          """
              Grant access on read to the table place_classtype for the webuser.
          """
@@ -237,7 +260,7 @@ class SPImporter():
                                .format(Identifier(table_name),
                                        Identifier(self.config.DATABASE_WEBUSER)))
  
-    def _remove_non_existent_tables_from_db(self):
+    def _remove_non_existent_tables_from_db(self) -> None:
          """
              Remove special phrases which doesn't exist on the wiki anymore.
              Delete the place_classtype tables.
diff --git a/nominatim/tools/special_phrases/sp_wiki_loader.py b/nominatim/tools/special_phrases/sp_wiki_loader.py

index ca4758ac49b3b1caad77e9e90bd74ff5259686b4..e71c2ec04a9f6e34504f02d3ecd3f90c5af3511c 100644 (file)
--- a/nominatim/tools/special_phrases/sp_wiki_loader.py
+++ b/nominatim/tools/special_phrases/sp_wiki_loader.py
@@ -7,14 +7,17 @@
  """
      Module containing the SPWikiLoader class.
  """
+from typing import Iterable
  import re
  import logging
+
+from nominatim.config import Configuration
  from nominatim.tools.special_phrases.special_phrase import SpecialPhrase
  from nominatim.tools.exec_utils import get_url
  
  LOG = logging.getLogger()
  
-def _get_wiki_content(lang):
+def _get_wiki_content(lang: str) -> str:
      """
          Request and return the wiki page's content
          corresponding to special phrases for a given lang.
@@ -30,8 +33,7 @@ class SPWikiLoader:
      """
          Handles loading of special phrases from the wiki.
      """
-    def __init__(self, config):
-        super().__init__()
+    def __init__(self, config: Configuration) -> None:
          self.config = config
          # Compile the regex here to increase performances.
          self.occurence_pattern = re.compile(
@@ -39,10 +41,15 @@ class SPWikiLoader:
          )
          # Hack around a bug where building=yes was imported with quotes into the wiki
          self.type_fix_pattern = re.compile(r'\"|&quot;')
-        self._load_languages()
+
+        self.languages = self.config.get_str_list('LANGUAGES') or \
+                         ['af', 'ar', 'br', 'ca', 'cs', 'de', 'en', 'es',
+                          'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu',
+                          'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl',
+                          'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi']
  
  
-    def generate_phrases(self):
+    def generate_phrases(self) -> Iterable[SpecialPhrase]:
          """ Download the wiki pages for the configured languages
              and extract the phrases from the page.
          """
@@ -58,19 +65,3 @@ class SPWikiLoader:
                                      match[1],
                                      self.type_fix_pattern.sub('', match[2]),
                                      match[3])
-
-
-    def _load_languages(self):
-        """
-            Get list of all languages from env config file
-            or default if there is no languages configured.
-            The system will extract special phrases only from all specified languages.
-        """
-        if self.config.LANGUAGES:
-            self.languages = self.config.get_str_list('LANGUAGES')
-        else:
-            self.languages = [
-            'af', 'ar', 'br', 'ca', 'cs', 'de', 'en', 'es',
-            'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu',
-            'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl',
-            'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi']
diff --git a/nominatim/tools/special_phrases/special_phrase.py b/nominatim/tools/special_phrases/special_phrase.py

index 16935ccfa5ac58cc5e77b521fb876b44de20bff0..40f6a9e4cb57112736dab4381ba1fdcd41b19b9b 100644 (file)
--- a/nominatim/tools/special_phrases/special_phrase.py
+++ b/nominatim/tools/special_phrases/special_phrase.py
@@ -10,20 +10,21 @@
      This class is a model used to transfer a special phrase through
      the process of load and importation.
  """
+from typing import Any
+
  class SpecialPhrase:
      """
          Model representing a special phrase.
      """
-    def __init__(self, p_label, p_class, p_type, p_operator):
+    def __init__(self, p_label: str, p_class: str, p_type: str, p_operator: str) -> None:
          self.p_label = p_label.strip()
          self.p_class = p_class.strip()
-        # Hack around a bug where building=yes was imported with quotes into the wiki
          self.p_type = p_type.strip()
          # Needed if some operator in the wiki are not written in english
          p_operator = p_operator.strip().lower()
          self.p_operator = '-' if p_operator not in ('near', 'in') else p_operator
  
-    def __eq__(self, other):
+    def __eq__(self, other: Any) -> bool:
          if not isinstance(other, SpecialPhrase):
              return False
  
@@ -32,5 +33,5 @@ class SpecialPhrase:
                 and self.p_type == other.p_type \
                 and self.p_operator == other.p_operator
  
-    def __hash__(self):
+    def __hash__(self) -> int:
          return hash((self.p_label, self.p_class, self.p_type, self.p_operator))
diff --git a/test/python/tools/test_import_special_phrases.py b/test/python/tools/test_import_special_phrases.py

index 0dcf549cacf0c26db85ed0a6a2fe54116bbc104f..75a6a066d43f19598d3b0b00b6bf347cb5d7e6ee 100644 (file)
--- a/test/python/tools/test_import_special_phrases.py
+++ b/test/python/tools/test_import_special_phrases.py
@@ -128,7 +128,7 @@ def test_create_place_classtype_table_and_indexes(
      """
      pairs = set([('class1', 'type1'), ('class2', 'type2')])
  
-    sp_importer._create_place_classtype_table_and_indexes(pairs)
+    sp_importer._create_classtype_table_and_indexes(pairs)
  
      for pair in pairs:
          assert check_table_exist(temp_db_conn, pair[0], pair[1])
author	Sarah Hoffmann <lonvia@denofr.de>
	Sun, 17 Jul 2022 08:46:59 +0000 (10:46 +0200)
committer	Sarah Hoffmann <lonvia@denofr.de>
	Mon, 18 Jul 2022 07:54:29 +0000 (09:54 +0200)
nominatim/tokenizer/base.py		patch \| blob \| history
nominatim/tokenizer/icu_tokenizer.py		patch \| blob \| history
nominatim/tokenizer/legacy_tokenizer.py		patch \| blob \| history
nominatim/tools/special_phrases/importer_statistics.py		patch \| blob \| history
nominatim/tools/special_phrases/sp_csv_loader.py		patch \| blob \| history
nominatim/tools/special_phrases/sp_importer.py		patch \| blob \| history
nominatim/tools/special_phrases/sp_wiki_loader.py		patch \| blob \| history
nominatim/tools/special_phrases/special_phrase.py		patch \| blob \| history
test/python/tools/test_import_special_phrases.py		patch \| blob \| history