do not check for extra housenumber index for reverse-only

[nominatim.git] / nominatim / tools / special_phrases.py
diff --git a/nominatim/tools/special_phrases.py b/nominatim/tools/special_phrases.py

index b8d5a5dcbd6e95768ac51634ffbda80a084939da..f4eec260c529c13e3a1d0235eaa5230669f2f16f 100644 (file)
--- a/nominatim/tools/special_phrases.py
+++ b/nominatim/tools/special_phrases.py
@@ -3,20 +3,21 @@
  """
  import logging
  import os
+from os.path import isfile
  from pathlib import Path
  import re
  import subprocess
  import json
-from os.path import isfile
+
  from icu import Transliterator
  from psycopg2.sql import Identifier, Literal, SQL
+
  from nominatim.tools.exec_utils import get_url
  from nominatim.errors import UsageError
  
  LOG = logging.getLogger()
  class SpecialPhrasesImporter():
      # pylint: disable-msg=too-many-instance-attributes
-    # pylint: disable-msg=too-few-public-methods
      """
          Class handling the process of special phrases importations.
      """
@@ -27,7 +28,7 @@ class SpecialPhrasesImporter():
          self.black_list, self.white_list = self._load_white_and_black_lists()
          #Compile the regex here to increase performances.
          self.occurence_pattern = re.compile(
-            r'\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([\-YN])'
+            r'\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([\-YN])'
          )
          self.sanity_check_pattern = re.compile(r'^\w+$')
          self.transliterator = Transliterator.createFromRules("special-phrases normalizer",
@@ -43,7 +44,6 @@ class SpecialPhrasesImporter():
          #This set will contain all existing place_classtype tables which doesn't match any
          #special phrases class/type on the wiki.
          self.table_phrases_to_delete = set()
-        self.table_phrases = set()
  
      def import_from_wiki(self, languages=None):
          """
@@ -53,8 +53,8 @@ class SpecialPhrasesImporter():
          if languages is not None and not isinstance(languages, list):
              raise TypeError('The \'languages\' argument should be of type list.')
  
-        self.fetch_existing_words_phrases()
-        self.fetch_existing_place_classtype_tables()
+        self._fetch_existing_words_phrases()
+        self._fetch_existing_place_classtype_tables()
  
          #Get all languages to process.
          languages = self._load_languages() if not languages else languages
@@ -68,11 +68,11 @@ class SpecialPhrasesImporter():
              class_type_pairs.update(self._process_xml_content(wiki_page_xml_content, lang))
  
          self._create_place_classtype_table_and_indexes(class_type_pairs)
-        self.remove_non_existent_phrases_from_db()
+        self._remove_non_existent_phrases_from_db()
          self.db_connection.commit()
          LOG.warning('Import done.')
  
-    def fetch_existing_words_phrases(self):
+    def _fetch_existing_words_phrases(self):
          """
              Fetch existing special phrases from the word table.
              Fill the word_phrases_to_delete set of the class.
@@ -92,7 +92,7 @@ class SpecialPhrasesImporter():
                      (row[0], row[1], row[2], row[3])
                  )
  
-    def fetch_existing_place_classtype_tables(self):
+    def _fetch_existing_place_classtype_tables(self):
          """
              Fetch existing place_classtype tables.
              Fill the table_phrases_to_delete set of the class.
@@ -117,7 +117,7 @@ class SpecialPhrasesImporter():
          if self.config.PHRASE_CONFIG:
              settings_path = self._convert_php_settings_if_needed(self.config.PHRASE_CONFIG)
  
-        with open(settings_path, "r") as json_settings:
+        with settings_path.open("r") as json_settings:
              settings = json.load(json_settings)
          return settings['blackList'], settings['whiteList']
  
@@ -153,9 +153,11 @@ class SpecialPhrasesImporter():
          type_matchs = self.sanity_check_pattern.findall(phrase_type)
          class_matchs = self.sanity_check_pattern.findall(phrase_class)
  
-        if len(class_matchs) < 1 or len(type_matchs) < 1:
-            raise UsageError("Bad class/type for language {}: {}={}".format(
-                lang, phrase_class, phrase_type))
+        if not class_matchs or not type_matchs:
+            LOG.warning("Bad class/type for language %s: %s=%s. It will not be imported",
+                        lang, phrase_class, phrase_type)
+            return False
+        return True
  
      def _process_xml_content(self, xml_content, lang):
          """
@@ -206,7 +208,8 @@ class SpecialPhrasesImporter():
                  continue
  
              #sanity check, in case somebody added garbage in the wiki
-            self._check_sanity(lang, phrase_class, phrase_type)
+            if not self._check_sanity(lang, phrase_class, phrase_type):
+                continue
  
              class_type_pairs.add((phrase_class, phrase_type))
  
@@ -299,7 +302,7 @@ class SpecialPhrasesImporter():
          """
          index_prefix = 'idx_place_classtype_{}_{}_'.format(phrase_class, phrase_type)
          base_table = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
-        #Index on centroidself.table_phrases_to_delete.add(row)
+        #Index on centroid
          if not self.db_connection.index_exists(index_prefix + 'centroid'):
              with self.db_connection.cursor() as db_cursor:
                  db_cursor.execute(SQL("""
@@ -326,7 +329,7 @@ class SpecialPhrasesImporter():
                                .format(Identifier(table_name),
                                        Identifier(self.config.DATABASE_WEBUSER)))
  
-    def remove_non_existent_phrases_from_db(self):
+    def _remove_non_existent_phrases_from_db(self):
          """
              Remove special phrases which doesn't exist on the wiki anymore.
              Delete from the word table and delete the place_classtype tables.