]> git.openstreetmap.org Git - nominatim.git/commitdiff
Merge pull request #2291 from AntoJvlt/special-phrases-statistics
authorSarah Hoffmann <lonvia@denofr.de>
Tue, 27 Apr 2021 09:57:05 +0000 (11:57 +0200)
committerGitHub <noreply@github.com>
Tue, 27 Apr 2021 09:57:05 +0000 (11:57 +0200)
Special phrases statistics

nominatim/clicmd/special_phrases.py
nominatim/tools/__init__.py
nominatim/tools/special_phrases/__init__.py [new file with mode: 0644]
nominatim/tools/special_phrases/importer_statistics.py [new file with mode: 0644]
nominatim/tools/special_phrases/special_phrases_importer.py [moved from nominatim/tools/special_phrases.py with 95% similarity]
test/python/test_tools_import_special_phrases.py

index 99e825925ad63443b305543624cde60572740fee..1eb25bdc156ea8dc9bc0ce53441a1d598620de0c 100644 (file)
@@ -2,7 +2,7 @@
     Implementation of the 'special-phrases' command.
 """
 import logging
-from nominatim.tools.special_phrases import SpecialPhrasesImporter
+from nominatim.tools import SpecialPhrasesImporter
 from nominatim.db.connection import connect
 
 LOG = logging.getLogger()
index cab6fb8b89ce541f1054ad7887cd5aafcdd0d5c1..cc5d3e9ba2890ac52993bac07cf57472e128a5d3 100644 (file)
@@ -2,3 +2,5 @@
 Module with functions for importing, updating Nominatim databases
 as well as general maintenance helpers.
 """
+
+from nominatim.tools.special_phrases.special_phrases_importer import SpecialPhrasesImporter
diff --git a/nominatim/tools/special_phrases/__init__.py b/nominatim/tools/special_phrases/__init__.py
new file mode 100644 (file)
index 0000000..e69de29
diff --git a/nominatim/tools/special_phrases/importer_statistics.py b/nominatim/tools/special_phrases/importer_statistics.py
new file mode 100644 (file)
index 0000000..46965c4
--- /dev/null
@@ -0,0 +1,138 @@
+"""
+    Contains the class which handles statistics for the
+    import of special phrases.
+"""
+import logging
+LOG = logging.getLogger()
+
+class SpecialPhrasesImporterStatistics():
+    # pylint: disable-msg=too-many-instance-attributes
+    """
+        Class handling statistics of the import
+        process of special phrases.
+    """
+    def __init__(self):
+        self._set_lang_values_to_0()
+        self._set_global_values_to_0()
+
+    def _set_global_values_to_0(self):
+        """
+            Set all counts for the global
+            import to 0.
+        """
+        self.tables_created = 0
+        self.tables_deleted = 0
+        self.tables_ignored = 0
+        self.global_phrases_invalid = 0
+        self.global_phrases_added = 0
+        self.global_phrases_ignored = 0
+        self.global_phrases_deleted = 0
+
+    def _set_lang_values_to_0(self):
+        """
+            Set all counts for the current
+            lang to 0.
+        """
+        self.lang_phrases_invalid = 0
+        self.lang_phrases_added = 0
+        self.lang_phrases_ignored = 0
+
+    def notify_one_phrase_invalid(self):
+        """
+            Add +1 to the count of invalid entries
+            fetched from the wiki.
+        """
+        self.lang_phrases_invalid += 1
+        self.global_phrases_invalid += 1
+
+    def notify_one_phrase_added(self):
+        """
+            Add +1 to the count of entries
+            added to the db.
+        """
+        self.lang_phrases_added += 1
+        self.global_phrases_added += 1
+
+    def notify_one_phrase_ignored(self):
+        """
+            Add +1 to the count of ignored
+            entries as it was already in the db.
+        """
+        self.lang_phrases_ignored += 1
+        self.global_phrases_ignored += 1
+
+    def notify_one_phrase_deleted(self):
+        """
+            Add +1 to the count of phrases deleted
+            from the database.
+        """
+        self.global_phrases_deleted += 1
+
+    def notify_one_table_created(self):
+        """
+            Add +1 to the count of created tables.
+        """
+        self.tables_created += 1
+
+    def notify_one_table_deleted(self):
+        """
+            Add +1 to the count of deleted tables.
+        """
+        self.tables_deleted += 1
+
+    def notify_one_table_ignored(self):
+        """
+            Add +1 to the count of ignored tables.
+        """
+        self.tables_ignored += 1
+
+
+    def notify_import_done(self):
+        """
+            Print stats for the whole import process
+            and reset all values.
+        """
+        LOG.info('====================================================================')
+        LOG.info('Final statistics of the import:')
+        LOG.info('- %s phrases were invalid.', self.global_phrases_invalid)
+        if self.global_phrases_invalid > 0:
+            LOG.info('  Those invalid phrases have been skipped.')
+        LOG.info('- %s phrases were ignored as they are already in the database',
+                 self.global_phrases_ignored)
+        LOG.info('- %s phrases were added to the database', self.global_phrases_added)
+        LOG.info('- %s phrases were deleted from the database', self.global_phrases_deleted)
+        if self.global_phrases_deleted > 0:
+            LOG.info('  They were deleted as they are not valid anymore.')
+        LOG.info('- %s tables were ignored as they already exist on the database',
+                 self.tables_ignored)
+        LOG.info('- %s tables were created', self.tables_created)
+        LOG.info('- %s tables were deleted from the database', self.tables_deleted)
+        if self.tables_deleted > 0:
+            LOG.info('  They were deleted as they are not valid anymore.')
+
+        if self.global_phrases_invalid > 0:
+            LOG.warning('%s phrases were invalid and have been skipped during the whole process.',
+                        self.global_phrases_invalid)
+
+        self._set_global_values_to_0()
+
+    def notify_current_lang_done(self, lang):
+        """
+            Print stats for the current lang
+            and then reset lang values.
+        """
+        LOG.info('====================================================================')
+        LOG.info('Statistics for the import of %s:', lang)
+        LOG.info('- %s phrases were invalid.', self.lang_phrases_invalid)
+        if self.lang_phrases_invalid > 0:
+            LOG.info('  Those invalid phrases have been skipped.')
+        LOG.info('- %s phrases were ignored as they are already in the database',
+                 self.lang_phrases_ignored)
+        LOG.info('- %s phrases were added to the database', self.lang_phrases_added)
+        LOG.info('====================================================================')
+
+        if self.lang_phrases_invalid > 0:
+            LOG.warning('%s phrases were invalid and have been skipped for the import of lang %s.',
+                        self.lang_phrases_invalid, lang)
+
+        self._set_lang_values_to_0()
similarity index 95%
rename from nominatim/tools/special_phrases.py
rename to nominatim/tools/special_phrases/special_phrases_importer.py
index f4eec260c529c13e3a1d0235eaa5230669f2f16f..a8e780c39ba4515568e5103b208ef0822af8ff6c 100644 (file)
@@ -14,6 +14,7 @@ from psycopg2.sql import Identifier, Literal, SQL
 
 from nominatim.tools.exec_utils import get_url
 from nominatim.errors import UsageError
+from nominatim.tools.special_phrases.importer_statistics import SpecialPhrasesImporterStatistics
 
 LOG = logging.getLogger()
 class SpecialPhrasesImporter():
@@ -22,6 +23,7 @@ class SpecialPhrasesImporter():
         Class handling the process of special phrases importations.
     """
     def __init__(self, config, phplib_dir, db_connection) -> None:
+        self.statistics_handler = SpecialPhrasesImporterStatistics()
         self.db_connection = db_connection
         self.config = config
         self.phplib_dir = phplib_dir
@@ -63,14 +65,16 @@ class SpecialPhrasesImporter():
         class_type_pairs = set()
 
         for lang in languages:
-            LOG.warning('Import phrases for lang: %s', lang)
+            LOG.warning('Importing phrases for lang: %s...', lang)
             wiki_page_xml_content = SpecialPhrasesImporter._get_wiki_content(lang)
             class_type_pairs.update(self._process_xml_content(wiki_page_xml_content, lang))
+            self.statistics_handler.notify_current_lang_done(lang)
 
         self._create_place_classtype_table_and_indexes(class_type_pairs)
         self._remove_non_existent_phrases_from_db()
         self.db_connection.commit()
         LOG.warning('Import done.')
+        self.statistics_handler.notify_import_done()
 
     def _fetch_existing_words_phrases(self):
         """
@@ -204,11 +208,13 @@ class SpecialPhrasesImporter():
                     (normalized_label, phrase_class, phrase_type, phrase_operator)
                 )
                 class_type_pairs.add((phrase_class, phrase_type))
+                self.statistics_handler.notify_one_phrase_ignored()
                 #Dont need to add this phrase as it already exists in the word table.
                 continue
 
             #sanity check, in case somebody added garbage in the wiki
             if not self._check_sanity(lang, phrase_class, phrase_type):
+                self.statistics_handler.notify_one_phrase_invalid()
                 continue
 
             class_type_pairs.add((phrase_class, phrase_type))
@@ -217,6 +223,7 @@ class SpecialPhrasesImporter():
                 phrase_label, normalized_label, phrase_class,
                 phrase_type, phrase_operator
             )
+            self.statistics_handler.notify_one_phrase_added()
 
         return class_type_pairs
 
@@ -263,6 +270,7 @@ class SpecialPhrasesImporter():
             table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
 
             if table_name in self.table_phrases_to_delete:
+                self.statistics_handler.notify_one_table_ignored()
                 #Remove this table from the ones to delete as it match a class/type
                 #still existing on the special phrases of the wiki.
                 self.table_phrases_to_delete.remove(table_name)
@@ -278,6 +286,8 @@ class SpecialPhrasesImporter():
             #Grant access on read to the web user.
             self._grant_access_to_webuser(phrase_class, phrase_type)
 
+            self.statistics_handler.notify_one_table_created()
+
         with self.db_connection.cursor() as db_cursor:
             db_cursor.execute("DROP INDEX idx_placex_classtype")
 
@@ -341,6 +351,7 @@ class SpecialPhrasesImporter():
 
         #Delete phrases from the word table which are not on the wiki anymore.
         for phrase_to_delete in self.words_phrases_to_delete:
+            self.statistics_handler.notify_one_phrase_deleted()
             if phrase_to_delete[3] == '-':
                 query = """
                     DELETE FROM word WHERE word = %s AND class = %s AND type = %s AND operator IS null
@@ -357,6 +368,7 @@ class SpecialPhrasesImporter():
 
         #Delete place_classtype tables corresponding to class/type which are not on the wiki anymore
         for table in self.table_phrases_to_delete:
+            self.statistics_handler.notify_one_table_deleted()
             query = SQL('DROP TABLE IF EXISTS {}').format(Identifier(table))
             queries_parameters.append((query, ()))
 
index 4890e0b22be519e4c53a94880eeacec7e4075d3e..cb82f6b78c3d6ad53333673773dd9294ce4c2493 100644 (file)
@@ -2,13 +2,12 @@
     Tests for import special phrases methods
     of the class SpecialPhrasesImporter.
 """
-from mocks import MockParamCapture
 from nominatim.errors import UsageError
 from pathlib import Path
 import tempfile
 from shutil import copyfile
 import pytest
-from nominatim.tools.special_phrases import SpecialPhrasesImporter
+from nominatim.tools import SpecialPhrasesImporter
 
 TEST_BASE_DIR = Path(__file__) / '..' / '..'
 
@@ -304,7 +303,7 @@ def test_import_from_wiki(monkeypatch, temp_db_conn, def_config, special_phrases
             CREATE TABLE place_classtype_amenity_animal_shelter();
             CREATE TABLE place_classtype_wrongclass_wrongtype();""")
 
-    monkeypatch.setattr('nominatim.tools.special_phrases.SpecialPhrasesImporter._get_wiki_content', mock_get_wiki_content)
+    monkeypatch.setattr('nominatim.tools.SpecialPhrasesImporter._get_wiki_content', mock_get_wiki_content)
     special_phrases_importer.import_from_wiki(['en'])
 
     class_test = 'aerialway'