--- /dev/null
+"""
+ Contains the class which handles statistics for the
+ import of special phrases.
+"""
+import logging
+LOG = logging.getLogger()
+
+class SpecialPhrasesImporterStatistics():
+ # pylint: disable-msg=too-many-instance-attributes
+ """
+ Class handling statistics of the import
+ process of special phrases.
+ """
+ def __init__(self):
+ self._set_lang_values_to_0()
+ self._set_global_values_to_0()
+
+ def _set_global_values_to_0(self):
+ """
+ Set all counts for the global
+ import to 0.
+ """
+ self.tables_created = 0
+ self.tables_deleted = 0
+ self.tables_ignored = 0
+ self.global_phrases_invalid = 0
+ self.global_phrases_added = 0
+ self.global_phrases_ignored = 0
+ self.global_phrases_deleted = 0
+
+ def _set_lang_values_to_0(self):
+ """
+ Set all counts for the current
+ lang to 0.
+ """
+ self.lang_phrases_invalid = 0
+ self.lang_phrases_added = 0
+ self.lang_phrases_ignored = 0
+
+ def notify_one_phrase_invalid(self):
+ """
+ Add +1 to the count of invalid entries
+ fetched from the wiki.
+ """
+ self.lang_phrases_invalid += 1
+ self.global_phrases_invalid += 1
+
+ def notify_one_phrase_added(self):
+ """
+ Add +1 to the count of entries
+ added to the db.
+ """
+ self.lang_phrases_added += 1
+ self.global_phrases_added += 1
+
+ def notify_one_phrase_ignored(self):
+ """
+ Add +1 to the count of ignored
+ entries as it was already in the db.
+ """
+ self.lang_phrases_ignored += 1
+ self.global_phrases_ignored += 1
+
+ def notify_one_phrase_deleted(self):
+ """
+ Add +1 to the count of phrases deleted
+ from the database.
+ """
+ self.global_phrases_deleted += 1
+
+ def notify_one_table_created(self):
+ """
+ Add +1 to the count of created tables.
+ """
+ self.tables_created += 1
+
+ def notify_one_table_deleted(self):
+ """
+ Add +1 to the count of deleted tables.
+ """
+ self.tables_deleted += 1
+
+ def notify_one_table_ignored(self):
+ """
+ Add +1 to the count of ignored tables.
+ """
+ self.tables_ignored += 1
+
+
+ def notify_import_done(self):
+ """
+ Print stats for the whole import process
+ and reset all values.
+ """
+ LOG.info('====================================================================')
+ LOG.info('Final statistics of the import:')
+ LOG.info('- %s phrases were invalid.', self.global_phrases_invalid)
+ if self.global_phrases_invalid > 0:
+ LOG.info(' Those invalid phrases have been skipped.')
+ LOG.info('- %s phrases were ignored as they are already in the database',
+ self.global_phrases_ignored)
+ LOG.info('- %s phrases were added to the database', self.global_phrases_added)
+ LOG.info('- %s phrases were deleted from the database', self.global_phrases_deleted)
+ if self.global_phrases_deleted > 0:
+ LOG.info(' They were deleted as they are not valid anymore.')
+ LOG.info('- %s tables were ignored as they already exist on the database',
+ self.tables_ignored)
+ LOG.info('- %s tables were created', self.tables_created)
+ LOG.info('- %s tables were deleted from the database', self.tables_deleted)
+ if self.tables_deleted > 0:
+ LOG.info(' They were deleted as they are not valid anymore.')
+
+ if self.global_phrases_invalid > 0:
+ LOG.warning('%s phrases were invalid and have been skipped during the whole process.',
+ self.global_phrases_invalid)
+
+ self._set_global_values_to_0()
+
+ def notify_current_lang_done(self, lang):
+ """
+ Print stats for the current lang
+ and then reset lang values.
+ """
+ LOG.info('====================================================================')
+ LOG.info('Statistics for the import of %s:', lang)
+ LOG.info('- %s phrases were invalid.', self.lang_phrases_invalid)
+ if self.lang_phrases_invalid > 0:
+ LOG.info(' Those invalid phrases have been skipped.')
+ LOG.info('- %s phrases were ignored as they are already in the database',
+ self.lang_phrases_ignored)
+ LOG.info('- %s phrases were added to the database', self.lang_phrases_added)
+ LOG.info('====================================================================')
+
+ if self.lang_phrases_invalid > 0:
+ LOG.warning('%s phrases were invalid and have been skipped for the import of lang %s.',
+ self.lang_phrases_invalid, lang)
+
+ self._set_lang_values_to_0()
from nominatim.tools.exec_utils import get_url
from nominatim.errors import UsageError
+from nominatim.tools.special_phrases.importer_statistics import SpecialPhrasesImporterStatistics
LOG = logging.getLogger()
class SpecialPhrasesImporter():
Class handling the process of special phrases importations.
"""
def __init__(self, config, phplib_dir, db_connection) -> None:
+ self.statistics_handler = SpecialPhrasesImporterStatistics()
self.db_connection = db_connection
self.config = config
self.phplib_dir = phplib_dir
class_type_pairs = set()
for lang in languages:
- LOG.warning('Import phrases for lang: %s', lang)
+ LOG.warning('Importing phrases for lang: %s...', lang)
wiki_page_xml_content = SpecialPhrasesImporter._get_wiki_content(lang)
class_type_pairs.update(self._process_xml_content(wiki_page_xml_content, lang))
+ self.statistics_handler.notify_current_lang_done(lang)
self._create_place_classtype_table_and_indexes(class_type_pairs)
self._remove_non_existent_phrases_from_db()
self.db_connection.commit()
LOG.warning('Import done.')
+ self.statistics_handler.notify_import_done()
def _fetch_existing_words_phrases(self):
"""
(normalized_label, phrase_class, phrase_type, phrase_operator)
)
class_type_pairs.add((phrase_class, phrase_type))
+ self.statistics_handler.notify_one_phrase_ignored()
#Dont need to add this phrase as it already exists in the word table.
continue
#sanity check, in case somebody added garbage in the wiki
if not self._check_sanity(lang, phrase_class, phrase_type):
+ self.statistics_handler.notify_one_phrase_invalid()
continue
class_type_pairs.add((phrase_class, phrase_type))
phrase_label, normalized_label, phrase_class,
phrase_type, phrase_operator
)
+ self.statistics_handler.notify_one_phrase_added()
return class_type_pairs
table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
if table_name in self.table_phrases_to_delete:
+ self.statistics_handler.notify_one_table_ignored()
#Remove this table from the ones to delete as it match a class/type
#still existing on the special phrases of the wiki.
self.table_phrases_to_delete.remove(table_name)
#Grant access on read to the web user.
self._grant_access_to_webuser(phrase_class, phrase_type)
+ self.statistics_handler.notify_one_table_created()
+
with self.db_connection.cursor() as db_cursor:
db_cursor.execute("DROP INDEX idx_placex_classtype")
#Delete phrases from the word table which are not on the wiki anymore.
for phrase_to_delete in self.words_phrases_to_delete:
+ self.statistics_handler.notify_one_phrase_deleted()
if phrase_to_delete[3] == '-':
query = """
DELETE FROM word WHERE word = %s AND class = %s AND type = %s AND operator IS null
#Delete place_classtype tables corresponding to class/type which are not on the wiki anymore
for table in self.table_phrases_to_delete:
+ self.statistics_handler.notify_one_table_deleted()
query = SQL('DROP TABLE IF EXISTS {}').format(Identifier(table))
queries_parameters.append((query, ()))