class SPImporter():
# pylint: disable-msg=too-many-instance-attributes
"""
- Class handling the process of special phrases importations into the database.
+ Class handling the process of special phrases importation into the database.
- Take a SPLoader which load the phrases from an external source.
+ Take a sp loader which load the phrases from an external source.
"""
def __init__(self, config, phplib_dir, db_connection, sp_loader) -> None:
self.config = config
#special phrases class/type on the wiki.
self.table_phrases_to_delete = set()
- def import_phrases(self, tokenizer):
+ def import_phrases(self, tokenizer, should_replace):
"""
- Iterate through all specified languages and
- extract corresponding special phrases from the wiki.
+ Iterate through all SpecialPhrases extracted from the
+ loader and import them into the database.
+
+ If should_replace is set to True only the loaded phrases
+ will be kept into the database. All other phrases already
+ in the database will be removed.
"""
LOG.warning('Special phrases importation starting')
self._fetch_existing_place_classtype_tables()
class_type_pairs.update(result)
self._create_place_classtype_table_and_indexes(class_type_pairs)
- self._remove_non_existent_tables_from_db()
+ if should_replace:
+ self._remove_non_existent_tables_from_db()
self.db_connection.commit()
with tokenizer.name_analyzer() as analyzer:
- analyzer.update_special_phrases(self.word_phrases)
+ analyzer.update_special_phrases(self.word_phrases, should_replace)
LOG.warning('Import done.')
self.statistics_handler.notify_import_done()
"""
#blacklisting: disallow certain class/type combinations
- if (
- phrase.p_class in self.black_list.keys() and
- phrase.p_type in self.black_list[phrase.p_class]
- ): return None
+ if phrase.p_class in self.black_list.keys() \
+ and phrase.p_type in self.black_list[phrase.p_class]:
+ return None
#whitelisting: if class is in whitelist, allow only tags in the list
- if (
- phrase.p_class in self.white_list.keys() and
- phrase.p_type not in self.white_list[phrase.p_class]
- ): return None
+ if phrase.p_class in self.white_list.keys() \
+ and phrase.p_type not in self.white_list[phrase.p_class]:
+ return None
#sanity check, in case somebody added garbage in the wiki
if not self._check_sanity(phrase):