X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/00959fac57d6c1b35d009e68ad857989ae71de6c..925195725dfcb7f1a6795c50244c1df6cb7242ce:/nominatim/tools/special_phrases/sp_importer.py diff --git a/nominatim/tools/special_phrases/sp_importer.py b/nominatim/tools/special_phrases/sp_importer.py index 1b42cb00..791f4dc3 100644 --- a/nominatim/tools/special_phrases/sp_importer.py +++ b/nominatim/tools/special_phrases/sp_importer.py @@ -20,12 +20,18 @@ from nominatim.errors import UsageError from nominatim.tools.special_phrases.importer_statistics import SpecialPhrasesImporterStatistics LOG = logging.getLogger() + +def _classtype_table(phrase_class, phrase_type): + """ Return the name of the table for the given class and type. + """ + return f'place_classtype_{phrase_class}_{phrase_type}' + class SPImporter(): # pylint: disable-msg=too-many-instance-attributes """ - Class handling the process of special phrases importations into the database. + Class handling the process of special phrases importation into the database. - Take a SPLoader which load the phrases from an external source. + Take a sp loader which load the phrases from an external source. """ def __init__(self, config, phplib_dir, db_connection, sp_loader) -> None: self.config = config @@ -38,33 +44,38 @@ class SPImporter(): # This set will contain all existing phrases to be added. # It contains tuples with the following format: (lable, class, type, operator) self.word_phrases = set() - #This set will contain all existing place_classtype tables which doesn't match any - #special phrases class/type on the wiki. + # This set will contain all existing place_classtype tables which doesn't match any + # special phrases class/type on the wiki. self.table_phrases_to_delete = set() - def import_phrases(self, tokenizer): + def import_phrases(self, tokenizer, should_replace): """ - Iterate through all specified languages and - extract corresponding special phrases from the wiki. + Iterate through all SpecialPhrases extracted from the + loader and import them into the database. + + If should_replace is set to True only the loaded phrases + will be kept into the database. All other phrases already + in the database will be removed. """ LOG.warning('Special phrases importation starting') self._fetch_existing_place_classtype_tables() - #Store pairs of class/type for further processing + # Store pairs of class/type for further processing class_type_pairs = set() for loaded_phrases in self.sp_loader: for phrase in loaded_phrases: result = self._process_phrase(phrase) if result: - class_type_pairs.update(result) + class_type_pairs.add(result) self._create_place_classtype_table_and_indexes(class_type_pairs) - self._remove_non_existent_tables_from_db() + if should_replace: + self._remove_non_existent_tables_from_db() self.db_connection.commit() with tokenizer.name_analyzer() as analyzer: - analyzer.update_special_phrases(self.word_phrases) + analyzer.update_special_phrases(self.word_phrases, should_replace) LOG.warning('Import done.') self.statistics_handler.notify_import_done() @@ -120,19 +131,17 @@ class SPImporter(): Return the class/type pair corresponding to the phrase. """ - #blacklisting: disallow certain class/type combinations - if ( - phrase.p_class in self.black_list.keys() and - phrase.p_type in self.black_list[phrase.p_class] - ): return None + # blacklisting: disallow certain class/type combinations + if phrase.p_class in self.black_list.keys() \ + and phrase.p_type in self.black_list[phrase.p_class]: + return None - #whitelisting: if class is in whitelist, allow only tags in the list - if ( - phrase.p_class in self.white_list.keys() and - phrase.p_type not in self.white_list[phrase.p_class] - ): return None + # whitelisting: if class is in whitelist, allow only tags in the list + if phrase.p_class in self.white_list.keys() \ + and phrase.p_type not in self.white_list[phrase.p_class]: + return None - #sanity check, in case somebody added garbage in the wiki + # sanity check, in case somebody added garbage in the wiki if not self._check_sanity(phrase): self.statistics_handler.notify_one_phrase_invalid() return None @@ -140,7 +149,7 @@ class SPImporter(): self.word_phrases.add((phrase.p_label, phrase.p_class, phrase.p_type, phrase.p_operator)) - return set({(phrase.p_class, phrase.p_type)}) + return (phrase.p_class, phrase.p_type) def _create_place_classtype_table_and_indexes(self, class_type_pairs): @@ -152,7 +161,7 @@ class SPImporter(): sql_tablespace = self.config.TABLESPACE_AUX_DATA if sql_tablespace: - sql_tablespace = ' TABLESPACE '+sql_tablespace + sql_tablespace = ' TABLESPACE ' + sql_tablespace with self.db_connection.cursor() as db_cursor: db_cursor.execute("CREATE INDEX idx_placex_classtype ON placex (class, type)") @@ -161,23 +170,23 @@ class SPImporter(): phrase_class = pair[0] phrase_type = pair[1] - table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type) + table_name = _classtype_table(phrase_class, phrase_type) if table_name in self.table_phrases_to_delete: self.statistics_handler.notify_one_table_ignored() - #Remove this table from the ones to delete as it match a class/type - #still existing on the special phrases of the wiki. + # Remove this table from the ones to delete as it match a + # class/type still existing on the special phrases of the wiki. self.table_phrases_to_delete.remove(table_name) - #So dont need to create the table and indexes. + # So don't need to create the table and indexes. continue - #Table creation + # Table creation self._create_place_classtype_table(sql_tablespace, phrase_class, phrase_type) - #Indexes creation + # Indexes creation self._create_place_classtype_indexes(sql_tablespace, phrase_class, phrase_type) - #Grant access on read to the web user. + # Grant access on read to the web user. self._grant_access_to_webuser(phrase_class, phrase_type) self.statistics_handler.notify_one_table_created() @@ -190,11 +199,11 @@ class SPImporter(): """ Create table place_classtype of the given phrase_class/phrase_type if doesn't exit. """ - table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type) + table_name = _classtype_table(phrase_class, phrase_type) with self.db_connection.cursor() as db_cursor: db_cursor.execute(SQL(""" - CREATE TABLE IF NOT EXISTS {{}} {} - AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex + CREATE TABLE IF NOT EXISTS {{}} {} + AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex WHERE class = {{}} AND type = {{}}""".format(sql_tablespace)) .format(Identifier(table_name), Literal(phrase_class), Literal(phrase_type))) @@ -205,8 +214,8 @@ class SPImporter(): Create indexes on centroid and place_id for the place_classtype table. """ index_prefix = 'idx_place_classtype_{}_{}_'.format(phrase_class, phrase_type) - base_table = 'place_classtype_{}_{}'.format(phrase_class, phrase_type) - #Index on centroid + base_table = _classtype_table(phrase_class, phrase_type) + # Index on centroid if not self.db_connection.index_exists(index_prefix + 'centroid'): with self.db_connection.cursor() as db_cursor: db_cursor.execute(SQL(""" @@ -214,7 +223,7 @@ class SPImporter(): .format(Identifier(index_prefix + 'centroid'), Identifier(base_table)), sql_tablespace) - #Index on place_id + # Index on place_id if not self.db_connection.index_exists(index_prefix + 'place_id'): with self.db_connection.cursor() as db_cursor: db_cursor.execute(SQL( @@ -227,7 +236,7 @@ class SPImporter(): """ Grant access on read to the table place_classtype for the webuser. """ - table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type) + table_name = _classtype_table(phrase_class, phrase_type) with self.db_connection.cursor() as db_cursor: db_cursor.execute(SQL("""GRANT SELECT ON {} TO {}""") .format(Identifier(table_name), @@ -239,18 +248,14 @@ class SPImporter(): Delete the place_classtype tables. """ LOG.warning('Cleaning database...') - #Array containing all queries to execute. Contain tuples of format (query, parameters) - queries_parameters = [] - - #Delete place_classtype tables corresponding to class/type which are not on the wiki anymore - for table in self.table_phrases_to_delete: - self.statistics_handler.notify_one_table_deleted() - query = SQL('DROP TABLE IF EXISTS {}').format(Identifier(table)) - queries_parameters.append((query, ())) + # Delete place_classtype tables corresponding to class/type which + # are not on the wiki anymore. with self.db_connection.cursor() as db_cursor: - for query, parameters in queries_parameters: - db_cursor.execute(query, parameters) + for table in self.table_phrases_to_delete: + self.statistics_handler.notify_one_table_deleted() + db_cursor.drop_table(table) + def _convert_php_settings_if_needed(self, file_path): """ @@ -262,7 +267,7 @@ class SPImporter(): file, extension = os.path.splitext(file_path) json_file_path = Path(file + '.json').resolve() - if extension not in('.php', '.json'): + if extension not in ('.php', '.json'): raise UsageError('The custom NOMINATIM_PHRASE_CONFIG file has not a valid extension.') if extension == '.php' and not isfile(json_file_path): @@ -271,9 +276,8 @@ class SPImporter(): (self.phplib_dir / 'migration/PhraseSettingsToJson.php').resolve(), file_path], check=True) LOG.warning('special_phrase configuration file has been converted to json.') - return json_file_path except subprocess.CalledProcessError: LOG.error('Error while converting %s to json.', file_path) raise - else: - return json_file_path + + return json_file_path