From 6d56cbb3e8cfe886d92737087d24e49ff7fd31e0 Mon Sep 17 00:00:00 2001 From: AntoJvlt Date: Mon, 22 Mar 2021 00:07:55 +0100 Subject: [PATCH] Changed phrase_settings.py to phrase-settings.json and added migration function for old php settings file. --- CMakeLists.txt | 5 +- lib-php/admin/specialphrases.php | 163 +++++++++++++++++++++ lib-php/migration/phraseSettingsToJson.php | 11 ++ nominatim/clicmd/special_phrases.py | 2 +- nominatim/tools/special_phrases.py | 50 ++++++- settings/__init__.py | 3 - settings/phrase-settings.json | 25 ++++ settings/phrase_settings.py | 26 ---- 8 files changed, 246 insertions(+), 39 deletions(-) create mode 100644 lib-php/admin/specialphrases.php delete mode 100644 settings/__init__.py create mode 100644 settings/phrase-settings.json delete mode 100644 settings/phrase_settings.py diff --git a/CMakeLists.txt b/CMakeLists.txt index 45881a4a..1c6336a4 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -216,7 +216,7 @@ endif() include(GNUInstallDirs) set(NOMINATIM_DATADIR ${CMAKE_INSTALL_FULL_DATADIR}/${PROJECT_NAME}) set(NOMINATIM_LIBDIR ${CMAKE_INSTALL_FULL_LIBDIR}/${PROJECT_NAME}) -set(NOMINATIM_CONFIGDIR ${CMAKE_INSTALL_FULL_SYSCONFDIR}/${PROJECT_NAME}/settings) +set(NOMINATIM_CONFIGDIR ${CMAKE_INSTALL_FULL_SYSCONFDIR}/${PROJECT_NAME}) if (BUILD_IMPORTER) configure_file(${PROJECT_SOURCE_DIR}/cmake/tool-installed.tmpl installed.bin) @@ -257,9 +257,8 @@ if (BUILD_API) endif() install(FILES settings/env.defaults - settings/__init__.py settings/address-levels.json - settings/phrase_settings.py + settings/phrase-settings.json settings/import-admin.style settings/import-street.style settings/import-address.style diff --git a/lib-php/admin/specialphrases.php b/lib-php/admin/specialphrases.php new file mode 100644 index 00000000..9b90387a --- /dev/null +++ b/lib-php/admin/specialphrases.php @@ -0,0 +1,163 @@ +transliterate($sLabel)); + } else { + $sTrans = null; + } + $sClass = trim($aMatch[2]); + $sType = trim($aMatch[3]); + // hack around a bug where building=yes was imported with + // quotes into the wiki + $sType = preg_replace('/("|")/', '', $sType); + // sanity check, in case somebody added garbage in the wiki + if (preg_match('/^\\w+$/', $sClass) < 1 + || preg_match('/^\\w+$/', $sType) < 1 + ) { + trigger_error("Bad class/type for language $sLanguage: $sClass=$sType"); + exit; + } + // blacklisting: disallow certain class/type combinations + if (isset($aTagsBlacklist[$sClass]) && in_array($sType, $aTagsBlacklist[$sClass])) { + // fwrite(STDERR, "Blacklisted: ".$sClass."/".$sType."\n"); + continue; + } + // whitelisting: if class is in whitelist, allow only tags in the list + if (isset($aTagsWhitelist[$sClass]) && !in_array($sType, $aTagsWhitelist[$sClass])) { + // fwrite(STDERR, "Non-Whitelisted: ".$sClass."/".$sType."\n"); + continue; + } + $aPairs[$sClass.'|'.$sType] = array($sClass, $sType); + + switch (trim($aMatch[4])) { + case 'near': + printf( + "SELECT getorcreate_amenityoperator(make_standard_name('%s'), '%s', '%s', '%s', 'near');\n", + pg_escape_string($sLabel), + $sTrans, + $sClass, + $sType + ); + break; + case 'in': + printf( + "SELECT getorcreate_amenityoperator(make_standard_name('%s'), '%s', '%s', '%s', 'in');\n", + pg_escape_string($sLabel), + $sTrans, + $sClass, + $sType + ); + break; + default: + printf( + "SELECT getorcreate_amenity(make_standard_name('%s'), '%s', '%s', '%s');\n", + pg_escape_string($sLabel), + $sTrans, + $sClass, + $sType + ); + break; + } + } + } + + echo 'CREATE INDEX idx_placex_classtype ON placex (class, type);'; + + foreach ($aPairs as $aPair) { + $sql_tablespace = getSetting('TABLESPACE_AUX_DATA'); + if ($sql_tablespace) { + $sql_tablespace = ' TABLESPACE '.$sql_tablespace; + } + + printf( + 'CREATE TABLE place_classtype_%s_%s' + . $sql_tablespace + . ' AS' + . ' SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex' + . " WHERE class = '%s' AND type = '%s'" + . ";\n", + pg_escape_string($aPair[0]), + pg_escape_string($aPair[1]), + pg_escape_string($aPair[0]), + pg_escape_string($aPair[1]) + ); + + printf( + 'CREATE INDEX idx_place_classtype_%s_%s_centroid' + . ' ON place_classtype_%s_%s USING GIST (centroid)' + . $sql_tablespace + . ";\n", + pg_escape_string($aPair[0]), + pg_escape_string($aPair[1]), + pg_escape_string($aPair[0]), + pg_escape_string($aPair[1]) + ); + + printf( + 'CREATE INDEX idx_place_classtype_%s_%s_place_id' + . ' ON place_classtype_%s_%s USING btree(place_id)' + . $sql_tablespace + . ";\n", + pg_escape_string($aPair[0]), + pg_escape_string($aPair[1]), + pg_escape_string($aPair[0]), + pg_escape_string($aPair[1]) + ); + + printf( + 'GRANT SELECT ON place_classtype_%s_%s TO "%s"' + . ";\n", + pg_escape_string($aPair[0]), + pg_escape_string($aPair[1]), + getSetting('DATABASE_WEBUSER') + ); + } + + echo 'DROP INDEX idx_placex_classtype;'; +} \ No newline at end of file diff --git a/lib-php/migration/phraseSettingsToJson.php b/lib-php/migration/phraseSettingsToJson.php index 15c49f0a..187e3fc6 100644 --- a/lib-php/migration/phraseSettingsToJson.php +++ b/lib-php/migration/phraseSettingsToJson.php @@ -1,9 +1,16 @@ >>>>>> 3d939458... Changed phrase_settings.py to phrase-settings.json and added migration function for old php settings file. include $phpPhraseSettingsFile; $data = array(); @@ -16,4 +23,8 @@ if (file_exists($phpPhraseSettingsFile) && !file_exists($jsonPhraseSettingsFile) $jsonFile = fopen($jsonPhraseSettingsFile, 'w'); fwrite($jsonFile, json_encode($data)); fclose($jsonFile); +<<<<<<< HEAD +} +======= } +>>>>>>> 3d939458... Changed phrase_settings.py to phrase-settings.json and added migration function for old php settings file. diff --git a/nominatim/clicmd/special_phrases.py b/nominatim/clicmd/special_phrases.py index b7e0f5dc..8198a4c3 100644 --- a/nominatim/clicmd/special_phrases.py +++ b/nominatim/clicmd/special_phrases.py @@ -25,5 +25,5 @@ class ImportSpecialPhrases: if args.from_wiki: LOG.warning('Special phrases importation starting') with connect(args.config.get_libpq_dsn()) as db_connection: - import_from_wiki(args.config, db_connection) + import_from_wiki(args, db_connection) return 0 diff --git a/nominatim/tools/special_phrases.py b/nominatim/tools/special_phrases.py index a70d3047..3dead38b 100644 --- a/nominatim/tools/special_phrases.py +++ b/nominatim/tools/special_phrases.py @@ -2,26 +2,32 @@ Functions to import special phrases into the database. """ import logging +import os import re +import subprocess import sys +import json +from os.path import isfile from psycopg2.sql import Identifier, Literal, SQL -from settings.phrase_settings import BLACK_LIST, WHITE_LIST from nominatim.tools.exec_utils import get_url LOG = logging.getLogger() -def import_from_wiki(config, db_connection, languages=None): +def import_from_wiki(args, db_connection, languages=None): + # pylint: disable-msg=too-many-locals """ Iterate through all specified languages and extract corresponding special phrases from the wiki. """ + black_list, white_list = _load_white_and_black_lists(args) + #Compile the match regex to increase performance for the following loop. occurence_pattern = re.compile( r'\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([\-YN])' ) sanity_check_pattern = re.compile(r'^\w+$') - languages = _get_languages(config) if not languages else languages + languages = _get_languages(args.config) if not languages else languages #array for pairs of class/type pairs = dict() @@ -43,10 +49,10 @@ def import_from_wiki(config, db_connection, languages=None): _check_sanity(lang, phrase_class, phrase_type, sanity_check_pattern) #blacklisting: disallow certain class/type combinations - if phrase_class in BLACK_LIST.keys() and phrase_type in BLACK_LIST[phrase_class]: + if phrase_class in black_list.keys() and phrase_type in black_list[phrase_class]: continue #whitelisting: if class is in whitelist, allow only tags in the list - if phrase_class in WHITE_LIST.keys() and phrase_type not in WHITE_LIST[phrase_class]: + if phrase_class in white_list.keys() and phrase_type not in white_list[phrase_class]: continue #add class/type to the pairs dict @@ -56,10 +62,23 @@ def import_from_wiki(config, db_connection, languages=None): db_connection, phrase_label, phrase_class, phrase_type, phrase_operator ) - _create_place_classtype_table_and_indexes(db_connection, config, pairs) + _create_place_classtype_table_and_indexes(db_connection, args.config, pairs) db_connection.commit() LOG.warning('Import done.') +def _load_white_and_black_lists(args): + """ + Load white and black lists from phrases-settings.json. + """ + config = args.config + settings_path = str(config.config_dir)+'/phrase-settings.json' + + if config.PHRASE_CONFIG: + settings_path = _convert_php_settings_if_needed(args, config.PHRASE_CONFIG) + + with open(settings_path, "r") as json_settings: + settings = json.load(json_settings) + return settings['blackList'], settings['whiteList'] def _get_languages(config): """ @@ -199,3 +218,22 @@ def _grant_access_to_webuser(db_connection, config, phrase_class, phrase_type): db_cursor.execute(SQL("""GRANT SELECT ON {} TO {}""") .format(Identifier(f'place_classtype_{phrase_class}_{phrase_type}'), Identifier(config.DATABASE_WEBUSER))) + +def _convert_php_settings_if_needed(args, file_path): + """ + Convert php settings file of special phrases to json file if it is still in php format. + """ + file, extension = os.path.splitext(file_path) + json_file_path = f'{file}.json' + if extension == '.php' and not isfile(json_file_path): + try: + subprocess.run(['/usr/bin/env', 'php', '-Cq', + args.phplib_dir / 'migration/phraseSettingsToJson.php', + file_path], check=True) + LOG.warning('special_phrase configuration file has been converted to json.') + return json_file_path + except subprocess.CalledProcessError: + LOG.error('Error while converting %s to json.', file_path) + raise + else: + return json_file_path diff --git a/settings/__init__.py b/settings/__init__.py deleted file mode 100644 index b49d97fe..00000000 --- a/settings/__init__.py +++ /dev/null @@ -1,3 +0,0 @@ -""" - Module for settings -""" \ No newline at end of file diff --git a/settings/phrase-settings.json b/settings/phrase-settings.json new file mode 100644 index 00000000..a097dca4 --- /dev/null +++ b/settings/phrase-settings.json @@ -0,0 +1,25 @@ +{ + "Comments": [ + "Black list correspond to class/type combinations to exclude", + "If a class is in the white list then all types will", + "be ignored except the ones given in the list.", + "Also use this list to exclude an entire class from special phrases." + ], + "blackList": { + "bounday": [ + "administrative" + ], + "place": [ + "house", + "houses" + ] + }, + "whiteList": { + "highway": [ + "bus_stop", + "rest_area", + "raceway'" + ], + "building": [] + } +} diff --git a/settings/phrase_settings.py b/settings/phrase_settings.py deleted file mode 100644 index 59a4e7c6..00000000 --- a/settings/phrase_settings.py +++ /dev/null @@ -1,26 +0,0 @@ -""" - These settings control the import of special phrases from the wiki. -""" -#class/type combinations to exclude -BLACK_LIST = { - 'bounday': [ - 'administrative' - ], - 'place': [ - 'house', - 'houses' - ] -} - -#If a class is in the white list then all types will -#be ignored except the ones given in the list. -#Also use this list to exclude an entire class from -#special phrases. -WHITE_LIST = { - 'highway': [ - 'bus_stop', - 'rest_area', - 'raceway' - ], - 'building': [] -} -- 2.39.5