--- /dev/null
+<?php
+@define('CONST_LibDir', dirname(dirname(__FILE__)));
+
+require_once(CONST_LibDir.'/init-cmd.php');
+ini_set('memory_limit', '800M');
+ini_set('display_errors', 'stderr');
+
+$aCMDOptions
+= array(
+ 'Import and export special phrases',
+ array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
+ array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
+ array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
+ array('wiki-import', '', 0, 1, 0, 0, 'bool', 'Create import script for search phrases '),
+ array('project-dir', '', 0, 1, 1, 1, 'realpath', 'Base directory of the Nominatim installation (default: .)'),
+ );
+getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
+
+loadSettings($aCMDResult['project-dir'] ?? getcwd());
+setupHTTPProxy();
+
+include(getSettingConfig('PHRASE_CONFIG', 'phrase_settings.php'));
+
+if ($aCMDResult['wiki-import']) {
+ $oNormalizer = Transliterator::createFromRules(getSetting('TERM_NORMALIZATION'));
+ $aPairs = array();
+
+ $sLanguageIn = getSetting(
+ 'LANGUAGES',
+ 'af,ar,br,ca,cs,de,en,es,et,eu,fa,fi,fr,gl,hr,hu,'.
+ 'ia,is,it,ja,mk,nl,no,pl,ps,pt,ru,sk,sl,sv,uk,vi'
+ );
+
+ foreach (explode(',', $sLanguageIn) as $sLanguage) {
+ $sURL = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/'.strtoupper($sLanguage);
+ $sWikiPageXML = file_get_contents($sURL);
+
+ if (!preg_match_all(
+ '#\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([\\-YN])#',
+ $sWikiPageXML,
+ $aMatches,
+ PREG_SET_ORDER
+ )) {
+ continue;
+ }
+
+ foreach ($aMatches as $aMatch) {
+ $sLabel = trim($aMatch[1]);
+ if ($oNormalizer !== null) {
+ $sTrans = pg_escape_string($oNormalizer->transliterate($sLabel));
+ } else {
+ $sTrans = null;
+ }
+ $sClass = trim($aMatch[2]);
+ $sType = trim($aMatch[3]);
+ // hack around a bug where building=yes was imported with
+ // quotes into the wiki
+ $sType = preg_replace('/("|")/', '', $sType);
+ // sanity check, in case somebody added garbage in the wiki
+ if (preg_match('/^\\w+$/', $sClass) < 1
+ || preg_match('/^\\w+$/', $sType) < 1
+ ) {
+ trigger_error("Bad class/type for language $sLanguage: $sClass=$sType");
+ exit;
+ }
+ // blacklisting: disallow certain class/type combinations
+ if (isset($aTagsBlacklist[$sClass]) && in_array($sType, $aTagsBlacklist[$sClass])) {
+ // fwrite(STDERR, "Blacklisted: ".$sClass."/".$sType."\n");
+ continue;
+ }
+ // whitelisting: if class is in whitelist, allow only tags in the list
+ if (isset($aTagsWhitelist[$sClass]) && !in_array($sType, $aTagsWhitelist[$sClass])) {
+ // fwrite(STDERR, "Non-Whitelisted: ".$sClass."/".$sType."\n");
+ continue;
+ }
+ $aPairs[$sClass.'|'.$sType] = array($sClass, $sType);
+
+ switch (trim($aMatch[4])) {
+ case 'near':
+ printf(
+ "SELECT getorcreate_amenityoperator(make_standard_name('%s'), '%s', '%s', '%s', 'near');\n",
+ pg_escape_string($sLabel),
+ $sTrans,
+ $sClass,
+ $sType
+ );
+ break;
+ case 'in':
+ printf(
+ "SELECT getorcreate_amenityoperator(make_standard_name('%s'), '%s', '%s', '%s', 'in');\n",
+ pg_escape_string($sLabel),
+ $sTrans,
+ $sClass,
+ $sType
+ );
+ break;
+ default:
+ printf(
+ "SELECT getorcreate_amenity(make_standard_name('%s'), '%s', '%s', '%s');\n",
+ pg_escape_string($sLabel),
+ $sTrans,
+ $sClass,
+ $sType
+ );
+ break;
+ }
+ }
+ }
+
+ echo 'CREATE INDEX idx_placex_classtype ON placex (class, type);';
+
+ foreach ($aPairs as $aPair) {
+ $sql_tablespace = getSetting('TABLESPACE_AUX_DATA');
+ if ($sql_tablespace) {
+ $sql_tablespace = ' TABLESPACE '.$sql_tablespace;
+ }
+
+ printf(
+ 'CREATE TABLE place_classtype_%s_%s'
+ . $sql_tablespace
+ . ' AS'
+ . ' SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex'
+ . " WHERE class = '%s' AND type = '%s'"
+ . ";\n",
+ pg_escape_string($aPair[0]),
+ pg_escape_string($aPair[1]),
+ pg_escape_string($aPair[0]),
+ pg_escape_string($aPair[1])
+ );
+
+ printf(
+ 'CREATE INDEX idx_place_classtype_%s_%s_centroid'
+ . ' ON place_classtype_%s_%s USING GIST (centroid)'
+ . $sql_tablespace
+ . ";\n",
+ pg_escape_string($aPair[0]),
+ pg_escape_string($aPair[1]),
+ pg_escape_string($aPair[0]),
+ pg_escape_string($aPair[1])
+ );
+
+ printf(
+ 'CREATE INDEX idx_place_classtype_%s_%s_place_id'
+ . ' ON place_classtype_%s_%s USING btree(place_id)'
+ . $sql_tablespace
+ . ";\n",
+ pg_escape_string($aPair[0]),
+ pg_escape_string($aPair[1]),
+ pg_escape_string($aPair[0]),
+ pg_escape_string($aPair[1])
+ );
+
+ printf(
+ 'GRANT SELECT ON place_classtype_%s_%s TO "%s"'
+ . ";\n",
+ pg_escape_string($aPair[0]),
+ pg_escape_string($aPair[1]),
+ getSetting('DATABASE_WEBUSER')
+ );
+ }
+
+ echo 'DROP INDEX idx_placex_classtype;';
+}
\ No newline at end of file
Functions to import special phrases into the database.
"""
import logging
+import os
import re
+import subprocess
import sys
+import json
+from os.path import isfile
from psycopg2.sql import Identifier, Literal, SQL
-from settings.phrase_settings import BLACK_LIST, WHITE_LIST
from nominatim.tools.exec_utils import get_url
LOG = logging.getLogger()
-def import_from_wiki(config, db_connection, languages=None):
+def import_from_wiki(args, db_connection, languages=None):
+ # pylint: disable-msg=too-many-locals
"""
Iterate through all specified languages and
extract corresponding special phrases from the wiki.
"""
+ black_list, white_list = _load_white_and_black_lists(args)
+
#Compile the match regex to increase performance for the following loop.
occurence_pattern = re.compile(
r'\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([\-YN])'
)
sanity_check_pattern = re.compile(r'^\w+$')
- languages = _get_languages(config) if not languages else languages
+ languages = _get_languages(args.config) if not languages else languages
#array for pairs of class/type
pairs = dict()
_check_sanity(lang, phrase_class, phrase_type, sanity_check_pattern)
#blacklisting: disallow certain class/type combinations
- if phrase_class in BLACK_LIST.keys() and phrase_type in BLACK_LIST[phrase_class]:
+ if phrase_class in black_list.keys() and phrase_type in black_list[phrase_class]:
continue
#whitelisting: if class is in whitelist, allow only tags in the list
- if phrase_class in WHITE_LIST.keys() and phrase_type not in WHITE_LIST[phrase_class]:
+ if phrase_class in white_list.keys() and phrase_type not in white_list[phrase_class]:
continue
#add class/type to the pairs dict
db_connection, phrase_label, phrase_class, phrase_type, phrase_operator
)
- _create_place_classtype_table_and_indexes(db_connection, config, pairs)
+ _create_place_classtype_table_and_indexes(db_connection, args.config, pairs)
db_connection.commit()
LOG.warning('Import done.')
+def _load_white_and_black_lists(args):
+ """
+ Load white and black lists from phrases-settings.json.
+ """
+ config = args.config
+ settings_path = str(config.config_dir)+'/phrase-settings.json'
+
+ if config.PHRASE_CONFIG:
+ settings_path = _convert_php_settings_if_needed(args, config.PHRASE_CONFIG)
+
+ with open(settings_path, "r") as json_settings:
+ settings = json.load(json_settings)
+ return settings['blackList'], settings['whiteList']
def _get_languages(config):
"""
db_cursor.execute(SQL("""GRANT SELECT ON {} TO {}""")
.format(Identifier(f'place_classtype_{phrase_class}_{phrase_type}'),
Identifier(config.DATABASE_WEBUSER)))
+
+def _convert_php_settings_if_needed(args, file_path):
+ """
+ Convert php settings file of special phrases to json file if it is still in php format.
+ """
+ file, extension = os.path.splitext(file_path)
+ json_file_path = f'{file}.json'
+ if extension == '.php' and not isfile(json_file_path):
+ try:
+ subprocess.run(['/usr/bin/env', 'php', '-Cq',
+ args.phplib_dir / 'migration/phraseSettingsToJson.php',
+ file_path], check=True)
+ LOG.warning('special_phrase configuration file has been converted to json.')
+ return json_file_path
+ except subprocess.CalledProcessError:
+ LOG.error('Error while converting %s to json.', file_path)
+ raise
+ else:
+ return json_file_path