]> git.openstreetmap.org Git - nominatim.git/commitdiff
Changed phrase_settings.py to phrase-settings.json and added migration function for...
authorAntoJvlt <antonin.jolivat@gmail.com>
Sun, 21 Mar 2021 23:07:55 +0000 (00:07 +0100)
committerAntoJvlt <antonin.jolivat@gmail.com>
Tue, 23 Mar 2021 22:30:39 +0000 (23:30 +0100)
CMakeLists.txt
lib-php/admin/specialphrases.php [new file with mode: 0644]
lib-php/migration/phraseSettingsToJson.php
nominatim/clicmd/special_phrases.py
nominatim/tools/special_phrases.py
settings/__init__.py [deleted file]
settings/phrase-settings.json [new file with mode: 0644]
settings/phrase_settings.py [deleted file]

index 45881a4ab47babdf45e7da074c1757cda0153a32..1c6336a4c8c23c303d47851361ebfe0e434158f4 100644 (file)
@@ -216,7 +216,7 @@ endif()
 include(GNUInstallDirs)
 set(NOMINATIM_DATADIR ${CMAKE_INSTALL_FULL_DATADIR}/${PROJECT_NAME})
 set(NOMINATIM_LIBDIR ${CMAKE_INSTALL_FULL_LIBDIR}/${PROJECT_NAME})
-set(NOMINATIM_CONFIGDIR ${CMAKE_INSTALL_FULL_SYSCONFDIR}/${PROJECT_NAME}/settings)
+set(NOMINATIM_CONFIGDIR ${CMAKE_INSTALL_FULL_SYSCONFDIR}/${PROJECT_NAME})
 
 if (BUILD_IMPORTER)
     configure_file(${PROJECT_SOURCE_DIR}/cmake/tool-installed.tmpl installed.bin)
@@ -257,9 +257,8 @@ if (BUILD_API)
 endif()
 
 install(FILES settings/env.defaults
-              settings/__init__.py
               settings/address-levels.json
-              settings/phrase_settings.py
+              settings/phrase-settings.json
               settings/import-admin.style
               settings/import-street.style
               settings/import-address.style
diff --git a/lib-php/admin/specialphrases.php b/lib-php/admin/specialphrases.php
new file mode 100644 (file)
index 0000000..9b90387
--- /dev/null
@@ -0,0 +1,163 @@
+<?php
+@define('CONST_LibDir', dirname(dirname(__FILE__)));
+
+require_once(CONST_LibDir.'/init-cmd.php');
+ini_set('memory_limit', '800M');
+ini_set('display_errors', 'stderr');
+
+$aCMDOptions
+= array(
+   'Import and export special phrases',
+   array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
+   array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
+   array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
+   array('wiki-import', '', 0, 1, 0, 0, 'bool', 'Create import script for search phrases '),
+   array('project-dir', '', 0, 1, 1, 1, 'realpath', 'Base directory of the Nominatim installation (default: .)'),
+  );
+getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
+
+loadSettings($aCMDResult['project-dir'] ?? getcwd());
+setupHTTPProxy();
+
+include(getSettingConfig('PHRASE_CONFIG', 'phrase_settings.php'));
+
+if ($aCMDResult['wiki-import']) {
+    $oNormalizer = Transliterator::createFromRules(getSetting('TERM_NORMALIZATION'));
+    $aPairs = array();
+
+    $sLanguageIn = getSetting(
+        'LANGUAGES',
+        'af,ar,br,ca,cs,de,en,es,et,eu,fa,fi,fr,gl,hr,hu,'.
+        'ia,is,it,ja,mk,nl,no,pl,ps,pt,ru,sk,sl,sv,uk,vi'
+    );
+
+    foreach (explode(',', $sLanguageIn) as $sLanguage) {
+        $sURL = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/'.strtoupper($sLanguage);
+        $sWikiPageXML = file_get_contents($sURL);
+
+        if (!preg_match_all(
+            '#\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([\\-YN])#',
+            $sWikiPageXML,
+            $aMatches,
+            PREG_SET_ORDER
+        )) {
+            continue;
+        }
+
+        foreach ($aMatches as $aMatch) {
+            $sLabel = trim($aMatch[1]);
+            if ($oNormalizer !== null) {
+                $sTrans = pg_escape_string($oNormalizer->transliterate($sLabel));
+            } else {
+                $sTrans = null;
+            }
+            $sClass = trim($aMatch[2]);
+            $sType = trim($aMatch[3]);
+            // hack around a bug where building=yes was imported with
+            // quotes into the wiki
+            $sType = preg_replace('/(&quot;|")/', '', $sType);
+            // sanity check, in case somebody added garbage in the wiki
+            if (preg_match('/^\\w+$/', $sClass) < 1
+                || preg_match('/^\\w+$/', $sType) < 1
+            ) {
+                trigger_error("Bad class/type for language $sLanguage: $sClass=$sType");
+                exit;
+            }
+            // blacklisting: disallow certain class/type combinations
+            if (isset($aTagsBlacklist[$sClass]) && in_array($sType, $aTagsBlacklist[$sClass])) {
+                // fwrite(STDERR, "Blacklisted: ".$sClass."/".$sType."\n");
+                continue;
+            }
+            // whitelisting: if class is in whitelist, allow only tags in the list
+            if (isset($aTagsWhitelist[$sClass]) && !in_array($sType, $aTagsWhitelist[$sClass])) {
+                // fwrite(STDERR, "Non-Whitelisted: ".$sClass."/".$sType."\n");
+                continue;
+            }
+            $aPairs[$sClass.'|'.$sType] = array($sClass, $sType);
+
+            switch (trim($aMatch[4])) {
+                case 'near':
+                    printf(
+                        "SELECT getorcreate_amenityoperator(make_standard_name('%s'), '%s', '%s', '%s', 'near');\n",
+                        pg_escape_string($sLabel),
+                        $sTrans,
+                        $sClass,
+                        $sType
+                    );
+                    break;
+                case 'in':
+                    printf(
+                        "SELECT getorcreate_amenityoperator(make_standard_name('%s'), '%s', '%s', '%s', 'in');\n",
+                        pg_escape_string($sLabel),
+                        $sTrans,
+                        $sClass,
+                        $sType
+                    );
+                    break;
+                default:
+                    printf(
+                        "SELECT getorcreate_amenity(make_standard_name('%s'), '%s', '%s', '%s');\n",
+                        pg_escape_string($sLabel),
+                        $sTrans,
+                        $sClass,
+                        $sType
+                    );
+                    break;
+            }
+        }
+    }
+
+    echo 'CREATE INDEX idx_placex_classtype ON placex (class, type);';
+
+    foreach ($aPairs as $aPair) {
+        $sql_tablespace = getSetting('TABLESPACE_AUX_DATA');
+        if ($sql_tablespace) {
+            $sql_tablespace = ' TABLESPACE '.$sql_tablespace;
+        }
+
+        printf(
+            'CREATE TABLE place_classtype_%s_%s'
+            . $sql_tablespace
+            . ' AS'
+            . ' SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex'
+            . " WHERE class = '%s' AND type = '%s'"
+            . ";\n",
+            pg_escape_string($aPair[0]),
+            pg_escape_string($aPair[1]),
+            pg_escape_string($aPair[0]),
+            pg_escape_string($aPair[1])
+        );
+
+        printf(
+            'CREATE INDEX idx_place_classtype_%s_%s_centroid'
+            . ' ON place_classtype_%s_%s USING GIST (centroid)'
+            . $sql_tablespace
+            . ";\n",
+            pg_escape_string($aPair[0]),
+            pg_escape_string($aPair[1]),
+            pg_escape_string($aPair[0]),
+            pg_escape_string($aPair[1])
+        );
+
+        printf(
+            'CREATE INDEX idx_place_classtype_%s_%s_place_id'
+            . ' ON place_classtype_%s_%s USING btree(place_id)'
+            . $sql_tablespace
+            . ";\n",
+            pg_escape_string($aPair[0]),
+            pg_escape_string($aPair[1]),
+            pg_escape_string($aPair[0]),
+            pg_escape_string($aPair[1])
+        );
+
+        printf(
+            'GRANT SELECT ON place_classtype_%s_%s TO "%s"'
+            . ";\n",
+            pg_escape_string($aPair[0]),
+            pg_escape_string($aPair[1]),
+            getSetting('DATABASE_WEBUSER')
+        );
+    }
+
+    echo 'DROP INDEX idx_placex_classtype;';
+}
\ No newline at end of file
index 15c49f0aa0094fa87bf452eed9eb5b9463e78714..187e3fc6cc0600cd601724b1e913f457baa3fcfd 100644 (file)
@@ -1,9 +1,16 @@
 <?php
 
 $phpPhraseSettingsFile = $argv[1];
+<<<<<<< HEAD
 $jsonPhraseSettingsFile = dirname($phpPhraseSettingsFile).'/'.basename($phpPhraseSettingsFile, '.php').'.json';
 
 if (file_exists($phpPhraseSettingsFile) && !file_exists($jsonPhraseSettingsFile)) {
+=======
+$jsonPhraseSettingsFile = dirname($phpPhraseSettingsFile)."/".basename($phpPhraseSettingsFile, ".php").".json";
+
+if(file_exists($phpPhraseSettingsFile) && !file_exists($jsonPhraseSettingsFile))
+{
+>>>>>>> 3d939458... Changed phrase_settings.py to phrase-settings.json and added migration function for old php settings file.
     include $phpPhraseSettingsFile;
 
     $data = array();
@@ -16,4 +23,8 @@ if (file_exists($phpPhraseSettingsFile) && !file_exists($jsonPhraseSettingsFile)
     $jsonFile = fopen($jsonPhraseSettingsFile, 'w');
     fwrite($jsonFile, json_encode($data));
     fclose($jsonFile);
+<<<<<<< HEAD
+}
+=======
 }
+>>>>>>> 3d939458... Changed phrase_settings.py to phrase-settings.json and added migration function for old php settings file.
index b7e0f5dc6ce825318ef0843669e56eb472312449..8198a4c39cf49ebde42e881c2133c0fe01fc95b5 100644 (file)
@@ -25,5 +25,5 @@ class ImportSpecialPhrases:
         if args.from_wiki:
             LOG.warning('Special phrases importation starting')
             with connect(args.config.get_libpq_dsn()) as db_connection:
-                import_from_wiki(args.config, db_connection)
+                import_from_wiki(args, db_connection)
         return 0
index a70d304770e049bedff9acc72cfca5ce06ea61cc..3dead38b9ffb5cd50c91a613f209ecd59afc0427 100644 (file)
@@ -2,26 +2,32 @@
     Functions to import special phrases into the database.
 """
 import logging
+import os
 import re
+import subprocess
 import sys
+import json
+from os.path import isfile
 from psycopg2.sql import Identifier, Literal, SQL
-from settings.phrase_settings import BLACK_LIST, WHITE_LIST
 from nominatim.tools.exec_utils import get_url
 
 LOG = logging.getLogger()
 
-def import_from_wiki(config, db_connection, languages=None):
+def import_from_wiki(args, db_connection, languages=None):
+    # pylint: disable-msg=too-many-locals
     """
         Iterate through all specified languages and
         extract corresponding special phrases from the wiki.
     """
+    black_list, white_list = _load_white_and_black_lists(args)
+
     #Compile the match regex to increase performance for the following loop.
     occurence_pattern = re.compile(
         r'\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([\-YN])'
     )
     sanity_check_pattern = re.compile(r'^\w+$')
 
-    languages = _get_languages(config) if not languages else languages
+    languages = _get_languages(args.config) if not languages else languages
 
     #array for pairs of class/type
     pairs = dict()
@@ -43,10 +49,10 @@ def import_from_wiki(config, db_connection, languages=None):
             _check_sanity(lang, phrase_class, phrase_type, sanity_check_pattern)
 
             #blacklisting: disallow certain class/type combinations
-            if phrase_class in BLACK_LIST.keys() and phrase_type in BLACK_LIST[phrase_class]:
+            if phrase_class in black_list.keys() and phrase_type in black_list[phrase_class]:
                 continue
             #whitelisting: if class is in whitelist, allow only tags in the list
-            if phrase_class in WHITE_LIST.keys() and phrase_type not in WHITE_LIST[phrase_class]:
+            if phrase_class in white_list.keys() and phrase_type not in white_list[phrase_class]:
                 continue
 
             #add class/type to the pairs dict
@@ -56,10 +62,23 @@ def import_from_wiki(config, db_connection, languages=None):
                 db_connection, phrase_label, phrase_class, phrase_type, phrase_operator
             )
 
-    _create_place_classtype_table_and_indexes(db_connection, config, pairs)
+    _create_place_classtype_table_and_indexes(db_connection, args.config, pairs)
     db_connection.commit()
     LOG.warning('Import done.')
 
+def _load_white_and_black_lists(args):
+    """
+        Load white and black lists from phrases-settings.json.
+    """
+    config = args.config
+    settings_path = str(config.config_dir)+'/phrase-settings.json'
+
+    if config.PHRASE_CONFIG:
+        settings_path = _convert_php_settings_if_needed(args, config.PHRASE_CONFIG)
+
+    with open(settings_path, "r") as json_settings:
+        settings = json.load(json_settings)
+    return settings['blackList'], settings['whiteList']
 
 def _get_languages(config):
     """
@@ -199,3 +218,22 @@ def _grant_access_to_webuser(db_connection, config, phrase_class, phrase_type):
         db_cursor.execute(SQL("""GRANT SELECT ON {} TO {}""")
                           .format(Identifier(f'place_classtype_{phrase_class}_{phrase_type}'),
                                   Identifier(config.DATABASE_WEBUSER)))
+
+def _convert_php_settings_if_needed(args, file_path):
+    """
+        Convert php settings file of special phrases to json file if it is still in php format.
+    """
+    file, extension = os.path.splitext(file_path)
+    json_file_path = f'{file}.json'
+    if extension == '.php' and not isfile(json_file_path):
+        try:
+            subprocess.run(['/usr/bin/env', 'php', '-Cq',
+                            args.phplib_dir / 'migration/phraseSettingsToJson.php',
+                            file_path], check=True)
+            LOG.warning('special_phrase configuration file has been converted to json.')
+            return json_file_path
+        except subprocess.CalledProcessError:
+            LOG.error('Error while converting %s to json.', file_path)
+            raise
+    else:
+        return json_file_path
diff --git a/settings/__init__.py b/settings/__init__.py
deleted file mode 100644 (file)
index b49d97f..0000000
+++ /dev/null
@@ -1,3 +0,0 @@
-"""
-    Module for settings
-"""
\ No newline at end of file
diff --git a/settings/phrase-settings.json b/settings/phrase-settings.json
new file mode 100644 (file)
index 0000000..a097dca
--- /dev/null
@@ -0,0 +1,25 @@
+{
+    "Comments": [
+        "Black list correspond to class/type combinations to exclude",
+        "If a class is in the white list then all types will",
+        "be ignored except the ones given in the list.",
+        "Also use this list to exclude an entire class from special phrases."
+    ],
+    "blackList": {
+        "bounday": [
+            "administrative"
+        ],
+        "place": [
+            "house",
+            "houses"
+        ]
+    },
+    "whiteList": {
+        "highway": [
+            "bus_stop",
+            "rest_area",
+            "raceway'"
+        ],
+        "building": []
+    }
+}
diff --git a/settings/phrase_settings.py b/settings/phrase_settings.py
deleted file mode 100644 (file)
index 59a4e7c..0000000
+++ /dev/null
@@ -1,26 +0,0 @@
-"""
-    These settings control the import of special phrases from the wiki.
-"""
-#class/type combinations to exclude
-BLACK_LIST = {
-    'bounday': [
-        'administrative'
-    ],
-    'place': [
-        'house',
-        'houses'
-    ]
-}
-
-#If a class is in the white list then all types will
-#be ignored except the ones given in the list.
-#Also use this list to exclude an entire class from
-#special phrases.
-WHITE_LIST = {
-    'highway': [
-        'bus_stop',
-        'rest_area',
-        'raceway'
-    ],
-    'building': []
-}