steps:
- name: Install prerequisites
run: |
- sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev python3-psycopg2 python3-pyosmium python3-dotenv python3-psutil python3-jinja2
+ sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev python3-psycopg2 python3-pyosmium python3-dotenv python3-psutil python3-jinja2 python3-icu
shell: bash
- name: Download dependencies
working-directory: data-env
- name: Import special phrases
- run: nominatim special-phrases --from-wiki | psql -d nominatim
+ run: nominatim special-phrases --import-from-wiki
working-directory: data-env
- name: Check import
data/osmosischange.osc
.vagrant
+data/country_osm_grid.sql.gz
[MASTER]
extension-pkg-whitelist=osmium
+ignored-modules=icu
[MESSAGES CONTROL]
export.php
query.php
setup.php
- specialphrases.php
update.php
warm.php
)
install(FILES settings/env.defaults
settings/address-levels.json
- settings/phrase_settings.php
+ settings/phrase-settings.json
settings/import-admin.style
settings/import-street.style
settings/import-address.style
If you want to be able to search for places by their type through
[special key phrases](https://wiki.openstreetmap.org/wiki/Nominatim/Special_Phrases)
-you also need to enable these key phrases like this:
+you also need to import these key phrases like this:
- nominatim special-phrases --from-wiki > specialphrases.sql
- psql -d nominatim -f specialphrases.sql
+ nominatim special-phrases --import-from-wiki
Note that this command downloads the phrases from the wiki link above. You
need internet access for the step.
* [proj](https://proj.org/)
* [bzip2](http://www.bzip.org/)
* [zlib](https://www.zlib.net/)
+ * [ICU](http://site.icu-project.org/)
* [Boost libraries](https://www.boost.org/), including system and filesystem
* PostgreSQL client libraries
* a recent C++ compiler (gcc 5+ or Clang 3.8+)
* [Python Dotenv](https://github.com/theskumar/python-dotenv)
* [psutil](https://github.com/giampaolo/psutil)
* [Jinja2](https://palletsprojects.com/p/jinja/)
+ * [PyICU](https://pypi.org/project/PyICU/)
* [PHP](https://php.net) (7.0 or later)
* PHP-pgsql
* PHP-intl (bundled with PHP)
+
<?php
@define('CONST_LibDir', dirname(dirname(__FILE__)));
require_once(CONST_LibDir.'/init-cmd.php');
-ini_set('memory_limit', '800M');
-ini_set('display_errors', 'stderr');
-
-$aCMDOptions
-= array(
- 'Import and export special phrases',
- array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
- array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
- array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
- array('wiki-import', '', 0, 1, 0, 0, 'bool', 'Create import script for search phrases '),
- array('project-dir', '', 0, 1, 1, 1, 'realpath', 'Base directory of the Nominatim installation (default: .)'),
- );
-getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
-
-loadSettings($aCMDResult['project-dir'] ?? getcwd());
-setupHTTPProxy();
-
-include(getSettingConfig('PHRASE_CONFIG', 'phrase_settings.php'));
-
-if ($aCMDResult['wiki-import']) {
- $oNormalizer = Transliterator::createFromRules(getSetting('TERM_NORMALIZATION'));
- $aPairs = array();
-
- $sLanguageIn = getSetting(
- 'LANGUAGES',
- 'af,ar,br,ca,cs,de,en,es,et,eu,fa,fi,fr,gl,hr,hu,'.
- 'ia,is,it,ja,mk,nl,no,pl,ps,pt,ru,sk,sl,sv,uk,vi'
- );
-
- foreach (explode(',', $sLanguageIn) as $sLanguage) {
- $sURL = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/'.strtoupper($sLanguage);
- $sWikiPageXML = file_get_contents($sURL);
-
- if (!preg_match_all(
- '#\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([^|]+) \\|\\| ([\\-YN])#',
- $sWikiPageXML,
- $aMatches,
- PREG_SET_ORDER
- )) {
- continue;
- }
-
- foreach ($aMatches as $aMatch) {
- $sLabel = trim($aMatch[1]);
- if ($oNormalizer !== null) {
- $sTrans = pg_escape_string($oNormalizer->transliterate($sLabel));
- } else {
- $sTrans = null;
- }
- $sClass = trim($aMatch[2]);
- $sType = trim($aMatch[3]);
- // hack around a bug where building=yes was imported with
- // quotes into the wiki
- $sType = preg_replace('/("|")/', '', $sType);
- // sanity check, in case somebody added garbage in the wiki
- if (preg_match('/^\\w+$/', $sClass) < 1
- || preg_match('/^\\w+$/', $sType) < 1
- ) {
- trigger_error("Bad class/type for language $sLanguage: $sClass=$sType");
- exit;
- }
- // blacklisting: disallow certain class/type combinations
- if (isset($aTagsBlacklist[$sClass]) && in_array($sType, $aTagsBlacklist[$sClass])) {
- // fwrite(STDERR, "Blacklisted: ".$sClass."/".$sType."\n");
- continue;
- }
- // whitelisting: if class is in whitelist, allow only tags in the list
- if (isset($aTagsWhitelist[$sClass]) && !in_array($sType, $aTagsWhitelist[$sClass])) {
- // fwrite(STDERR, "Non-Whitelisted: ".$sClass."/".$sType."\n");
- continue;
- }
- $aPairs[$sClass.'|'.$sType] = array($sClass, $sType);
-
- switch (trim($aMatch[4])) {
- case 'near':
- printf(
- "SELECT getorcreate_amenityoperator(make_standard_name('%s'), '%s', '%s', '%s', 'near');\n",
- pg_escape_string($sLabel),
- $sTrans,
- $sClass,
- $sType
- );
- break;
- case 'in':
- printf(
- "SELECT getorcreate_amenityoperator(make_standard_name('%s'), '%s', '%s', '%s', 'in');\n",
- pg_escape_string($sLabel),
- $sTrans,
- $sClass,
- $sType
- );
- break;
- default:
- printf(
- "SELECT getorcreate_amenity(make_standard_name('%s'), '%s', '%s', '%s');\n",
- pg_escape_string($sLabel),
- $sTrans,
- $sClass,
- $sType
- );
- break;
- }
- }
- }
-
- echo 'CREATE INDEX idx_placex_classtype ON placex (class, type);';
-
- foreach ($aPairs as $aPair) {
- $sql_tablespace = getSetting('TABLESPACE_AUX_DATA');
- if ($sql_tablespace) {
- $sql_tablespace = ' TABLESPACE '.$sql_tablespace;
- }
-
- printf(
- 'CREATE TABLE place_classtype_%s_%s'
- . $sql_tablespace
- . ' AS'
- . ' SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex'
- . " WHERE class = '%s' AND type = '%s'"
- . ";\n",
- pg_escape_string($aPair[0]),
- pg_escape_string($aPair[1]),
- pg_escape_string($aPair[0]),
- pg_escape_string($aPair[1])
- );
-
- printf(
- 'CREATE INDEX idx_place_classtype_%s_%s_centroid'
- . ' ON place_classtype_%s_%s USING GIST (centroid)'
- . $sql_tablespace
- . ";\n",
- pg_escape_string($aPair[0]),
- pg_escape_string($aPair[1]),
- pg_escape_string($aPair[0]),
- pg_escape_string($aPair[1])
- );
-
- printf(
- 'CREATE INDEX idx_place_classtype_%s_%s_place_id'
- . ' ON place_classtype_%s_%s USING btree(place_id)'
- . $sql_tablespace
- . ";\n",
- pg_escape_string($aPair[0]),
- pg_escape_string($aPair[1]),
- pg_escape_string($aPair[0]),
- pg_escape_string($aPair[1])
- );
- printf(
- 'GRANT SELECT ON place_classtype_%s_%s TO "%s"'
- . ";\n",
- pg_escape_string($aPair[0]),
- pg_escape_string($aPair[1]),
- getSetting('DATABASE_WEBUSER')
- );
- }
+loadSettings(getcwd());
- echo 'DROP INDEX idx_placex_classtype;';
-}
+(new \Nominatim\Shell(getSetting('NOMINATIM_TOOL')))
+ ->addParams('special-phrases', '--import-from-wiki')
+ ->run();
--- /dev/null
+<?php
+
+$phpPhraseSettingsFile = $argv[1];
+$jsonPhraseSettingsFile = dirname($phpPhraseSettingsFile).'/'.basename($phpPhraseSettingsFile, '.php').'.json';
+
+if (file_exists($phpPhraseSettingsFile) && !file_exists($jsonPhraseSettingsFile)) {
+ include $phpPhraseSettingsFile;
+
+ $data = array();
+
+ if (isset($aTagsBlacklist))
+ $data['blackList'] = $aTagsBlacklist;
+ if (isset($aTagsWhitelist))
+ $data['whiteList'] = $aTagsWhitelist;
+
+ $jsonFile = fopen($jsonPhraseSettingsFile, 'w');
+ fwrite($jsonFile, json_encode($data));
+ fclose($jsonFile);
+}
# pylint: disable=C0111
# Using non-top-level imports to make pyosmium optional for replication only.
# pylint: disable=E0012,C0415
-
-
-class SetupSpecialPhrases:
- """\
- Maintain special phrases.
- """
-
- @staticmethod
- def add_args(parser):
- group = parser.add_argument_group('Input arguments')
- group.add_argument('--from-wiki', action='store_true',
- help='Pull special phrases from the OSM wiki.')
- group = parser.add_argument_group('Output arguments')
- group.add_argument('-o', '--output', default='-',
- help="""File to write the preprocessed phrases to.
- If omitted, it will be written to stdout.""")
-
- @staticmethod
- def run(args):
- if args.output != '-':
- raise NotImplementedError('Only output to stdout is currently implemented.')
- return run_legacy_script('specialphrases.php', '--wiki-import', nominatim_env=args)
-
-
class UpdateAddData:
"""\
Add additional data from a file or an online source.
parser.add_subcommand('freeze', clicmd.SetupFreeze)
parser.add_subcommand('replication', clicmd.UpdateReplication)
- parser.add_subcommand('special-phrases', SetupSpecialPhrases)
+ parser.add_subcommand('special-phrases', clicmd.ImportSpecialPhrases)
parser.add_subcommand('add-data', UpdateAddData)
parser.add_subcommand('index', clicmd.UpdateIndex)
from .admin import AdminFuncs
from .freeze import SetupFreeze
from .transition import AdminTransition
+from .special_phrases import ImportSpecialPhrases
--- /dev/null
+"""
+ Implementation of the 'special-phrases' command.
+"""
+import logging
+from nominatim.tools.special_phrases import SpecialPhrasesImporter
+from nominatim.db.connection import connect
+
+LOG = logging.getLogger()
+
+# Do not repeat documentation of subcommand classes.
+# pylint: disable=C0111
+
+class ImportSpecialPhrases:
+ """\
+ Import special phrases.
+ """
+ @staticmethod
+ def add_args(parser):
+ group = parser.add_argument_group('Input arguments')
+ group.add_argument('--import-from-wiki', action='store_true',
+ help='Import special phrases from the OSM wiki to the database.')
+
+ @staticmethod
+ def run(args):
+ if args.import_from_wiki:
+ LOG.warning('Special phrases importation starting')
+ with connect(args.config.get_libpq_dsn()) as db_connection:
+ SpecialPhrasesImporter(
+ args.config, args.phplib_dir, db_connection
+ ).import_from_wiki()
+ return 0
--- /dev/null
+"""
+ Functions to import special phrases into the database.
+"""
+import logging
+import os
+from pathlib import Path
+import re
+import subprocess
+import json
+from os.path import isfile
+from icu import Transliterator
+from psycopg2.sql import Identifier, Literal, SQL
+from nominatim.tools.exec_utils import get_url
+from nominatim.errors import UsageError
+
+LOG = logging.getLogger()
+class SpecialPhrasesImporter():
+ # pylint: disable-msg=too-many-instance-attributes
+ # pylint: disable-msg=too-few-public-methods
+ """
+ Class handling the process of special phrases importations.
+ """
+ def __init__(self, config, phplib_dir, db_connection) -> None:
+ self.db_connection = db_connection
+ self.config = config
+ self.phplib_dir = phplib_dir
+ self.black_list, self.white_list = self._load_white_and_black_lists()
+ #Compile the regex here to increase performances.
+ self.occurence_pattern = re.compile(
+ r'\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([\-YN])'
+ )
+ self.sanity_check_pattern = re.compile(r'^\w+$')
+ self.transliterator = Transliterator.createFromRules("special-phrases normalizer",
+ self.config.TERM_NORMALIZATION)
+
+ def import_from_wiki(self, languages=None):
+ """
+ Iterate through all specified languages and
+ extract corresponding special phrases from the wiki.
+ """
+ if languages is not None and not isinstance(languages, list):
+ raise TypeError('The \'languages\' argument should be of type list.')
+
+ #Get all languages to process.
+ languages = self._load_languages() if not languages else languages
+
+ #Store pairs of class/type for further processing
+ class_type_pairs = set()
+
+ for lang in languages:
+ LOG.warning('Import phrases for lang: %s', lang)
+ wiki_page_xml_content = SpecialPhrasesImporter._get_wiki_content(lang)
+ class_type_pairs.update(self._process_xml_content(wiki_page_xml_content, lang))
+
+ self._create_place_classtype_table_and_indexes(class_type_pairs)
+ self.db_connection.commit()
+ LOG.warning('Import done.')
+
+ def _load_white_and_black_lists(self):
+ """
+ Load white and black lists from phrases-settings.json.
+ """
+ settings_path = (self.config.config_dir / 'phrase-settings.json').resolve()
+
+ if self.config.PHRASE_CONFIG:
+ settings_path = self._convert_php_settings_if_needed(self.config.PHRASE_CONFIG)
+
+ with open(settings_path, "r") as json_settings:
+ settings = json.load(json_settings)
+ return settings['blackList'], settings['whiteList']
+
+ def _load_languages(self):
+ """
+ Get list of all languages from env config file
+ or default if there is no languages configured.
+ The system will extract special phrases only from all specified languages.
+ """
+ default_languages = [
+ 'af', 'ar', 'br', 'ca', 'cs', 'de', 'en', 'es',
+ 'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu',
+ 'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl',
+ 'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi']
+ return self.config.LANGUAGES or default_languages
+
+ @staticmethod
+ def _get_wiki_content(lang):
+ """
+ Request and return the wiki page's content
+ corresponding to special phrases for a given lang.
+ Requested URL Example :
+ https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/EN
+ """
+ url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' + lang.upper() # pylint: disable=line-too-long
+ return get_url(url)
+
+ def _check_sanity(self, lang, phrase_class, phrase_type):
+ """
+ Check sanity of given inputs in case somebody added garbage in the wiki.
+ If a bad class/type is detected the system will exit with an error.
+ """
+ type_matchs = self.sanity_check_pattern.findall(phrase_type)
+ class_matchs = self.sanity_check_pattern.findall(phrase_class)
+
+ if len(class_matchs) < 1 or len(type_matchs) < 1:
+ raise UsageError("Bad class/type for language {}: {}={}".format(
+ lang, phrase_class, phrase_type))
+
+ def _process_xml_content(self, xml_content, lang):
+ """
+ Process given xml content by extracting matching patterns.
+ Matching patterns are processed there and returned in a
+ set of class/type pairs.
+ """
+ #One match will be of format [label, class, type, operator, plural]
+ matches = self.occurence_pattern.findall(xml_content)
+ #Store pairs of class/type for further processing
+ class_type_pairs = set()
+
+ for match in matches:
+ phrase_label = match[0].strip()
+ normalized_label = self.transliterator.transliterate(phrase_label)
+ phrase_class = match[1].strip()
+ phrase_type = match[2].strip()
+ phrase_operator = match[3].strip()
+ #hack around a bug where building=yes was imported with quotes into the wiki
+ phrase_type = re.sub(r'\"|"', '', phrase_type)
+
+ #sanity check, in case somebody added garbage in the wiki
+ self._check_sanity(lang, phrase_class, phrase_type)
+
+ #blacklisting: disallow certain class/type combinations
+ if (
+ phrase_class in self.black_list.keys() and
+ phrase_type in self.black_list[phrase_class]
+ ):
+ continue
+ #whitelisting: if class is in whitelist, allow only tags in the list
+ if (
+ phrase_class in self.white_list.keys() and
+ phrase_type not in self.white_list[phrase_class]
+ ):
+ continue
+
+ #add class/type to the pairs dict
+ class_type_pairs.add((phrase_class, phrase_type))
+
+ self._process_amenity(
+ phrase_label, normalized_label, phrase_class,
+ phrase_type, phrase_operator
+ )
+
+ return class_type_pairs
+
+ def _process_amenity(self, phrase_label, normalized_label,
+ phrase_class, phrase_type, phrase_operator):
+ # pylint: disable-msg=too-many-arguments
+ """
+ Add phrase lookup and corresponding class and
+ type to the word table based on the operator.
+ """
+ with self.db_connection.cursor() as db_cursor:
+ if phrase_operator == 'near':
+ db_cursor.execute("""SELECT getorcreate_amenityoperator(
+ make_standard_name(%s), %s, %s, %s, 'near')""",
+ (phrase_label, normalized_label, phrase_class, phrase_type))
+ elif phrase_operator == 'in':
+ db_cursor.execute("""SELECT getorcreate_amenityoperator(
+ make_standard_name(%s), %s, %s, %s, 'in')""",
+ (phrase_label, normalized_label, phrase_class, phrase_type))
+ else:
+ db_cursor.execute("""SELECT getorcreate_amenity(
+ make_standard_name(%s), %s, %s, %s)""",
+ (phrase_label, normalized_label, phrase_class, phrase_type))
+
+
+ def _create_place_classtype_table_and_indexes(self, class_type_pairs):
+ """
+ Create table place_classtype for each given pair.
+ Also create indexes on place_id and centroid.
+ """
+ LOG.warning('Create tables and indexes...')
+
+ sql_tablespace = self.config.TABLESPACE_AUX_DATA
+ if sql_tablespace:
+ sql_tablespace = ' TABLESPACE '+sql_tablespace
+
+ with self.db_connection.cursor() as db_cursor:
+ db_cursor.execute("CREATE INDEX idx_placex_classtype ON placex (class, type)")
+
+ for pair in class_type_pairs:
+ phrase_class = pair[0]
+ phrase_type = pair[1]
+
+ #Table creation
+ self._create_place_classtype_table(sql_tablespace, phrase_class, phrase_type)
+
+ #Indexes creation
+ self._create_place_classtype_indexes(sql_tablespace, phrase_class, phrase_type)
+
+ #Grant access on read to the web user.
+ self._grant_access_to_webuser(phrase_class, phrase_type)
+
+ with self.db_connection.cursor() as db_cursor:
+ db_cursor.execute("DROP INDEX idx_placex_classtype")
+
+
+ def _create_place_classtype_table(self, sql_tablespace, phrase_class, phrase_type):
+ """
+ Create table place_classtype of the given phrase_class/phrase_type if doesn't exit.
+ """
+ table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
+ with self.db_connection.cursor() as db_cursor:
+ db_cursor.execute(SQL("""
+ CREATE TABLE IF NOT EXISTS {{}} {}
+ AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex
+ WHERE class = {{}} AND type = {{}}""".format(sql_tablespace))
+ .format(Identifier(table_name), Literal(phrase_class),
+ Literal(phrase_type)))
+
+
+ def _create_place_classtype_indexes(self, sql_tablespace, phrase_class, phrase_type):
+ """
+ Create indexes on centroid and place_id for the place_classtype table.
+ """
+ index_prefix = 'idx_place_classtype_{}_{}_'.format(phrase_class, phrase_type)
+ base_table = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
+ #Index on centroid
+ if not self.db_connection.index_exists(index_prefix + 'centroid'):
+ with self.db_connection.cursor() as db_cursor:
+ db_cursor.execute(SQL("""
+ CREATE INDEX {{}} ON {{}} USING GIST (centroid) {}""".format(sql_tablespace))
+ .format(Identifier(index_prefix + 'centroid'),
+ Identifier(base_table)), sql_tablespace)
+
+ #Index on place_id
+ if not self.db_connection.index_exists(index_prefix + 'place_id'):
+ with self.db_connection.cursor() as db_cursor:
+ db_cursor.execute(SQL(
+ """CREATE INDEX {{}} ON {{}} USING btree(place_id) {}""".format(sql_tablespace))
+ .format(Identifier(index_prefix + 'place_id'),
+ Identifier(base_table)))
+
+
+ def _grant_access_to_webuser(self, phrase_class, phrase_type):
+ """
+ Grant access on read to the table place_classtype for the webuser.
+ """
+ table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
+ with self.db_connection.cursor() as db_cursor:
+ db_cursor.execute(SQL("""GRANT SELECT ON {} TO {}""")
+ .format(Identifier(table_name),
+ Identifier(self.config.DATABASE_WEBUSER)))
+
+ def _convert_php_settings_if_needed(self, file_path):
+ """
+ Convert php settings file of special phrases to json file if it is still in php format.
+ """
+ if not isfile(file_path):
+ raise UsageError(str(file_path) + ' is not a valid file.')
+
+ file, extension = os.path.splitext(file_path)
+ json_file_path = Path(file + '.json').resolve()
+
+ if extension not in('.php', '.json'):
+ raise UsageError('The custom NOMINATIM_PHRASE_CONFIG file has not a valid extension.')
+
+ if extension == '.php' and not isfile(json_file_path):
+ try:
+ subprocess.run(['/usr/bin/env', 'php', '-Cq',
+ (self.phplib_dir / 'migration/PhraseSettingsToJson.php').resolve(),
+ file_path], check=True)
+ LOG.warning('special_phrase configuration file has been converted to json.')
+ return json_file_path
+ except subprocess.CalledProcessError:
+ LOG.error('Error while converting %s to json.', file_path)
+ raise
+ else:
+ return json_file_path
NOMINATIM_WIKIPEDIA_DATA_PATH=
# Configuration file for special phrase import.
-# When unset, the internal default settings from 'settings/phrase_settings.php'
+# When unset, the internal default settings from 'settings/phrase-settings.json'
# are used.
NOMINATIM_PHRASE_CONFIG=
--- /dev/null
+{
+ "Comments": [
+ "Black list correspond to class/type combinations to exclude",
+ "If a class is in the white list then all types will",
+ "be ignored except the ones given in the list.",
+ "Also use this list to exclude an entire class from special phrases."
+ ],
+ "blackList": {
+ "bounday": [
+ "administrative"
+ ],
+ "place": [
+ "house",
+ "houses"
+ ]
+ },
+ "whiteList": {
+ "highway": [
+ "bus_stop",
+ "rest_area",
+ "raceway'"
+ ],
+ "building": []
+ }
+}
import psycopg2
import psycopg2.extras
import pytest
+import tempfile
SRC_DIR = Path(__file__) / '..' / '..' / '..'
def src_dir():
return SRC_DIR.resolve()
+@pytest.fixture
+def tmp_phplib_dir():
+ with tempfile.TemporaryDirectory() as phpdir:
+ (Path(phpdir) / 'admin').mkdir()
+
+ yield Path(phpdir)
+
@pytest.fixture
def status_table(temp_db_conn):
""" Create an empty version of the status table and
@pytest.mark.parametrize("command,script", [
- (('special-phrases',), 'specialphrases'),
(('add-data', '--file', 'foo.osm'), 'update'),
(('export',), 'export')
])
assert bnd_mock.called == do_bnds
assert rank_mock.called == do_ranks
+def test_special_phrases_command(temp_db, mock_func_factory):
+ func = mock_func_factory(nominatim.clicmd.special_phrases.SpecialPhrasesImporter, 'import_from_wiki')
+
+ call_nominatim('special-phrases', '--import-from-wiki')
+
+ assert func.called == 1
@pytest.mark.parametrize("command,func", [
('postcodes', 'update_postcodes'),
import nominatim.tools.exec_utils as exec_utils
-@pytest.fixture
-def tmp_phplib_dir():
- with tempfile.TemporaryDirectory() as phpdir:
- (Path(phpdir) / 'admin').mkdir()
-
- yield Path(phpdir)
-
@pytest.fixture
def nominatim_env(tmp_phplib_dir, def_config):
class _NominatimEnv:
--- /dev/null
+"""
+ Tests for import special phrases methods
+ of the class SpecialPhrasesImporter.
+"""
+from nominatim.errors import UsageError
+from pathlib import Path
+import tempfile
+from shutil import copyfile
+import pytest
+from nominatim.tools.special_phrases import SpecialPhrasesImporter
+
+TEST_BASE_DIR = Path(__file__) / '..' / '..'
+
+def test_check_sanity_class(special_phrases_importer):
+ """
+ Check for _check_sanity() method.
+ If a wrong class or type is given, an UsageError should raise.
+ If a good class and type are given, nothing special happens.
+ """
+ with pytest.raises(UsageError) as wrong_class:
+ special_phrases_importer._check_sanity('en', '', 'type')
+
+ with pytest.raises(UsageError) as wrong_type:
+ special_phrases_importer._check_sanity('en', 'class', '')
+
+ special_phrases_importer._check_sanity('en', 'class', 'type')
+
+ assert wrong_class and wrong_type
+
+def test_load_white_and_black_lists(special_phrases_importer):
+ """
+ Test that _load_white_and_black_lists() well return
+ black list and white list and that they are of dict type.
+ """
+ black_list, white_list = special_phrases_importer._load_white_and_black_lists()
+
+ assert isinstance(black_list, dict) and isinstance(white_list, dict)
+
+def test_convert_php_settings(special_phrases_importer):
+ """
+ Test that _convert_php_settings_if_needed() convert the given
+ php file to a json file.
+ """
+ php_file = (TEST_BASE_DIR / 'testfiles' / 'phrase_settings.php').resolve()
+
+ with tempfile.TemporaryDirectory() as temp_dir:
+ temp_settings = (Path(temp_dir) / 'phrase_settings.php').resolve()
+ copyfile(php_file, temp_settings)
+ special_phrases_importer._convert_php_settings_if_needed(temp_settings)
+
+ assert (Path(temp_dir) / 'phrase_settings.json').is_file()
+
+def test_convert_settings_wrong_file(special_phrases_importer):
+ """
+ Test that _convert_php_settings_if_needed() raise an exception
+ if the given file is not a valid file.
+ """
+
+ with pytest.raises(UsageError) as exceptioninfos:
+ special_phrases_importer._convert_php_settings_if_needed('random_file')
+
+ assert str(exceptioninfos.value) == 'random_file is not a valid file.'
+
+def test_convert_settings_json_already_exist(special_phrases_importer):
+ """
+ Test that if we give to '_convert_php_settings_if_needed' a php file path
+ and that a the corresponding json file already exists, it is returned.
+ """
+ php_file = (TEST_BASE_DIR / 'testfiles' / 'phrase_settings.php').resolve()
+ json_file = (TEST_BASE_DIR / 'testfiles' / 'phrase_settings.json').resolve()
+
+ returned = special_phrases_importer._convert_php_settings_if_needed(php_file)
+
+ assert returned == json_file
+
+def test_convert_settings_giving_json(special_phrases_importer):
+ """
+ Test that if we give to '_convert_php_settings_if_needed' a json file path
+ the same path is directly returned
+ """
+ json_file = (TEST_BASE_DIR / 'testfiles' / 'phrase-settings.json').resolve()
+
+ returned = special_phrases_importer._convert_php_settings_if_needed(json_file)
+
+ assert returned == json_file
+
+def test_process_amenity_with_operator(special_phrases_importer, getorcreate_amenityoperator_funcs,
+ word_table, temp_db_conn):
+ """
+ Test that _process_amenity() execute well the
+ getorcreate_amenityoperator() SQL function and that
+ the 2 differents operators are well handled.
+ """
+ special_phrases_importer._process_amenity('', '', '', '', 'near')
+ special_phrases_importer._process_amenity('', '', '', '', 'in')
+
+ with temp_db_conn.cursor() as temp_db_cursor:
+ temp_db_cursor.execute("SELECT * FROM temp_with_operator WHERE op='near' OR op='in'")
+ results = temp_db_cursor.fetchall()
+
+ assert len(results) == 2
+
+def test_process_amenity_without_operator(special_phrases_importer, getorcreate_amenity_funcs,
+ temp_db_conn):
+ """
+ Test that _process_amenity() execute well the
+ getorcreate_amenity() SQL function.
+ """
+ special_phrases_importer._process_amenity('', '', '', '', '')
+
+ with temp_db_conn.cursor() as temp_db_cursor:
+ temp_db_cursor.execute("SELECT * FROM temp_without_operator WHERE op='no_operator'")
+ result = temp_db_cursor.fetchone()
+
+ assert result
+
+def test_create_place_classtype_indexes(temp_db_conn, special_phrases_importer):
+ """
+ Test that _create_place_classtype_indexes() create the
+ place_id index and centroid index on the right place_class_type table.
+ """
+ phrase_class = 'class'
+ phrase_type = 'type'
+ table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
+
+ with temp_db_conn.cursor() as temp_db_cursor:
+ temp_db_cursor.execute("CREATE EXTENSION postgis;")
+ temp_db_cursor.execute('CREATE TABLE {}(place_id BIGINT, centroid GEOMETRY)'.format(table_name))
+
+ special_phrases_importer._create_place_classtype_indexes('', phrase_class, phrase_type)
+
+ assert check_placeid_and_centroid_indexes(temp_db_conn, phrase_class, phrase_type)
+
+def test_create_place_classtype_table(temp_db_conn, placex_table, special_phrases_importer):
+ """
+ Test that _create_place_classtype_table() create
+ the right place_classtype table.
+ """
+ phrase_class = 'class'
+ phrase_type = 'type'
+ special_phrases_importer._create_place_classtype_table('', phrase_class, phrase_type)
+
+ assert check_table_exist(temp_db_conn, phrase_class, phrase_type)
+
+def test_grant_access_to_web_user(temp_db_conn, def_config, special_phrases_importer):
+ """
+ Test that _grant_access_to_webuser() give
+ right access to the web user.
+ """
+ phrase_class = 'class'
+ phrase_type = 'type'
+ table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
+
+ with temp_db_conn.cursor() as temp_db_cursor:
+ temp_db_cursor.execute('CREATE TABLE {}()'.format(table_name))
+
+ special_phrases_importer._grant_access_to_webuser(phrase_class, phrase_type)
+
+ assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, phrase_class, phrase_type)
+
+def test_create_place_classtype_table_and_indexes(
+ temp_db_conn, def_config, placex_table, getorcreate_amenity_funcs,
+ getorcreate_amenityoperator_funcs, special_phrases_importer):
+ """
+ Test that _create_place_classtype_table_and_indexes()
+ create the right place_classtype tables and place_id indexes
+ and centroid indexes and grant access to the web user
+ for the given set of pairs.
+ """
+ pairs = set([('class1', 'type1'), ('class2', 'type2')])
+
+ special_phrases_importer._create_place_classtype_table_and_indexes(pairs)
+
+ for pair in pairs:
+ assert check_table_exist(temp_db_conn, pair[0], pair[1])
+ assert check_placeid_and_centroid_indexes(temp_db_conn, pair[0], pair[1])
+ assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, pair[0], pair[1])
+
+def test_process_xml_content(temp_db_conn, def_config, special_phrases_importer,
+ getorcreate_amenity_funcs, getorcreate_amenityoperator_funcs):
+ """
+ Test that _process_xml_content() process the given xml content right
+ by executing the right SQL functions for amenities and
+ by returning the right set of pairs.
+ """
+ class_test = 'aerialway'
+ type_test = 'zip_line'
+
+ #Converted output set to a dict for easy assert further.
+ results = dict(special_phrases_importer._process_xml_content(get_test_xml_wiki_content(), 'en'))
+
+ assert check_amenities_with_op(temp_db_conn)
+ assert check_amenities_without_op(temp_db_conn)
+ assert results[class_test] and type_test in results.values()
+
+def test_import_from_wiki(monkeypatch, temp_db_conn, def_config, special_phrases_importer, placex_table,
+ getorcreate_amenity_funcs, getorcreate_amenityoperator_funcs):
+ """
+ Check that the main import_from_wiki() method is well executed.
+ It should create the place_classtype table, the place_id and centroid indexes,
+ grand access to the web user and executing the SQL functions for amenities.
+ """
+ monkeypatch.setattr('nominatim.tools.special_phrases.SpecialPhrasesImporter._get_wiki_content', mock_get_wiki_content)
+ special_phrases_importer.import_from_wiki(['en'])
+
+ class_test = 'aerialway'
+ type_test = 'zip_line'
+
+ assert check_table_exist(temp_db_conn, class_test, type_test)
+ assert check_placeid_and_centroid_indexes(temp_db_conn, class_test, type_test)
+ assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, class_test, type_test)
+ assert check_amenities_with_op(temp_db_conn)
+ assert check_amenities_without_op(temp_db_conn)
+
+def mock_get_wiki_content(lang):
+ """
+ Mock the _get_wiki_content() method to return
+ static xml test file content.
+ """
+ return get_test_xml_wiki_content()
+
+def get_test_xml_wiki_content():
+ """
+ return the content of the static xml test file.
+ """
+ xml_test_content_path = (TEST_BASE_DIR / 'testdata' / 'special_phrases_test_content.txt').resolve()
+ with open(xml_test_content_path) as xml_content_reader:
+ return xml_content_reader.read()
+
+def check_table_exist(temp_db_conn, phrase_class, phrase_type):
+ """
+ Verify that the place_classtype table exists for the given
+ phrase_class and phrase_type.
+ """
+ table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
+
+ with temp_db_conn.cursor() as temp_db_cursor:
+ temp_db_cursor.execute("""
+ SELECT *
+ FROM information_schema.tables
+ WHERE table_type='BASE TABLE'
+ AND table_name='{}'""".format(table_name))
+ return temp_db_cursor.fetchone()
+
+def check_grant_access(temp_db_conn, user, phrase_class, phrase_type):
+ """
+ Check that the web user has been granted right access to the
+ place_classtype table of the given phrase_class and phrase_type.
+ """
+ table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
+
+ with temp_db_conn.cursor() as temp_db_cursor:
+ temp_db_cursor.execute("""
+ SELECT * FROM information_schema.role_table_grants
+ WHERE table_name='{}'
+ AND grantee='{}'
+ AND privilege_type='SELECT'""".format(table_name, user))
+ return temp_db_cursor.fetchone()
+
+def check_placeid_and_centroid_indexes(temp_db_conn, phrase_class, phrase_type):
+ """
+ Check that the place_id index and centroid index exist for the
+ place_classtype table of the given phrase_class and phrase_type.
+ """
+ index_prefix = 'idx_place_classtype_{}_{}_'.format(phrase_class, phrase_type)
+
+ return (
+ temp_db_conn.index_exists(index_prefix + 'centroid')
+ and
+ temp_db_conn.index_exists(index_prefix + 'place_id')
+ )
+
+def check_amenities_with_op(temp_db_conn):
+ """
+ Check that the test table for the SQL function getorcreate_amenityoperator()
+ contains more than one value (so that the SQL function was call more than one time).
+ """
+ with temp_db_conn.cursor() as temp_db_cursor:
+ temp_db_cursor.execute("SELECT * FROM temp_with_operator")
+ return len(temp_db_cursor.fetchall()) > 1
+
+def check_amenities_without_op(temp_db_conn):
+ """
+ Check that the test table for the SQL function getorcreate_amenity()
+ contains more than one value (so that the SQL function was call more than one time).
+ """
+ with temp_db_conn.cursor() as temp_db_cursor:
+ temp_db_cursor.execute("SELECT * FROM temp_without_operator")
+ return len(temp_db_cursor.fetchall()) > 1
+
+@pytest.fixture
+def special_phrases_importer(temp_db_conn, def_config, temp_phplib_dir_with_migration):
+ """
+ Return an instance of SpecialPhrasesImporter.
+ """
+ return SpecialPhrasesImporter(def_config, temp_phplib_dir_with_migration, temp_db_conn)
+
+@pytest.fixture
+def temp_phplib_dir_with_migration():
+ """
+ Return temporary phpdir with migration subdirectory and
+ PhraseSettingsToJson.php script inside.
+ """
+ migration_file = (TEST_BASE_DIR / '..' / 'lib-php' / 'migration'
+ / 'PhraseSettingsToJson.php').resolve()
+ with tempfile.TemporaryDirectory() as phpdir:
+ (Path(phpdir) / 'migration').mkdir()
+ migration_dest_path = (Path(phpdir) / 'migration' / 'PhraseSettingsToJson.php').resolve()
+ copyfile(migration_file, migration_dest_path)
+
+ yield Path(phpdir)
+
+@pytest.fixture
+def make_strandard_name_func(temp_db_cursor):
+ temp_db_cursor.execute("""
+ CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) RETURNS TEXT AS $$
+ BEGIN
+ RETURN trim(name); --Basically return only the trimed name for the tests
+ END;
+ $$ LANGUAGE plpgsql IMMUTABLE;""")
+
+@pytest.fixture
+def getorcreate_amenity_funcs(temp_db_cursor, make_strandard_name_func):
+ temp_db_cursor.execute("""
+ CREATE TABLE temp_without_operator(op TEXT);
+
+ CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, normalized_word TEXT,
+ lookup_class text, lookup_type text)
+ RETURNS void as $$
+ BEGIN
+ INSERT INTO temp_without_operator VALUES('no_operator');
+ END;
+ $$ LANGUAGE plpgsql""")
+
+@pytest.fixture
+def getorcreate_amenityoperator_funcs(temp_db_cursor, make_strandard_name_func):
+ temp_db_cursor.execute("""
+ CREATE TABLE temp_with_operator(op TEXT);
+
+ CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT, normalized_word TEXT,
+ lookup_class text, lookup_type text, op text)
+ RETURNS void as $$
+ BEGIN
+ INSERT INTO temp_with_operator VALUES(op);
+ END;
+ $$ LANGUAGE plpgsql""")
\ No newline at end of file
--- /dev/null
+<mediawiki xsi:schemaLocation="http://www.mediawiki.org/xml/export-0.11/ http://www.mediawiki.org/xml/export-0.11.xsd" version="0.11" xml:lang="en">
+<siteinfo>
+<sitename>OpenStreetMap Wiki</sitename>
+<dbname>wiki</dbname>
+<base>https://wiki.openstreetmap.org/wiki/Main_Page</base>
+<generator>MediaWiki 1.35.1</generator>
+<case>first-letter</case>
+<namespaces>
+<namespace key="-2" case="first-letter">Media</namespace>
+<namespace key="-1" case="first-letter">Special</namespace>
+<namespace key="0" case="first-letter"/>
+<namespace key="1" case="first-letter">Talk</namespace>
+<namespace key="2" case="first-letter">User</namespace>
+<namespace key="3" case="first-letter">User talk</namespace>
+<namespace key="4" case="first-letter">Wiki</namespace>
+<namespace key="5" case="first-letter">Wiki talk</namespace>
+<namespace key="6" case="first-letter">File</namespace>
+<namespace key="7" case="first-letter">File talk</namespace>
+<namespace key="8" case="first-letter">MediaWiki</namespace>
+<namespace key="9" case="first-letter">MediaWiki talk</namespace>
+<namespace key="10" case="first-letter">Template</namespace>
+<namespace key="11" case="first-letter">Template talk</namespace>
+<namespace key="12" case="first-letter">Help</namespace>
+<namespace key="13" case="first-letter">Help talk</namespace>
+<namespace key="14" case="first-letter">Category</namespace>
+<namespace key="15" case="first-letter">Category talk</namespace>
+<namespace key="120" case="first-letter">Item</namespace>
+<namespace key="121" case="first-letter">Item talk</namespace>
+<namespace key="122" case="first-letter">Property</namespace>
+<namespace key="123" case="first-letter">Property talk</namespace>
+<namespace key="200" case="first-letter">DE</namespace>
+<namespace key="201" case="first-letter">DE talk</namespace>
+<namespace key="202" case="first-letter">FR</namespace>
+<namespace key="203" case="first-letter">FR talk</namespace>
+<namespace key="204" case="first-letter">ES</namespace>
+<namespace key="205" case="first-letter">ES talk</namespace>
+<namespace key="206" case="first-letter">IT</namespace>
+<namespace key="207" case="first-letter">IT talk</namespace>
+<namespace key="208" case="first-letter">NL</namespace>
+<namespace key="209" case="first-letter">NL talk</namespace>
+<namespace key="210" case="first-letter">RU</namespace>
+<namespace key="211" case="first-letter">RU talk</namespace>
+<namespace key="212" case="first-letter">JA</namespace>
+<namespace key="213" case="first-letter">JA talk</namespace>
+<namespace key="710" case="first-letter">TimedText</namespace>
+<namespace key="711" case="first-letter">TimedText talk</namespace>
+<namespace key="828" case="first-letter">Module</namespace>
+<namespace key="829" case="first-letter">Module talk</namespace>
+<namespace key="2300" case="first-letter">Gadget</namespace>
+<namespace key="2301" case="first-letter">Gadget talk</namespace>
+<namespace key="2302" case="case-sensitive">Gadget definition</namespace>
+<namespace key="2303" case="case-sensitive">Gadget definition talk</namespace>
+</namespaces>
+</siteinfo>
+<page>
+<title>Nominatim/Special Phrases/EN</title>
+<ns>0</ns>
+<id>67365</id>
+<revision>
+<id>2100424</id>
+<parentid>2100422</parentid>
+<timestamp>2021-01-27T20:29:53Z</timestamp>
+<contributor>
+<username>Violaine Do</username>
+<id>88152</id>
+</contributor>
+<minor/>
+<comment>/* en */ add coworking amenity</comment>
+<origin>2100424</origin>
+<model>wikitext</model>
+<format>text/x-wiki</format>
+<text bytes="158218" sha1="cst5x7tt58izti1pxzgljf27tx8qjcj" xml:space="preserve">
+== en == {| class="wikitable sortable" |- ! Word / Phrase !! Key !! Value !! Operator !! Plural |- | Zip Line || aerialway || zip_line || - || N |- | Zip Lines || aerialway || zip_line || - || Y |- | Zip Line in || aerialway || zip_line || in || N |- | Zip Lines in || aerialway || zip_line || in || Y |- | Zip Line near || aerialway || zip_line || near || N |- | Zip Lines near || aerialway || zip_line || near || Y |- | Zip Wire || aerialway || zip_line || - || N |- | Zip Wires || aerialway || zip_line || - || Y |- | Zip Wire in || aerialway || zip_line || in || N |- | Zip Wires in || aerialway || zip_line || in || Y |- | Zip Wire near || aerialway || zip_line || near || N |} [[Category:Word list]]
+</text>
+<sha1>cst5x7tt58izti1pxzgljf27tx8qjcj</sha1>
+</revision>
+</page>
+</mediawiki>
\ No newline at end of file
php-pgsql php php-intl libpqxx-devel \
proj-epsg bzip2-devel proj-devel boost-devel \
python3-pip python3-setuptools python3-devel \
- expat-devel zlib-devel
+ expat-devel zlib-devel libicu-dev
- pip3 install --user psycopg2 python-dotenv psutil Jinja2
+ pip3 install --user psycopg2 python-dotenv psutil Jinja2 PyICU
#
php-pgsql php php-intl php-json libpq-devel \
bzip2-devel proj-devel boost-devel \
python3-pip python3-setuptools python3-devel \
- expat-devel zlib-devel
+ expat-devel zlib-devel libicu-dev
- pip3 install --user psycopg2 python-dotenv psutil Jinja2
+ pip3 install --user psycopg2 python-dotenv psutil Jinja2 PyICU
#
libbz2-dev libpq-dev libproj-dev \
postgresql-server-dev-10 postgresql-10-postgis-2.4 \
postgresql-contrib-10 postgresql-10-postgis-scripts \
- php php-pgsql php-intl python3-pip \
- python3-psycopg2 python3-psutil python3-jinja2 git
+ php php-pgsql php-intl libicu-dev python3-pip \
+ python3-psycopg2 python3-psutil python3-jinja2 python3-icu git
# The python-dotenv package that comes with Ubuntu 18.04 is too old, so
# install the latest version from pip:
libbz2-dev libpq-dev libproj-dev \
postgresql-server-dev-12 postgresql-12-postgis-3 \
postgresql-contrib-12 postgresql-12-postgis-3-scripts \
- php php-pgsql php-intl python3-dotenv \
- python3-psycopg2 python3-psutil python3-jinja2 git
+ php php-pgsql php-intl libicu-dev python3-dotenv \
+ python3-psycopg2 python3-psutil python3-jinja2 python3-icu git
#
# System Configuration