export.php
query.php
setup.php
- specialphrases.php
update.php
warm.php
)
include(GNUInstallDirs)
set(NOMINATIM_DATADIR ${CMAKE_INSTALL_FULL_DATADIR}/${PROJECT_NAME})
set(NOMINATIM_LIBDIR ${CMAKE_INSTALL_FULL_LIBDIR}/${PROJECT_NAME})
-set(NOMINATIM_CONFIGDIR ${CMAKE_INSTALL_FULL_SYSCONFDIR}/${PROJECT_NAME})
+set(NOMINATIM_CONFIGDIR ${CMAKE_INSTALL_FULL_SYSCONFDIR}/${PROJECT_NAME}/settings)
if (BUILD_IMPORTER)
configure_file(${PROJECT_SOURCE_DIR}/cmake/tool-installed.tmpl installed.bin)
endif()
install(FILES settings/env.defaults
+ settings/__init__.py
settings/address-levels.json
- settings/phrase_settings.php
+ settings/phrase_settings.py
settings/import-admin.style
settings/import-street.style
settings/import-address.style
import sys
import os
-sys.path.insert(1, '@NOMINATIM_LIBDIR@/lib-python')
+sys.path.insert(0, '@NOMINATIM_LIBDIR@/lib-python')
+#Add config directory to the python path for module importation
+sys.path.insert(1, '@NOMINATIM_CONFIGDIR@/..')
os.environ['NOMINATIM_NOMINATIM_TOOL'] = os.path.abspath(__file__)
If you want to be able to search for places by their type through
[special key phrases](https://wiki.openstreetmap.org/wiki/Nominatim/Special_Phrases)
-you also need to enable these key phrases like this:
+you also need to import these key phrases like this:
- nominatim special-phrases --from-wiki > specialphrases.sql
- psql -d nominatim -f specialphrases.sql
+ nominatim import-special-phrases --from-wiki
Note that this command downloads the phrases from the wiki link above. You
need internet access for the step.
LANGUAGE plpgsql;
-CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, normalized_word TEXT,
+CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT,
lookup_class text, lookup_type text)
RETURNS INTEGER
AS $$
BEGIN
lookup_token := ' '||trim(lookup_word);
SELECT min(word_id) FROM word
- WHERE word_token = lookup_token and word = normalized_word
+ WHERE word_token = lookup_token and word = lookup_word
and class = lookup_class and type = lookup_type
INTO return_word_id;
IF return_word_id IS NULL THEN
return_word_id := nextval('seq_word');
- INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word,
+ INSERT INTO word VALUES (return_word_id, lookup_token, lookup_word,
lookup_class, lookup_type, null, 0);
END IF;
RETURN return_word_id;
CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT,
- normalized_word TEXT,
lookup_class text,
lookup_type text,
op text)
BEGIN
lookup_token := ' '||trim(lookup_word);
SELECT min(word_id) FROM word
- WHERE word_token = lookup_token and word = normalized_word
+ WHERE word_token = lookup_token and word = lookup_word
and class = lookup_class and type = lookup_type and operator = op
INTO return_word_id;
IF return_word_id IS NULL THEN
return_word_id := nextval('seq_word');
- INSERT INTO word VALUES (return_word_id, lookup_token, normalized_word,
+ INSERT INTO word VALUES (return_word_id, lookup_token, lookup_word,
lookup_class, lookup_type, null, 0, op);
END IF;
RETURN return_word_id;
# pylint: disable=C0111
# Using non-top-level imports to make pyosmium optional for replication only.
# pylint: disable=E0012,C0415
-
-
-class SetupSpecialPhrases:
- """\
- Maintain special phrases.
- """
-
- @staticmethod
- def add_args(parser):
- group = parser.add_argument_group('Input arguments')
- group.add_argument('--from-wiki', action='store_true',
- help='Pull special phrases from the OSM wiki.')
- group = parser.add_argument_group('Output arguments')
- group.add_argument('-o', '--output', default='-',
- help="""File to write the preprocessed phrases to.
- If omitted, it will be written to stdout.""")
-
- @staticmethod
- def run(args):
- if args.output != '-':
- raise NotImplementedError('Only output to stdout is currently implemented.')
- return run_legacy_script('specialphrases.php', '--wiki-import', nominatim_env=args)
-
-
class UpdateAddData:
"""\
Add additional data from a file or an online source.
parser.add_subcommand('freeze', clicmd.SetupFreeze)
parser.add_subcommand('replication', clicmd.UpdateReplication)
- parser.add_subcommand('special-phrases', SetupSpecialPhrases)
+ parser.add_subcommand('import-special-phrases', clicmd.ImportSpecialPhrases)
parser.add_subcommand('add-data', UpdateAddData)
parser.add_subcommand('index', clicmd.UpdateIndex)
from .admin import AdminFuncs
from .freeze import SetupFreeze
from .transition import AdminTransition
+from .special_phrases import ImportSpecialPhrases
--- /dev/null
+"""
+ Implementation of the 'import-special-phrases' command.
+"""
+import logging
+from nominatim.tools.special_phrases import import_from_wiki
+from nominatim.db.connection import connect
+
+LOG = logging.getLogger()
+
+# Do not repeat documentation of subcommand classes.
+# pylint: disable=C0111
+
+class ImportSpecialPhrases:
+ """\
+ Import special phrases.
+ """
+ @staticmethod
+ def add_args(parser):
+ group = parser.add_argument_group('Input arguments')
+ group.add_argument('--from-wiki', action='store_true',
+ help='Import special phrases from the OSM wiki to the database.')
+
+ @staticmethod
+ def run(args):
+ if args.from_wiki:
+ LOG.warning('Special phrases importation starting')
+ with connect(args.config.get_libpq_dsn()) as db_connection:
+ import_from_wiki(args.config, db_connection)
+ return 0
--- /dev/null
+"""
+ Functions to import special phrases into the database.
+"""
+import logging
+import re
+import sys
+from psycopg2.sql import Identifier, Literal, SQL
+from settings.phrase_settings import BLACK_LIST, WHITE_LIST
+from nominatim.tools.exec_utils import get_url
+
+LOG = logging.getLogger()
+
+def import_from_wiki(config, db_connection, languages=None):
+ """
+ Iterate through all specified languages and
+ extract corresponding special phrases from the wiki.
+ """
+ #Compile the match regex to increase performance for the following loop.
+ occurence_pattern = re.compile(
+ r'\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([\-YN])'
+ )
+ sanity_check_pattern = re.compile(r'^\w+$')
+
+ languages = _get_languages(config) if not languages else languages
+
+ #array for pairs of class/type
+ pairs = dict()
+ for lang in languages:
+ LOG.warning('Import phrases for lang: %s', lang)
+ wiki_page_xml_content = _get_wiki_content(lang)
+ #One match will be of format [label, class, type, operator, plural]
+ matches = occurence_pattern.findall(wiki_page_xml_content)
+
+ for match in matches:
+ phrase_label = match[0].strip()
+ phrase_class = match[1].strip()
+ phrase_type = match[2].strip()
+ phrase_operator = match[3].strip()
+ #hack around a bug where building=yes was imported withq quotes into the wiki
+ phrase_type = re.sub(r'\"|"', '', phrase_type)
+
+ #sanity check, in case somebody added garbage in the wiki
+ _check_sanity(lang, phrase_class, phrase_type, sanity_check_pattern)
+
+ #blacklisting: disallow certain class/type combinations
+ if phrase_class in BLACK_LIST.keys() and phrase_type in BLACK_LIST[phrase_class]:
+ continue
+ #whitelisting: if class is in whitelist, allow only tags in the list
+ if phrase_class in WHITE_LIST.keys() and phrase_type not in WHITE_LIST[phrase_class]:
+ continue
+
+ #add class/type to the pairs dict
+ pairs[f'{phrase_class}|{phrase_type}'] = (phrase_class, phrase_type)
+
+ _process_amenity(
+ db_connection, phrase_label, phrase_class, phrase_type, phrase_operator
+ )
+
+ _create_place_classtype_table_and_indexes(db_connection, config, pairs)
+ db_connection.commit()
+ LOG.warning('Import done.')
+
+
+def _get_languages(config):
+ """
+ Get list of all languages from env config file
+ or default if there is no languages configured.
+ The system will extract special phrases only from all specified languages.
+ """
+ default_languages = [
+ 'af', 'ar', 'br', 'ca', 'cs', 'de', 'en', 'es',
+ 'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu',
+ 'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl',
+ 'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi']
+ return config.LANGUAGES or default_languages
+
+
+def _get_wiki_content(lang):
+ """
+ Request and return the wiki page's content
+ corresponding to special phrases for a given lang.
+ Requested URL Example :
+ https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/EN
+ """
+ url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' + lang.upper() # pylint: disable=line-too-long
+ return get_url(url)
+
+
+def _check_sanity(lang, phrase_class, phrase_type, pattern):
+ """
+ Check sanity of given inputs in case somebody added garbage in the wiki.
+ If a bad class/type is detected the system will exit with an error.
+ """
+ try:
+ if len(pattern.findall(phrase_class)) < 1 or len(pattern.findall(phrase_type)) < 1:
+ sys.exit()
+ except SystemExit:
+ LOG.error("Bad class/type for language %s: %s=%s", lang, phrase_class, phrase_type)
+ raise
+
+
+def _process_amenity(db_connection, phrase_label, phrase_class, phrase_type, phrase_operator):
+ """
+ Add phrase lookup and corresponding class and type to the word table based on the operator.
+ """
+ with db_connection.cursor() as db_cursor:
+ if phrase_operator == 'near':
+ db_cursor.execute("""SELECT getorcreate_amenityoperator(
+ make_standard_name(%s), %s, %s, 'near')""",
+ (phrase_label, phrase_class, phrase_type))
+ elif phrase_operator == 'in':
+ db_cursor.execute("""SELECT getorcreate_amenityoperator(
+ make_standard_name(%s), %s, %s, 'in')""",
+ (phrase_label, phrase_class, phrase_type))
+ else:
+ db_cursor.execute("""SELECT getorcreate_amenity(
+ make_standard_name(%s), %s, %s)""",
+ (phrase_label, phrase_class, phrase_type))
+
+
+def _create_place_classtype_table_and_indexes(db_connection, config, pairs):
+ """
+ Create table place_classtype for each given pair.
+ Also create indexes on place_id and centroid.
+ """
+ LOG.warning('Create tables and indexes...')
+
+ sql_tablespace = config.TABLESPACE_AUX_DATA
+ if sql_tablespace:
+ sql_tablespace = ' TABLESPACE '+sql_tablespace
+
+ with db_connection.cursor() as db_cursor:
+ db_cursor.execute("CREATE INDEX idx_placex_classtype ON placex (class, type)")
+
+ for _, pair in pairs.items():
+ phrase_class = pair[0]
+ phrase_type = pair[1]
+
+ #Table creation
+ _create_place_classtype_table(
+ db_connection, sql_tablespace, phrase_class, phrase_type
+ )
+
+ #Indexes creation
+ _create_place_classtype_indexes(
+ db_connection, sql_tablespace, phrase_class, phrase_type
+ )
+
+ #Grant access on read to the web user.
+ _grant_access_to_webuser(
+ db_connection, config, phrase_class, phrase_type
+ )
+
+ with db_connection.cursor() as db_cursor:
+ db_cursor.execute("DROP INDEX idx_placex_classtype")
+
+
+def _create_place_classtype_table(db_connection, sql_tablespace, phrase_class, phrase_type):
+ """
+ Create table place_classtype of the given phrase_class/phrase_type if doesn't exit.
+ """
+ with db_connection.cursor() as db_cursor:
+ db_cursor.execute(SQL(f"""
+ CREATE TABLE IF NOT EXISTS {{}} {sql_tablespace}
+ AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex
+ WHERE class = {{}} AND type = {{}}""")
+ .format(Identifier(f'place_classtype_{phrase_class}_{phrase_type}'),
+ Literal(phrase_class), Literal(phrase_type)))
+
+
+def _create_place_classtype_indexes(db_connection, sql_tablespace, phrase_class, phrase_type):
+ """
+ Create indexes on centroid and place_id for the place_classtype table.
+ """
+ #Index on centroid
+ if not db_connection.index_exists(f'idx_place_classtype_{phrase_class}_{phrase_type}_centroid'):
+ with db_connection.cursor() as db_cursor:
+ db_cursor.execute(SQL(f"""
+ CREATE INDEX {{}} ON {{}} USING GIST (centroid) {sql_tablespace}""")
+ .format(Identifier(
+ f"""idx_place_classtype_{phrase_class}_{phrase_type}_centroid"""),
+ Identifier(f'place_classtype_{phrase_class}_{phrase_type}')))
+
+ #Index on place_id
+ if not db_connection.index_exists(f'idx_place_classtype_{phrase_class}_{phrase_type}_place_id'):
+ with db_connection.cursor() as db_cursor:
+ db_cursor.execute(SQL(f"""
+ CREATE INDEX {{}} ON {{}} USING btree(place_id) {sql_tablespace}""")
+ .format(Identifier(
+ f"""idx_place_classtype_{phrase_class}_{phrase_type}_place_id"""),
+ Identifier(f'place_classtype_{phrase_class}_{phrase_type}')))
+
+
+def _grant_access_to_webuser(db_connection, config, phrase_class, phrase_type):
+ """
+ Grant access on read to the table place_classtype for the webuser.
+ """
+ with db_connection.cursor() as db_cursor:
+ db_cursor.execute(SQL("""GRANT SELECT ON {} TO {}""")
+ .format(Identifier(f'place_classtype_{phrase_class}_{phrase_type}'),
+ Identifier(config.DATABASE_WEBUSER)))
--- /dev/null
+"""
+ Module for settings
+"""
\ No newline at end of file
NOMINATIM_WIKIPEDIA_DATA_PATH=
# Configuration file for special phrase import.
-# When unset, the internal default settings from 'settings/phrase_settings.php'
+# When unset, the internal default settings from 'settings/phrase_settings.py'
# are used.
NOMINATIM_PHRASE_CONFIG=
--- /dev/null
+"""
+ These settings control the import of special phrases from the wiki.
+"""
+#class/type combinations to exclude
+BLACK_LIST = {
+ 'bounday': [
+ 'administrative'
+ ],
+ 'place': [
+ 'house',
+ 'houses'
+ ]
+}
+
+#If a class is in the white list then all types will
+#be ignored except the ones given in the list.
+#Also use this list to exclude an entire class from
+#special phrases.
+WHITE_LIST = {
+ 'highway': [
+ 'bus_stop',
+ 'rest_area',
+ 'raceway'
+ ],
+ 'building': []
+}
@pytest.mark.parametrize("command,script", [
- (('special-phrases',), 'specialphrases'),
(('add-data', '--file', 'foo.osm'), 'update'),
(('export',), 'export')
])
assert bnd_mock.called == do_bnds
assert rank_mock.called == do_ranks
+def test_special_phrases_command(temp_db, mock_func_factory):
+ func = mock_func_factory(nominatim.clicmd.special_phrases, 'import_from_wiki')
+
+ call_nominatim('import-special-phrases', '--from-wiki')
+
+ assert func.called == 1
@pytest.mark.parametrize("command,func", [
('postcodes', 'update_postcodes'),
--- /dev/null
+"""
+ Tests for import special phrases functions
+"""
+import pytest
+from nominatim.tools.special_phrases import _create_place_classtype_indexes, _create_place_classtype_table, _get_wiki_content, _grant_access_to_webuser, _process_amenity
+
+def test_get_wiki_content():
+ assert _get_wiki_content('fr')
+
+def execute_and_verify_add_word(temp_db_conn, phrase_label, phrase_class, phrase_type):
+ _process_amenity(temp_db_conn, phrase_label, phrase_class, phrase_type, '')
+
+ with temp_db_conn.cursor() as temp_db_cursor:
+ temp_db_cursor.execute(f"""
+ SELECT * FROM word
+ WHERE word_token=' {phrase_label}'
+ AND word='{phrase_label}'
+ AND class='{phrase_class}'
+ AND type='{phrase_type}'""")
+ return temp_db_cursor.fetchone()
+
+def execute_and_verify_add_word_with_operator(temp_db_conn, phrase_label, phrase_class, phrase_type, phrase_operator):
+ _process_amenity(temp_db_conn, phrase_label, phrase_class, phrase_type, phrase_operator)
+
+ with temp_db_conn.cursor() as temp_db_cursor:
+ temp_db_cursor.execute(f"""
+ SELECT * FROM word
+ WHERE word_token=' {phrase_label}'
+ AND word='{phrase_label}'
+ AND class='{phrase_class}'
+ AND type='{phrase_type}'
+ AND operator='{phrase_operator}'""")
+ return temp_db_cursor.fetchone()
+
+def test_process_amenity_with_near_operator(temp_db_conn, word_table, amenity_operator_funcs):
+ phrase_label = 'label'
+ phrase_class = 'class'
+ phrase_type = 'type'
+
+ assert execute_and_verify_add_word(temp_db_conn, phrase_label, phrase_class, phrase_type)
+ assert execute_and_verify_add_word_with_operator(temp_db_conn, phrase_label, phrase_class, phrase_type, 'near')
+ assert execute_and_verify_add_word_with_operator(temp_db_conn, phrase_label, phrase_class, phrase_type, 'in')
+
+def index_exists(db_connect, index):
+ """ Check that an index with the given name exists in the database.
+ """
+ with db_connect.cursor() as cur:
+ cur.execute("""SELECT tablename FROM pg_indexes
+ WHERE indexname = %s and schemaname = 'public'""", (index, ))
+ if cur.rowcount == 0:
+ return False
+ return True
+
+def test_create_place_classtype_indexes(temp_db_conn):
+ phrase_class = 'class'
+ phrase_type = 'type'
+ table_name = f'place_classtype_{phrase_class}_{phrase_type}'
+
+ with temp_db_conn.cursor() as temp_db_cursor:
+ temp_db_cursor.execute("CREATE EXTENSION postgis;")
+ temp_db_cursor.execute(f'CREATE TABLE {table_name}(place_id BIGINT, centroid GEOMETRY)')
+
+ _create_place_classtype_indexes(temp_db_conn, '', phrase_class, phrase_type)
+
+ centroid_index_exists = index_exists(temp_db_conn, f'idx_place_classtype_{phrase_class}_{phrase_type}_centroid')
+ place_id_index_exists = index_exists(temp_db_conn, f'idx_place_classtype_{phrase_class}_{phrase_type}_place_id')
+
+ assert centroid_index_exists and place_id_index_exists
+
+def test_create_place_classtype_table(temp_db_conn, placex_table):
+ phrase_class = 'class'
+ phrase_type = 'type'
+ _create_place_classtype_table(temp_db_conn, '', phrase_class, phrase_type)
+
+ with temp_db_conn.cursor() as temp_db_cursor:
+ temp_db_cursor.execute(f"""
+ SELECT *
+ FROM information_schema.tables
+ WHERE table_type='BASE TABLE'
+ AND table_name='place_classtype_{phrase_class}_{phrase_type}'""")
+ result = temp_db_cursor.fetchone()
+ assert result
+
+def test_grant_access_to_web_user(temp_db_conn, def_config):
+ phrase_class = 'class'
+ phrase_type = 'type'
+ table_name = f'place_classtype_{phrase_class}_{phrase_type}'
+
+ with temp_db_conn.cursor() as temp_db_cursor:
+ temp_db_cursor.execute(f'CREATE TABLE {table_name}()')
+
+ _grant_access_to_webuser(temp_db_conn, def_config, phrase_class, phrase_type)
+
+ with temp_db_conn.cursor() as temp_db_cursor:
+ temp_db_cursor.execute(f"""
+ SELECT * FROM information_schema.role_table_grants
+ WHERE table_name='{table_name}'
+ AND grantee='{def_config.DATABASE_WEBUSER}'
+ AND privilege_type='SELECT'""")
+ result = temp_db_cursor.fetchone()
+ assert result
+
+@pytest.fixture
+def amenity_operator_funcs(temp_db_cursor):
+ temp_db_cursor.execute(f"""
+ CREATE OR REPLACE FUNCTION make_standard_name(name TEXT) RETURNS TEXT
+ AS $$
+ DECLARE
+ o TEXT;
+ BEGIN
+ RETURN name; --Basically return the same name for the tests
+ END;
+ $$
+ LANGUAGE plpgsql IMMUTABLE;
+
+ CREATE SEQUENCE seq_word start 1;
+
+ CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT,
+ lookup_class text, lookup_type text)
+ RETURNS INTEGER
+ AS $$
+ DECLARE
+ lookup_token TEXT;
+ return_word_id INTEGER;
+ BEGIN
+ lookup_token := ' '||trim(lookup_word);
+ SELECT min(word_id) FROM word
+ WHERE word_token = lookup_token and word = lookup_word
+ and class = lookup_class and type = lookup_type
+ INTO return_word_id;
+ IF return_word_id IS NULL THEN
+ return_word_id := nextval('seq_word');
+ INSERT INTO word VALUES (return_word_id, lookup_token, lookup_word,
+ lookup_class, lookup_type, null, 0);
+ END IF;
+ RETURN return_word_id;
+ END;
+ $$
+ LANGUAGE plpgsql;
+
+ CREATE OR REPLACE FUNCTION getorcreate_amenityoperator(lookup_word TEXT,
+ lookup_class text,
+ lookup_type text,
+ op text)
+ RETURNS INTEGER
+ AS $$
+ DECLARE
+ lookup_token TEXT;
+ return_word_id INTEGER;
+ BEGIN
+ lookup_token := ' '||trim(lookup_word);
+ SELECT min(word_id) FROM word
+ WHERE word_token = lookup_token and word = lookup_word
+ and class = lookup_class and type = lookup_type and operator = op
+ INTO return_word_id;
+ IF return_word_id IS NULL THEN
+ return_word_id := nextval('seq_word');
+ INSERT INTO word VALUES (return_word_id, lookup_token, lookup_word,
+ lookup_class, lookup_type, null, 0, op);
+ END IF;
+ RETURN return_word_id;
+ END;
+ $$
+ LANGUAGE plpgsql;""")