2 Functions to import special phrases into the database.
9 from os.path import isfile
10 from icu import Transliterator
11 from psycopg2.sql import Identifier, Literal, SQL
12 from nominatim.tools.exec_utils import get_url
14 LOG = logging.getLogger()
16 def import_from_wiki(args, db_connection, languages=None):
17 # pylint: disable-msg=too-many-locals
19 Iterate through all specified languages and
20 extract corresponding special phrases from the wiki.
22 black_list, white_list = _load_white_and_black_lists(args)
24 #Compile the match regex to increase performance for the following loop.
25 occurence_pattern = re.compile(
26 r'\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([\-YN])'
28 sanity_check_pattern = re.compile(r'^\w+$')
30 #Get all languages to process.
31 languages = _get_languages(args.config) if not languages else languages
33 #array for pairs of class/type
34 class_type_pairs = set()
36 transliterator = Transliterator.createFromRules("special-phrases normalizer",
37 args.config.TERM_NORMALIZATION)
39 for lang in languages:
40 LOG.warning('Import phrases for lang: %s', lang)
41 wiki_page_xml_content = _get_wiki_content(lang)
42 #One match will be of format [label, class, type, operator, plural]
43 matches = occurence_pattern.findall(wiki_page_xml_content)
46 phrase_label = match[0].strip()
47 normalized_label = transliterator.transliterate(phrase_label)
48 phrase_class = match[1].strip()
49 phrase_type = match[2].strip()
50 phrase_operator = match[3].strip()
51 #hack around a bug where building=yes was imported withq quotes into the wiki
52 phrase_type = re.sub(r'\"|"', '', phrase_type)
54 #sanity check, in case somebody added garbage in the wiki
55 _check_sanity(lang, phrase_class, phrase_type, sanity_check_pattern)
57 #blacklisting: disallow certain class/type combinations
58 if phrase_class in black_list.keys() and phrase_type in black_list[phrase_class]:
60 #whitelisting: if class is in whitelist, allow only tags in the list
61 if phrase_class in white_list.keys() and phrase_type not in white_list[phrase_class]:
64 #add class/type to the pairs dict
65 class_type_pairs.add((phrase_class, phrase_type))
68 db_connection, phrase_label, normalized_label,
69 phrase_class, phrase_type, phrase_operator
72 _create_place_classtype_table_and_indexes(db_connection, args.config, class_type_pairs)
73 db_connection.commit()
74 LOG.warning('Import done.')
76 def _load_white_and_black_lists(args):
78 Load white and black lists from phrases-settings.json.
81 settings_path = str(config.config_dir)+'/phrase-settings.json'
83 if config.PHRASE_CONFIG:
84 settings_path = _convert_php_settings_if_needed(args, config.PHRASE_CONFIG)
86 with open(settings_path, "r") as json_settings:
87 settings = json.load(json_settings)
88 return settings['blackList'], settings['whiteList']
90 def _get_languages(config):
92 Get list of all languages from env config file
93 or default if there is no languages configured.
94 The system will extract special phrases only from all specified languages.
97 'af', 'ar', 'br', 'ca', 'cs', 'de', 'en', 'es',
98 'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu',
99 'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl',
100 'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi']
101 return config.LANGUAGES or default_languages
104 def _get_wiki_content(lang):
106 Request and return the wiki page's content
107 corresponding to special phrases for a given lang.
108 Requested URL Example :
109 https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/EN
111 url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' + lang.upper() # pylint: disable=line-too-long
115 def _check_sanity(lang, phrase_class, phrase_type, pattern):
117 Check sanity of given inputs in case somebody added garbage in the wiki.
118 If a bad class/type is detected the system will exit with an error.
120 if len(pattern.findall(phrase_class)) < 1 or len(pattern.findall(phrase_type)) < 1:
121 LOG.error("Bad class/type for language %s: %s=%s", lang, phrase_class, phrase_type)
124 def _process_amenity(db_connection, phrase_label, normalized_label,
125 phrase_class, phrase_type, phrase_operator):
126 # pylint: disable-msg=too-many-arguments
128 Add phrase lookup and corresponding class and type to the word table based on the operator.
130 with db_connection.cursor() as db_cursor:
131 if phrase_operator == 'near':
132 db_cursor.execute("""SELECT getorcreate_amenityoperator(
133 make_standard_name(%s), %s, %s, %s, 'near')""",
134 (phrase_label, normalized_label, phrase_class, phrase_type))
135 elif phrase_operator == 'in':
136 db_cursor.execute("""SELECT getorcreate_amenityoperator(
137 make_standard_name(%s), %s, %s, %s, 'in')""",
138 (phrase_label, normalized_label, phrase_class, phrase_type))
140 db_cursor.execute("""SELECT getorcreate_amenity(
141 make_standard_name(%s), %s, %s, %s)""",
142 (phrase_label, normalized_label, phrase_class, phrase_type))
145 def _create_place_classtype_table_and_indexes(db_connection, config, class_type_pairs):
147 Create table place_classtype for each given pair.
148 Also create indexes on place_id and centroid.
150 LOG.warning('Create tables and indexes...')
152 sql_tablespace = config.TABLESPACE_AUX_DATA
154 sql_tablespace = ' TABLESPACE '+sql_tablespace
156 with db_connection.cursor() as db_cursor:
157 db_cursor.execute("CREATE INDEX idx_placex_classtype ON placex (class, type)")
159 for pair in class_type_pairs.items():
160 phrase_class = pair[0]
161 phrase_type = pair[1]
164 _create_place_classtype_table(
165 db_connection, sql_tablespace, phrase_class, phrase_type
169 _create_place_classtype_indexes(
170 db_connection, sql_tablespace, phrase_class, phrase_type
173 #Grant access on read to the web user.
174 _grant_access_to_webuser(
175 db_connection, config, phrase_class, phrase_type
178 with db_connection.cursor() as db_cursor:
179 db_cursor.execute("DROP INDEX idx_placex_classtype")
182 def _create_place_classtype_table(db_connection, sql_tablespace, phrase_class, phrase_type):
184 Create table place_classtype of the given phrase_class/phrase_type if doesn't exit.
186 table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
187 with db_connection.cursor() as db_cursor:
188 db_cursor.execute(SQL("""
189 CREATE TABLE IF NOT EXISTS {{}} {}
190 AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex
191 WHERE class = {{}} AND type = {{}}""".format(sql_tablespace))
192 .format(Identifier(table_name), Literal(phrase_class),
193 Literal(phrase_type)))
196 def _create_place_classtype_indexes(db_connection, sql_tablespace, phrase_class, phrase_type):
198 Create indexes on centroid and place_id for the place_classtype table.
200 index_prefix = 'idx_place_classtype_{}_{}_'.format(phrase_class, phrase_type)
201 base_table = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
203 if not db_connection.index_exists(index_prefix + 'centroid'):
204 with db_connection.cursor() as db_cursor:
205 db_cursor.execute(SQL("""
206 CREATE INDEX {{}} ON {{}} USING GIST (centroid) {}""".format(sql_tablespace))
207 .format(Identifier(index_prefix + 'centroid'),
208 Identifier(base_table)), sql_tablespace)
211 if not db_connection.index_exists(index_prefix + 'place_id'):
212 with db_connection.cursor() as db_cursor:
213 db_cursor.execute(SQL(
214 """CREATE INDEX {{}} ON {{}} USING btree(place_id) {}""".format(sql_tablespace))
215 .format(Identifier(index_prefix + 'place_id'),
216 Identifier(base_table)))
219 def _grant_access_to_webuser(db_connection, config, phrase_class, phrase_type):
221 Grant access on read to the table place_classtype for the webuser.
223 table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
224 with db_connection.cursor() as db_cursor:
225 db_cursor.execute(SQL("""GRANT SELECT ON {} TO {}""")
226 .format(Identifier(table_name), Identifier(config.DATABASE_WEBUSER)))
228 def _convert_php_settings_if_needed(args, file_path):
230 Convert php settings file of special phrases to json file if it is still in php format.
232 file, extension = os.path.splitext(file_path)
233 json_file_path = file + '.json'
234 if extension == '.php' and not isfile(json_file_path):
236 subprocess.run(['/usr/bin/env', 'php', '-Cq',
237 args.phplib_dir / 'migration/phraseSettingsToJson.php',
238 file_path], check=True)
239 LOG.warning('special_phrase configuration file has been converted to json.')
240 return json_file_path
241 except subprocess.CalledProcessError:
242 LOG.error('Error while converting %s to json.', file_path)
245 return json_file_path