2 Functions to import special phrases into the database.
10 from os.path import isfile
11 from psycopg2.sql import Identifier, Literal, SQL
12 from nominatim.tools.exec_utils import get_url
14 LOG = logging.getLogger()
16 def import_from_wiki(args, db_connection, languages=None):
17 # pylint: disable-msg=too-many-locals
19 Iterate through all specified languages and
20 extract corresponding special phrases from the wiki.
22 black_list, white_list = _load_white_and_black_lists(args)
24 #Compile the match regex to increase performance for the following loop.
25 occurence_pattern = re.compile(
26 r'\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([\-YN])'
28 sanity_check_pattern = re.compile(r'^\w+$')
30 languages = _get_languages(args.config) if not languages else languages
32 #array for pairs of class/type
34 for lang in languages:
35 LOG.warning('Import phrases for lang: %s', lang)
36 wiki_page_xml_content = _get_wiki_content(lang)
37 #One match will be of format [label, class, type, operator, plural]
38 matches = occurence_pattern.findall(wiki_page_xml_content)
41 phrase_label = match[0].strip()
42 phrase_class = match[1].strip()
43 phrase_type = match[2].strip()
44 phrase_operator = match[3].strip()
45 #hack around a bug where building=yes was imported withq quotes into the wiki
46 phrase_type = re.sub(r'\"|"', '', phrase_type)
48 #sanity check, in case somebody added garbage in the wiki
49 _check_sanity(lang, phrase_class, phrase_type, sanity_check_pattern)
51 #blacklisting: disallow certain class/type combinations
52 if phrase_class in black_list.keys() and phrase_type in black_list[phrase_class]:
54 #whitelisting: if class is in whitelist, allow only tags in the list
55 if phrase_class in white_list.keys() and phrase_type not in white_list[phrase_class]:
58 #add class/type to the pairs dict
59 pairs[f'{phrase_class}|{phrase_type}'] = (phrase_class, phrase_type)
62 db_connection, phrase_label, phrase_class, phrase_type, phrase_operator
65 _create_place_classtype_table_and_indexes(db_connection, args.config, pairs)
66 db_connection.commit()
67 LOG.warning('Import done.')
69 def _load_white_and_black_lists(args):
71 Load white and black lists from phrases-settings.json.
74 settings_path = str(config.config_dir)+'/phrase-settings.json'
76 if config.PHRASE_CONFIG:
77 settings_path = _convert_php_settings_if_needed(args, config.PHRASE_CONFIG)
79 with open(settings_path, "r") as json_settings:
80 settings = json.load(json_settings)
81 return settings['blackList'], settings['whiteList']
83 def _get_languages(config):
85 Get list of all languages from env config file
86 or default if there is no languages configured.
87 The system will extract special phrases only from all specified languages.
90 'af', 'ar', 'br', 'ca', 'cs', 'de', 'en', 'es',
91 'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu',
92 'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl',
93 'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi']
94 return config.LANGUAGES or default_languages
97 def _get_wiki_content(lang):
99 Request and return the wiki page's content
100 corresponding to special phrases for a given lang.
101 Requested URL Example :
102 https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/EN
104 url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' + lang.upper() # pylint: disable=line-too-long
108 def _check_sanity(lang, phrase_class, phrase_type, pattern):
110 Check sanity of given inputs in case somebody added garbage in the wiki.
111 If a bad class/type is detected the system will exit with an error.
114 if len(pattern.findall(phrase_class)) < 1 or len(pattern.findall(phrase_type)) < 1:
117 LOG.error("Bad class/type for language %s: %s=%s", lang, phrase_class, phrase_type)
121 def _process_amenity(db_connection, phrase_label, phrase_class, phrase_type, phrase_operator):
123 Add phrase lookup and corresponding class and type to the word table based on the operator.
125 with db_connection.cursor() as db_cursor:
126 if phrase_operator == 'near':
127 db_cursor.execute("""SELECT getorcreate_amenityoperator(
128 make_standard_name(%s), %s, %s, 'near')""",
129 (phrase_label, phrase_class, phrase_type))
130 elif phrase_operator == 'in':
131 db_cursor.execute("""SELECT getorcreate_amenityoperator(
132 make_standard_name(%s), %s, %s, 'in')""",
133 (phrase_label, phrase_class, phrase_type))
135 db_cursor.execute("""SELECT getorcreate_amenity(
136 make_standard_name(%s), %s, %s)""",
137 (phrase_label, phrase_class, phrase_type))
140 def _create_place_classtype_table_and_indexes(db_connection, config, pairs):
142 Create table place_classtype for each given pair.
143 Also create indexes on place_id and centroid.
145 LOG.warning('Create tables and indexes...')
147 sql_tablespace = config.TABLESPACE_AUX_DATA
149 sql_tablespace = ' TABLESPACE '+sql_tablespace
151 with db_connection.cursor() as db_cursor:
152 db_cursor.execute("CREATE INDEX idx_placex_classtype ON placex (class, type)")
154 for _, pair in pairs.items():
155 phrase_class = pair[0]
156 phrase_type = pair[1]
159 _create_place_classtype_table(
160 db_connection, sql_tablespace, phrase_class, phrase_type
164 _create_place_classtype_indexes(
165 db_connection, sql_tablespace, phrase_class, phrase_type
168 #Grant access on read to the web user.
169 _grant_access_to_webuser(
170 db_connection, config, phrase_class, phrase_type
173 with db_connection.cursor() as db_cursor:
174 db_cursor.execute("DROP INDEX idx_placex_classtype")
177 def _create_place_classtype_table(db_connection, sql_tablespace, phrase_class, phrase_type):
179 Create table place_classtype of the given phrase_class/phrase_type if doesn't exit.
181 with db_connection.cursor() as db_cursor:
182 db_cursor.execute(SQL(f"""
183 CREATE TABLE IF NOT EXISTS {{}} {sql_tablespace}
184 AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex
185 WHERE class = {{}} AND type = {{}}""")
186 .format(Identifier(f'place_classtype_{phrase_class}_{phrase_type}'),
187 Literal(phrase_class), Literal(phrase_type)))
190 def _create_place_classtype_indexes(db_connection, sql_tablespace, phrase_class, phrase_type):
192 Create indexes on centroid and place_id for the place_classtype table.
195 if not db_connection.index_exists(f'idx_place_classtype_{phrase_class}_{phrase_type}_centroid'):
196 with db_connection.cursor() as db_cursor:
197 db_cursor.execute(SQL(f"""
198 CREATE INDEX {{}} ON {{}} USING GIST (centroid) {sql_tablespace}""")
200 f"""idx_place_classtype_{phrase_class}_{phrase_type}_centroid"""),
201 Identifier(f'place_classtype_{phrase_class}_{phrase_type}')))
204 if not db_connection.index_exists(f'idx_place_classtype_{phrase_class}_{phrase_type}_place_id'):
205 with db_connection.cursor() as db_cursor:
206 db_cursor.execute(SQL(f"""
207 CREATE INDEX {{}} ON {{}} USING btree(place_id) {sql_tablespace}""")
209 f"""idx_place_classtype_{phrase_class}_{phrase_type}_place_id"""),
210 Identifier(f'place_classtype_{phrase_class}_{phrase_type}')))
213 def _grant_access_to_webuser(db_connection, config, phrase_class, phrase_type):
215 Grant access on read to the table place_classtype for the webuser.
217 with db_connection.cursor() as db_cursor:
218 db_cursor.execute(SQL("""GRANT SELECT ON {} TO {}""")
219 .format(Identifier(f'place_classtype_{phrase_class}_{phrase_type}'),
220 Identifier(config.DATABASE_WEBUSER)))
222 def _convert_php_settings_if_needed(args, file_path):
224 Convert php settings file of special phrases to json file if it is still in php format.
226 file, extension = os.path.splitext(file_path)
227 json_file_path = f'{file}.json'
228 if extension == '.php' and not isfile(json_file_path):
230 subprocess.run(['/usr/bin/env', 'php', '-Cq',
231 args.phplib_dir / 'migration/phraseSettingsToJson.php',
232 file_path], check=True)
233 LOG.warning('special_phrase configuration file has been converted to json.')
234 return json_file_path
235 except subprocess.CalledProcessError:
236 LOG.error('Error while converting %s to json.', file_path)
239 return json_file_path