2 Functions to import special phrases into the database.
10 from os.path import isfile
11 from icu import Transliterator # pylint: disable-msg=no-name-in-module
12 from psycopg2.sql import Identifier, Literal, SQL
13 from nominatim.tools.exec_utils import get_url
15 LOG = logging.getLogger()
17 def import_from_wiki(args, db_connection, languages=None):
18 # pylint: disable-msg=too-many-locals
20 Iterate through all specified languages and
21 extract corresponding special phrases from the wiki.
23 black_list, white_list = _load_white_and_black_lists(args)
25 #Compile the match regex to increase performance for the following loop.
26 occurence_pattern = re.compile(
27 r'\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([\-YN])'
29 sanity_check_pattern = re.compile(r'^\w+$')
31 #Get all languages to process.
32 languages = _get_languages(args.config) if not languages else languages
34 #array for pairs of class/type
37 transliterator = Transliterator.createFromRules("special-phrases normalizer",
38 args.config.TERM_NORMALIZATION)
40 for lang in languages:
41 LOG.warning('Import phrases for lang: %s', lang)
42 wiki_page_xml_content = _get_wiki_content(lang)
43 #One match will be of format [label, class, type, operator, plural]
44 matches = occurence_pattern.findall(wiki_page_xml_content)
47 phrase_label = match[0].strip()
48 normalized_label = transliterator.transliterate(phrase_label)
49 phrase_class = match[1].strip()
50 phrase_type = match[2].strip()
51 phrase_operator = match[3].strip()
52 #hack around a bug where building=yes was imported withq quotes into the wiki
53 phrase_type = re.sub(r'\"|"', '', phrase_type)
55 #sanity check, in case somebody added garbage in the wiki
56 _check_sanity(lang, phrase_class, phrase_type, sanity_check_pattern)
58 #blacklisting: disallow certain class/type combinations
59 if phrase_class in black_list.keys() and phrase_type in black_list[phrase_class]:
61 #whitelisting: if class is in whitelist, allow only tags in the list
62 if phrase_class in white_list.keys() and phrase_type not in white_list[phrase_class]:
65 #add class/type to the pairs dict
66 pairs[f'{phrase_class}|{phrase_type}'] = (phrase_class, phrase_type)
69 db_connection, phrase_label, normalized_label,
70 phrase_class, phrase_type, phrase_operator
73 _create_place_classtype_table_and_indexes(db_connection, args.config, pairs)
74 db_connection.commit()
75 LOG.warning('Import done.')
77 def _load_white_and_black_lists(args):
79 Load white and black lists from phrases-settings.json.
82 settings_path = str(config.config_dir)+'/phrase-settings.json'
84 if config.PHRASE_CONFIG:
85 settings_path = _convert_php_settings_if_needed(args, config.PHRASE_CONFIG)
87 with open(settings_path, "r") as json_settings:
88 settings = json.load(json_settings)
89 return settings['blackList'], settings['whiteList']
91 def _get_languages(config):
93 Get list of all languages from env config file
94 or default if there is no languages configured.
95 The system will extract special phrases only from all specified languages.
98 'af', 'ar', 'br', 'ca', 'cs', 'de', 'en', 'es',
99 'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu',
100 'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl',
101 'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi']
102 return config.LANGUAGES or default_languages
105 def _get_wiki_content(lang):
107 Request and return the wiki page's content
108 corresponding to special phrases for a given lang.
109 Requested URL Example :
110 https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/EN
112 url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' + lang.upper() # pylint: disable=line-too-long
116 def _check_sanity(lang, phrase_class, phrase_type, pattern):
118 Check sanity of given inputs in case somebody added garbage in the wiki.
119 If a bad class/type is detected the system will exit with an error.
122 if len(pattern.findall(phrase_class)) < 1 or len(pattern.findall(phrase_type)) < 1:
125 LOG.error("Bad class/type for language %s: %s=%s", lang, phrase_class, phrase_type)
129 def _process_amenity(db_connection, phrase_label, normalized_label,
130 phrase_class, phrase_type, phrase_operator):
131 # pylint: disable-msg=too-many-arguments
133 Add phrase lookup and corresponding class and type to the word table based on the operator.
135 with db_connection.cursor() as db_cursor:
136 if phrase_operator == 'near':
137 db_cursor.execute("""SELECT getorcreate_amenityoperator(
138 make_standard_name(%s), %s, %s, %s, 'near')""",
139 (phrase_label, normalized_label, phrase_class, phrase_type))
140 elif phrase_operator == 'in':
141 db_cursor.execute("""SELECT getorcreate_amenityoperator(
142 make_standard_name(%s), %s, %s, %s, 'in')""",
143 (phrase_label, normalized_label, phrase_class, phrase_type))
145 db_cursor.execute("""SELECT getorcreate_amenity(
146 make_standard_name(%s), %s, %s, %s)""",
147 (phrase_label, normalized_label, phrase_class, phrase_type))
150 def _create_place_classtype_table_and_indexes(db_connection, config, pairs):
152 Create table place_classtype for each given pair.
153 Also create indexes on place_id and centroid.
155 LOG.warning('Create tables and indexes...')
157 sql_tablespace = config.TABLESPACE_AUX_DATA
159 sql_tablespace = ' TABLESPACE '+sql_tablespace
161 with db_connection.cursor() as db_cursor:
162 db_cursor.execute("CREATE INDEX idx_placex_classtype ON placex (class, type)")
164 for _, pair in pairs.items():
165 phrase_class = pair[0]
166 phrase_type = pair[1]
169 _create_place_classtype_table(
170 db_connection, sql_tablespace, phrase_class, phrase_type
174 _create_place_classtype_indexes(
175 db_connection, sql_tablespace, phrase_class, phrase_type
178 #Grant access on read to the web user.
179 _grant_access_to_webuser(
180 db_connection, config, phrase_class, phrase_type
183 with db_connection.cursor() as db_cursor:
184 db_cursor.execute("DROP INDEX idx_placex_classtype")
187 def _create_place_classtype_table(db_connection, sql_tablespace, phrase_class, phrase_type):
189 Create table place_classtype of the given phrase_class/phrase_type if doesn't exit.
191 with db_connection.cursor() as db_cursor:
192 db_cursor.execute(SQL(f"""
193 CREATE TABLE IF NOT EXISTS {{}} {sql_tablespace}
194 AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex
195 WHERE class = {{}} AND type = {{}}""")
196 .format(Identifier(f'place_classtype_{phrase_class}_{phrase_type}'),
197 Literal(phrase_class), Literal(phrase_type)))
200 def _create_place_classtype_indexes(db_connection, sql_tablespace, phrase_class, phrase_type):
202 Create indexes on centroid and place_id for the place_classtype table.
205 if not db_connection.index_exists(f'idx_place_classtype_{phrase_class}_{phrase_type}_centroid'):
206 with db_connection.cursor() as db_cursor:
207 db_cursor.execute(SQL(f"""
208 CREATE INDEX {{}} ON {{}} USING GIST (centroid) {sql_tablespace}""")
210 f"""idx_place_classtype_{phrase_class}_{phrase_type}_centroid"""),
211 Identifier(f'place_classtype_{phrase_class}_{phrase_type}')))
214 if not db_connection.index_exists(f'idx_place_classtype_{phrase_class}_{phrase_type}_place_id'):
215 with db_connection.cursor() as db_cursor:
216 db_cursor.execute(SQL(f"""
217 CREATE INDEX {{}} ON {{}} USING btree(place_id) {sql_tablespace}""")
219 f"""idx_place_classtype_{phrase_class}_{phrase_type}_place_id"""),
220 Identifier(f'place_classtype_{phrase_class}_{phrase_type}')))
223 def _grant_access_to_webuser(db_connection, config, phrase_class, phrase_type):
225 Grant access on read to the table place_classtype for the webuser.
227 with db_connection.cursor() as db_cursor:
228 db_cursor.execute(SQL("""GRANT SELECT ON {} TO {}""")
229 .format(Identifier(f'place_classtype_{phrase_class}_{phrase_type}'),
230 Identifier(config.DATABASE_WEBUSER)))
232 def _convert_php_settings_if_needed(args, file_path):
234 Convert php settings file of special phrases to json file if it is still in php format.
236 file, extension = os.path.splitext(file_path)
237 json_file_path = f'{file}.json'
238 if extension == '.php' and not isfile(json_file_path):
240 subprocess.run(['/usr/bin/env', 'php', '-Cq',
241 args.phplib_dir / 'migration/phraseSettingsToJson.php',
242 file_path], check=True)
243 LOG.warning('special_phrase configuration file has been converted to json.')
244 return json_file_path
245 except subprocess.CalledProcessError:
246 LOG.error('Error while converting %s to json.', file_path)
249 return json_file_path