2 Functions to import special phrases into the database.
7 from psycopg2.sql import Identifier, Literal, SQL
8 from settings.phrase_settings import BLACK_LIST, WHITE_LIST
9 from nominatim.tools.exec_utils import get_url
11 LOG = logging.getLogger()
13 def import_from_wiki(config, db_connection, languages=None):
15 Iterate through all specified languages and
16 extract corresponding special phrases from the wiki.
18 #Compile the match regex to increase performance for the following loop.
19 occurence_pattern = re.compile(
20 r'\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([\-YN])'
22 sanity_check_pattern = re.compile(r'^\w+$')
24 languages = _get_languages(config) if not languages else languages
26 #array for pairs of class/type
28 for lang in languages:
29 LOG.warning('Import phrases for lang: %s', lang)
30 wiki_page_xml_content = _get_wiki_content(lang)
31 #One match will be of format [label, class, type, operator, plural]
32 matches = occurence_pattern.findall(wiki_page_xml_content)
35 phrase_label = match[0].strip()
36 phrase_class = match[1].strip()
37 phrase_type = match[2].strip()
38 phrase_operator = match[3].strip()
39 #hack around a bug where building=yes was imported withq quotes into the wiki
40 phrase_type = re.sub(r'\"|"', '', phrase_type)
42 #sanity check, in case somebody added garbage in the wiki
43 _check_sanity(lang, phrase_class, phrase_type, sanity_check_pattern)
45 #blacklisting: disallow certain class/type combinations
46 if phrase_class in BLACK_LIST.keys() and phrase_type in BLACK_LIST[phrase_class]:
48 #whitelisting: if class is in whitelist, allow only tags in the list
49 if phrase_class in WHITE_LIST.keys() and phrase_type not in WHITE_LIST[phrase_class]:
52 #add class/type to the pairs dict
53 pairs[f'{phrase_class}|{phrase_type}'] = (phrase_class, phrase_type)
56 db_connection, phrase_label, phrase_class, phrase_type, phrase_operator
59 _create_place_classtype_table_and_indexes(db_connection, config, pairs)
60 db_connection.commit()
61 LOG.warning('Import done.')
64 def _get_languages(config):
66 Get list of all languages from env config file
67 or default if there is no languages configured.
68 The system will extract special phrases only from all specified languages.
71 'af', 'ar', 'br', 'ca', 'cs', 'de', 'en', 'es',
72 'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu',
73 'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl',
74 'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi']
75 return config.LANGUAGES or default_languages
78 def _get_wiki_content(lang):
80 Request and return the wiki page's content
81 corresponding to special phrases for a given lang.
82 Requested URL Example :
83 https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/EN
85 url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' + lang.upper() # pylint: disable=line-too-long
89 def _check_sanity(lang, phrase_class, phrase_type, pattern):
91 Check sanity of given inputs in case somebody added garbage in the wiki.
92 If a bad class/type is detected the system will exit with an error.
95 if len(pattern.findall(phrase_class)) < 1 or len(pattern.findall(phrase_type)) < 1:
98 LOG.error("Bad class/type for language %s: %s=%s", lang, phrase_class, phrase_type)
102 def _process_amenity(db_connection, phrase_label, phrase_class, phrase_type, phrase_operator):
104 Add phrase lookup and corresponding class and type to the word table based on the operator.
106 with db_connection.cursor() as db_cursor:
107 if phrase_operator == 'near':
108 db_cursor.execute("""SELECT getorcreate_amenityoperator(
109 make_standard_name(%s), %s, %s, 'near')""",
110 (phrase_label, phrase_class, phrase_type))
111 elif phrase_operator == 'in':
112 db_cursor.execute("""SELECT getorcreate_amenityoperator(
113 make_standard_name(%s), %s, %s, 'in')""",
114 (phrase_label, phrase_class, phrase_type))
116 db_cursor.execute("""SELECT getorcreate_amenity(
117 make_standard_name(%s), %s, %s)""",
118 (phrase_label, phrase_class, phrase_type))
121 def _create_place_classtype_table_and_indexes(db_connection, config, pairs):
123 Create table place_classtype for each given pair.
124 Also create indexes on place_id and centroid.
126 LOG.warning('Create tables and indexes...')
128 sql_tablespace = config.TABLESPACE_AUX_DATA
130 sql_tablespace = ' TABLESPACE '+sql_tablespace
132 with db_connection.cursor() as db_cursor:
133 db_cursor.execute("CREATE INDEX idx_placex_classtype ON placex (class, type)")
135 for _, pair in pairs.items():
136 phrase_class = pair[0]
137 phrase_type = pair[1]
140 _create_place_classtype_table(
141 db_connection, sql_tablespace, phrase_class, phrase_type
145 _create_place_classtype_indexes(
146 db_connection, sql_tablespace, phrase_class, phrase_type
149 #Grant access on read to the web user.
150 _grant_access_to_webuser(
151 db_connection, config, phrase_class, phrase_type
154 with db_connection.cursor() as db_cursor:
155 db_cursor.execute("DROP INDEX idx_placex_classtype")
158 def _create_place_classtype_table(db_connection, sql_tablespace, phrase_class, phrase_type):
160 Create table place_classtype of the given phrase_class/phrase_type if doesn't exit.
162 with db_connection.cursor() as db_cursor:
163 db_cursor.execute(SQL(f"""
164 CREATE TABLE IF NOT EXISTS {{}} {sql_tablespace}
165 AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex
166 WHERE class = {{}} AND type = {{}}""")
167 .format(Identifier(f'place_classtype_{phrase_class}_{phrase_type}'),
168 Literal(phrase_class), Literal(phrase_type)))
171 def _create_place_classtype_indexes(db_connection, sql_tablespace, phrase_class, phrase_type):
173 Create indexes on centroid and place_id for the place_classtype table.
176 if not db_connection.index_exists(f'idx_place_classtype_{phrase_class}_{phrase_type}_centroid'):
177 with db_connection.cursor() as db_cursor:
178 db_cursor.execute(SQL(f"""
179 CREATE INDEX {{}} ON {{}} USING GIST (centroid) {sql_tablespace}""")
181 f"""idx_place_classtype_{phrase_class}_{phrase_type}_centroid"""),
182 Identifier(f'place_classtype_{phrase_class}_{phrase_type}')))
185 if not db_connection.index_exists(f'idx_place_classtype_{phrase_class}_{phrase_type}_place_id'):
186 with db_connection.cursor() as db_cursor:
187 db_cursor.execute(SQL(f"""
188 CREATE INDEX {{}} ON {{}} USING btree(place_id) {sql_tablespace}""")
190 f"""idx_place_classtype_{phrase_class}_{phrase_type}_place_id"""),
191 Identifier(f'place_classtype_{phrase_class}_{phrase_type}')))
194 def _grant_access_to_webuser(db_connection, config, phrase_class, phrase_type):
196 Grant access on read to the table place_classtype for the webuser.
198 with db_connection.cursor() as db_cursor:
199 db_cursor.execute(SQL("""GRANT SELECT ON {} TO {}""")
200 .format(Identifier(f'place_classtype_{phrase_class}_{phrase_type}'),
201 Identifier(config.DATABASE_WEBUSER)))