+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2022 by the Nominatim developer community.
+# For a full list of authors see the git log.
"""
Module containing the SPWikiLoader class.
"""
"""
Handles loading of special phrases from the wiki.
"""
- def __init__(self, config, languages=None):
+ def __init__(self, config):
super().__init__()
self.config = config
- #Compile the regex here to increase performances.
+ # Compile the regex here to increase performances.
self.occurence_pattern = re.compile(
r'\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([\-YN])'
)
- self.languages = self._load_languages() if not languages else list(languages)
+ self._load_languages()
def __next__(self):
if not self.languages:
Parses XML content and extracts special phrases from it.
Return a list of SpecialPhrase.
"""
- #One match will be of format [label, class, type, operator, plural]
+ # One match will be of format [label, class, type, operator, plural]
matches = self.occurence_pattern.findall(xml)
returned_phrases = set()
for match in matches:
or default if there is no languages configured.
The system will extract special phrases only from all specified languages.
"""
- default_languages = [
+ if self.config.LANGUAGES:
+ self.languages = self.config.get_str_list('LANGUAGES')
+ else:
+ self.languages = [
'af', 'ar', 'br', 'ca', 'cs', 'de', 'en', 'es',
'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu',
'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl',
'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi']
- return self.config.LANGUAGES.split(',') if self.config.LANGUAGES else default_languages
@staticmethod
def _get_wiki_content(lang):
Requested URL Example :
https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/EN
"""
- url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' + lang.upper() # pylint: disable=line-too-long
+ url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' \
+ + lang.upper()
return get_url(url)