+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2022 by the Nominatim developer community.
+# For a full list of authors see the git log.
"""
Module containing the SPWikiLoader class.
"""
+from typing import Iterable
import re
import logging
+
+from nominatim.config import Configuration
from nominatim.tools.special_phrases.special_phrase import SpecialPhrase
-from nominatim.tools.special_phrases.sp_loader import SPLoader
from nominatim.tools.exec_utils import get_url
LOG = logging.getLogger()
-class SPWikiLoader(SPLoader):
+
+def _get_wiki_content(lang: str) -> str:
+ """
+ Request and return the wiki page's content
+ corresponding to special phrases for a given lang.
+ Requested URL Example :
+ https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/EN
+ """
+ url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' \
+ + lang.upper()
+ return get_url(url)
+
+
+class SPWikiLoader:
"""
Handles loading of special phrases from the wiki.
"""
- def __init__(self, config, languages=None):
- if languages is not None and not isinstance(languages, list):
- raise TypeError('The \'languages\' parameter should be of type list.')
- super().__init__()
+ def __init__(self, config: Configuration) -> None:
self.config = config
- #Compile the regex here to increase performances.
+ # Compile the regex here to increase performances.
self.occurence_pattern = re.compile(
r'\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([\-YN])'
)
- self.languages = self._load_languages() if not languages else languages
+ # Hack around a bug where building=yes was imported with quotes into the wiki
+ self.type_fix_pattern = re.compile(r'\"|"')
- def __next__(self):
- if not self.languages:
- raise StopIteration
+ self.languages = self.config.get_str_list('LANGUAGES') or \
+ ['af', 'ar', 'br', 'ca', 'cs', 'de', 'en', 'es',
+ 'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu',
+ 'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl',
+ 'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi']
- lang = self.languages.pop(0)
- loaded_xml = SPWikiLoader._get_wiki_content(lang)
- LOG.warning('Importing phrases for lang: %s...', lang)
- return self.parse_xml(loaded_xml)
- def parse_xml(self, xml):
+ def generate_phrases(self) -> Iterable[SpecialPhrase]:
+ """ Download the wiki pages for the configured languages
+ and extract the phrases from the page.
"""
- Parses XML content and extracts special phrases from it.
- Return a list of SpecialPhrase.
- """
- #One match will be of format [label, class, type, operator, plural]
- matches = self.occurence_pattern.findall(xml)
- returned_phrases = set()
- for match in matches:
- returned_phrases.add(
- SpecialPhrase(match[0], match[1], match[2], match[3])
- )
- return returned_phrases
+ for lang in self.languages:
+ LOG.warning('Importing phrases for lang: %s...', lang)
+ loaded_xml = _get_wiki_content(lang)
- def _load_languages(self):
- """
- Get list of all languages from env config file
- or default if there is no languages configured.
- The system will extract special phrases only from all specified languages.
- """
- default_languages = [
- 'af', 'ar', 'br', 'ca', 'cs', 'de', 'en', 'es',
- 'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu',
- 'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl',
- 'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi']
- return self.config.LANGUAGES.split(',') if self.config.LANGUAGES else default_languages
+ # One match will be of format [label, class, type, operator, plural]
+ matches = self.occurence_pattern.findall(loaded_xml)
- @staticmethod
- def _get_wiki_content(lang):
- """
- Request and return the wiki page's content
- corresponding to special phrases for a given lang.
- Requested URL Example :
- https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/EN
- """
- url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' + lang.upper() # pylint: disable=line-too-long
- return get_url(url)
+ for match in matches:
+ yield SpecialPhrase(match[0],
+ match[1],
+ self.type_fix_pattern.sub('', match[2]),
+ match[3])