X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/cce0e5ea38fe3466e157651e789554d99fbdc8fe..4be6970bd437db93e90bc46c2b5a0ddf14be1e5b:/nominatim/tools/special_phrases/sp_wiki_loader.py diff --git a/nominatim/tools/special_phrases/sp_wiki_loader.py b/nominatim/tools/special_phrases/sp_wiki_loader.py index 6093fa45..cbeaeabf 100644 --- a/nominatim/tools/special_phrases/sp_wiki_loader.py +++ b/nominatim/tools/special_phrases/sp_wiki_loader.py @@ -7,14 +7,17 @@ """ Module containing the SPWikiLoader class. """ +from typing import Iterable import re import logging + +from nominatim.config import Configuration from nominatim.tools.special_phrases.special_phrase import SpecialPhrase from nominatim.tools.exec_utils import get_url LOG = logging.getLogger() -def _get_wiki_content(lang): +def _get_wiki_content(lang: str) -> str: """ Request and return the wiki page's content corresponding to special phrases for a given lang. @@ -30,17 +33,24 @@ class SPWikiLoader: """ Handles loading of special phrases from the wiki. """ - def __init__(self, config): - super().__init__() + def __init__(self, config: Configuration) -> None: self.config = config # Compile the regex here to increase performances. self.occurence_pattern = re.compile( r'\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([\-YN])' ) - self._load_languages() + # Hack around a bug where building=yes was imported with quotes into the wiki + self.type_fix_pattern = re.compile(r'\"|"') + + self.languages = self.config.get_str_list('LANGUAGES') or \ + ['af', 'ar', 'br', 'ca', 'cs', 'de', 'en', 'es', + 'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu', + 'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl', + 'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi', + 'lv', 'tr'] - def generate_phrases(self): + def generate_phrases(self) -> Iterable[SpecialPhrase]: """ Download the wiki pages for the configured languages and extract the phrases from the page. """ @@ -52,20 +62,7 @@ class SPWikiLoader: matches = self.occurence_pattern.findall(loaded_xml) for match in matches: - yield SpecialPhrase(match[0], match[1], match[2], match[3]) - - - def _load_languages(self): - """ - Get list of all languages from env config file - or default if there is no languages configured. - The system will extract special phrases only from all specified languages. - """ - if self.config.LANGUAGES: - self.languages = self.config.get_str_list('LANGUAGES') - else: - self.languages = [ - 'af', 'ar', 'br', 'ca', 'cs', 'de', 'en', 'es', - 'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu', - 'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl', - 'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi'] + yield SpecialPhrase(match[0], + match[1], + self.type_fix_pattern.sub('', match[2]), + match[3])