X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/00959fac57d6c1b35d009e68ad857989ae71de6c..e0a7b94e49cb4c47d0a42f0d8108f826e9de59f3:/nominatim/tools/special_phrases/sp_wiki_loader.py diff --git a/nominatim/tools/special_phrases/sp_wiki_loader.py b/nominatim/tools/special_phrases/sp_wiki_loader.py index 11e59b45..2f698092 100644 --- a/nominatim/tools/special_phrases/sp_wiki_loader.py +++ b/nominatim/tools/special_phrases/sp_wiki_loader.py @@ -1,34 +1,38 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2022 by the Nominatim developer community. +# For a full list of authors see the git log. """ Module containing the SPWikiLoader class. """ -import logging import re +import logging +from collections.abc import Iterator from nominatim.tools.special_phrases.special_phrase import SpecialPhrase -from nominatim.tools.special_phrases.sp_loader import SPLoader from nominatim.tools.exec_utils import get_url LOG = logging.getLogger() -class SPWikiLoader(SPLoader): +class SPWikiLoader(Iterator): """ Handles loading of special phrases from the wiki. """ def __init__(self, config, languages=None): - if languages is not None and not isinstance(languages, list): - raise TypeError('The \'languages\' parameter should be of type list.') super().__init__() self.config = config - #Compile the regex here to increase performances. + # Compile the regex here to increase performances. self.occurence_pattern = re.compile( r'\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([\-YN])' ) - self.languages = self._load_languages() if not languages else languages + self.languages = self._load_languages() if not languages else list(languages) def __next__(self): if not self.languages: raise StopIteration lang = self.languages.pop(0) - loaded_xml = SPWikiLoader._get_wiki_content(lang) + loaded_xml = self._get_wiki_content(lang) LOG.warning('Importing phrases for lang: %s...', lang) return self.parse_xml(loaded_xml) @@ -37,7 +41,7 @@ class SPWikiLoader(SPLoader): Parses XML content and extracts special phrases from it. Return a list of SpecialPhrase. """ - #One match will be of format [label, class, type, operator, plural] + # One match will be of format [label, class, type, operator, plural] matches = self.occurence_pattern.findall(xml) returned_phrases = set() for match in matches: @@ -67,5 +71,6 @@ class SPWikiLoader(SPLoader): Requested URL Example : https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/EN """ - url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' + lang.upper() # pylint: disable=line-too-long + url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' \ + + lang.upper() return get_url(url)