X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/9d83da830f349b746c72b1df2a7526d517552b72..c3788d765ed4e7ddf00794085af757714bc102cf:/nominatim/tools/special_phrases/sp_wiki_loader.py diff --git a/nominatim/tools/special_phrases/sp_wiki_loader.py b/nominatim/tools/special_phrases/sp_wiki_loader.py index 4990eef2..2f698092 100644 --- a/nominatim/tools/special_phrases/sp_wiki_loader.py +++ b/nominatim/tools/special_phrases/sp_wiki_loader.py @@ -1,34 +1,38 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2022 by the Nominatim developer community. +# For a full list of authors see the git log. """ Module containing the SPWikiLoader class. """ import re import logging +from collections.abc import Iterator from nominatim.tools.special_phrases.special_phrase import SpecialPhrase -from nominatim.tools.special_phrases.sp_loader import SPLoader from nominatim.tools.exec_utils import get_url LOG = logging.getLogger() -class SPWikiLoader(SPLoader): +class SPWikiLoader(Iterator): """ Handles loading of special phrases from the wiki. """ def __init__(self, config, languages=None): - if languages is not None and not isinstance(languages, list): - raise TypeError('The \'languages\' parameter should be of type list.') super().__init__() self.config = config - #Compile the regex here to increase performances. + # Compile the regex here to increase performances. self.occurence_pattern = re.compile( r'\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([\-YN])' ) - self.languages = self._load_languages() if not languages else languages + self.languages = self._load_languages() if not languages else list(languages) def __next__(self): if not self.languages: raise StopIteration lang = self.languages.pop(0) - loaded_xml = SPWikiLoader._get_wiki_content(lang) + loaded_xml = self._get_wiki_content(lang) LOG.warning('Importing phrases for lang: %s...', lang) return self.parse_xml(loaded_xml) @@ -37,7 +41,7 @@ class SPWikiLoader(SPLoader): Parses XML content and extracts special phrases from it. Return a list of SpecialPhrase. """ - #One match will be of format [label, class, type, operator, plural] + # One match will be of format [label, class, type, operator, plural] matches = self.occurence_pattern.findall(xml) returned_phrases = set() for match in matches: @@ -67,5 +71,6 @@ class SPWikiLoader(SPLoader): Requested URL Example : https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/EN """ - url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' + lang.upper() # pylint: disable=line-too-long + url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' \ + + lang.upper() return get_url(url)