X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/042e31458917e83713cc60f73a0dc4c171db78b1..cce0e5ea38fe3466e157651e789554d99fbdc8fe:/nominatim/tools/special_phrases/sp_wiki_loader.py diff --git a/nominatim/tools/special_phrases/sp_wiki_loader.py b/nominatim/tools/special_phrases/sp_wiki_loader.py index b5f8db83..6093fa45 100644 --- a/nominatim/tools/special_phrases/sp_wiki_loader.py +++ b/nominatim/tools/special_phrases/sp_wiki_loader.py @@ -9,12 +9,24 @@ """ import re import logging -from collections.abc import Iterator from nominatim.tools.special_phrases.special_phrase import SpecialPhrase from nominatim.tools.exec_utils import get_url LOG = logging.getLogger() -class SPWikiLoader(Iterator): + +def _get_wiki_content(lang): + """ + Request and return the wiki page's content + corresponding to special phrases for a given lang. + Requested URL Example : + https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/EN + """ + url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' \ + + lang.upper() + return get_url(url) + + +class SPWikiLoader: """ Handles loading of special phrases from the wiki. """ @@ -27,28 +39,21 @@ class SPWikiLoader(Iterator): ) self._load_languages() - def __next__(self): - if not self.languages: - raise StopIteration - - lang = self.languages.pop(0) - loaded_xml = self._get_wiki_content(lang) - LOG.warning('Importing phrases for lang: %s...', lang) - return self.parse_xml(loaded_xml) - def parse_xml(self, xml): - """ - Parses XML content and extracts special phrases from it. - Return a list of SpecialPhrase. + def generate_phrases(self): + """ Download the wiki pages for the configured languages + and extract the phrases from the page. """ - # One match will be of format [label, class, type, operator, plural] - matches = self.occurence_pattern.findall(xml) - returned_phrases = set() - for match in matches: - returned_phrases.add( - SpecialPhrase(match[0], match[1], match[2], match[3]) - ) - return returned_phrases + for lang in self.languages: + LOG.warning('Importing phrases for lang: %s...', lang) + loaded_xml = _get_wiki_content(lang) + + # One match will be of format [label, class, type, operator, plural] + matches = self.occurence_pattern.findall(loaded_xml) + + for match in matches: + yield SpecialPhrase(match[0], match[1], match[2], match[3]) + def _load_languages(self): """ @@ -64,15 +69,3 @@ class SPWikiLoader(Iterator): 'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu', 'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl', 'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi'] - - @staticmethod - def _get_wiki_content(lang): - """ - Request and return the wiki page's content - corresponding to special phrases for a given lang. - Requested URL Example : - https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/EN - """ - url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' \ - + lang.upper() - return get_url(url)