"""
import csv
import os
-from collections.abc import Iterator
from nominatim.tools.special_phrases.special_phrase import SpecialPhrase
from nominatim.errors import UsageError
-class SPCsvLoader(Iterator):
+class SPCsvLoader:
"""
Handles loading of special phrases from external csv file.
"""
def __init__(self, csv_path):
super().__init__()
self.csv_path = csv_path
- self.has_been_read = False
- def __next__(self):
- if self.has_been_read:
- raise StopIteration()
- self.has_been_read = True
- self.check_csv_validity()
- return self.parse_csv()
-
- def parse_csv(self):
- """
- Open and parse the given csv file.
+ def generate_phrases(self):
+ """ Open and parse the given csv file.
Create the corresponding SpecialPhrases.
"""
- phrases = set()
+ self._check_csv_validity()
with open(self.csv_path, encoding='utf-8') as fd:
reader = csv.DictReader(fd, delimiter=',')
for row in reader:
- phrases.add(
- SpecialPhrase(row['phrase'], row['class'], row['type'], row['operator'])
- )
- return phrases
+ yield SpecialPhrase(row['phrase'], row['class'], row['type'], row['operator'])
+
- def check_csv_validity(self):
+ def _check_csv_validity(self):
"""
Check that the csv file has the right extension.
"""
# Store pairs of class/type for further processing
class_type_pairs = set()
- for loaded_phrases in self.sp_loader:
- for phrase in loaded_phrases:
- result = self._process_phrase(phrase)
- if result:
- class_type_pairs.add(result)
+ for phrase in self.sp_loader.generate_phrases():
+ result = self._process_phrase(phrase)
+ if result:
+ class_type_pairs.add(result)
self._create_place_classtype_table_and_indexes(class_type_pairs)
if should_replace:
"""
import re
import logging
-from collections.abc import Iterator
from nominatim.tools.special_phrases.special_phrase import SpecialPhrase
from nominatim.tools.exec_utils import get_url
LOG = logging.getLogger()
-class SPWikiLoader(Iterator):
+
+def _get_wiki_content(lang):
+ """
+ Request and return the wiki page's content
+ corresponding to special phrases for a given lang.
+ Requested URL Example :
+ https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/EN
+ """
+ url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' \
+ + lang.upper()
+ return get_url(url)
+
+
+class SPWikiLoader:
"""
Handles loading of special phrases from the wiki.
"""
)
self._load_languages()
- def __next__(self):
- if not self.languages:
- raise StopIteration
-
- lang = self.languages.pop(0)
- loaded_xml = self._get_wiki_content(lang)
- LOG.warning('Importing phrases for lang: %s...', lang)
- return self.parse_xml(loaded_xml)
- def parse_xml(self, xml):
- """
- Parses XML content and extracts special phrases from it.
- Return a list of SpecialPhrase.
+ def generate_phrases(self):
+ """ Download the wiki pages for the configured languages
+ and extract the phrases from the page.
"""
- # One match will be of format [label, class, type, operator, plural]
- matches = self.occurence_pattern.findall(xml)
- returned_phrases = set()
- for match in matches:
- returned_phrases.add(
- SpecialPhrase(match[0], match[1], match[2], match[3])
- )
- return returned_phrases
+ for lang in self.languages:
+ LOG.warning('Importing phrases for lang: %s...', lang)
+ loaded_xml = _get_wiki_content(lang)
+
+ # One match will be of format [label, class, type, operator, plural]
+ matches = self.occurence_pattern.findall(loaded_xml)
+
+ for match in matches:
+ yield SpecialPhrase(match[0], match[1], match[2], match[3])
+
def _load_languages(self):
"""
'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu',
'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl',
'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi']
-
- @staticmethod
- def _get_wiki_content(lang):
- """
- Request and return the wiki page's content
- corresponding to special phrases for a given lang.
- Requested URL Example :
- https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/EN
- """
- url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' \
- + lang.upper()
- return get_url(url)
table_factory('place_classtype_amenity_animal_shelter')
table_factory('place_classtype_wrongclass_wrongtype')
- monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader.SPWikiLoader._get_wiki_content',
- lambda self, lang: xml_wiki_content)
+ monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader._get_wiki_content',
+ lambda lang: xml_wiki_content)
tokenizer = tokenizer_mock()
sp_importer.import_phrases(tokenizer, should_replace)
from nominatim.errors import UsageError
from nominatim.tools.special_phrases.sp_csv_loader import SPCsvLoader
-def test_parse_csv(sp_csv_loader):
+@pytest.fixture
+def sp_csv_loader(src_dir):
"""
- Test method parse_csv()
- Should return the right SpecialPhrase objects.
+ Return an instance of SPCsvLoader.
"""
- phrases = sp_csv_loader.parse_csv()
- assert check_phrases_content(phrases)
+ csv_path = (src_dir / 'test' / 'testdata' / 'sp_csv_test.csv').resolve()
+ loader = SPCsvLoader(csv_path)
+ return loader
+
-def test_next(sp_csv_loader):
+def test_generate_phrases(sp_csv_loader):
"""
- Test objects returned from the next() method.
- It should return all SpecialPhrases objects of
- the sp_csv_test.csv special phrases.
+ Test method parse_csv()
+ Should return the right SpecialPhrase objects.
"""
- phrases = next(sp_csv_loader)
- assert check_phrases_content(phrases)
+ phrases = list(sp_csv_loader.generate_phrases())
+
+ assert len(phrases) == 41
+ assert len(set(phrases)) == 41
+
+ assert any(p.p_label == 'Billboard'
+ and p.p_class == 'advertising'
+ and p.p_type == 'billboard'
+ and p.p_operator == '-' for p in phrases)
+ assert any(p.p_label == 'Zip Lines'
+ and p.p_class == 'aerialway'
+ and p.p_type == 'zip_line'
+ and p.p_operator == '-' for p in phrases)
-def test_check_csv_validity(sp_csv_loader):
+
+def test_invalid_cvs_file():
"""
Test method check_csv_validity()
It should raise an exception when file with a
different exception than .csv is given.
"""
- sp_csv_loader.csv_path = 'test.csv'
- sp_csv_loader.check_csv_validity()
- sp_csv_loader.csv_path = 'test.wrong'
- with pytest.raises(UsageError):
- assert sp_csv_loader.check_csv_validity()
-
-def check_phrases_content(phrases):
- """
- Asserts that the given phrases list contains
- the right phrases of the sp_csv_test.csv special phrases.
- """
- return len(phrases) > 1 \
- and any(p.p_label == 'Billboard'
- and p.p_class == 'advertising'
- and p.p_type == 'billboard'
- and p.p_operator == '-' for p in phrases) \
- and any(p.p_label == 'Zip Lines'
- and p.p_class == 'aerialway'
- and p.p_type == 'zip_line'
- and p.p_operator == '-' for p in phrases)
+ loader = SPCsvLoader('test.wrong')
-@pytest.fixture
-def sp_csv_loader(src_dir):
- """
- Return an instance of SPCsvLoader.
- """
- csv_path = (src_dir / 'test' / 'testdata' / 'sp_csv_test.csv').resolve()
- loader = SPCsvLoader(csv_path)
- return loader
+ with pytest.raises(UsageError, match='not a csv file'):
+ next(loader.generate_phrases())
"""
monkeypatch.setenv('NOMINATIM_LANGUAGES', 'en')
loader = SPWikiLoader(def_config)
- monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader.SPWikiLoader._get_wiki_content',
- lambda self, lang: xml_wiki_content)
+ monkeypatch.setattr('nominatim.tools.special_phrases.sp_wiki_loader._get_wiki_content',
+ lambda lang: xml_wiki_content)
return loader
-def test_parse_xml(sp_wiki_loader, xml_wiki_content):
- """
- Test method parse_xml()
- Should return the right SpecialPhrase objects.
- """
- phrases = sp_wiki_loader.parse_xml(xml_wiki_content)
- check_phrases_content(phrases)
-
-
-def test_next(sp_wiki_loader):
+def test_generate_phrases(sp_wiki_loader):
"""
Test objects returned from the next() method.
It should return all SpecialPhrases objects of
the 'en' special phrases.
"""
- phrases = next(sp_wiki_loader)
+ phrases = list(sp_wiki_loader.generate_phrases())
check_phrases_content(phrases)
def check_phrases_content(phrases):