"""
import logging
import os
+from os.path import isfile
from pathlib import Path
import re
import subprocess
import json
-from os.path import isfile
+
from icu import Transliterator
from psycopg2.sql import Identifier, Literal, SQL
+
from nominatim.tools.exec_utils import get_url
from nominatim.errors import UsageError
LOG = logging.getLogger()
class SpecialPhrasesImporter():
# pylint: disable-msg=too-many-instance-attributes
- # pylint: disable-msg=too-few-public-methods
"""
Class handling the process of special phrases importations.
"""
self.black_list, self.white_list = self._load_white_and_black_lists()
#Compile the regex here to increase performances.
self.occurence_pattern = re.compile(
- r'\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([\-YN])'
+ r'\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([\-YN])'
)
self.sanity_check_pattern = re.compile(r'^\w+$')
self.transliterator = Transliterator.createFromRules("special-phrases normalizer",
if self.config.PHRASE_CONFIG:
settings_path = self._convert_php_settings_if_needed(self.config.PHRASE_CONFIG)
- with open(settings_path, "r") as json_settings:
+ with settings_path.open("r") as json_settings:
settings = json.load(json_settings)
return settings['blackList'], settings['whiteList']
type_matchs = self.sanity_check_pattern.findall(phrase_type)
class_matchs = self.sanity_check_pattern.findall(phrase_class)
- if len(class_matchs) < 1 or len(type_matchs) < 1:
- raise UsageError("Bad class/type for language {}: {}={}".format(
- lang, phrase_class, phrase_type))
+ if not class_matchs or not type_matchs:
+ LOG.warning("Bad class/type for language %s: %s=%s. It will not be imported",
+ lang, phrase_class, phrase_type)
+ return False
+ return True
def _process_xml_content(self, xml_content, lang):
"""
continue
#sanity check, in case somebody added garbage in the wiki
- self._check_sanity(lang, phrase_class, phrase_type)
+ if not self._check_sanity(lang, phrase_class, phrase_type):
+ continue
class_type_pairs.add((phrase_class, phrase_type))