X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/c64193f8391bd8649a03a4935ace31fbbd550726..c8fb25201a304f504c0749384740405f172b2fb6:/nominatim/tools/special_phrases.py diff --git a/nominatim/tools/special_phrases.py b/nominatim/tools/special_phrases.py index 0c1258fe..f4eec260 100644 --- a/nominatim/tools/special_phrases.py +++ b/nominatim/tools/special_phrases.py @@ -3,20 +3,21 @@ """ import logging import os +from os.path import isfile from pathlib import Path import re import subprocess import json -from os.path import isfile + from icu import Transliterator from psycopg2.sql import Identifier, Literal, SQL + from nominatim.tools.exec_utils import get_url from nominatim.errors import UsageError LOG = logging.getLogger() class SpecialPhrasesImporter(): # pylint: disable-msg=too-many-instance-attributes - # pylint: disable-msg=too-few-public-methods """ Class handling the process of special phrases importations. """ @@ -27,7 +28,7 @@ class SpecialPhrasesImporter(): self.black_list, self.white_list = self._load_white_and_black_lists() #Compile the regex here to increase performances. self.occurence_pattern = re.compile( - r'\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([\-YN])' + r'\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([\-YN])' ) self.sanity_check_pattern = re.compile(r'^\w+$') self.transliterator = Transliterator.createFromRules("special-phrases normalizer", @@ -116,7 +117,7 @@ class SpecialPhrasesImporter(): if self.config.PHRASE_CONFIG: settings_path = self._convert_php_settings_if_needed(self.config.PHRASE_CONFIG) - with open(settings_path, "r") as json_settings: + with settings_path.open("r") as json_settings: settings = json.load(json_settings) return settings['blackList'], settings['whiteList'] @@ -152,9 +153,11 @@ class SpecialPhrasesImporter(): type_matchs = self.sanity_check_pattern.findall(phrase_type) class_matchs = self.sanity_check_pattern.findall(phrase_class) - if len(class_matchs) < 1 or len(type_matchs) < 1: - raise UsageError("Bad class/type for language {}: {}={}".format( - lang, phrase_class, phrase_type)) + if not class_matchs or not type_matchs: + LOG.warning("Bad class/type for language %s: %s=%s. It will not be imported", + lang, phrase_class, phrase_type) + return False + return True def _process_xml_content(self, xml_content, lang): """ @@ -205,7 +208,8 @@ class SpecialPhrasesImporter(): continue #sanity check, in case somebody added garbage in the wiki - self._check_sanity(lang, phrase_class, phrase_type) + if not self._check_sanity(lang, phrase_class, phrase_type): + continue class_type_pairs.add((phrase_class, phrase_type))