From: Sarah Hoffmann Date: Sun, 19 Sep 2021 13:59:44 +0000 (+0200) Subject: Merge remote-tracking branch 'upstream/master' X-Git-Tag: deploy~155 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/c2bdda8895963cece639a9e93ceff31465b518b5?hp=754846d9dc125e3caa8a97b8381918b565a55289 Merge remote-tracking branch 'upstream/master' --- diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml index bac20d98..b70ea80f 100644 --- a/.github/workflows/ci-tests.yml +++ b/.github/workflows/ci-tests.yml @@ -185,7 +185,7 @@ jobs: - name: Prepare container (CentOS) run: | dnf update -y - dnf install -y sudo + dnf install -y sudo glibc-langpack-en shell: bash if: matrix.flavour == 'centos' diff --git a/README.md b/README.md index b643088b..d5041d46 100644 --- a/README.md +++ b/README.md @@ -20,14 +20,6 @@ https://nominatim.org/release-docs/develop/ . Installation ============ -**Nominatim is a complex piece of software and runs in a complex environment. -Installing and running Nominatim is something for experienced system -administrators only who can do some trouble-shooting themselves. We are sorry, -but we can not provide installation support. We are all doing this in our free -time and there is just so much of that time to go around. Do not open issues in -our bug tracker if you need help. Use the discussions forum -or ask for help on [help.openstreetmap.org](https://help.openstreetmap.org/).** - The latest stable release can be downloaded from https://nominatim.org. There you can also find [installation instructions for the release](https://nominatim.org/release-docs/latest/admin/Installation), as well as an extensive [Troubleshooting/FAQ section](https://nominatim.org/release-docs/latest/admin/Faq/). diff --git a/lib-sql/functions/address_lookup.sql b/lib-sql/functions/address_lookup.sql index b6c552c4..3ea72cb1 100644 --- a/lib-sql/functions/address_lookup.sql +++ b/lib-sql/functions/address_lookup.sql @@ -223,11 +223,13 @@ BEGIN OR placex.country_code = place.country_code) ORDER BY rank_address desc, (place_addressline.place_id = in_place_id) desc, - (fromarea and place.centroid is not null and not isaddress - and (place.address is null or avals(name) && avals(place.address)) - and ST_Contains(geometry, place.centroid)) desc, - isaddress desc, fromarea desc, - distance asc, rank_search desc + (CASE WHEN coalesce((avals(name) && avals(place.address)), False) THEN 2 + WHEN isaddress THEN 0 + WHEN fromarea + and place.centroid is not null + and ST_Contains(geometry, place.centroid) THEN 1 + ELSE -1 END) desc, + fromarea desc, distance asc, rank_search desc LOOP -- RAISE WARNING '%',location; location_isaddress := location.rank_address != current_rank_address; diff --git a/nominatim/clicmd/setup.py b/nominatim/clicmd/setup.py index 7e2f6fc3..a3fbb846 100644 --- a/nominatim/clicmd/setup.py +++ b/nominatim/clicmd/setup.py @@ -55,7 +55,7 @@ class SetupAll: from ..tools import database_import, refresh, postcodes, freeze, country_info from ..indexer.indexer import Indexer - country_info.setup_country_config(args.config.config_dir / 'country_settings.yaml') + country_info.setup_country_config(args.config) if args.continue_at is None: files = args.get_osm_file_list() diff --git a/nominatim/config.py b/nominatim/config.py index a8436440..64614bf1 100644 --- a/nominatim/config.py +++ b/nominatim/config.py @@ -4,6 +4,7 @@ Nominatim configuration accessor. import logging import os from pathlib import Path +import yaml from dotenv import dotenv_values @@ -53,7 +54,10 @@ class Configuration: def __getattr__(self, name): name = 'NOMINATIM_' + name - return self.environ.get(name) or self._config[name] + if name in self.environ: + return self.environ[name] + + return self._config[name] def get_bool(self, name): """ Return the given configuration parameter as a boolean. @@ -114,3 +118,93 @@ class Configuration: env.update(self.environ) return env + + + def load_sub_configuration(self, filename, config=None): + """ Load additional configuration from a file. `filename` is the name + of the configuration file. The file is first searched in the + project directory and then in the global settings dirctory. + + If `config` is set, then the name of the configuration file can + be additionally given through a .env configuration option. When + the option is set, then the file will be exclusively loaded as set: + if the name is an absolute path, the file name is taken as is, + if the name is relative, it is taken to be relative to the + project directory. + + The format of the file is determined from the filename suffix. + Currently only files with extension '.yaml' are supported. + + YAML files support a special '!include' construct. When the + directive is given, the value is taken to be a filename, the file + is loaded using this function and added at the position in the + configuration tree. + """ + assert Path(filename).suffix == '.yaml' + + configfile = self._find_config_file(filename, config) + + return self._load_from_yaml(configfile) + + + def _find_config_file(self, filename, config=None): + """ Resolve the location of a configuration file given a filename and + an optional configuration option with the file name. + Raises a UsageError when the file cannot be found or is not + a regular file. + """ + if config is not None: + cfg_filename = self.__getattr__(config) + if cfg_filename: + cfg_filename = Path(cfg_filename) + + if cfg_filename.is_absolute(): + cfg_filename = cfg_filename.resolve() + + if not cfg_filename.is_file(): + LOG.fatal("Cannot find config file '%s'.", cfg_filename) + raise UsageError("Config file not found.") + + return cfg_filename + + filename = cfg_filename + + + search_paths = [self.project_dir, self.config_dir] + for path in search_paths: + if path is not None and (path / filename).is_file(): + return path / filename + + LOG.fatal("Configuration file '%s' not found.\nDirectories searched: %s", + filename, search_paths) + raise UsageError("Config file not found.") + + + def _load_from_yaml(self, cfgfile): + """ Load a YAML configuration file. This installs a special handler that + allows to include other YAML files using the '!include' operator. + """ + yaml.add_constructor('!include', self._yaml_include_representer, + Loader=yaml.SafeLoader) + return yaml.safe_load(cfgfile.read_text(encoding='utf-8')) + + + def _yaml_include_representer(self, loader, node): + """ Handler for the '!include' operator in YAML files. + + When the filename is relative, then the file is first searched in the + project directory and then in the global settings dirctory. + """ + fname = loader.construct_scalar(node) + + if Path(fname).is_absolute(): + configfile = Path(fname) + else: + configfile = self._find_config_file(loader.construct_scalar(node)) + + if configfile.suffix != '.yaml': + LOG.fatal("Format error while reading '%s': only YAML format supported.", + configfile) + raise UsageError("Cannot handle config file format.") + + return yaml.safe_load(configfile.read_text(encoding='utf-8')) diff --git a/nominatim/tokenizer/icu_rule_loader.py b/nominatim/tokenizer/icu_rule_loader.py index b408f1c3..0e6e40b4 100644 --- a/nominatim/tokenizer/icu_rule_loader.py +++ b/nominatim/tokenizer/icu_rule_loader.py @@ -4,10 +4,8 @@ Helper class to create ICU rules from a configuration file. import io import logging import itertools -from pathlib import Path import re -import yaml from icu import Transliterator from nominatim.errors import UsageError @@ -15,17 +13,17 @@ import nominatim.tokenizer.icu_variants as variants LOG = logging.getLogger() -def _flatten_yaml_list(content): +def _flatten_config_list(content): if not content: return [] if not isinstance(content, list): - raise UsageError("List expected in ICU yaml configuration.") + raise UsageError("List expected in ICU configuration.") output = [] for ele in content: if isinstance(ele, list): - output.extend(_flatten_yaml_list(ele)) + output.extend(_flatten_config_list(ele)) else: output.append(ele) @@ -48,14 +46,12 @@ class ICURuleLoader: """ Compiler for ICU rules from a tokenizer configuration file. """ - def __init__(self, configfile): - self.configfile = configfile + def __init__(self, rules): self.variants = set() - if configfile.suffix == '.yaml': - self._load_from_yaml() - else: - raise UsageError("Unknown format of tokenizer configuration.") + self.normalization_rules = self._cfg_to_icu_rules(rules, 'normalization') + self.transliteration_rules = self._cfg_to_icu_rules(rules, 'transliteration') + self._parse_variant_list(self._get_section(rules, 'variants')) def get_search_rules(self): @@ -88,34 +84,14 @@ class ICURuleLoader: """ return self.variants - def _yaml_include_representer(self, loader, node): - value = loader.construct_scalar(node) - - if Path(value).is_absolute(): - content = Path(value) - else: - content = (self.configfile.parent / value) - - return yaml.safe_load(content.read_text(encoding='utf-8')) - - - def _load_from_yaml(self): - yaml.add_constructor('!include', self._yaml_include_representer, - Loader=yaml.SafeLoader) - rules = yaml.safe_load(self.configfile.read_text(encoding='utf-8')) - - self.normalization_rules = self._cfg_to_icu_rules(rules, 'normalization') - self.transliteration_rules = self._cfg_to_icu_rules(rules, 'transliteration') - self._parse_variant_list(self._get_section(rules, 'variants')) - - def _get_section(self, rules, section): + @staticmethod + def _get_section(rules, section): """ Get the section named 'section' from the rules. If the section does not exist, raise a usage error with a meaningful message. """ if section not in rules: - LOG.fatal("Section '%s' not found in tokenizer config '%s'.", - section, str(self.configfile)) + LOG.fatal("Section '%s' not found in tokenizer config.", section) raise UsageError("Syntax error in tokenizer configuration file.") return rules[section] @@ -133,7 +109,7 @@ class ICURuleLoader: if content is None: return '' - return ';'.join(_flatten_yaml_list(content)) + ';' + return ';'.join(_flatten_config_list(content)) + ';' def _parse_variant_list(self, rules): @@ -142,7 +118,7 @@ class ICURuleLoader: if not rules: return - rules = _flatten_yaml_list(rules) + rules = _flatten_config_list(rules) vmaker = _VariantMaker(self.normalization_rules) diff --git a/nominatim/tokenizer/icu_tokenizer.py b/nominatim/tokenizer/icu_tokenizer.py index cb411204..61263678 100644 --- a/nominatim/tokenizer/icu_tokenizer.py +++ b/nominatim/tokenizer/icu_tokenizer.py @@ -8,7 +8,6 @@ import json import logging import re from textwrap import dedent -from pathlib import Path from nominatim.db.connection import connect from nominatim.db.properties import set_property, get_property @@ -49,12 +48,8 @@ class LegacyICUTokenizer(AbstractTokenizer): This copies all necessary data in the project directory to make sure the tokenizer remains stable even over updates. """ - if config.TOKENIZER_CONFIG: - cfgfile = Path(config.TOKENIZER_CONFIG) - else: - cfgfile = config.config_dir / 'icu_tokenizer.yaml' - - loader = ICURuleLoader(cfgfile) + loader = ICURuleLoader(config.load_sub_configuration('icu_tokenizer.yaml', + config='TOKENIZER_CONFIG')) self.naming_rules = ICUNameProcessorRules(loader=loader) self.term_normalization = config.TERM_NORMALIZATION self.max_word_frequency = config.MAX_WORD_FREQUENCY diff --git a/nominatim/tools/country_info.py b/nominatim/tools/country_info.py index 897ac220..e04a8693 100644 --- a/nominatim/tools/country_info.py +++ b/nominatim/tools/country_info.py @@ -2,7 +2,6 @@ Functions for importing and managing static country information. """ import psycopg2.extras -import yaml from nominatim.db import utils as db_utils from nominatim.db.connection import connect @@ -14,12 +13,12 @@ class _CountryInfo: def __init__(self): self._info = {} - def load(self, configfile): + def load(self, config): """ Load the country properties from the configuration files, if they are not loaded yet. """ if not self._info: - self._info = yaml.safe_load(configfile.read_text(encoding='utf-8')) + self._info = config.load_sub_configuration('country_settings.yaml') def items(self): """ Return tuples of (country_code, property dict) as iterable. @@ -29,12 +28,12 @@ class _CountryInfo: _COUNTRY_INFO = _CountryInfo() -def setup_country_config(configfile): +def setup_country_config(config): """ Load country properties from the configuration file. Needs to be called before using any other functions in this file. """ - _COUNTRY_INFO.load(configfile) + _COUNTRY_INFO.load(config) def setup_country_tables(dsn, sql_dir, ignore_partitions=False): diff --git a/test/bdd/db/import/addressing.feature b/test/bdd/db/import/addressing.feature index b6345baf..b2437d71 100644 --- a/test/bdd/db/import/addressing.feature +++ b/test/bdd/db/import/addressing.feature @@ -433,3 +433,76 @@ Feature: Address computation Then results contain | osm | display_name | | N2 | Leftside, Wonderway, Right | + + + Scenario: POIs can correct address parts on the fly (with partial unmatching address) + Given the grid + | 1 | | | | 2 | | 5 | + | | | | 9 | | 8 | | + | | 10| 11| | | 12| | + | 4 | | | | 3 | | 6 | + And the places + | osm | class | type | admin | name | geometry | + | R1 | boundary | administrative | 8 | Left | (1,2,3,4,1) | + | R2 | boundary | administrative | 8 | Right | (2,3,6,5,2) | + And the places + | osm | class | type | name | geometry | + | W1 | highway | primary | Wonderway | 10,11,12 | + And the places + | osm | class | type | name | addr+suburb | geometry | + | N1 | amenity | cafe | Bolder | Boring | 9 | + | N2 | amenity | cafe | Leftside | Boring | 8 | + When importing + Then place_addressline contains + | object | address | isaddress | + | W1 | R1 | True | + | W1 | R2 | False | + And place_addressline doesn't contain + | object | address | + | N1 | R1 | + | N2 | R2 | + When sending search query "Bolder" + Then results contain + | osm | display_name | + | N1 | Bolder, Wonderway, Left | + When sending search query "Leftside" + Then results contain + | osm | display_name | + | N2 | Leftside, Wonderway, Right | + + + + Scenario: POIs can correct address parts on the fly (with partial matching address) + Given the grid + | 1 | | | | 2 | | 5 | + | | | | 9 | | 8 | | + | | 10| 11| | | 12| | + | 4 | | | | 3 | | 6 | + And the places + | osm | class | type | admin | name | geometry | + | R1 | boundary | administrative | 8 | Left | (1,2,3,4,1) | + | R2 | boundary | administrative | 8 | Right | (2,3,6,5,2) | + And the places + | osm | class | type | name | geometry | + | W1 | highway | primary | Wonderway | 10,11,12 | + And the places + | osm | class | type | name | addr+state | geometry | + | N1 | amenity | cafe | Bolder | Left | 9 | + | N2 | amenity | cafe | Leftside | Left | 8 | + When importing + Then place_addressline contains + | object | address | isaddress | + | W1 | R1 | True | + | W1 | R2 | False | + And place_addressline doesn't contain + | object | address | + | N1 | R1 | + | N2 | R2 | + When sending search query "Bolder" + Then results contain + | osm | display_name | + | N1 | Bolder, Wonderway, Left | + When sending search query "Leftside" + Then results contain + | osm | display_name | + | N2 | Leftside, Wonderway, Left | diff --git a/test/python/test_config.py b/test/python/test_config.py index 6729f954..8b5cb11b 100644 --- a/test/python/test_config.py +++ b/test/python/test_config.py @@ -15,6 +15,20 @@ def make_config(src_dir): return _mk_config +@pytest.fixture +def make_config_path(src_dir, tmp_path): + """ Create a configuration object with project and config directories + in a temporary directory. + """ + def _mk_config(): + (tmp_path / 'project').mkdir() + (tmp_path / 'config').mkdir() + conf = Configuration(tmp_path / 'project', src_dir / 'settings') + conf.config_dir = tmp_path / 'config' + return conf + + return _mk_config + def test_no_project_dir(make_config): config = make_config() @@ -43,6 +57,17 @@ def test_prefer_os_environ_over_project_setting(make_config, monkeypatch, tmp_pa assert config.DATABASE_WEBUSER == 'nobody' +def test_prefer_os_environ_can_unset_project_setting(make_config, monkeypatch, tmp_path): + envfile = tmp_path / '.env' + envfile.write_text('NOMINATIM_DATABASE_WEBUSER=apache\n') + + monkeypatch.setenv('NOMINATIM_DATABASE_WEBUSER', '') + + config = make_config(tmp_path) + + assert config.DATABASE_WEBUSER == '' + + def test_get_os_env_add_defaults(make_config, monkeypatch): config = make_config() @@ -158,3 +183,151 @@ def test_get_import_style_extern(make_config, monkeypatch, value): monkeypatch.setenv('NOMINATIM_IMPORT_STYLE', value) assert str(config.get_import_style_file()) == value + + +def test_load_subconf_from_project_dir(make_config_path): + config = make_config_path() + + testfile = config.project_dir / 'test.yaml' + testfile.write_text('cow: muh\ncat: miau\n') + + testfile = config.config_dir / 'test.yaml' + testfile.write_text('cow: miau\ncat: muh\n') + + rules = config.load_sub_configuration('test.yaml') + + assert rules == dict(cow='muh', cat='miau') + + +def test_load_subconf_from_settings_dir(make_config_path): + config = make_config_path() + + testfile = config.config_dir / 'test.yaml' + testfile.write_text('cow: muh\ncat: miau\n') + + rules = config.load_sub_configuration('test.yaml') + + assert rules == dict(cow='muh', cat='miau') + + +def test_load_subconf_empty_env_conf(make_config_path, monkeypatch): + monkeypatch.setenv('NOMINATIM_MY_CONFIG', '') + config = make_config_path() + + testfile = config.config_dir / 'test.yaml' + testfile.write_text('cow: muh\ncat: miau\n') + + rules = config.load_sub_configuration('test.yaml', config='MY_CONFIG') + + assert rules == dict(cow='muh', cat='miau') + + +def test_load_subconf_env_absolute_found(make_config_path, monkeypatch, tmp_path): + monkeypatch.setenv('NOMINATIM_MY_CONFIG', str(tmp_path / 'other.yaml')) + config = make_config_path() + + (config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n') + (tmp_path / 'other.yaml').write_text('dog: muh\nfrog: miau\n') + + rules = config.load_sub_configuration('test.yaml', config='MY_CONFIG') + + assert rules == dict(dog='muh', frog='miau') + + +def test_load_subconf_env_absolute_not_found(make_config_path, monkeypatch, tmp_path): + monkeypatch.setenv('NOMINATIM_MY_CONFIG', str(tmp_path / 'other.yaml')) + config = make_config_path() + + (config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n') + + with pytest.raises(UsageError, match='Config file not found.'): + rules = config.load_sub_configuration('test.yaml', config='MY_CONFIG') + + +@pytest.mark.parametrize("location", ['project_dir', 'config_dir']) +def test_load_subconf_env_relative_found(make_config_path, monkeypatch, location): + monkeypatch.setenv('NOMINATIM_MY_CONFIG', 'other.yaml') + config = make_config_path() + + (config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n') + (getattr(config, location) / 'other.yaml').write_text('dog: bark\n') + + rules = config.load_sub_configuration('test.yaml', config='MY_CONFIG') + + assert rules == dict(dog='bark') + + +def test_load_subconf_env_relative_not_found(make_config_path, monkeypatch): + monkeypatch.setenv('NOMINATIM_MY_CONFIG', 'other.yaml') + config = make_config_path() + + (config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n') + + with pytest.raises(UsageError, match='Config file not found.'): + rules = config.load_sub_configuration('test.yaml', config='MY_CONFIG') + + +def test_load_subconf_not_found(make_config_path): + config = make_config_path() + + with pytest.raises(UsageError, match='Config file not found.'): + rules = config.load_sub_configuration('test.yaml') + + +def test_load_subconf_include_absolute(make_config_path, tmp_path): + config = make_config_path() + + testfile = config.config_dir / 'test.yaml' + testfile.write_text(f'base: !include {tmp_path}/inc.yaml\n') + (tmp_path / 'inc.yaml').write_text('first: 1\nsecond: 2\n') + + rules = config.load_sub_configuration('test.yaml') + + assert rules == dict(base=dict(first=1, second=2)) + + +@pytest.mark.parametrize("location", ['project_dir', 'config_dir']) +def test_load_subconf_include_relative(make_config_path, tmp_path, location): + config = make_config_path() + + testfile = config.config_dir / 'test.yaml' + testfile.write_text(f'base: !include inc.yaml\n') + (getattr(config, location) / 'inc.yaml').write_text('first: 1\nsecond: 2\n') + + rules = config.load_sub_configuration('test.yaml') + + assert rules == dict(base=dict(first=1, second=2)) + + +def test_load_subconf_include_bad_format(make_config_path): + config = make_config_path() + + testfile = config.config_dir / 'test.yaml' + testfile.write_text(f'base: !include inc.txt\n') + (config.config_dir / 'inc.txt').write_text('first: 1\nsecond: 2\n') + + with pytest.raises(UsageError, match='Cannot handle config file format.'): + rules = config.load_sub_configuration('test.yaml') + + +def test_load_subconf_include_not_found(make_config_path): + config = make_config_path() + + testfile = config.config_dir / 'test.yaml' + testfile.write_text(f'base: !include inc.txt\n') + + with pytest.raises(UsageError, match='Config file not found.'): + rules = config.load_sub_configuration('test.yaml') + + +def test_load_subconf_include_recursive(make_config_path): + config = make_config_path() + + testfile = config.config_dir / 'test.yaml' + testfile.write_text(f'base: !include inc.yaml\n') + (config.config_dir / 'inc.yaml').write_text('- !include more.yaml\n- upper\n') + (config.config_dir / 'more.yaml').write_text('- the end\n') + + rules = config.load_sub_configuration('test.yaml') + + assert rules == dict(base=[['the end'], 'upper']) diff --git a/test/python/test_tokenizer_icu.py b/test/python/test_tokenizer_icu.py index 5ec434b6..b7101c3f 100644 --- a/test/python/test_tokenizer_icu.py +++ b/test/python/test_tokenizer_icu.py @@ -67,13 +67,10 @@ def analyzer(tokenizer_factory, test_config, monkeypatch, def _mk_analyser(norm=("[[:Punctuation:][:Space:]]+ > ' '",), trans=(':: upper()',), variants=('~gasse -> gasse', 'street => st', )): - cfgfile = tmp_path / 'analyser_test_config.yaml' - with cfgfile.open('w') as stream: - cfgstr = {'normalization' : list(norm), - 'transliteration' : list(trans), - 'variants' : [ {'words': list(variants)}]} - yaml.dump(cfgstr, stream) - tok.naming_rules = ICUNameProcessorRules(loader=ICURuleLoader(cfgfile)) + cfgstr = {'normalization' : list(norm), + 'transliteration' : list(trans), + 'variants' : [ {'words': list(variants)}]} + tok.naming_rules = ICUNameProcessorRules(loader=ICURuleLoader(cfgstr)) return tok.name_analyzer() diff --git a/test/python/test_tokenizer_icu_name_processor.py b/test/python/test_tokenizer_icu_name_processor.py index cc103116..ae05988a 100644 --- a/test/python/test_tokenizer_icu_name_processor.py +++ b/test/python/test_tokenizer_icu_name_processor.py @@ -4,6 +4,7 @@ Tests for import name normalisation and variant generation. from textwrap import dedent import pytest +import yaml from nominatim.tokenizer.icu_rule_loader import ICURuleLoader from nominatim.tokenizer.icu_name_processor import ICUNameProcessor, ICUNameProcessorRules @@ -11,7 +12,7 @@ from nominatim.tokenizer.icu_name_processor import ICUNameProcessor, ICUNameProc from nominatim.errors import UsageError @pytest.fixture -def cfgfile(tmp_path, suffix='.yaml'): +def cfgfile(): def _create_config(*variants, **kwargs): content = dedent("""\ normalization: @@ -29,9 +30,7 @@ def cfgfile(tmp_path, suffix='.yaml'): content += '\n'.join((" - " + s for s in variants)) + '\n' for k, v in kwargs: content += " {}: {}\n".format(k, v) - fpath = tmp_path / ('test_config' + suffix) - fpath.write_text(dedent(content)) - return fpath + return yaml.safe_load(content) return _create_config diff --git a/test/python/test_tokenizer_icu_rule_loader.py b/test/python/test_tokenizer_icu_rule_loader.py index bb30dc6e..c3480de8 100644 --- a/test/python/test_tokenizer_icu_rule_loader.py +++ b/test/python/test_tokenizer_icu_rule_loader.py @@ -1,16 +1,18 @@ """ Tests for converting a config file to ICU rules. """ -import pytest from textwrap import dedent +import pytest +import yaml + from nominatim.tokenizer.icu_rule_loader import ICURuleLoader from nominatim.errors import UsageError from icu import Transliterator @pytest.fixture -def cfgfile(tmp_path, suffix='.yaml'): +def cfgrules(): def _create_config(*variants, **kwargs): content = dedent("""\ normalization: @@ -27,22 +29,19 @@ def cfgfile(tmp_path, suffix='.yaml'): content += '\n'.join((" - " + s for s in variants)) + '\n' for k, v in kwargs: content += " {}: {}\n".format(k, v) - fpath = tmp_path / ('test_config' + suffix) - fpath.write_text(dedent(content)) - return fpath + return yaml.safe_load(content) return _create_config -def test_empty_rule_file(tmp_path): - fpath = tmp_path / ('test_config.yaml') - fpath.write_text(dedent("""\ +def test_empty_rule_set(): + rule_cfg = yaml.safe_load(dedent("""\ normalization: transliteration: variants: """)) - rules = ICURuleLoader(fpath) + rules = ICURuleLoader(rule_cfg) assert rules.get_search_rules() == '' assert rules.get_normalization_rules() == '' assert rules.get_transliteration_rules() == '' @@ -51,19 +50,15 @@ def test_empty_rule_file(tmp_path): CONFIG_SECTIONS = ('normalization', 'transliteration', 'variants') @pytest.mark.parametrize("section", CONFIG_SECTIONS) -def test_missing_normalization(tmp_path, section): - fpath = tmp_path / ('test_config.yaml') - with fpath.open('w') as fd: - for name in CONFIG_SECTIONS: - if name != section: - fd.write(name + ':\n') +def test_missing_section(section): + rule_cfg = { s: {} for s in CONFIG_SECTIONS if s != section} with pytest.raises(UsageError): - ICURuleLoader(fpath) + ICURuleLoader(rule_cfg) -def test_get_search_rules(cfgfile): - loader = ICURuleLoader(cfgfile()) +def test_get_search_rules(cfgrules): + loader = ICURuleLoader(cfgrules()) rules = loader.get_search_rules() trans = Transliterator.createFromRules("test", rules) @@ -77,23 +72,24 @@ def test_get_search_rules(cfgfile): assert trans.transliterate(" проспект ") == " prospekt " -def test_get_normalization_rules(cfgfile): - loader = ICURuleLoader(cfgfile()) +def test_get_normalization_rules(cfgrules): + loader = ICURuleLoader(cfgrules()) rules = loader.get_normalization_rules() trans = Transliterator.createFromRules("test", rules) assert trans.transliterate(" проспект-Prospekt ") == " проспект prospekt " -def test_get_transliteration_rules(cfgfile): - loader = ICURuleLoader(cfgfile()) +def test_get_transliteration_rules(cfgrules): + loader = ICURuleLoader(cfgrules()) rules = loader.get_transliteration_rules() trans = Transliterator.createFromRules("test", rules) assert trans.transliterate(" проспект-Prospekt ") == " prospekt Prospekt " -def test_transliteration_rules_from_file(tmp_path): +def test_transliteration_rules_from_file(def_config, tmp_path): + def_config.project_dir = tmp_path cfgpath = tmp_path / ('test_config.yaml') cfgpath.write_text(dedent("""\ normalization: @@ -105,7 +101,7 @@ def test_transliteration_rules_from_file(tmp_path): transpath = tmp_path / ('transliteration.yaml') transpath.write_text('- "x > y"') - loader = ICURuleLoader(cfgpath) + loader = ICURuleLoader(def_config.load_sub_configuration('test_config.yaml')) rules = loader.get_transliteration_rules() trans = Transliterator.createFromRules("test", rules) @@ -115,11 +111,11 @@ def test_transliteration_rules_from_file(tmp_path): class TestGetReplacements: @pytest.fixture(autouse=True) - def setup_cfg(self, cfgfile): - self.cfgfile = cfgfile + def setup_cfg(self, cfgrules): + self.cfgrules = cfgrules def get_replacements(self, *variants): - loader = ICURuleLoader(self.cfgfile(*variants)) + loader = ICURuleLoader(self.cfgrules(*variants)) rules = loader.get_replacement_pairs() return set((v.source, v.replacement) for v in rules) @@ -129,7 +125,7 @@ class TestGetReplacements: '~foo~ -> bar', 'fo~ o -> bar']) def test_invalid_variant_description(self, variant): with pytest.raises(UsageError): - ICURuleLoader(self.cfgfile(variant)) + ICURuleLoader(self.cfgrules(variant)) def test_add_full(self): repl = self.get_replacements("foo -> bar") diff --git a/test/python/test_tools_country_info.py b/test/python/test_tools_country_info.py index 59737769..66f785c2 100644 --- a/test/python/test_tools_country_info.py +++ b/test/python/test_tools_country_info.py @@ -8,7 +8,7 @@ from nominatim.tools import country_info @pytest.fixture(autouse=True) def read_config(def_config): - country_info.setup_country_config(def_config.config_dir / 'country_settings.yaml') + country_info.setup_country_config(def_config) @pytest.mark.parametrize("no_partitions", (True, False)) def test_setup_country_tables(src_dir, temp_db_with_extensions, dsn, temp_db_cursor,