]> git.openstreetmap.org Git - nominatim.git/commitdiff
Merge remote-tracking branch 'upstream/master'
authorSarah Hoffmann <lonvia@denofr.de>
Sun, 19 Sep 2021 13:59:44 +0000 (15:59 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Sun, 19 Sep 2021 13:59:44 +0000 (15:59 +0200)
14 files changed:
.github/workflows/ci-tests.yml
README.md
lib-sql/functions/address_lookup.sql
nominatim/clicmd/setup.py
nominatim/config.py
nominatim/tokenizer/icu_rule_loader.py
nominatim/tokenizer/icu_tokenizer.py
nominatim/tools/country_info.py
test/bdd/db/import/addressing.feature
test/python/test_config.py
test/python/test_tokenizer_icu.py
test/python/test_tokenizer_icu_name_processor.py
test/python/test_tokenizer_icu_rule_loader.py
test/python/test_tools_country_info.py

index bac20d98094104501a744b78111bf6a6c528641e..b70ea80f1bf102127c9787eb712859a407cb32c7 100644 (file)
@@ -185,7 +185,7 @@ jobs:
             - name: Prepare container (CentOS)
               run: |
                   dnf update -y
-                  dnf install -y sudo
+                  dnf install -y sudo glibc-langpack-en
               shell: bash
               if: matrix.flavour == 'centos'
 
index b643088b3636db3e777c6a3d4bce681f17f02043..d5041d46b87480f6bad486551e92cb8059bd43a1 100644 (file)
--- a/README.md
+++ b/README.md
@@ -20,14 +20,6 @@ https://nominatim.org/release-docs/develop/ .
 Installation
 ============
 
-**Nominatim is a complex piece of software and runs in a complex environment.
-Installing and running Nominatim is something for experienced system
-administrators only who can do some trouble-shooting themselves. We are sorry,
-but we can not provide installation support. We are all doing this in our free
-time and there is just so much of that time to go around. Do not open issues in
-our bug tracker if you need help. Use the discussions forum
-or ask for help on [help.openstreetmap.org](https://help.openstreetmap.org/).**
-
 The latest stable release can be downloaded from https://nominatim.org.
 There you can also find [installation instructions for the release](https://nominatim.org/release-docs/latest/admin/Installation), as well as an extensive [Troubleshooting/FAQ section](https://nominatim.org/release-docs/latest/admin/Faq/).
 
index b6c552c492421a66d5254f5d1970ea72831ed1f4..3ea72cb190b5053d3e485c01ae27db645b2b3d38 100644 (file)
@@ -223,11 +223,13 @@ BEGIN
                  OR placex.country_code = place.country_code)
       ORDER BY rank_address desc,
                (place_addressline.place_id = in_place_id) desc,
-               (fromarea and place.centroid is not null and not isaddress
-                and (place.address is null or avals(name) && avals(place.address))
-                and ST_Contains(geometry, place.centroid)) desc,
-               isaddress desc, fromarea desc,
-               distance asc, rank_search desc
+               (CASE WHEN coalesce((avals(name) && avals(place.address)), False) THEN 2
+                     WHEN isaddress THEN 0
+                     WHEN fromarea
+                          and place.centroid is not null
+                          and ST_Contains(geometry, place.centroid) THEN 1
+                     ELSE -1 END) desc,
+               fromarea desc, distance asc, rank_search desc
   LOOP
     -- RAISE WARNING '%',location;
     location_isaddress := location.rank_address != current_rank_address;
index 7e2f6fc35f9cf0d6093b3860a0032044906c9d7c..a3fbb84610172815a5befca3818c51b5749814e8 100644 (file)
@@ -55,7 +55,7 @@ class SetupAll:
         from ..tools import database_import, refresh, postcodes, freeze, country_info
         from ..indexer.indexer import Indexer
 
-        country_info.setup_country_config(args.config.config_dir / 'country_settings.yaml')
+        country_info.setup_country_config(args.config)
 
         if args.continue_at is None:
             files = args.get_osm_file_list()
index a8436440b9f5ca78670ba9fe9e1cc8e3979ece96..64614bf14d7bd55f4a4c2a71f25cadc682ea5d65 100644 (file)
@@ -4,6 +4,7 @@ Nominatim configuration accessor.
 import logging
 import os
 from pathlib import Path
+import yaml
 
 from dotenv import dotenv_values
 
@@ -53,7 +54,10 @@ class Configuration:
     def __getattr__(self, name):
         name = 'NOMINATIM_' + name
 
-        return self.environ.get(name) or self._config[name]
+        if name in self.environ:
+            return self.environ[name]
+
+        return self._config[name]
 
     def get_bool(self, name):
         """ Return the given configuration parameter as a boolean.
@@ -114,3 +118,93 @@ class Configuration:
         env.update(self.environ)
 
         return env
+
+
+    def load_sub_configuration(self, filename, config=None):
+        """ Load additional configuration from a file. `filename` is the name
+            of the configuration file. The file is first searched in the
+            project directory and then in the global settings dirctory.
+
+            If `config` is set, then the name of the configuration file can
+            be additionally given through a .env configuration option. When
+            the option is set, then the file will be exclusively loaded as set:
+            if the name is an absolute path, the file name is taken as is,
+            if the name is relative, it is taken to be relative to the
+            project directory.
+
+            The format of the file is determined from the filename suffix.
+            Currently only files with extension '.yaml' are supported.
+
+            YAML files support a special '!include' construct. When the
+            directive is given, the value is taken to be a filename, the file
+            is loaded using this function and added at the position in the
+            configuration tree.
+        """
+        assert Path(filename).suffix == '.yaml'
+
+        configfile = self._find_config_file(filename, config)
+
+        return self._load_from_yaml(configfile)
+
+
+    def _find_config_file(self, filename, config=None):
+        """ Resolve the location of a configuration file given a filename and
+            an optional configuration option with the file name.
+            Raises a UsageError when the file cannot be found or is not
+            a regular file.
+        """
+        if config is not None:
+            cfg_filename = self.__getattr__(config)
+            if cfg_filename:
+                cfg_filename = Path(cfg_filename)
+
+                if cfg_filename.is_absolute():
+                    cfg_filename = cfg_filename.resolve()
+
+                    if not cfg_filename.is_file():
+                        LOG.fatal("Cannot find config file '%s'.", cfg_filename)
+                        raise UsageError("Config file not found.")
+
+                    return cfg_filename
+
+                filename = cfg_filename
+
+
+        search_paths = [self.project_dir, self.config_dir]
+        for path in search_paths:
+            if path is not None and (path / filename).is_file():
+                return path / filename
+
+        LOG.fatal("Configuration file '%s' not found.\nDirectories searched: %s",
+                  filename, search_paths)
+        raise UsageError("Config file not found.")
+
+
+    def _load_from_yaml(self, cfgfile):
+        """ Load a YAML configuration file. This installs a special handler that
+            allows to include other YAML files using the '!include' operator.
+        """
+        yaml.add_constructor('!include', self._yaml_include_representer,
+                             Loader=yaml.SafeLoader)
+        return yaml.safe_load(cfgfile.read_text(encoding='utf-8'))
+
+
+    def _yaml_include_representer(self, loader, node):
+        """ Handler for the '!include' operator in YAML files.
+
+            When the filename is relative, then the file is first searched in the
+            project directory and then in the global settings dirctory.
+        """
+        fname = loader.construct_scalar(node)
+
+        if Path(fname).is_absolute():
+            configfile = Path(fname)
+        else:
+            configfile = self._find_config_file(loader.construct_scalar(node))
+
+        if configfile.suffix != '.yaml':
+            LOG.fatal("Format error while reading '%s': only YAML format supported.",
+                      configfile)
+            raise UsageError("Cannot handle config file format.")
+
+        return yaml.safe_load(configfile.read_text(encoding='utf-8'))
index b408f1c3f98a7fc965146d1a7eaef47f9186a37b..0e6e40b4c88dc3109e5aa9fa60cb27925458454b 100644 (file)
@@ -4,10 +4,8 @@ Helper class to create ICU rules from a configuration file.
 import io
 import logging
 import itertools
-from pathlib import Path
 import re
 
-import yaml
 from icu import Transliterator
 
 from nominatim.errors import UsageError
@@ -15,17 +13,17 @@ import nominatim.tokenizer.icu_variants as variants
 
 LOG = logging.getLogger()
 
-def _flatten_yaml_list(content):
+def _flatten_config_list(content):
     if not content:
         return []
 
     if not isinstance(content, list):
-        raise UsageError("List expected in ICU yaml configuration.")
+        raise UsageError("List expected in ICU configuration.")
 
     output = []
     for ele in content:
         if isinstance(ele, list):
-            output.extend(_flatten_yaml_list(ele))
+            output.extend(_flatten_config_list(ele))
         else:
             output.append(ele)
 
@@ -48,14 +46,12 @@ class ICURuleLoader:
     """ Compiler for ICU rules from a tokenizer configuration file.
     """
 
-    def __init__(self, configfile):
-        self.configfile = configfile
+    def __init__(self, rules):
         self.variants = set()
 
-        if configfile.suffix == '.yaml':
-            self._load_from_yaml()
-        else:
-            raise UsageError("Unknown format of tokenizer configuration.")
+        self.normalization_rules = self._cfg_to_icu_rules(rules, 'normalization')
+        self.transliteration_rules = self._cfg_to_icu_rules(rules, 'transliteration')
+        self._parse_variant_list(self._get_section(rules, 'variants'))
 
 
     def get_search_rules(self):
@@ -88,34 +84,14 @@ class ICURuleLoader:
         """
         return self.variants
 
-    def _yaml_include_representer(self, loader, node):
-        value = loader.construct_scalar(node)
-
-        if Path(value).is_absolute():
-            content = Path(value)
-        else:
-            content = (self.configfile.parent / value)
-
-        return yaml.safe_load(content.read_text(encoding='utf-8'))
-
-
-    def _load_from_yaml(self):
-        yaml.add_constructor('!include', self._yaml_include_representer,
-                             Loader=yaml.SafeLoader)
-        rules = yaml.safe_load(self.configfile.read_text(encoding='utf-8'))
-
-        self.normalization_rules = self._cfg_to_icu_rules(rules, 'normalization')
-        self.transliteration_rules = self._cfg_to_icu_rules(rules, 'transliteration')
-        self._parse_variant_list(self._get_section(rules, 'variants'))
-
 
-    def _get_section(self, rules, section):
+    @staticmethod
+    def _get_section(rules, section):
         """ Get the section named 'section' from the rules. If the section does
             not exist, raise a usage error with a meaningful message.
         """
         if section not in rules:
-            LOG.fatal("Section '%s' not found in tokenizer config '%s'.",
-                      section, str(self.configfile))
+            LOG.fatal("Section '%s' not found in tokenizer config.", section)
             raise UsageError("Syntax error in tokenizer configuration file.")
 
         return rules[section]
@@ -133,7 +109,7 @@ class ICURuleLoader:
         if content is None:
             return ''
 
-        return ';'.join(_flatten_yaml_list(content)) + ';'
+        return ';'.join(_flatten_config_list(content)) + ';'
 
 
     def _parse_variant_list(self, rules):
@@ -142,7 +118,7 @@ class ICURuleLoader:
         if not rules:
             return
 
-        rules = _flatten_yaml_list(rules)
+        rules = _flatten_config_list(rules)
 
         vmaker = _VariantMaker(self.normalization_rules)
 
index cb4112049fb7e8173b835fa1638db0f6ee3a7cc4..61263678d811db87e90cc0ab8ed55b885d24a57c 100644 (file)
@@ -8,7 +8,6 @@ import json
 import logging
 import re
 from textwrap import dedent
-from pathlib import Path
 
 from nominatim.db.connection import connect
 from nominatim.db.properties import set_property, get_property
@@ -49,12 +48,8 @@ class LegacyICUTokenizer(AbstractTokenizer):
             This copies all necessary data in the project directory to make
             sure the tokenizer remains stable even over updates.
         """
-        if config.TOKENIZER_CONFIG:
-            cfgfile = Path(config.TOKENIZER_CONFIG)
-        else:
-            cfgfile = config.config_dir / 'icu_tokenizer.yaml'
-
-        loader = ICURuleLoader(cfgfile)
+        loader = ICURuleLoader(config.load_sub_configuration('icu_tokenizer.yaml',
+                                                             config='TOKENIZER_CONFIG'))
         self.naming_rules = ICUNameProcessorRules(loader=loader)
         self.term_normalization = config.TERM_NORMALIZATION
         self.max_word_frequency = config.MAX_WORD_FREQUENCY
index 897ac22031c32267a1321da3b54efb23348d503e..e04a8693f116bccd6d7e609de0c463b74170e46a 100644 (file)
@@ -2,7 +2,6 @@
 Functions for importing and managing static country information.
 """
 import psycopg2.extras
-import yaml
 
 from nominatim.db import utils as db_utils
 from nominatim.db.connection import connect
@@ -14,12 +13,12 @@ class _CountryInfo:
     def __init__(self):
         self._info = {}
 
-    def load(self, configfile):
+    def load(self, config):
         """ Load the country properties from the configuration files,
             if they are not loaded yet.
         """
         if not self._info:
-            self._info = yaml.safe_load(configfile.read_text(encoding='utf-8'))
+            self._info = config.load_sub_configuration('country_settings.yaml')
 
     def items(self):
         """ Return tuples of (country_code, property dict) as iterable.
@@ -29,12 +28,12 @@ class _CountryInfo:
 
 _COUNTRY_INFO = _CountryInfo()
 
-def setup_country_config(configfile):
+def setup_country_config(config):
     """ Load country properties from the configuration file.
         Needs to be called before using any other functions in this
         file.
     """
-    _COUNTRY_INFO.load(configfile)
+    _COUNTRY_INFO.load(config)
 
 
 def setup_country_tables(dsn, sql_dir, ignore_partitions=False):
index b6345baff2df1ed051b3777229185cf4467797de..b2437d71cf60282feeca558c8191ad73179134d5 100644 (file)
@@ -433,3 +433,76 @@ Feature: Address computation
         Then results contain
            | osm | display_name               |
            | N2  | Leftside, Wonderway, Right |
+
+
+    Scenario: POIs can correct address parts on the fly (with partial unmatching address)
+        Given the grid
+            | 1 |   |   |   |  2 |   | 5 |
+            |   |   |   | 9 |    | 8 |   |
+            |   | 10| 11|   |    | 12|   |
+            | 4 |   |   |   |  3 |   | 6 |
+        And the places
+            | osm | class    | type           | admin | name  | geometry    |
+            | R1  | boundary | administrative | 8     | Left  | (1,2,3,4,1) |
+            | R2  | boundary | administrative | 8     | Right | (2,3,6,5,2) |
+        And the places
+            | osm | class   | type    | name      | geometry |
+            | W1  | highway | primary | Wonderway | 10,11,12 |
+        And the places
+            | osm | class   | type    | name      | addr+suburb | geometry |
+            | N1  | amenity | cafe    | Bolder    | Boring      | 9        |
+            | N2  | amenity | cafe    | Leftside  | Boring      | 8        |
+        When importing
+        Then place_addressline contains
+           | object | address | isaddress |
+           | W1     | R1      | True      |
+           | W1     | R2      | False     |
+        And place_addressline doesn't contain
+           | object | address |
+           | N1     | R1      |
+           | N2     | R2      |
+        When sending search query "Bolder"
+        Then results contain
+           | osm | display_name            |
+           | N1  | Bolder, Wonderway, Left |
+        When sending search query "Leftside"
+        Then results contain
+           | osm | display_name               |
+           | N2  | Leftside, Wonderway, Right |
+
+
+
+    Scenario: POIs can correct address parts on the fly (with partial matching address)
+        Given the grid
+            | 1 |   |   |   |  2 |   | 5 |
+            |   |   |   | 9 |    | 8 |   |
+            |   | 10| 11|   |    | 12|   |
+            | 4 |   |   |   |  3 |   | 6 |
+        And the places
+            | osm | class    | type           | admin | name  | geometry    |
+            | R1  | boundary | administrative | 8     | Left  | (1,2,3,4,1) |
+            | R2  | boundary | administrative | 8     | Right | (2,3,6,5,2) |
+        And the places
+            | osm | class   | type    | name      | geometry |
+            | W1  | highway | primary | Wonderway | 10,11,12 |
+        And the places
+            | osm | class   | type    | name      | addr+state | geometry |
+            | N1  | amenity | cafe    | Bolder    | Left       | 9        |
+            | N2  | amenity | cafe    | Leftside  | Left       | 8        |
+        When importing
+        Then place_addressline contains
+           | object | address | isaddress |
+           | W1     | R1      | True      |
+           | W1     | R2      | False     |
+        And place_addressline doesn't contain
+           | object | address |
+           | N1     | R1      |
+           | N2     | R2      |
+        When sending search query "Bolder"
+        Then results contain
+           | osm | display_name            |
+           | N1  | Bolder, Wonderway, Left |
+        When sending search query "Leftside"
+        Then results contain
+           | osm | display_name               |
+           | N2  | Leftside, Wonderway, Left |
index 6729f95411e5d01f813a4165402b7df06df801ac..8b5cb11bdb71f360094b59633ca364a588b6ab29 100644 (file)
@@ -15,6 +15,20 @@ def make_config(src_dir):
 
     return _mk_config
 
+@pytest.fixture
+def make_config_path(src_dir, tmp_path):
+    """ Create a configuration object with project and config directories
+        in a temporary directory.
+    """
+    def _mk_config():
+        (tmp_path / 'project').mkdir()
+        (tmp_path / 'config').mkdir()
+        conf = Configuration(tmp_path / 'project', src_dir / 'settings')
+        conf.config_dir = tmp_path / 'config'
+        return conf
+
+    return _mk_config
+
 
 def test_no_project_dir(make_config):
     config = make_config()
@@ -43,6 +57,17 @@ def test_prefer_os_environ_over_project_setting(make_config, monkeypatch, tmp_pa
     assert config.DATABASE_WEBUSER == 'nobody'
 
 
+def test_prefer_os_environ_can_unset_project_setting(make_config, monkeypatch, tmp_path):
+    envfile = tmp_path / '.env'
+    envfile.write_text('NOMINATIM_DATABASE_WEBUSER=apache\n')
+
+    monkeypatch.setenv('NOMINATIM_DATABASE_WEBUSER', '')
+
+    config = make_config(tmp_path)
+
+    assert config.DATABASE_WEBUSER == ''
+
+
 def test_get_os_env_add_defaults(make_config, monkeypatch):
     config = make_config()
 
@@ -158,3 +183,151 @@ def test_get_import_style_extern(make_config, monkeypatch, value):
     monkeypatch.setenv('NOMINATIM_IMPORT_STYLE', value)
 
     assert str(config.get_import_style_file()) == value
+
+
+def test_load_subconf_from_project_dir(make_config_path):
+    config = make_config_path()
+
+    testfile = config.project_dir / 'test.yaml'
+    testfile.write_text('cow: muh\ncat: miau\n')
+
+    testfile = config.config_dir / 'test.yaml'
+    testfile.write_text('cow: miau\ncat: muh\n')
+
+    rules = config.load_sub_configuration('test.yaml')
+
+    assert rules == dict(cow='muh', cat='miau')
+
+
+def test_load_subconf_from_settings_dir(make_config_path):
+    config = make_config_path()
+
+    testfile = config.config_dir / 'test.yaml'
+    testfile.write_text('cow: muh\ncat: miau\n')
+
+    rules = config.load_sub_configuration('test.yaml')
+
+    assert rules == dict(cow='muh', cat='miau')
+
+
+def test_load_subconf_empty_env_conf(make_config_path, monkeypatch):
+    monkeypatch.setenv('NOMINATIM_MY_CONFIG', '')
+    config = make_config_path()
+
+    testfile = config.config_dir / 'test.yaml'
+    testfile.write_text('cow: muh\ncat: miau\n')
+
+    rules = config.load_sub_configuration('test.yaml', config='MY_CONFIG')
+
+    assert rules == dict(cow='muh', cat='miau')
+
+
+def test_load_subconf_env_absolute_found(make_config_path, monkeypatch, tmp_path):
+    monkeypatch.setenv('NOMINATIM_MY_CONFIG', str(tmp_path / 'other.yaml'))
+    config = make_config_path()
+
+    (config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n')
+    (tmp_path / 'other.yaml').write_text('dog: muh\nfrog: miau\n')
+
+    rules = config.load_sub_configuration('test.yaml', config='MY_CONFIG')
+
+    assert rules == dict(dog='muh', frog='miau')
+
+
+def test_load_subconf_env_absolute_not_found(make_config_path, monkeypatch, tmp_path):
+    monkeypatch.setenv('NOMINATIM_MY_CONFIG', str(tmp_path / 'other.yaml'))
+    config = make_config_path()
+
+    (config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n')
+
+    with pytest.raises(UsageError, match='Config file not found.'):
+        rules = config.load_sub_configuration('test.yaml', config='MY_CONFIG')
+
+
+@pytest.mark.parametrize("location", ['project_dir', 'config_dir'])
+def test_load_subconf_env_relative_found(make_config_path, monkeypatch, location):
+    monkeypatch.setenv('NOMINATIM_MY_CONFIG', 'other.yaml')
+    config = make_config_path()
+
+    (config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n')
+    (getattr(config, location) / 'other.yaml').write_text('dog: bark\n')
+
+    rules = config.load_sub_configuration('test.yaml', config='MY_CONFIG')
+
+    assert rules == dict(dog='bark')
+
+
+def test_load_subconf_env_relative_not_found(make_config_path, monkeypatch):
+    monkeypatch.setenv('NOMINATIM_MY_CONFIG', 'other.yaml')
+    config = make_config_path()
+
+    (config.config_dir / 'test.yaml').write_text('cow: muh\ncat: miau\n')
+
+    with pytest.raises(UsageError, match='Config file not found.'):
+        rules = config.load_sub_configuration('test.yaml', config='MY_CONFIG')
+
+
+def test_load_subconf_not_found(make_config_path):
+    config = make_config_path()
+
+    with pytest.raises(UsageError, match='Config file not found.'):
+        rules = config.load_sub_configuration('test.yaml')
+
+
+def test_load_subconf_include_absolute(make_config_path, tmp_path):
+    config = make_config_path()
+
+    testfile = config.config_dir / 'test.yaml'
+    testfile.write_text(f'base: !include {tmp_path}/inc.yaml\n')
+    (tmp_path / 'inc.yaml').write_text('first: 1\nsecond: 2\n')
+
+    rules = config.load_sub_configuration('test.yaml')
+
+    assert rules == dict(base=dict(first=1, second=2))
+
+
+@pytest.mark.parametrize("location", ['project_dir', 'config_dir'])
+def test_load_subconf_include_relative(make_config_path, tmp_path, location):
+    config = make_config_path()
+
+    testfile = config.config_dir / 'test.yaml'
+    testfile.write_text(f'base: !include inc.yaml\n')
+    (getattr(config, location) / 'inc.yaml').write_text('first: 1\nsecond: 2\n')
+
+    rules = config.load_sub_configuration('test.yaml')
+
+    assert rules == dict(base=dict(first=1, second=2))
+
+
+def test_load_subconf_include_bad_format(make_config_path):
+    config = make_config_path()
+
+    testfile = config.config_dir / 'test.yaml'
+    testfile.write_text(f'base: !include inc.txt\n')
+    (config.config_dir / 'inc.txt').write_text('first: 1\nsecond: 2\n')
+
+    with pytest.raises(UsageError, match='Cannot handle config file format.'):
+        rules = config.load_sub_configuration('test.yaml')
+
+
+def test_load_subconf_include_not_found(make_config_path):
+    config = make_config_path()
+
+    testfile = config.config_dir / 'test.yaml'
+    testfile.write_text(f'base: !include inc.txt\n')
+
+    with pytest.raises(UsageError, match='Config file not found.'):
+        rules = config.load_sub_configuration('test.yaml')
+
+
+def test_load_subconf_include_recursive(make_config_path):
+    config = make_config_path()
+
+    testfile = config.config_dir / 'test.yaml'
+    testfile.write_text(f'base: !include inc.yaml\n')
+    (config.config_dir / 'inc.yaml').write_text('- !include more.yaml\n- upper\n')
+    (config.config_dir / 'more.yaml').write_text('- the end\n')
+
+    rules = config.load_sub_configuration('test.yaml')
+
+    assert rules == dict(base=[['the end'], 'upper'])
index 5ec434b6f4b349902ca743106a9199f1382979bc..b7101c3f67ef62229e5205d226e4c50b4c6ccad8 100644 (file)
@@ -67,13 +67,10 @@ def analyzer(tokenizer_factory, test_config, monkeypatch,
 
     def _mk_analyser(norm=("[[:Punctuation:][:Space:]]+ > ' '",), trans=(':: upper()',),
                      variants=('~gasse -> gasse', 'street => st', )):
-        cfgfile = tmp_path / 'analyser_test_config.yaml'
-        with cfgfile.open('w') as stream:
-            cfgstr = {'normalization' : list(norm),
-                       'transliteration' : list(trans),
-                       'variants' : [ {'words': list(variants)}]}
-            yaml.dump(cfgstr, stream)
-        tok.naming_rules = ICUNameProcessorRules(loader=ICURuleLoader(cfgfile))
+        cfgstr = {'normalization' : list(norm),
+                   'transliteration' : list(trans),
+                   'variants' : [ {'words': list(variants)}]}
+        tok.naming_rules = ICUNameProcessorRules(loader=ICURuleLoader(cfgstr))
 
         return tok.name_analyzer()
 
index cc1031164c2872b77ed7dcf0fb3600df05895376..ae05988ae42ce4a69ab9942ef8ca39305b151924 100644 (file)
@@ -4,6 +4,7 @@ Tests for import name normalisation and variant generation.
 from textwrap import dedent
 
 import pytest
+import yaml
 
 from nominatim.tokenizer.icu_rule_loader import ICURuleLoader
 from nominatim.tokenizer.icu_name_processor import ICUNameProcessor, ICUNameProcessorRules
@@ -11,7 +12,7 @@ from nominatim.tokenizer.icu_name_processor import ICUNameProcessor, ICUNameProc
 from nominatim.errors import UsageError
 
 @pytest.fixture
-def cfgfile(tmp_path, suffix='.yaml'):
+def cfgfile():
     def _create_config(*variants, **kwargs):
         content = dedent("""\
         normalization:
@@ -29,9 +30,7 @@ def cfgfile(tmp_path, suffix='.yaml'):
         content += '\n'.join(("      - " + s for s in variants)) + '\n'
         for k, v in kwargs:
             content += "    {}: {}\n".format(k, v)
-        fpath = tmp_path / ('test_config' + suffix)
-        fpath.write_text(dedent(content))
-        return fpath
+        return yaml.safe_load(content)
 
     return _create_config
 
index bb30dc6eae2133b9c3fdb42cf95dab5620657aa6..c3480de87ac08a1b251666c0b61fb31f6405cfba 100644 (file)
@@ -1,16 +1,18 @@
 """
 Tests for converting a config file to ICU rules.
 """
-import pytest
 from textwrap import dedent
 
+import pytest
+import yaml
+
 from nominatim.tokenizer.icu_rule_loader import ICURuleLoader
 from nominatim.errors import UsageError
 
 from icu import Transliterator
 
 @pytest.fixture
-def cfgfile(tmp_path, suffix='.yaml'):
+def cfgrules():
     def _create_config(*variants, **kwargs):
         content = dedent("""\
         normalization:
@@ -27,22 +29,19 @@ def cfgfile(tmp_path, suffix='.yaml'):
         content += '\n'.join(("      - " + s for s in variants)) + '\n'
         for k, v in kwargs:
             content += "    {}: {}\n".format(k, v)
-        fpath = tmp_path / ('test_config' + suffix)
-        fpath.write_text(dedent(content))
-        return fpath
+        return yaml.safe_load(content)
 
     return _create_config
 
 
-def test_empty_rule_file(tmp_path):
-    fpath = tmp_path / ('test_config.yaml')
-    fpath.write_text(dedent("""\
+def test_empty_rule_set():
+    rule_cfg = yaml.safe_load(dedent("""\
         normalization:
         transliteration:
         variants:
         """))
 
-    rules = ICURuleLoader(fpath)
+    rules = ICURuleLoader(rule_cfg)
     assert rules.get_search_rules() == ''
     assert rules.get_normalization_rules() == ''
     assert rules.get_transliteration_rules() == ''
@@ -51,19 +50,15 @@ def test_empty_rule_file(tmp_path):
 CONFIG_SECTIONS = ('normalization', 'transliteration', 'variants')
 
 @pytest.mark.parametrize("section", CONFIG_SECTIONS)
-def test_missing_normalization(tmp_path, section):
-    fpath = tmp_path / ('test_config.yaml')
-    with fpath.open('w') as fd:
-        for name in CONFIG_SECTIONS:
-            if name != section:
-                fd.write(name + ':\n')
+def test_missing_section(section):
+    rule_cfg = { s: {} for s in CONFIG_SECTIONS if s != section}
 
     with pytest.raises(UsageError):
-        ICURuleLoader(fpath)
+        ICURuleLoader(rule_cfg)
 
 
-def test_get_search_rules(cfgfile):
-    loader = ICURuleLoader(cfgfile())
+def test_get_search_rules(cfgrules):
+    loader = ICURuleLoader(cfgrules())
 
     rules = loader.get_search_rules()
     trans = Transliterator.createFromRules("test", rules)
@@ -77,23 +72,24 @@ def test_get_search_rules(cfgfile):
     assert trans.transliterate(" проспект ") == " prospekt "
 
 
-def test_get_normalization_rules(cfgfile):
-    loader = ICURuleLoader(cfgfile())
+def test_get_normalization_rules(cfgrules):
+    loader = ICURuleLoader(cfgrules())
     rules = loader.get_normalization_rules()
     trans = Transliterator.createFromRules("test", rules)
 
     assert trans.transliterate(" проспект-Prospekt ") == " проспект prospekt "
 
 
-def test_get_transliteration_rules(cfgfile):
-    loader = ICURuleLoader(cfgfile())
+def test_get_transliteration_rules(cfgrules):
+    loader = ICURuleLoader(cfgrules())
     rules = loader.get_transliteration_rules()
     trans = Transliterator.createFromRules("test", rules)
 
     assert trans.transliterate(" проспект-Prospekt ") == " prospekt Prospekt "
 
 
-def test_transliteration_rules_from_file(tmp_path):
+def test_transliteration_rules_from_file(def_config, tmp_path):
+    def_config.project_dir = tmp_path
     cfgpath = tmp_path / ('test_config.yaml')
     cfgpath.write_text(dedent("""\
         normalization:
@@ -105,7 +101,7 @@ def test_transliteration_rules_from_file(tmp_path):
     transpath = tmp_path / ('transliteration.yaml')
     transpath.write_text('- "x > y"')
 
-    loader = ICURuleLoader(cfgpath)
+    loader = ICURuleLoader(def_config.load_sub_configuration('test_config.yaml'))
     rules = loader.get_transliteration_rules()
     trans = Transliterator.createFromRules("test", rules)
 
@@ -115,11 +111,11 @@ def test_transliteration_rules_from_file(tmp_path):
 class TestGetReplacements:
 
     @pytest.fixture(autouse=True)
-    def setup_cfg(self, cfgfile):
-        self.cfgfile = cfgfile
+    def setup_cfg(self, cfgrules):
+        self.cfgrules = cfgrules
 
     def get_replacements(self, *variants):
-        loader = ICURuleLoader(self.cfgfile(*variants))
+        loader = ICURuleLoader(self.cfgrules(*variants))
         rules = loader.get_replacement_pairs()
 
         return set((v.source, v.replacement) for v in rules)
@@ -129,7 +125,7 @@ class TestGetReplacements:
                                          '~foo~ -> bar', 'fo~ o -> bar'])
     def test_invalid_variant_description(self, variant):
         with pytest.raises(UsageError):
-            ICURuleLoader(self.cfgfile(variant))
+            ICURuleLoader(self.cfgrules(variant))
 
     def test_add_full(self):
         repl = self.get_replacements("foo -> bar")
index 597377691395175e01e1febe75d72a99456e0080..66f785c2622a6d9fe9c5967af18368681e7e1850 100644 (file)
@@ -8,7 +8,7 @@ from nominatim.tools import country_info
 
 @pytest.fixture(autouse=True)
 def read_config(def_config):
-    country_info.setup_country_config(def_config.config_dir / 'country_settings.yaml')
+    country_info.setup_country_config(def_config)
 
 @pytest.mark.parametrize("no_partitions", (True, False))
 def test_setup_country_tables(src_dir, temp_db_with_extensions, dsn, temp_db_cursor,