]> git.openstreetmap.org Git - nominatim.git/commitdiff
add support for external sanitizer modules
authorSarah Hoffmann <lonvia@denofr.de>
Mon, 25 Jul 2022 14:10:19 +0000 (16:10 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Mon, 25 Jul 2022 14:10:19 +0000 (16:10 +0200)
nominatim/tokenizer/icu_rule_loader.py
nominatim/tokenizer/place_sanitizer.py
test/python/config/test_config_load_module.py
test/python/tokenizer/sanitizers/test_clean_housenumbers.py
test/python/tokenizer/sanitizers/test_clean_postcodes.py
test/python/tokenizer/sanitizers/test_split_name_list.py
test/python/tokenizer/sanitizers/test_strip_brace_terms.py
test/python/tokenizer/sanitizers/test_tag_analyzer_by_language.py
test/python/tokenizer/test_place_sanitizer.py

index 84040ddc36f86a1ad66722abfbcd4f444fff652a..cf9fdb88ac16fa96ea2e6a5cda2a4e1afb43d5f5 100644 (file)
@@ -45,6 +45,7 @@ class ICURuleLoader:
     """
 
     def __init__(self, config: Configuration) -> None:
     """
 
     def __init__(self, config: Configuration) -> None:
+        self.config = config
         rules = config.load_sub_configuration('icu_tokenizer.yaml',
                                               config='TOKENIZER_CONFIG')
 
         rules = config.load_sub_configuration('icu_tokenizer.yaml',
                                               config='TOKENIZER_CONFIG')
 
@@ -92,7 +93,7 @@ class ICURuleLoader:
     def make_sanitizer(self) -> PlaceSanitizer:
         """ Create a place sanitizer from the configured rules.
         """
     def make_sanitizer(self) -> PlaceSanitizer:
         """ Create a place sanitizer from the configured rules.
         """
-        return PlaceSanitizer(self.sanitizer_rules)
+        return PlaceSanitizer(self.sanitizer_rules, self.config)
 
 
     def make_token_analysis(self) -> ICUTokenAnalysis:
 
 
     def make_token_analysis(self) -> ICUTokenAnalysis:
index 3f548e061e4fdc9dc45e5f4711c35b061d66a1cd..c7dfd1ba5d34f4f55fe946bae421237c5e74cbe8 100644 (file)
@@ -9,9 +9,9 @@ Handler for cleaning name and address tags in place information before it
 is handed to the token analysis.
 """
 from typing import Optional, List, Mapping, Sequence, Callable, Any, Tuple
 is handed to the token analysis.
 """
 from typing import Optional, List, Mapping, Sequence, Callable, Any, Tuple
-import importlib
 
 from nominatim.errors import UsageError
 
 from nominatim.errors import UsageError
+from nominatim.config import Configuration
 from nominatim.tokenizer.sanitizers.config import SanitizerConfig
 from nominatim.tokenizer.sanitizers.base import SanitizerHandler, ProcessInfo, PlaceName
 from nominatim.data.place_info import PlaceInfo
 from nominatim.tokenizer.sanitizers.config import SanitizerConfig
 from nominatim.tokenizer.sanitizers.base import SanitizerHandler, ProcessInfo, PlaceName
 from nominatim.data.place_info import PlaceInfo
@@ -22,16 +22,21 @@ class PlaceSanitizer:
         names and address before they are used by the token analysers.
     """
 
         names and address before they are used by the token analysers.
     """
 
-    def __init__(self, rules: Optional[Sequence[Mapping[str, Any]]]) -> None:
+    def __init__(self, rules: Optional[Sequence[Mapping[str, Any]]],
+                 config: Configuration) -> None:
         self.handlers: List[Callable[[ProcessInfo], None]] = []
 
         if rules:
             for func in rules:
                 if 'step' not in func:
                     raise UsageError("Sanitizer rule is missing the 'step' attribute.")
         self.handlers: List[Callable[[ProcessInfo], None]] = []
 
         if rules:
             for func in rules:
                 if 'step' not in func:
                     raise UsageError("Sanitizer rule is missing the 'step' attribute.")
-                module_name = 'nominatim.tokenizer.sanitizers.' + func['step'].replace('-', '_')
-                handler_module: SanitizerHandler = importlib.import_module(module_name)
-                self.handlers.append(handler_module.create(SanitizerConfig(func)))
+                if not isinstance(func['step'], str):
+                    raise UsageError("'step' attribute must be a simple string.")
+
+                module: SanitizerHandler = \
+                    config.load_plugin_module(func['step'], 'nominatim.tokenizer.sanitizers')
+
+                self.handlers.append(module.create(SanitizerConfig(func)))
 
 
     def process_names(self, place: PlaceInfo) -> Tuple[List[PlaceName], List[PlaceName]]:
 
 
     def process_names(self, place: PlaceInfo) -> Tuple[List[PlaceName], List[PlaceName]]:
index cee88c825241f86694ebfe11ee7106ca8078061a..df6c4794178465db5f79558662ee99703a6bbcac 100644 (file)
@@ -31,6 +31,11 @@ def test_load_default_module(test_config):
 
     assert isinstance(module.NOMINATIM_VERSION, tuple)
 
 
     assert isinstance(module.NOMINATIM_VERSION, tuple)
 
+def test_load_default_module_with_hyphen(test_config):
+    module = test_config.load_plugin_module('place-info', 'nominatim.data')
+
+    assert isinstance(module.PlaceInfo, object)
+
 
 def test_load_plugin_module(test_config, tmp_path):
     (tmp_path / 'project' / 'testpath').mkdir()
 
 def test_load_plugin_module(test_config, tmp_path):
     (tmp_path / 'project' / 'testpath').mkdir()
index 128e1201ed1c4b3cdfa9714375e632ba75e0c374..11a71a5fb3b992ff70b1c981aea22bf17ad96f3c 100644 (file)
@@ -13,14 +13,14 @@ from nominatim.tokenizer.place_sanitizer import PlaceSanitizer
 from nominatim.data.place_info import PlaceInfo
 
 @pytest.fixture
 from nominatim.data.place_info import PlaceInfo
 
 @pytest.fixture
-def sanitize(request):
+def sanitize(request, def_config):
     sanitizer_args = {'step': 'clean-housenumbers'}
     for mark in request.node.iter_markers(name="sanitizer_params"):
         sanitizer_args.update({k.replace('_', '-') : v for k,v in mark.kwargs.items()})
 
     def _run(**kwargs):
         place = PlaceInfo({'address': kwargs})
     sanitizer_args = {'step': 'clean-housenumbers'}
     for mark in request.node.iter_markers(name="sanitizer_params"):
         sanitizer_args.update({k.replace('_', '-') : v for k,v in mark.kwargs.items()})
 
     def _run(**kwargs):
         place = PlaceInfo({'address': kwargs})
-        _, address = PlaceSanitizer([sanitizer_args]).process_names(place)
+        _, address = PlaceSanitizer([sanitizer_args], def_config).process_names(place)
 
         return sorted([(p.kind, p.name) for p in address])
 
 
         return sorted([(p.kind, p.name) for p in address])
 
@@ -45,24 +45,24 @@ def test_filter_kind(sanitize):
 
 
 @pytest.mark.parametrize('number', ('6523', 'n/a', '4'))
 
 
 @pytest.mark.parametrize('number', ('6523', 'n/a', '4'))
-def test_convert_to_name_converted(number):
+def test_convert_to_name_converted(def_config, number):
     sanitizer_args = {'step': 'clean-housenumbers',
                       'convert-to-name': (r'\d+', 'n/a')}
 
     place = PlaceInfo({'address': {'housenumber': number}})
     sanitizer_args = {'step': 'clean-housenumbers',
                       'convert-to-name': (r'\d+', 'n/a')}
 
     place = PlaceInfo({'address': {'housenumber': number}})
-    names, address = PlaceSanitizer([sanitizer_args]).process_names(place)
+    names, address = PlaceSanitizer([sanitizer_args], def_config).process_names(place)
 
     assert ('housenumber', number) in set((p.kind, p.name) for p in names)
     assert 'housenumber' not in set(p.kind for p in address)
 
 
 @pytest.mark.parametrize('number', ('a54', 'n.a', 'bow'))
 
     assert ('housenumber', number) in set((p.kind, p.name) for p in names)
     assert 'housenumber' not in set(p.kind for p in address)
 
 
 @pytest.mark.parametrize('number', ('a54', 'n.a', 'bow'))
-def test_convert_to_name_unconverted(number):
+def test_convert_to_name_unconverted(def_config, number):
     sanitizer_args = {'step': 'clean-housenumbers',
                       'convert-to-name': (r'\d+', 'n/a')}
 
     place = PlaceInfo({'address': {'housenumber': number}})
     sanitizer_args = {'step': 'clean-housenumbers',
                       'convert-to-name': (r'\d+', 'n/a')}
 
     place = PlaceInfo({'address': {'housenumber': number}})
-    names, address = PlaceSanitizer([sanitizer_args]).process_names(place)
+    names, address = PlaceSanitizer([sanitizer_args], def_config).process_names(place)
 
     assert 'housenumber' not in set(p.kind for p in names)
     assert ('housenumber', number) in set((p.kind, p.name) for p in address)
 
     assert 'housenumber' not in set(p.kind for p in names)
     assert ('housenumber', number) in set((p.kind, p.name) for p in address)
index 237527f1e44c7064cf8be6e2c3025353a1774ebd..f2c965ad9b1db0017864b5bbaec1677023b1d838 100644 (file)
@@ -25,7 +25,7 @@ def sanitize(def_config, request):
         if country is not None:
             pi['country_code'] = country
 
         if country is not None:
             pi['country_code'] = country
 
-        _, address = PlaceSanitizer([sanitizer_args]).process_names(PlaceInfo(pi))
+        _, address = PlaceSanitizer([sanitizer_args], def_config).process_names(PlaceInfo(pi))
 
         return sorted([(p.kind, p.name) for p in address])
 
 
         return sorted([(p.kind, p.name) for p in address])
 
index 67157fba2148a9e806dce756c68e797e82719cdb..9ca539d57cb529cac93283c813c3f0a7075900a8 100644 (file)
@@ -14,58 +14,66 @@ from nominatim.data.place_info import PlaceInfo
 
 from nominatim.errors import UsageError
 
 
 from nominatim.errors import UsageError
 
-def run_sanitizer_on(**kwargs):
-    place = PlaceInfo({'name': kwargs})
-    name, _ = PlaceSanitizer([{'step': 'split-name-list'}]).process_names(place)
+class TestSplitName:
 
 
-    return sorted([(p.name, p.kind, p.suffix) for p in name])
+    @pytest.fixture(autouse=True)
+    def setup_country(self, def_config):
+        self.config = def_config
 
 
 
 
-def sanitize_with_delimiter(delimiter, name):
-    place = PlaceInfo({'name': {'name': name}})
-    san = PlaceSanitizer([{'step': 'split-name-list', 'delimiters': delimiter}])
-    name, _ = san.process_names(place)
+    def run_sanitizer_on(self, **kwargs):
+        place = PlaceInfo({'name': kwargs})
+        name, _ = PlaceSanitizer([{'step': 'split-name-list'}], self.config).process_names(place)
 
 
-    return sorted([p.name for p in name])
+        return sorted([(p.name, p.kind, p.suffix) for p in name])
 
 
 
 
-def test_simple():
-    assert run_sanitizer_on(name='ABC') == [('ABC', 'name', None)]
-    assert run_sanitizer_on(name='') == [('', 'name', None)]
+    def sanitize_with_delimiter(self, delimiter, name):
+        place = PlaceInfo({'name': {'name': name}})
+        san = PlaceSanitizer([{'step': 'split-name-list', 'delimiters': delimiter}],
+                             self.config)
+        name, _ = san.process_names(place)
 
 
+        return sorted([p.name for p in name])
 
 
-def test_splits():
-    assert run_sanitizer_on(name='A;B;C') == [('A', 'name', None),
-                                              ('B', 'name', None),
-                                              ('C', 'name', None)]
-    assert run_sanitizer_on(short_name=' House, boat ') == [('House', 'short_name', None),
-                                                            ('boat', 'short_name', None)]
 
 
+    def test_simple(self):
+        assert self.run_sanitizer_on(name='ABC') == [('ABC', 'name', None)]
+        assert self.run_sanitizer_on(name='') == [('', 'name', None)]
 
 
-def test_empty_fields():
-    assert run_sanitizer_on(name='A;;B') == [('A', 'name', None),
-                                             ('B', 'name', None)]
-    assert run_sanitizer_on(name='A; ,B') == [('A', 'name', None),
-                                              ('B', 'name', None)]
-    assert run_sanitizer_on(name=' ;B') == [('B', 'name', None)]
-    assert run_sanitizer_on(name='B,') == [('B', 'name', None)]
 
 
+    def test_splits(self):
+        assert self.run_sanitizer_on(name='A;B;C') == [('A', 'name', None),
+                                                       ('B', 'name', None),
+                                                       ('C', 'name', None)]
+        assert self.run_sanitizer_on(short_name=' House, boat ') == [('House', 'short_name', None),
+                                                                     ('boat', 'short_name', None)]
 
 
-def test_custom_delimiters():
-    assert sanitize_with_delimiter(':', '12:45,3') == ['12', '45,3']
-    assert sanitize_with_delimiter('\\', 'a;\\b!#@ \\') == ['a;', 'b!#@']
-    assert sanitize_with_delimiter('[]', 'foo[to]be') == ['be', 'foo', 'to']
-    assert sanitize_with_delimiter(' ', 'morning  sun') == ['morning', 'sun']
 
 
+    def test_empty_fields(self):
+        assert self.run_sanitizer_on(name='A;;B') == [('A', 'name', None),
+                                                      ('B', 'name', None)]
+        assert self.run_sanitizer_on(name='A; ,B') == [('A', 'name', None),
+                                                       ('B', 'name', None)]
+        assert self.run_sanitizer_on(name=' ;B') == [('B', 'name', None)]
+        assert self.run_sanitizer_on(name='B,') == [('B', 'name', None)]
 
 
-def test_empty_delimiter_set():
-    with pytest.raises(UsageError):
-        sanitize_with_delimiter('', 'abc')
 
 
+    def test_custom_delimiters(self):
+        assert self.sanitize_with_delimiter(':', '12:45,3') == ['12', '45,3']
+        assert self.sanitize_with_delimiter('\\', 'a;\\b!#@ \\') == ['a;', 'b!#@']
+        assert self.sanitize_with_delimiter('[]', 'foo[to]be') == ['be', 'foo', 'to']
+        assert self.sanitize_with_delimiter(' ', 'morning  sun') == ['morning', 'sun']
 
 
-def test_no_name_list():
+
+    def test_empty_delimiter_set(self):
+        with pytest.raises(UsageError):
+            self.sanitize_with_delimiter('', 'abc')
+
+
+def test_no_name_list(def_config):
     place = PlaceInfo({'address': {'housenumber': '3'}})
     place = PlaceInfo({'address': {'housenumber': '3'}})
-    name, address = PlaceSanitizer([{'step': 'split-name-list'}]).process_names(place)
+    name, address = PlaceSanitizer([{'step': 'split-name-list'}], def_config).process_names(place)
 
     assert not name
     assert len(address) == 1
 
     assert not name
     assert len(address) == 1
index eb5543646595a96aa7183e7c813f54ef61813b6e..7fa0a018d7603bf6a80d163e8ec818fc1208bb2e 100644 (file)
@@ -12,39 +12,45 @@ import pytest
 from nominatim.tokenizer.place_sanitizer import PlaceSanitizer
 from nominatim.data.place_info import PlaceInfo
 
 from nominatim.tokenizer.place_sanitizer import PlaceSanitizer
 from nominatim.data.place_info import PlaceInfo
 
-def run_sanitizer_on(**kwargs):
-    place = PlaceInfo({'name': kwargs})
-    name, _ = PlaceSanitizer([{'step': 'strip-brace-terms'}]).process_names(place)
+class TestStripBrace:
 
 
-    return sorted([(p.name, p.kind, p.suffix) for p in name])
+    @pytest.fixture(autouse=True)
+    def setup_country(self, def_config):
+        self.config = def_config
 
 
+    def run_sanitizer_on(self, **kwargs):
+        place = PlaceInfo({'name': kwargs})
+        name, _ = PlaceSanitizer([{'step': 'strip-brace-terms'}], self.config).process_names(place)
 
 
-def test_no_braces():
-    assert run_sanitizer_on(name='foo', ref='23') == [('23', 'ref', None),
-                                                      ('foo', 'name', None)]
+        return sorted([(p.name, p.kind, p.suffix) for p in name])
 
 
 
 
-def test_simple_braces():
-    assert run_sanitizer_on(name='Halle (Saale)', ref='3')\
-      == [('3', 'ref', None), ('Halle', 'name', None), ('Halle (Saale)', 'name', None)]
-    assert run_sanitizer_on(name='ack ( bar')\
-      == [('ack', 'name', None), ('ack ( bar', 'name', None)]
+    def test_no_braces(self):
+        assert self.run_sanitizer_on(name='foo', ref='23') == [('23', 'ref', None),
+                                                               ('foo', 'name', None)]
 
 
 
 
-def test_only_braces():
-    assert run_sanitizer_on(name='(maybe)') == [('(maybe)', 'name', None)]
+    def test_simple_braces(self):
+        assert self.run_sanitizer_on(name='Halle (Saale)', ref='3')\
+          == [('3', 'ref', None), ('Halle', 'name', None), ('Halle (Saale)', 'name', None)]
+        assert self.run_sanitizer_on(name='ack ( bar')\
+          == [('ack', 'name', None), ('ack ( bar', 'name', None)]
 
 
 
 
-def test_double_braces():
-    assert run_sanitizer_on(name='a((b))') == [('a', 'name', None),
-                                               ('a((b))', 'name', None)]
-    assert run_sanitizer_on(name='a (b) (c)') == [('a', 'name', None),
-                                                  ('a (b) (c)', 'name', None)]
+    def test_only_braces(self):
+        assert self.run_sanitizer_on(name='(maybe)') == [('(maybe)', 'name', None)]
 
 
 
 
-def test_no_names():
+    def test_double_braces(self):
+        assert self.run_sanitizer_on(name='a((b))') == [('a', 'name', None),
+                                                        ('a((b))', 'name', None)]
+        assert self.run_sanitizer_on(name='a (b) (c)') == [('a', 'name', None),
+                                                           ('a (b) (c)', 'name', None)]
+
+
+def test_no_names(def_config):
     place = PlaceInfo({'address': {'housenumber': '3'}})
     place = PlaceInfo({'address': {'housenumber': '3'}})
-    name, address = PlaceSanitizer([{'step': 'strip-brace-terms'}]).process_names(place)
+    name, address = PlaceSanitizer([{'step': 'strip-brace-terms'}], def_config).process_names(place)
 
     assert not name
     assert len(address) == 1
 
     assert not name
     assert len(address) == 1
index 306b80273f7876b1babc0f4efef022fa8e258aaa..1feecf3f6341b32728ef03ea91ab1a672962bd47 100644 (file)
@@ -15,11 +15,16 @@ from nominatim.data.country_info import setup_country_config
 
 class TestWithDefaults:
 
 
 class TestWithDefaults:
 
-    @staticmethod
-    def run_sanitizer_on(country, **kwargs):
+    @pytest.fixture(autouse=True)
+    def setup_country(self, def_config):
+        self.config = def_config
+
+
+    def run_sanitizer_on(self, country, **kwargs):
         place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
                            'country_code': country})
         place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
                            'country_code': country})
-        name, _ = PlaceSanitizer([{'step': 'tag-analyzer-by-language'}]).process_names(place)
+        name, _ = PlaceSanitizer([{'step': 'tag-analyzer-by-language'}],
+                                 self.config).process_names(place)
 
         return sorted([(p.name, p.kind, p.suffix, p.attr) for p in name])
 
 
         return sorted([(p.name, p.kind, p.suffix, p.attr) for p in name])
 
@@ -44,12 +49,17 @@ class TestWithDefaults:
 
 class TestFilterKind:
 
 
 class TestFilterKind:
 
-    @staticmethod
-    def run_sanitizer_on(filt, **kwargs):
+    @pytest.fixture(autouse=True)
+    def setup_country(self, def_config):
+        self.config = def_config
+
+
+    def run_sanitizer_on(self, filt, **kwargs):
         place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
                            'country_code': 'de'})
         name, _ = PlaceSanitizer([{'step': 'tag-analyzer-by-language',
         place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
                            'country_code': 'de'})
         name, _ = PlaceSanitizer([{'step': 'tag-analyzer-by-language',
-                                   'filter-kind': filt}]).process_names(place)
+                                   'filter-kind': filt}],
+                                 self.config).process_names(place)
 
         return sorted([(p.name, p.kind, p.suffix, p.attr) for p in name])
 
 
         return sorted([(p.name, p.kind, p.suffix, p.attr) for p in name])
 
@@ -94,14 +104,16 @@ class TestDefaultCountry:
     @pytest.fixture(autouse=True)
     def setup_country(self, def_config):
         setup_country_config(def_config)
     @pytest.fixture(autouse=True)
     def setup_country(self, def_config):
         setup_country_config(def_config)
+        self.config = def_config
+
 
 
-    @staticmethod
-    def run_sanitizer_append(mode,  country, **kwargs):
+    def run_sanitizer_append(self, mode,  country, **kwargs):
         place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
                            'country_code': country})
         name, _ = PlaceSanitizer([{'step': 'tag-analyzer-by-language',
                                    'use-defaults': mode,
         place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
                            'country_code': country})
         name, _ = PlaceSanitizer([{'step': 'tag-analyzer-by-language',
                                    'use-defaults': mode,
-                                   'mode': 'append'}]).process_names(place)
+                                   'mode': 'append'}],
+                                 self.config).process_names(place)
 
         assert all(isinstance(p.attr, dict) for p in name)
         assert all(len(p.attr) <= 1 for p in name)
 
         assert all(isinstance(p.attr, dict) for p in name)
         assert all(len(p.attr) <= 1 for p in name)
@@ -111,13 +123,13 @@ class TestDefaultCountry:
         return sorted([(p.name, p.attr.get('analyzer', '')) for p in name])
 
 
         return sorted([(p.name, p.attr.get('analyzer', '')) for p in name])
 
 
-    @staticmethod
-    def run_sanitizer_replace(mode,  country, **kwargs):
+    def run_sanitizer_replace(self, mode,  country, **kwargs):
         place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
                            'country_code': country})
         name, _ = PlaceSanitizer([{'step': 'tag-analyzer-by-language',
                                    'use-defaults': mode,
         place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
                            'country_code': country})
         name, _ = PlaceSanitizer([{'step': 'tag-analyzer-by-language',
                                    'use-defaults': mode,
-                                   'mode': 'replace'}]).process_names(place)
+                                   'mode': 'replace'}],
+                                 self.config).process_names(place)
 
         assert all(isinstance(p.attr, dict) for p in name)
         assert all(len(p.attr) <= 1 for p in name)
 
         assert all(isinstance(p.attr, dict) for p in name)
         assert all(len(p.attr) <= 1 for p in name)
@@ -131,7 +143,8 @@ class TestDefaultCountry:
         place = PlaceInfo({'name': {'name': 'something'}})
         name, _ = PlaceSanitizer([{'step': 'tag-analyzer-by-language',
                                    'use-defaults': 'all',
         place = PlaceInfo({'name': {'name': 'something'}})
         name, _ = PlaceSanitizer([{'step': 'tag-analyzer-by-language',
                                    'use-defaults': 'all',
-                                   'mode': 'replace'}]).process_names(place)
+                                   'mode': 'replace'}],
+                                 self.config).process_names(place)
 
         assert len(name) == 1
         assert name[0].name == 'something'
 
         assert len(name) == 1
         assert name[0].name == 'something'
@@ -199,14 +212,19 @@ class TestDefaultCountry:
 
 class TestCountryWithWhitelist:
 
 
 class TestCountryWithWhitelist:
 
-    @staticmethod
-    def run_sanitizer_on(mode,  country, **kwargs):
+    @pytest.fixture(autouse=True)
+    def setup_country(self, def_config):
+        self.config = def_config
+
+
+    def run_sanitizer_on(self, mode,  country, **kwargs):
         place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
                            'country_code': country})
         name, _ = PlaceSanitizer([{'step': 'tag-analyzer-by-language',
                                    'use-defaults': mode,
                                    'mode': 'replace',
         place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},
                            'country_code': country})
         name, _ = PlaceSanitizer([{'step': 'tag-analyzer-by-language',
                                    'use-defaults': mode,
                                    'mode': 'replace',
-                                   'whitelist': ['de', 'fr', 'ru']}]).process_names(place)
+                                   'whitelist': ['de', 'fr', 'ru']}],
+                                 self.config).process_names(place)
 
         assert all(isinstance(p.attr, dict) for p in name)
         assert all(len(p.attr) <= 1 for p in name)
 
         assert all(isinstance(p.attr, dict) for p in name)
         assert all(len(p.attr) <= 1 for p in name)
@@ -238,12 +256,17 @@ class TestCountryWithWhitelist:
 
 class TestWhiteList:
 
 
 class TestWhiteList:
 
-    @staticmethod
-    def run_sanitizer_on(whitelist, **kwargs):
+    @pytest.fixture(autouse=True)
+    def setup_country(self, def_config):
+        self.config = def_config
+
+
+    def run_sanitizer_on(self, whitelist, **kwargs):
         place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()}})
         name, _ = PlaceSanitizer([{'step': 'tag-analyzer-by-language',
                                    'mode': 'replace',
         place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()}})
         name, _ = PlaceSanitizer([{'step': 'tag-analyzer-by-language',
                                    'mode': 'replace',
-                                   'whitelist': whitelist}]).process_names(place)
+                                   'whitelist': whitelist}],
+                                 self.config).process_names(place)
 
         assert all(isinstance(p.attr, dict) for p in name)
         assert all(len(p.attr) <= 1 for p in name)
 
         assert all(isinstance(p.attr, dict) for p in name)
         assert all(len(p.attr) <= 1 for p in name)
index 31401bd19aa0eb73c161823023e537aef31d1726..3dd3033ca6f5031dae5f58276a5bada03e7cbec3 100644 (file)
@@ -47,8 +47,8 @@ def test_placeinfo_has_attr():
     assert not place.has_attr('whatever')
 
 
     assert not place.has_attr('whatever')
 
 
-def test_sanitizer_default():
-    san = sanitizer.PlaceSanitizer([{'step': 'split-name-list'}])
+def test_sanitizer_default(def_config):
+    san = sanitizer.PlaceSanitizer([{'step': 'split-name-list'}], def_config)
 
     name, address =  san.process_names(PlaceInfo({'name': {'name:de:de': '1;2;3'},
                                                   'address': {'street': 'Bald'}}))
 
     name, address =  san.process_names(PlaceInfo({'name': {'name:de:de': '1;2;3'},
                                                   'address': {'street': 'Bald'}}))
@@ -63,8 +63,8 @@ def test_sanitizer_default():
 
 
 @pytest.mark.parametrize('rules', [None, []])
 
 
 @pytest.mark.parametrize('rules', [None, []])
-def test_sanitizer_empty_list(rules):
-    san = sanitizer.PlaceSanitizer(rules)
+def test_sanitizer_empty_list(def_config, rules):
+    san = sanitizer.PlaceSanitizer(rules, def_config)
 
     name, address =  san.process_names(PlaceInfo({'name': {'name:de:de': '1;2;3'}}))
 
 
     name, address =  san.process_names(PlaceInfo({'name': {'name:de:de': '1;2;3'}}))
 
@@ -72,6 +72,6 @@ def test_sanitizer_empty_list(rules):
     assert all(isinstance(n, sanitizer.PlaceName) for n in name)
 
 
     assert all(isinstance(n, sanitizer.PlaceName) for n in name)
 
 
-def test_sanitizer_missing_step_definition():
+def test_sanitizer_missing_step_definition(def_config):
     with pytest.raises(UsageError):
     with pytest.raises(UsageError):
-        san = sanitizer.PlaceSanitizer([{'id': 'split-name-list'}])
+        san = sanitizer.PlaceSanitizer([{'id': 'split-name-list'}], def_config)