+# SPDX-License-Identifier: GPL-2.0-only\r
+#\r
+# This file is part of Nominatim. (https://nominatim.org)\r
+#\r
+# Copyright (C) 2023 by the Nominatim developer community.\r
+# For a full list of authors see the git log.\r
+"""\r
+Tests for the sanitizer that normalizes housenumbers.\r
+"""\r
+import pytest\r
+\r
+\r
+from nominatim.data.place_info import PlaceInfo\r
+from nominatim.tokenizer.place_sanitizer import PlaceSanitizer\r
+\r
+\r
+class TestWithDefault:\r
+\r
+ @pytest.fixture(autouse=True)\r
+ def setup_country(self, def_config):\r
+ self.config = def_config\r
+\r
+ def run_sanitizer_on(self, type, **kwargs):\r
+\r
+ place = PlaceInfo({type: {k.replace('_', ':'): v for k, v in kwargs.items()},\r
+ 'country_code': 'de', 'rank_address': 30})\r
+\r
+ sanitizer_args = {'step': 'delete-tags'}\r
+\r
+ name, address = PlaceSanitizer([sanitizer_args],\r
+ self.config).process_names(place)\r
+\r
+ return {\r
+ 'name': sorted([(p.name, p.kind, p.suffix or '') for p in name]),\r
+ 'address': sorted([(p.name, p.kind, p.suffix or '') for p in address])\r
+ }\r
+\r
+\r
+ def test_on_name(self):\r
+ res = self.run_sanitizer_on('name', name='foo', ref='bar', ref_abc='baz')\r
+\r
+ assert res.get('name') == []\r
+\r
+ def test_on_address(self):\r
+ res = self.run_sanitizer_on('address', name='foo', ref='bar', ref_abc='baz')\r
+\r
+ assert res.get('address') == [('bar', 'ref', ''), ('baz', 'ref', 'abc'),\r
+ ('foo', 'name', '')]\r
+\r
+\r
+class TestTypeField:\r
+\r
+ @pytest.fixture(autouse=True)\r
+ def setup_country(self, def_config):\r
+ self.config = def_config\r
+\r
+ def run_sanitizer_on(self, type, **kwargs):\r
+\r
+ place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},\r
+ 'country_code': 'de', 'rank_address': 30})\r
+\r
+ sanitizer_args = {\r
+ 'step': 'delete-tags',\r
+ 'type': type,\r
+ }\r
+\r
+ name, _ = PlaceSanitizer([sanitizer_args],\r
+ self.config).process_names(place)\r
+\r
+ return sorted([(p.name, p.kind, p.suffix or '') for p in name])\r
+\r
+ def test_name_type(self):\r
+ res = self.run_sanitizer_on('name', name='foo', ref='bar', ref_abc='baz')\r
+\r
+ assert res == []\r
+\r
+ def test_address_type(self):\r
+ res = self.run_sanitizer_on('address', name='foo', ref='bar', ref_abc='baz')\r
+\r
+ assert res == [('bar', 'ref', ''), ('baz', 'ref', 'abc'),\r
+ ('foo', 'name', '')]\r
+\r
+class TestFilterKind:\r
+\r
+ @pytest.fixture(autouse=True)\r
+ def setup_country(self, def_config):\r
+ self.config = def_config\r
+\r
+ def run_sanitizer_on(self, filt, **kwargs):\r
+\r
+ place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},\r
+ 'country_code': 'de', 'rank_address': 30})\r
+\r
+ sanitizer_args = {\r
+ 'step': 'delete-tags',\r
+ 'filter-kind': filt,\r
+ }\r
+\r
+ name, _ = PlaceSanitizer([sanitizer_args],\r
+ self.config).process_names(place)\r
+\r
+ return sorted([(p.name, p.kind, p.suffix or '') for p in name])\r
+\r
+ def test_single_exact_name(self):\r
+ res = self.run_sanitizer_on(['name'], ref='foo', name='foo',\r
+ name_abc='bar', ref_abc='bar')\r
+\r
+ assert res == [('bar', 'ref', 'abc'), ('foo', 'ref', '')]\r
+\r
+\r
+ def test_single_pattern(self):\r
+ res = self.run_sanitizer_on(['.*name'],\r
+ name_fr='foo', ref_fr='foo', namexx_fr='bar',\r
+ shortname_fr='bar', name='bar')\r
+\r
+ assert res == [('bar', 'namexx', 'fr'), ('foo', 'ref', 'fr')]\r
+\r
+\r
+ def test_multiple_patterns(self):\r
+ res = self.run_sanitizer_on(['.*name', 'ref'],\r
+ name_fr='foo', ref_fr='foo', oldref_fr='foo',\r
+ namexx_fr='bar', shortname_fr='baz', name='baz')\r
+\r
+ assert res == [('bar', 'namexx', 'fr'), ('foo', 'oldref', 'fr')]\r
+\r
+\r
+class TestRankAddress:\r
+\r
+ @pytest.fixture(autouse=True)\r
+ def setup_country(self, def_config):\r
+ self.config = def_config\r
+\r
+ def run_sanitizer_on(self, rank_addr, **kwargs):\r
+\r
+ place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},\r
+ 'country_code': 'de', 'rank_address': 30})\r
+\r
+ sanitizer_args = {\r
+ 'step': 'delete-tags',\r
+ 'rank_address': rank_addr\r
+ }\r
+\r
+ name, _ = PlaceSanitizer([sanitizer_args],\r
+ self.config).process_names(place)\r
+\r
+ return sorted([(p.name, p.kind, p.suffix or '') for p in name])\r
+\r
+\r
+ def test_single_rank(self):\r
+ res = self.run_sanitizer_on('30', name='foo', ref='bar')\r
+\r
+ assert res == []\r
+\r
+ def test_single_rank_fail(self):\r
+ res = self.run_sanitizer_on('28', name='foo', ref='bar')\r
+\r
+ assert res == [('bar', 'ref', ''), ('foo', 'name', '')]\r
+\r
+ def test_ranged_rank_pass(self):\r
+ res = self.run_sanitizer_on('26-30', name='foo', ref='bar')\r
+\r
+ assert res == []\r
+\r
+ def test_ranged_rank_fail(self):\r
+ res = self.run_sanitizer_on('26-29', name='foo', ref='bar')\r
+\r
+ assert res == [('bar', 'ref', ''), ('foo', 'name', '')]\r
+\r
+ def test_mixed_rank_pass(self):\r
+ res = self.run_sanitizer_on(['4', '20-28', '30', '10-12'], name='foo', ref='bar')\r
+\r
+ assert res == []\r
+\r
+ def test_mixed_rank_fail(self):\r
+ res = self.run_sanitizer_on(['4-8', '10', '26-29', '18'], name='foo', ref='bar')\r
+\r
+ assert res == [('bar', 'ref', ''), ('foo', 'name', '')]\r
+\r
+\r
+class TestSuffix:\r
+\r
+ @pytest.fixture(autouse=True)\r
+ def setup_country(self, def_config):\r
+ self.config = def_config\r
+\r
+ def run_sanitizer_on(self, suffix, **kwargs):\r
+\r
+ place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},\r
+ 'country_code': 'de', 'rank_address': 30})\r
+\r
+ sanitizer_args = {\r
+ 'step': 'delete-tags',\r
+ 'suffix': suffix,\r
+ }\r
+\r
+ name, _ = PlaceSanitizer([sanitizer_args],\r
+ self.config).process_names(place)\r
+\r
+ return sorted([(p.name, p.kind, p.suffix or '') for p in name])\r
+\r
+\r
+ def test_single_suffix(self):\r
+ res = self.run_sanitizer_on('abc', name='foo', name_abc='foo',\r
+ name_pqr='bar', ref='bar', ref_abc='baz')\r
+\r
+ assert res == [('bar', 'name', 'pqr'), ('bar', 'ref', ''), ('foo', 'name', '')]\r
+\r
+ def test_multiple_suffix(self):\r
+ res = self.run_sanitizer_on(['abc.*', 'pqr'], name='foo', name_abcxx='foo',\r
+ ref_pqr='bar', name_pqrxx='baz')\r
+\r
+ assert res == [('baz', 'name', 'pqrxx'), ('foo', 'name', '')]\r
+\r
+\r
+\r
+class TestCountryCodes:\r
+\r
+ @pytest.fixture(autouse=True)\r
+ def setup_country(self, def_config):\r
+ self.config = def_config\r
+\r
+ def run_sanitizer_on(self, country_code, **kwargs):\r
+\r
+ place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},\r
+ 'country_code': 'de', 'rank_address': 30})\r
+\r
+ sanitizer_args = {\r
+ 'step': 'delete-tags',\r
+ 'country_code': country_code,\r
+ }\r
+\r
+ name, _ = PlaceSanitizer([sanitizer_args],\r
+ self.config).process_names(place)\r
+\r
+ return sorted([(p.name, p.kind) for p in name])\r
+\r
+\r
+ def test_single_country_code_pass(self):\r
+ res = self.run_sanitizer_on('de', name='foo', ref='bar')\r
+\r
+ assert res == []\r
+\r
+ def test_single_country_code_fail(self):\r
+ res = self.run_sanitizer_on('in', name='foo', ref='bar')\r
+\r
+ assert res == [('bar', 'ref'), ('foo', 'name')]\r
+\r
+ def test_empty_country_code_list(self):\r
+ res = self.run_sanitizer_on([], name='foo', ref='bar')\r
+\r
+ assert res == [('bar', 'ref'), ('foo', 'name')]\r
+\r
+ def test_multiple_country_code_pass(self):\r
+ res = self.run_sanitizer_on(['in', 'de', 'fr'], name='foo', ref='bar')\r
+\r
+ assert res == []\r
+\r
+ def test_multiple_country_code_fail(self):\r
+ res = self.run_sanitizer_on(['in', 'au', 'fr'], name='foo', ref='bar')\r
+\r
+ assert res == [('bar', 'ref'), ('foo', 'name')]\r
+\r
+class TestAllParameters:\r
+\r
+ @pytest.fixture(autouse=True)\r
+ def setup_country(self, def_config):\r
+ self.config = def_config\r
+\r
+ def run_sanitizer_on(self, country_code, rank_addr, suffix, **kwargs):\r
+\r
+ place = PlaceInfo({'name': {k.replace('_', ':'): v for k, v in kwargs.items()},\r
+ 'country_code': 'de', 'rank_address': 30})\r
+\r
+ sanitizer_args = {\r
+ 'step': 'delete-tags',\r
+ 'type': 'name',\r
+ 'filter-kind': ['name', 'ref'],\r
+ 'country_code': country_code,\r
+ 'rank_address': rank_addr,\r
+ 'suffix': suffix,\r
+ 'name': r'[\s\S]*',\r
+ }\r
+\r
+ name, _ = PlaceSanitizer([sanitizer_args],\r
+ self.config).process_names(place)\r
+\r
+ return sorted([(p.name, p.kind, p.suffix or '') for p in name])\r
+\r
+\r
+ def test_string_arguments_pass(self):\r
+ res = self.run_sanitizer_on('de', '25-30', r'[\s\S]*',\r
+ name='foo', ref='foo', name_abc='bar', ref_abc='baz')\r
+\r
+ assert res == []\r
+\r
+ def test_string_arguments_fail(self):\r
+ res = self.run_sanitizer_on('in', '25-30', r'[\s\S]*',\r
+ name='foo', ref='foo', name_abc='bar', ref_abc='baz')\r
+\r
+ assert res == [('bar', 'name', 'abc'), ('baz', 'ref', 'abc'),\r
+ ('foo', 'name', ''), ('foo', 'ref', '')]\r
+\r
+ def test_list_arguments_pass(self):\r
+ res = self.run_sanitizer_on(['de', 'in'], ['20-28', '30'], [r'abc.*', r'[\s\S]*'],\r
+ name='foo', ref_abc='foo', name_abcxx='bar', ref_pqr='baz')\r
+\r
+ assert res == []\r
+\r
+ def test_list_arguments_fail(self):\r
+ res = self.run_sanitizer_on(['de', 'in'], ['14', '20-29'], [r'abc.*', r'pqr'],\r
+ name='foo', ref_abc='foo', name_abcxx='bar', ref_pqr='baz')\r
+\r
+ assert res == [('bar', 'name', 'abcxx'), ('baz', 'ref', 'pqr'),\r
+ ('foo', 'name', ''), ('foo', 'ref', 'abc')]\r
+\r
+ def test_mix_arguments_pass(self):\r
+ res = self.run_sanitizer_on('de', ['10', '20-28', '30'], r'[\s\S]*',\r
+ name='foo', ref_abc='foo', name_abcxx='bar', ref_pqr='baz')\r
+\r
+ assert res == []\r
+\r
+ def test_mix_arguments_fail(self):\r
+ res = self.run_sanitizer_on(['de', 'in'], ['10', '20-28', '30'], r'abc.*',\r
+ name='foo', ref='foo', name_pqr='bar', ref_pqr='baz')\r
+\r
+ assert res == [('bar', 'name', 'pqr'), ('baz', 'ref', 'pqr'),\r
+ ('foo', 'name', ''), ('foo', 'ref', '')]
\ No newline at end of file