]> git.openstreetmap.org Git - nominatim.git/blobdiff - test/python/tokenizer/test_icu.py
Merge pull request #2588 from lonvia/housenumber-sanitizer
[nominatim.git] / test / python / tokenizer / test_icu.py
index 642aaceb82e0f55ac59d03327b252fa53e0d250c..a3839365a750baa9c39ad8555acea1416fee9c79 100644 (file)
@@ -1,3 +1,9 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2022 by the Nominatim developer community.
+# For a full list of authors see the git log.
 """
 Tests for ICU tokenizer.
 """
@@ -394,7 +400,9 @@ class TestPlaceAddress:
 
     @pytest.fixture(autouse=True)
     def setup(self, analyzer, sql_functions):
-        with analyzer(trans=(":: upper()", "'🜵' > ' '")) as anl:
+        hnr = {'step': 'clean-housenumbers',
+               'filter-kind': ['housenumber', 'conscriptionnumber', 'streetnumber']}
+        with analyzer(trans=(":: upper()", "'🜵' > ' '"), sanitizers=[hnr]) as anl:
             self.analyzer = anl
             yield anl
 
@@ -440,13 +448,6 @@ class TestPlaceAddress:
         assert info['hnr_tokens'] == "{-1}"
 
 
-    def test_process_place_housenumbers_lists(self, getorcreate_hnr_id):
-        info = self.process_address(conscriptionnumber='1; 2;3')
-
-        assert set(info['hnr'].split(';')) == set(('1', '2', '3'))
-        assert info['hnr_tokens'] == "{-1,-2,-3}"
-
-
     def test_process_place_housenumbers_duplicates(self, getorcreate_hnr_id):
         info = self.process_address(housenumber='134',
                                     conscriptionnumber='134',
@@ -471,9 +472,25 @@ class TestPlaceAddress:
 
 
     def test_process_place_street(self):
+        self.analyzer.process_place(PlaceInfo({'name': {'name' : 'Grand Road'}}))
         info = self.process_address(street='Grand Road')
 
-        assert eval(info['street']) == self.name_token_set('GRAND', 'ROAD')
+        assert eval(info['street']) == self.name_token_set('#Grand Road')
+
+
+    def test_process_place_nonexisting_street(self):
+        info = self.process_address(street='Grand Road')
+
+        assert 'street' not in info
+
+
+    def test_process_place_multiple_street_tags(self):
+        self.analyzer.process_place(PlaceInfo({'name': {'name' : 'Grand Road',
+                                                        'ref': '05989'}}))
+        info = self.process_address(**{'street': 'Grand Road',
+                                      'street:sym_ul': '05989'})
+
+        assert eval(info['street']) == self.name_token_set('#Grand Road', '#05989')
 
 
     def test_process_place_street_empty(self):
@@ -482,12 +499,28 @@ class TestPlaceAddress:
         assert 'street' not in info
 
 
+    def test_process_place_street_from_cache(self):
+        self.analyzer.process_place(PlaceInfo({'name': {'name' : 'Grand Road'}}))
+        self.process_address(street='Grand Road')
+
+        # request address again
+        info = self.process_address(street='Grand Road')
+
+        assert eval(info['street']) == self.name_token_set('#Grand Road')
+
+
     def test_process_place_place(self):
         info = self.process_address(place='Honu Lulu')
 
         assert eval(info['place']) == self.name_token_set('HONU', 'LULU')
 
 
+    def test_process_place_place_extra(self):
+        info = self.process_address(**{'place:en': 'Honu Lulu'})
+
+        assert 'place' not in info
+
+
     def test_process_place_place_empty(self):
         info = self.process_address(place='🜵')
 
@@ -507,6 +540,14 @@ class TestPlaceAddress:
         assert result == {'city': city, 'suburb': city, 'state': state}
 
 
+    def test_process_place_multiple_address_terms(self):
+        info = self.process_address(**{'city': 'Bruxelles', 'city:de': 'Brüssel'})
+
+        result = {k: eval(v) for k,v in info['addr'].items()}
+
+        assert result == {'city': self.name_token_set('Bruxelles')}
+
+
     def test_process_place_address_terms_empty(self):
         info = self.process_address(country='de', city=' ', street='Hauptstr',
                                     full='right behind the church')