]> git.openstreetmap.org Git - nominatim.git/commitdiff
skip most addr: tags with suffixes
authorSarah Hoffmann <lonvia@denofr.de>
Mon, 6 Dec 2021 13:46:40 +0000 (14:46 +0100)
committerSarah Hoffmann <lonvia@denofr.de>
Mon, 6 Dec 2021 13:55:10 +0000 (14:55 +0100)
Only one addr: tag can be processed currently, so make
sure it is the one without suffixes to not get odd data.
addr:street is the exception because it uses a different
matching mechanism.

nominatim/tokenizer/icu_tokenizer.py
test/python/tokenizer/test_icu.py

index f8f6af2ea04ad25381c8399b5a6ee48e5d9cdae3..33f05cc4b21d463d22dfc675bab665c804fbc276 100644 (file)
@@ -420,8 +420,9 @@ class LegacyICUNameAnalyzer(AbstractAnalyzer):
                 if token:
                     streets.append(token)
             elif item.kind == 'place':
-                token_info.add_place(self._compute_partial_tokens(item.name))
-            elif not item.kind.startswith('_') and \
+                if not item.suffix:
+                    token_info.add_place(self._compute_partial_tokens(item.name))
+            elif not item.kind.startswith('_') and not item.suffix and \
                  item.kind not in ('country', 'full'):
                 addr_terms.append((item.kind, self._compute_partial_tokens(item.name)))
 
index 22112220ad9a03a01600c2d39bfee6068912332f..83668b3936e443f11e7b0928a0eee28a588f55ae 100644 (file)
@@ -514,6 +514,12 @@ class TestPlaceAddress:
         assert eval(info['place']) == self.name_token_set('HONU', 'LULU')
 
 
+    def test_process_place_place_extra(self):
+        info = self.process_address(**{'place:en': 'Honu Lulu'})
+
+        assert 'place' not in info
+
+
     def test_process_place_place_empty(self):
         info = self.process_address(place='🜵')
 
@@ -533,6 +539,14 @@ class TestPlaceAddress:
         assert result == {'city': city, 'suburb': city, 'state': state}
 
 
+    def test_process_place_multiple_address_terms(self):
+        info = self.process_address(**{'city': 'Bruxelles', 'city:de': 'Brüssel'})
+
+        result = {k: eval(v) for k,v in info['addr'].items()}
+
+        assert result == {'city': self.name_token_set('Bruxelles')}
+
+
     def test_process_place_address_terms_empty(self):
         info = self.process_address(country='de', city=' ', street='Hauptstr',
                                     full='right behind the church')