X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/bb175cc95898de420781867973b38d033c187e81..86588419fb1c3fffe131c0e8d99ecea3c77d67c5:/test/python/tokenizer/test_icu.py diff --git a/test/python/tokenizer/test_icu.py b/test/python/tokenizer/test_icu.py index 642aaceb..a3839365 100644 --- a/test/python/tokenizer/test_icu.py +++ b/test/python/tokenizer/test_icu.py @@ -1,3 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2022 by the Nominatim developer community. +# For a full list of authors see the git log. """ Tests for ICU tokenizer. """ @@ -394,7 +400,9 @@ class TestPlaceAddress: @pytest.fixture(autouse=True) def setup(self, analyzer, sql_functions): - with analyzer(trans=(":: upper()", "'🜵' > ' '")) as anl: + hnr = {'step': 'clean-housenumbers', + 'filter-kind': ['housenumber', 'conscriptionnumber', 'streetnumber']} + with analyzer(trans=(":: upper()", "'🜵' > ' '"), sanitizers=[hnr]) as anl: self.analyzer = anl yield anl @@ -440,13 +448,6 @@ class TestPlaceAddress: assert info['hnr_tokens'] == "{-1}" - def test_process_place_housenumbers_lists(self, getorcreate_hnr_id): - info = self.process_address(conscriptionnumber='1; 2;3') - - assert set(info['hnr'].split(';')) == set(('1', '2', '3')) - assert info['hnr_tokens'] == "{-1,-2,-3}" - - def test_process_place_housenumbers_duplicates(self, getorcreate_hnr_id): info = self.process_address(housenumber='134', conscriptionnumber='134', @@ -471,9 +472,25 @@ class TestPlaceAddress: def test_process_place_street(self): + self.analyzer.process_place(PlaceInfo({'name': {'name' : 'Grand Road'}})) info = self.process_address(street='Grand Road') - assert eval(info['street']) == self.name_token_set('GRAND', 'ROAD') + assert eval(info['street']) == self.name_token_set('#Grand Road') + + + def test_process_place_nonexisting_street(self): + info = self.process_address(street='Grand Road') + + assert 'street' not in info + + + def test_process_place_multiple_street_tags(self): + self.analyzer.process_place(PlaceInfo({'name': {'name' : 'Grand Road', + 'ref': '05989'}})) + info = self.process_address(**{'street': 'Grand Road', + 'street:sym_ul': '05989'}) + + assert eval(info['street']) == self.name_token_set('#Grand Road', '#05989') def test_process_place_street_empty(self): @@ -482,12 +499,28 @@ class TestPlaceAddress: assert 'street' not in info + def test_process_place_street_from_cache(self): + self.analyzer.process_place(PlaceInfo({'name': {'name' : 'Grand Road'}})) + self.process_address(street='Grand Road') + + # request address again + info = self.process_address(street='Grand Road') + + assert eval(info['street']) == self.name_token_set('#Grand Road') + + def test_process_place_place(self): info = self.process_address(place='Honu Lulu') assert eval(info['place']) == self.name_token_set('HONU', 'LULU') + def test_process_place_place_extra(self): + info = self.process_address(**{'place:en': 'Honu Lulu'}) + + assert 'place' not in info + + def test_process_place_place_empty(self): info = self.process_address(place='🜵') @@ -507,6 +540,14 @@ class TestPlaceAddress: assert result == {'city': city, 'suburb': city, 'state': state} + def test_process_place_multiple_address_terms(self): + info = self.process_address(**{'city': 'Bruxelles', 'city:de': 'Brüssel'}) + + result = {k: eval(v) for k,v in info['addr'].items()} + + assert result == {'city': self.name_token_set('Bruxelles')} + + def test_process_place_address_terms_empty(self): info = self.process_address(country='de', city=' ', street='Hauptstr', full='right behind the church')