From: Sarah Hoffmann Date: Fri, 30 Jun 2023 13:28:00 +0000 (+0200) Subject: also switch legacy tokenizer to new street/place choice behaviour X-Git-Tag: v4.3.0~59^2 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/d7a3039c2a4bd26d05f08ee3140b8dfaecd68f02 also switch legacy tokenizer to new street/place choice behaviour --- diff --git a/lib-sql/tokenizer/legacy_tokenizer.sql b/lib-sql/tokenizer/legacy_tokenizer.sql index 3b82619f..8c8f56e1 100644 --- a/lib-sql/tokenizer/legacy_tokenizer.sql +++ b/lib-sql/tokenizer/legacy_tokenizer.sql @@ -44,14 +44,14 @@ $$ LANGUAGE SQL IMMUTABLE STRICT; CREATE OR REPLACE FUNCTION token_is_street_address(info JSONB) RETURNS BOOLEAN AS $$ - SELECT info->>'street' is not null or info->>'place' is null; + SELECT info->>'street' is not null or info->>'place_search' is null; $$ LANGUAGE SQL IMMUTABLE; CREATE OR REPLACE FUNCTION token_has_addr_street(info JSONB) RETURNS BOOLEAN AS $$ - SELECT info->>'street' is not null; + SELECT info->>'street' is not null and info->>'street' != '{}'; $$ LANGUAGE SQL IMMUTABLE; diff --git a/nominatim/tokenizer/legacy_tokenizer.py b/nominatim/tokenizer/legacy_tokenizer.py index a50dedb2..e09700d9 100644 --- a/nominatim/tokenizer/legacy_tokenizer.py +++ b/nominatim/tokenizer/legacy_tokenizer.py @@ -564,14 +564,13 @@ class _TokenInfo: def add_street(self, conn: Connection, street: str) -> None: """ Add addr:street match terms. """ - def _get_street(name: str) -> List[int]: + def _get_street(name: str) -> Optional[str]: with conn.cursor() as cur: - return cast(List[int], + return cast(Optional[str], cur.scalar("SELECT word_ids_from_name(%s)::text", (name, ))) tokens = self.cache.streets.get(street, _get_street) - if tokens: - self.data['street'] = tokens + self.data['street'] = tokens or '{}' def add_place(self, conn: Connection, place: str) -> None: diff --git a/test/python/tokenizer/test_legacy.py b/test/python/tokenizer/test_legacy.py index 57a82b8a..d63ee8e1 100644 --- a/test/python/tokenizer/test_legacy.py +++ b/test/python/tokenizer/test_legacy.py @@ -549,7 +549,7 @@ class TestPlaceAddress: def test_process_place_street_empty(self): info = self.process_address(street='🜵') - assert 'street' not in info + assert info['street'] == '{}' def test_process_place_place(self):