From c7472662a69eb6074a58917dbde71cf2b27e8d6e Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Mon, 9 Nov 2020 12:03:37 +0100 Subject: [PATCH] lookup places for address tags for rank < 30 While previously the content of addr:* tags was only added to the list of address search keywords, we now really look up the matching place. This has the advantage that we pull in all potential translations from the place, just like all the other address terms that are looked up by neighbourhood search. If no place can be found for a given name, the content of the addr:* tag is still added to the search keywords as before. --- sql/functions/placex_triggers.sql | 62 +++++++++++++------------ sql/functions/ranking.sql | 47 +++++++++++++++++++ sql/partition-functions.src.sql | 63 +++++++++++++++++++++++--- test/bdd/db/import/search_name.feature | 7 ++- 4 files changed, 141 insertions(+), 38 deletions(-) diff --git a/sql/functions/placex_triggers.sql b/sql/functions/placex_triggers.sql index 6848140a..5e43b27a 100644 --- a/sql/functions/placex_triggers.sql +++ b/sql/functions/placex_triggers.sql @@ -254,6 +254,7 @@ CREATE OR REPLACE FUNCTION insert_addresslines(obj_place_id BIGINT, maxrank SMALLINT, address HSTORE, geometry GEOMETRY, + country TEXT, OUT parent_place_id BIGINT, OUT postcode TEXT, OUT nameaddress_vector INT[]) @@ -265,45 +266,49 @@ DECLARE current_boundary GEOMETRY := NULL; current_node_area GEOMETRY := NULL; - location RECORD; - addr_item RECORD; + parent_place_rank INT := 0; + addr_place_ids BIGINT[]; - isin_tokens INT[]; + location RECORD; BEGIN parent_place_id := 0; nameaddress_vector := '{}'::int[]; - isin_tokens := '{}'::int[]; - ---- convert address store to array of tokenids - IF address IS NOT NULL THEN - FOR addr_item IN SELECT * FROM each(address) - LOOP - IF addr_item.key IN ('city', 'tiger:county', 'state', 'suburb', 'province', - 'district', 'region', 'county', 'municipality', - 'hamlet', 'village', 'subdistrict', 'town', - 'neighbourhood', 'quarter', 'parish') - THEN - isin_tokens := array_merge(isin_tokens, - word_ids_from_name(addr_item.value)); - IF NOT %REVERSE-ONLY% THEN - nameaddress_vector := array_merge(nameaddress_vector, - addr_ids_from_name(addr_item.value)); + address_havelevel := array_fill(false, ARRAY[maxrank]); + + FOR location IN + SELECT * FROM get_places_for_addr_tags(partition, geometry, + address, country) + ORDER BY rank_address, distance, isguess desc + LOOP + IF NOT %REVERSE-ONLY% THEN + nameaddress_vector := array_merge(nameaddress_vector, + location.keywords::int[]); + END IF; + + IF location.place_id is not null THEN + location_isaddress := not address_havelevel[location.rank_address]; + IF not address_havelevel[location.rank_address] THEN + address_havelevel[location.rank_address] := true; + IF parent_place_rank < location.rank_address THEN + parent_place_id := location.place_id; + parent_place_rank := location.rank_address; END IF; END IF; - END LOOP; - END IF; - IF NOT %REVERSE-ONLY% THEN - nameaddress_vector := array_merge(nameaddress_vector, isin_tokens); - END IF; - ---- now compute the address terms - FOR i IN 1..maxrank LOOP - address_havelevel[i] := false; + INSERT INTO place_addressline (place_id, address_place_id, fromarea, + isaddress, distance, cached_rank_address) + VALUES (obj_place_id, location.place_id, not location.isguess, + true, location.distance, location.rank_address); + + addr_place_ids := array_append(addr_place_ids, location.place_id); + END IF; END LOOP; FOR location IN SELECT * FROM getNearFeatures(partition, geometry, maxrank) - ORDER BY rank_address, isin_tokens && keywords desc, isguess asc, + WHERE addr_place_ids is null or not addr_place_ids @> ARRAY[place_id] + ORDER BY rank_address, isguess asc, distance * CASE WHEN rank_address = 16 AND rank_search = 15 THEN 0.2 WHEN rank_address = 16 AND rank_search = 16 THEN 0.25 @@ -920,7 +925,8 @@ BEGIN NEW.address, CASE WHEN (NEW.rank_address = 0 or NEW.rank_search between 26 and 29) - THEN NEW.geometry ELSE NEW.centroid END) + THEN NEW.geometry ELSE NEW.centroid END, + NEW.country_code) INTO NEW.parent_place_id, NEW.postcode, nameaddress_vector; --DEBUG: RAISE WARNING 'RETURN insert_addresslines: %, %, %', NEW.parent_place_id, NEW.postcode, nameaddress_vector; diff --git a/sql/functions/ranking.sql b/sql/functions/ranking.sql index a84269fe..51dcd0d0 100644 --- a/sql/functions/ranking.sql +++ b/sql/functions/ranking.sql @@ -233,3 +233,50 @@ BEGIN END; $$ LANGUAGE plpgsql IMMUTABLE; + +CREATE OR REPLACE FUNCTION get_addr_tag_rank(key TEXT, country TEXT, + OUT from_rank SMALLINT, + OUT to_rank SMALLINT, + OUT extent FLOAT) + AS $$ +DECLARE + ranks RECORD; +BEGIN + from_rank := null; + + FOR ranks IN + SELECT * FROM + (SELECT l.rank_search, l.rank_address FROM address_levels l + WHERE (l.country_code = country or l.country_code is NULL) + AND l.class = 'place' AND l.type = key + ORDER BY l.country_code LIMIT 1) r + WHERE rank_address > 0 + LOOP + extent := reverse_place_diameter(ranks.rank_search); + + IF ranks.rank_address <= 4 THEN + from_rank := 4; + to_rank := 4; + ELSEIF ranks.rank_address <= 9 THEN + from_rank := 5; + to_rank := 9; + ELSEIF ranks.rank_address <= 12 THEN + from_rank := 10; + to_rank := 12; + ELSEIF ranks.rank_address <= 16 THEN + from_rank := 13; + to_rank := 16; + ELSEIF ranks.rank_address <= 21 THEN + from_rank := 17; + to_rank := 21; + ELSEIF ranks.rank_address <= 24 THEN + from_rank := 22; + to_rank := 24; + ELSE + from_rank := 25; + to_rank := 25; + END IF; + END LOOP; +END; +$$ +LANGUAGE plpgsql IMMUTABLE; diff --git a/sql/partition-functions.src.sql b/sql/partition-functions.src.sql index 97520f99..8e54868b 100644 --- a/sql/partition-functions.src.sql +++ b/sql/partition-functions.src.sql @@ -10,8 +10,8 @@ CREATE TYPE nearfeaturecentr AS ( centroid GEOMETRY ); - -- feature intersects geoemtry - -- for areas and linestrings they must touch at least along a line +-- feature intersects geoemtry +-- for areas and linestrings they must touch at least along a line CREATE OR REPLACE FUNCTION is_relevant_geometry(de9im TEXT, geom_type TEXT) RETURNS BOOLEAN AS $$ @@ -39,8 +39,10 @@ BEGIN -- start IF in_partition = -partition- THEN - FOR r IN - SELECT place_id, keywords, rank_address, rank_search, min(ST_Distance(feature, centroid)) as distance, isguess, postcode, centroid + FOR r IN + SELECT place_id, keywords, rank_address, rank_search, + min(ST_Distance(feature, centroid)) as distance, + isguess, postcode, centroid FROM location_area_large_-partition- WHERE geometry && feature AND is_relevant_geometry(ST_Relate(geometry, feature), ST_GeometryType(feature)) @@ -58,6 +60,56 @@ END $$ LANGUAGE plpgsql STABLE; +CREATE OR REPLACE FUNCTION get_places_for_addr_tags(in_partition SMALLINT, + feature GEOMETRY, + address HSTORE, country TEXT) + RETURNS SETOF nearfeaturecentr + AS $$ +DECLARE + r nearfeaturecentr%rowtype; + item RECORD; +BEGIN + FOR item IN + SELECT (get_addr_tag_rank(key, country)).*, key, name FROM + (SELECT skeys(address) as key, svals(address) as name) x + WHERE key not in ('country', 'postcode', 'housenumber', + 'conscriptionnumber', 'streetnumber') + LOOP + IF item.from_rank is null THEN + CONTINUE; + END IF; + +-- start + IF in_partition = -partition- THEN + SELECT place_id, keywords, rank_address, rank_search, + min(ST_Distance(feature, centroid)) as distance, + isguess, postcode, centroid INTO r + FROM location_area_large_-partition- + WHERE geometry && ST_Expand(feature, item.extent) + AND rank_address between item.from_rank and item.to_rank + AND word_ids_from_name(item.name) && keywords + GROUP BY place_id, keywords, rank_address, rank_search, isguess, postcode, centroid + ORDER BY ST_Intersects(ST_Collect(geometry), feature), distance LIMIT 1; + IF r.place_id is null THEN + -- If we cannot find a place for the term, just return the + -- search term for the given name. That ensures that the address + -- element can still be searched for, even though it will not be + -- displayed. + RETURN NEXT ROW(null, addr_ids_from_name(item.name), null, null, + null, null, null, null)::nearfeaturecentr; + ELSE + RETURN NEXT r; + END IF; + CONTINUE; + END IF; +-- end + + RAISE EXCEPTION 'Unknown partition %', in_partition; + END LOOP; +END; +$$ +LANGUAGE plpgsql STABLE; + create or replace function deleteLocationArea(in_partition INTEGER, in_place_id BIGINT, in_rank_search INTEGER) RETURNS BOOLEAN AS $$ DECLARE BEGIN @@ -153,7 +205,7 @@ BEGIN FROM search_name_-partition- WHERE name_vector && isin_token AND centroid && ST_Expand(point, 0.04) - AND search_rank between 16 and 25 + AND address_rank between 16 and 25 ORDER BY ST_Distance(centroid, point) ASC limit 1; RETURN parent; END IF; @@ -164,7 +216,6 @@ END $$ LANGUAGE plpgsql STABLE; - create or replace function insertSearchName( in_partition INTEGER, in_place_id BIGINT, in_name_vector INTEGER[], in_rank_search INTEGER, in_rank_address INTEGER, in_geometry GEOMETRY) diff --git a/test/bdd/db/import/search_name.feature b/test/bdd/db/import/search_name.feature index a0a53911..866a597d 100644 --- a/test/bdd/db/import/search_name.feature +++ b/test/bdd/db/import/search_name.feature @@ -185,16 +185,15 @@ Feature: Creation of search terms | object | name_vector | nameaddress_vector | | N1 | foo | the road | - Scenario: Some addr: tags are added to address when the name exists + Scenario: Some addr: tags are added to address Given the scene roads-with-pois And the places | osm | class | type | name | geometry | - | N1 | place | state | new york | 80 80 | | N2 | place | city | bonn | 81 81 | | N3 | place | suburb | smalltown| 80 81 | And the named places - | osm | class | type | addr+city | addr+state | addr+suburb | geometry | - | W1 | highway | service | bonn | New York | Smalltown | :w-north | + | osm | class | type | addr+city | addr+municipality | addr+suburb | geometry | + | W1 | highway | service | bonn | New York | Smalltown | :w-north | When importing Then search_name contains | object | nameaddress_vector | -- 2.39.5