X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/fae02fab00779b0f4c9aa3d6d541ba2c7eba165f..e871fabd5ffccb9b6db16a813c3addc89683d4e6:/sql/functions/placex_triggers.sql diff --git a/sql/functions/placex_triggers.sql b/sql/functions/placex_triggers.sql index dc96efd4..7d6352b4 100644 --- a/sql/functions/placex_triggers.sql +++ b/sql/functions/placex_triggers.sql @@ -92,7 +92,18 @@ BEGIN END IF; IF fallback THEN - IF ST_Area(bbox) < 0.01 THEN + IF addr_street is null and addr_place is not null THEN + -- The address is attached to a place we don't know. + -- Instead simply use the containing area with the largest rank. + FOR location IN + SELECT place_id FROM placex + WHERE bbox @ geometry AND _ST_Covers(geometry, ST_Centroid(bbox)) + AND rank_address between 5 and 25 + ORDER BY rank_address desc + LOOP + RETURN location.place_id; + END LOOP; + ELSEIF ST_Area(bbox) < 0.005 THEN -- for smaller features get the nearest road SELECT getNearestRoadPlaceId(poi_partition, bbox) INTO parent_place_id; --DEBUG: RAISE WARNING 'Checked for nearest way (%)', parent_place_id; @@ -127,6 +138,7 @@ DECLARE BEGIN IF bnd.rank_search >= 26 or bnd.rank_address = 0 or ST_GeometryType(bnd.geometry) NOT IN ('ST_Polygon','ST_MultiPolygon') + or bnd.type IN ('postcode', 'postal_code') THEN RETURN NULL; END IF; @@ -200,7 +212,11 @@ BEGIN FOR linked_placex IN SELECT placex.* from placex WHERE make_standard_name(name->'name') = bnd_name - AND ((bnd.rank_address > 0 and placex.rank_address = bnd.rank_address) + AND ((bnd.rank_address > 0 + and bnd.rank_address = (compute_place_rank(placex.country_code, + 'N', placex.class, + placex.type, 15::SMALLINT, + false, placex.postcode)).address_rank) OR (bnd.rank_address = 0 and placex.rank_search = bnd.rank_search)) AND placex.osm_type = 'N' AND placex.rank_search < 26 -- needed to select the right index @@ -224,7 +240,7 @@ LANGUAGE plpgsql STABLE; -- \param maxrank Rank of the place. All address features must have -- a search rank lower than the given rank. -- \param address Address terms for the place. --- \param geoemtry Geometry to which the address objects should be close. +-- \param geometry Geometry to which the address objects should be close. -- -- \retval parent_place_id Place_id of the address object that is the direct -- ancestor. @@ -239,142 +255,119 @@ CREATE OR REPLACE FUNCTION insert_addresslines(obj_place_id BIGINT, maxrank SMALLINT, address HSTORE, geometry GEOMETRY, + country TEXT, OUT parent_place_id BIGINT, OUT postcode TEXT, OUT nameaddress_vector INT[]) AS $$ DECLARE - current_rank_address INTEGER := 0; - location_distance FLOAT := 0; - location_parent GEOMETRY := NULL; - parent_place_id_rank SMALLINT := 0; + address_havelevel BOOLEAN[]; location_isaddress BOOLEAN; + current_boundary GEOMETRY := NULL; + current_node_area GEOMETRY := NULL; - address_havelevel BOOLEAN[]; - location_keywords INT[]; + parent_place_rank INT := 0; + addr_place_ids BIGINT[]; location RECORD; - addr_item RECORD; - - isin_tokens INT[]; - isin TEXT[]; BEGIN parent_place_id := 0; nameaddress_vector := '{}'::int[]; - isin_tokens := '{}'::int[]; - ---- convert address store to array of tokenids - IF address IS NOT NULL THEN - FOR addr_item IN SELECT * FROM each(address) - LOOP - IF addr_item.key IN ('city', 'tiger:county', 'state', 'suburb', 'province', - 'district', 'region', 'county', 'municipality', - 'hamlet', 'village', 'subdistrict', 'town', - 'neighbourhood', 'quarter', 'parish') - THEN - isin_tokens := array_merge(isin_tokens, - word_ids_from_name(addr_item.value)); - IF NOT %REVERSE-ONLY% THEN - nameaddress_vector := array_merge(nameaddress_vector, - addr_ids_from_name(addr_item.value)); + address_havelevel := array_fill(false, ARRAY[maxrank]); + + FOR location IN + SELECT * FROM get_places_for_addr_tags(partition, geometry, + address, country) + ORDER BY rank_address, distance, isguess desc + LOOP + IF NOT %REVERSE-ONLY% THEN + nameaddress_vector := array_merge(nameaddress_vector, + location.keywords::int[]); + END IF; + + IF location.place_id is not null THEN + location_isaddress := not address_havelevel[location.rank_address]; + IF not address_havelevel[location.rank_address] THEN + address_havelevel[location.rank_address] := true; + IF parent_place_rank < location.rank_address THEN + parent_place_id := location.place_id; + parent_place_rank := location.rank_address; END IF; END IF; - END LOOP; - IF address ? 'is_in' THEN - -- is_in items need splitting - isin := regexp_split_to_array(address->'is_in', E'[;,]'); - IF array_upper(isin, 1) IS NOT NULL THEN - FOR i IN 1..array_upper(isin, 1) LOOP - isin_tokens := array_merge(isin_tokens, - word_ids_from_name(isin[i])); - - -- merge word into address vector - IF NOT %REVERSE-ONLY% THEN - nameaddress_vector := array_merge(nameaddress_vector, - addr_ids_from_name(isin[i])); - END IF; - END LOOP; - END IF; - END IF; - END IF; - IF NOT %REVERSE-ONLY% THEN - nameaddress_vector := array_merge(nameaddress_vector, isin_tokens); - END IF; + INSERT INTO place_addressline (place_id, address_place_id, fromarea, + isaddress, distance, cached_rank_address) + VALUES (obj_place_id, location.place_id, not location.isguess, + true, location.distance, location.rank_address); - ---- now compute the address terms - FOR i IN 1..28 LOOP - address_havelevel[i] := false; + addr_place_ids := array_append(addr_place_ids, location.place_id); + END IF; END LOOP; FOR location IN - SELECT * FROM getNearFeatures(partition, geometry, maxrank, isin_tokens) + SELECT * FROM getNearFeatures(partition, geometry, maxrank) + WHERE addr_place_ids is null or not addr_place_ids @> ARRAY[place_id] + ORDER BY rank_address, isguess asc, + distance * + CASE WHEN rank_address = 16 AND rank_search = 15 THEN 0.2 + WHEN rank_address = 16 AND rank_search = 16 THEN 0.25 + WHEN rank_address = 16 AND rank_search = 18 THEN 0.5 + ELSE 1 END ASC LOOP - IF location.rank_address != current_rank_address THEN - current_rank_address := location.rank_address; - IF location.isguess THEN - location_distance := location.distance * 1.5; - ELSE - IF location.rank_address <= 12 THEN - -- for county and above, if we have an area consider that exact - -- (It would be nice to relax the constraint for places close to - -- the boundary but we'd need the exact geometry for that. Too - -- expensive.) - location_distance = 0; - ELSE - -- Below county level remain slightly fuzzy. - location_distance := location.distance * 0.5; - END IF; + -- Ignore all place nodes that do not fit in a lower level boundary. + CONTINUE WHEN location.isguess + and current_boundary is not NULL + and not ST_Contains(current_boundary, location.centroid); + + -- If this is the first item in the rank, then assume it is the address. + location_isaddress := not address_havelevel[location.rank_address]; + + -- Further sanity checks to ensure that the address forms a sane hierarchy. + IF location_isaddress THEN + IF location.isguess and current_node_area is not NULL THEN + location_isaddress := ST_Contains(current_node_area, location.centroid); + END IF; + IF not location.isguess and current_boundary is not NULL + and location.rank_address != 11 AND location.rank_address != 5 THEN + location_isaddress := ST_Contains(current_boundary, location.centroid); END IF; - ELSE - CONTINUE WHEN location.keywords <@ location_keywords; END IF; - IF location.distance < location_distance OR NOT location.isguess THEN - location_keywords := location.keywords; + IF location_isaddress THEN + address_havelevel[location.rank_address] := true; + parent_place_id := location.place_id; - location_isaddress := NOT address_havelevel[location.rank_address]; - --DEBUG: RAISE WARNING 'should be address: %, is guess: %, rank: %', location_isaddress, location.isguess, location.rank_address; - IF location_isaddress AND location.isguess AND location_parent IS NOT NULL THEN - location_isaddress := ST_Contains(location_parent, location.centroid); + -- Set postcode if we have one. + -- (Returned will be the highest ranking one.) + IF location.postcode is not NULL THEN + postcode = location.postcode; END IF; - --DEBUG: RAISE WARNING '% isaddress: %', location.place_id, location_isaddress; - -- Add it to the list of search terms - IF NOT %REVERSE-ONLY% THEN - nameaddress_vector := array_merge(nameaddress_vector, - location.keywords::integer[]); - END IF; - - INSERT INTO place_addressline (place_id, address_place_id, fromarea, - isaddress, distance, cached_rank_address) - VALUES (obj_place_id, location.place_id, true, - location_isaddress, location.distance, location.rank_address); - - IF location_isaddress THEN - -- add postcode if we have one - -- (If multiple postcodes are available, we end up with the highest ranking one.) - IF location.postcode is not null THEN - postcode = location.postcode; - END IF; - - address_havelevel[location.rank_address] := true; - -- add a hack against postcode ranks - IF NOT location.isguess - AND location.rank_address != 11 AND location.rank_address != 5 - THEN + -- Recompute the areas we need for hierarchy sanity checks. + IF location.rank_address != 11 AND location.rank_address != 5 THEN + IF location.isguess THEN + current_node_area := place_node_fuzzy_area(location.centroid, + location.rank_search); + ELSE + current_node_area := NULL; SELECT p.geometry FROM placex p - WHERE p.place_id = location.place_id INTO location_parent; - END IF; - - IF location.rank_address > parent_place_id_rank THEN - parent_place_id = location.place_id; - parent_place_id_rank = location.rank_address; + WHERE p.place_id = location.place_id INTO current_boundary; END IF; END IF; END IF; + -- Add it to the list of search terms + IF NOT %REVERSE-ONLY% THEN + nameaddress_vector := array_merge(nameaddress_vector, + location.keywords::integer[]); + END IF; + + INSERT INTO place_addressline (place_id, address_place_id, fromarea, + isaddress, distance, cached_rank_address) + VALUES (obj_place_id, location.place_id, not location.isguess, + location_isaddress, location.distance, location.rank_address); END LOOP; END; $$ @@ -417,7 +410,12 @@ BEGIN NEW.name := hstore('ref', NEW.address->'postcode'); - ELSEIF NEW.class = 'boundary' AND NOT is_area THEN + ELSEIF NEW.class = 'highway' AND is_area AND NEW.name is null + AND NEW.extratags ? 'area' AND NEW.extratags->'area' = 'yes' + THEN + RETURN NULL; + ELSEIF NEW.class = 'boundary' AND NOT is_area + THEN RETURN NULL; ELSEIF NEW.class = 'boundary' AND NEW.type = 'administrative' AND NEW.admin_level <= 4 AND NEW.osm_type = 'W' @@ -453,11 +451,16 @@ BEGIN -- mark items within the geometry for re-indexing -- RAISE WARNING 'placex poly insert: % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type; - -- work around bug in postgis, this may have been fixed in 2.0.0 (see http://trac.osgeo.org/postgis/ticket/547) - update placex set indexed_status = 2 where (st_covers(NEW.geometry, placex.geometry) OR ST_Intersects(NEW.geometry, placex.geometry)) - AND rank_search > NEW.rank_search and indexed_status = 0 and ST_geometrytype(placex.geometry) = 'ST_Point' and (rank_search < 28 or name is not null or (NEW.rank_search >= 16 and address ? 'place')); - update placex set indexed_status = 2 where (st_covers(NEW.geometry, placex.geometry) OR ST_Intersects(NEW.geometry, placex.geometry)) - AND rank_search > NEW.rank_search and indexed_status = 0 and ST_geometrytype(placex.geometry) != 'ST_Point' and (rank_search < 28 or name is not null or (NEW.rank_search >= 16 and address ? 'place')); + UPDATE placex SET indexed_status = 2 + WHERE ST_Intersects(NEW.geometry, placex.geometry) + and indexed_status = 0 + and ((rank_address = 0 and rank_search > NEW.rank_address) + or rank_address > NEW.rank_address + or (class = 'place' and osm_type = 'N') + ) + and (rank_search < 28 + or name is not null + or (NEW.rank_address >= 16 and address ? 'place')); END IF; ELSE -- mark nearby items for re-indexing, where 'nearby' depends on the features rank_search and is a complete guess :( @@ -497,33 +500,6 @@ END; $$ LANGUAGE plpgsql; -CREATE OR REPLACE FUNCTION get_parent_address_level(geom GEOMETRY, in_level SMALLINT) - RETURNS SMALLINT - AS $$ -DECLARE - address_rank SMALLINT; -BEGIN - IF in_level <= 3 or in_level > 15 THEN - address_rank := 3; - ELSE - SELECT rank_address INTO address_rank - FROM placex - WHERE osm_type = 'R' and class = 'boundary' and type = 'administrative' - and admin_level < in_level - and geometry && geom and ST_Covers(geometry, geom) - ORDER BY admin_level desc LIMIT 1; - END IF; - - IF address_rank is NULL or address_rank <= 3 THEN - RETURN 3; - END IF; - - RETURN address_rank; -END; -$$ -LANGUAGE plpgsql; - - CREATE OR REPLACE FUNCTION placex_update() RETURNS TRIGGER AS $$ @@ -539,8 +515,13 @@ DECLARE addr_street TEXT; addr_place TEXT; + max_rank SMALLINT; + name_vector INTEGER[]; nameaddress_vector INTEGER[]; + addr_nameaddress_vector INTEGER[]; + + inherited_address HSTORE; linked_node_id BIGINT; linked_importance FLOAT; @@ -575,12 +556,22 @@ BEGIN -- update not necessary for osmline, cause linked_place_id does not exist NEW.extratags := NEW.extratags - 'linked_place'::TEXT; + NEW.address := NEW.address - '_unlisted_place'::TEXT; IF NEW.linked_place_id is not null THEN --DEBUG: RAISE WARNING 'place already linked to %', NEW.linked_place_id; RETURN NEW; END IF; + -- Postcodes are just here to compute the centroids. They are not searchable + -- unless they are a boundary=postal_code. + -- There was an error in the style so that boundary=postal_code used to be + -- imported as place=postcode. That's why relations are allowed to pass here. + -- This can go away in a couple of versions. + IF NEW.class = 'place' and NEW.type = 'postcode' and NEW.osm_type != 'R' THEN + RETURN NEW; + END IF; + -- Speed up searches - just use the centroid of the feature -- cheaper but less acurate NEW.centroid := ST_PointOnSurface(NEW.geometry); @@ -599,14 +590,67 @@ BEGIN IF NEW.class = 'boundary' and NEW.type = 'administrative' and NEW.osm_type = 'R' and NEW.rank_address > 0 THEN - parent_address_level := get_parent_address_level(NEW.centroid, NEW.admin_level); - IF parent_address_level >= NEW.rank_address THEN - IF parent_address_level >= 24 THEN - NEW.rank_address := 25; + -- First, check that admin boundaries do not overtake each other rank-wise. + parent_address_level := 3; + FOR location IN + SELECT rank_address, + (CASE WHEN extratags ? 'wikidata' and NEW.extratags ? 'wikidata' + and extratags->'wikidata' = NEW.extratags->'wikidata' + THEN ST_Equals(geometry, NEW.geometry) + ELSE false END) as is_same + FROM placex + WHERE osm_type = 'R' and class = 'boundary' and type = 'administrative' + and admin_level < NEW.admin_level and admin_level > 3 + and rank_address > 0 + and geometry && NEW.centroid and _ST_Covers(geometry, NEW.centroid) + ORDER BY admin_level desc LIMIT 1 + LOOP + IF location.is_same THEN + -- Looks like the same boundary is replicated on multiple admin_levels. + -- Usual tagging in Poland. Remove our boundary from addresses. + NEW.rank_address := 0; ELSE - NEW.rank_address := parent_address_level + 2; + parent_address_level := location.rank_address; + IF location.rank_address >= NEW.rank_address THEN + IF location.rank_address >= 24 THEN + NEW.rank_address := 25; + ELSE + NEW.rank_address := location.rank_address + 2; + END IF; + END IF; END IF; + END LOOP; + + IF NEW.rank_address > 9 THEN + -- Second check that the boundary is not completely contained in a + -- place area with a higher address rank + FOR location IN + SELECT rank_address FROM placex + WHERE class = 'place' and rank_address < 24 + and rank_address > NEW.rank_address + and geometry && NEW.geometry + and geometry ~ NEW.geometry -- needed because ST_Relate does not do bbox cover test + and ST_Relate(geometry, NEW.geometry, 'T*T***FF*') -- contains but not equal + ORDER BY rank_address desc LIMIT 1 + LOOP + NEW.rank_address := location.rank_address + 2; + END LOOP; END IF; + ELSEIF NEW.class = 'place' and NEW.osm_type = 'N' + and NEW.rank_address between 16 and 23 + THEN + -- If a place node is contained in a admin boundary with the same address level + -- and has not been linked, then make the node a subpart by increasing the + -- address rank (city level and above). + FOR location IN + SELECT rank_address FROM placex + WHERE osm_type = 'R' and class = 'boundary' and type = 'administrative' + and rank_address = NEW.rank_address + and geometry && NEW.centroid and _ST_Covers(geometry, NEW.centroid) + LIMIT 1 + LOOP + NEW.rank_address = NEW.rank_address + 2; + END LOOP; ELSE parent_address_level := 3; END IF; @@ -634,7 +678,7 @@ BEGIN addr_street := NEW.address->'street'; addr_place := NEW.address->'place'; - IF NEW.address ? 'postcode' and NEW.address->'postcode' not similar to '%(,|;)%' THEN + IF NEW.address ? 'postcode' and NEW.address->'postcode' not similar to '%(:|,|;)%' THEN i := getorcreate_postcode_id(NEW.address->'postcode'); END IF; END IF; @@ -694,13 +738,14 @@ BEGIN -- --------------------------------------------------------------------------- -- For low level elements we inherit from our parent road - IF (NEW.rank_search > 27 OR (NEW.type = 'postcode' AND NEW.rank_search = 25)) THEN + IF NEW.rank_search > 27 THEN --DEBUG: RAISE WARNING 'finding street for % %', NEW.osm_type, NEW.osm_id; NEW.parent_place_id := null; -- if we have a POI and there is no address information, -- see if we can get it from a surrounding building + inherited_address := ''::HSTORE; IF NEW.osm_type = 'N' AND addr_street IS NULL AND addr_place IS NULL AND NEW.housenumber IS NULL THEN FOR location IN @@ -715,6 +760,7 @@ BEGIN NEW.housenumber := location.address->'housenumber'; addr_street := location.address->'street'; addr_place := location.address->'place'; + inherited_address := location.address; END LOOP; END IF; @@ -729,9 +775,18 @@ BEGIN IF NEW.parent_place_id is not null THEN -- Get the details of the parent road - SELECT p.country_code, p.postcode FROM placex p + SELECT p.country_code, p.postcode, p.name FROM placex p WHERE p.place_id = NEW.parent_place_id INTO location; + IF addr_street is null and addr_place is not null THEN + -- Check if the addr:place tag is part of the parent name + SELECT count(*) INTO i + FROM svals(location.name) AS pname WHERE pname = addr_place; + IF i = 0 THEN + NEW.address = NEW.address || hstore('_unlisted_place', addr_place); + END IF; + END IF; + NEW.country_code := location.country_code; --DEBUG: RAISE WARNING 'Got parent details from search name'; @@ -745,33 +800,31 @@ BEGIN NEW.postcode := get_nearest_postcode(NEW.country_code, NEW.geometry); END IF; - -- If there is no name it isn't searchable, don't bother to create a search record - IF NEW.name is NULL THEN - --DEBUG: RAISE WARNING 'Not a searchable place % %', NEW.osm_type, NEW.osm_id; - return NEW; - END IF; + IF NEW.name is not NULL THEN + NEW.name := add_default_place_name(NEW.country_code, NEW.name); + name_vector := make_keywords(NEW.name); - NEW.name := add_default_place_name(NEW.country_code, NEW.name); - name_vector := make_keywords(NEW.name); + IF NEW.rank_search <= 25 and NEW.rank_address > 0 THEN + result := add_location(NEW.place_id, NEW.country_code, NEW.partition, + name_vector, NEW.rank_search, NEW.rank_address, + upper(trim(NEW.address->'postcode')), NEW.geometry, + NEW.centroid); + --DEBUG: RAISE WARNING 'Place added to location table'; + END IF; - -- Performance, it would be more acurate to do all the rest of the import - -- process but it takes too long - -- Just be happy with inheriting from parent road only - IF NEW.rank_search <= 25 and NEW.rank_address > 0 THEN - result := add_location(NEW.place_id, NEW.country_code, NEW.partition, name_vector, NEW.rank_search, NEW.rank_address, upper(trim(NEW.address->'postcode')), NEW.geometry); - --DEBUG: RAISE WARNING 'Place added to location table'; END IF; - result := insertSearchName(NEW.partition, NEW.place_id, name_vector, - NEW.rank_search, NEW.rank_address, NEW.geometry); - - IF NOT %REVERSE-ONLY% THEN - -- Merge address from parent - SELECT array_merge(s.name_vector, s.nameaddress_vector) - INTO nameaddress_vector - FROM search_name s - WHERE s.place_id = NEW.parent_place_id; - + IF array_length(name_vector, 1) is not NULL + OR inherited_address is not NULL OR NEW.address is not NULL + THEN + SELECT * INTO name_vector, nameaddress_vector + FROM create_poi_search_terms(NEW.place_id, + NEW.partition, NEW.parent_place_id, + inherited_address || NEW.address, + NEW.country_code, NEW.housenumber, + name_vector, NEW.centroid); + + IF not %REVERSE-ONLY% AND array_length(name_vector, 1) is not NULL THEN INSERT INTO search_name (place_id, search_rank, address_rank, importance, country_code, name_vector, nameaddress_vector, centroid) @@ -780,8 +833,9 @@ BEGIN nameaddress_vector, NEW.centroid); --DEBUG: RAISE WARNING 'Place added to search table'; END IF; + END IF; - return NEW; + RETURN NEW; END IF; END IF; @@ -864,13 +918,20 @@ BEGIN --DEBUG: RAISE WARNING 'Country names updated'; END IF; - SELECT * FROM insert_addresslines(NEW.place_id, NEW.partition, - CASE WHEN NEW.rank_address = 0 - THEN NEW.rank_search ELSE NEW.rank_address END, + IF NEW.rank_address = 0 THEN + max_rank := geometry_to_rank(NEW.rank_search, NEW.geometry, NEW.country_code); + ELSEIF NEW.rank_address > 25 THEN + max_rank := 25; + ELSE + max_rank = NEW.rank_address; + END IF; + + SELECT * FROM insert_addresslines(NEW.place_id, NEW.partition, max_rank, NEW.address, - CASE WHEN NEW.rank_search >= 26 - AND NEW.rank_search < 30 - THEN NEW.geometry ELSE NEW.centroid END) + CASE WHEN (NEW.rank_address = 0 or + NEW.rank_search between 26 and 29) + THEN NEW.geometry ELSE NEW.centroid END, + NEW.country_code) INTO NEW.parent_place_id, NEW.postcode, nameaddress_vector; --DEBUG: RAISE WARNING 'RETURN insert_addresslines: %, %, %', NEW.parent_place_id, NEW.postcode, nameaddress_vector; @@ -888,7 +949,7 @@ BEGIN IF NEW.name IS NOT NULL THEN IF NEW.rank_search <= 25 and NEW.rank_address > 0 THEN - result := add_location(NEW.place_id, NEW.country_code, NEW.partition, name_vector, NEW.rank_search, NEW.rank_address, upper(trim(NEW.address->'postcode')), NEW.geometry); + result := add_location(NEW.place_id, NEW.country_code, NEW.partition, name_vector, NEW.rank_search, NEW.rank_address, upper(trim(NEW.address->'postcode')), NEW.geometry, NEW.centroid); --DEBUG: RAISE WARNING 'added to location (full)'; END IF;