From: Sarah Hoffmann Date: Tue, 20 Oct 2020 21:26:44 +0000 (+0200) Subject: detect and remove admin boundary duplicates X-Git-Tag: v3.6.0~42^2~3 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/7a1690921959ea8fc24574a6453598439a73085d detect and remove admin boundary duplicates The Polish community maps admin boundaries that span multiple levels by duplicating the boundary relations. Detect this situation by looking out for matching wikidata tags. The higher ranked duplicates are then thrown out from the address pool by setting their address rank to 0. --- diff --git a/sql/functions/placex_triggers.sql b/sql/functions/placex_triggers.sql index 696a7c34..837f8fd3 100644 --- a/sql/functions/placex_triggers.sql +++ b/sql/functions/placex_triggers.sql @@ -494,33 +494,6 @@ END; $$ LANGUAGE plpgsql; -CREATE OR REPLACE FUNCTION get_parent_address_level(geom GEOMETRY, in_level SMALLINT) - RETURNS SMALLINT - AS $$ -DECLARE - address_rank SMALLINT; -BEGIN - IF in_level <= 3 or in_level > 15 THEN - address_rank := 3; - ELSE - SELECT rank_address INTO address_rank - FROM placex - WHERE osm_type = 'R' and class = 'boundary' and type = 'administrative' - and admin_level < in_level - and geometry ~ geom and _ST_Covers(geometry, geom) - ORDER BY admin_level desc LIMIT 1; - END IF; - - IF address_rank is NULL or address_rank <= 3 THEN - RETURN 3; - END IF; - - RETURN address_rank; -END; -$$ -LANGUAGE plpgsql; - - CREATE OR REPLACE FUNCTION placex_update() RETURNS TRIGGER AS $$ @@ -610,14 +583,33 @@ BEGIN and NEW.osm_type = 'R' and NEW.rank_address > 0 THEN -- First, check that admin boundaries do not overtake each other rank-wise. - parent_address_level := get_parent_address_level(NEW.centroid, NEW.admin_level); - IF parent_address_level >= NEW.rank_address THEN - IF parent_address_level >= 24 THEN - NEW.rank_address := 25; + parent_address_level := 3; + FOR location IN + SELECT rank_address, extratags FROM placex + WHERE osm_type = 'R' and class = 'boundary' and type = 'administrative' + and admin_level < NEW.admin_level and admin_level > 3 + and rank_address > 0 + and ST_Covers(geometry, NEW.geometry) + ORDER BY admin_level desc LIMIT 1 + LOOP + IF location.extratags ? 'wikidata' and NEW.extratags ? 'wikidata' + and location.extratags->'wikidata' = NEW.extratags->'wikidata' + THEN + -- Looks like the same boundary is replicated on multiple admin_levels. + -- Usual tagging in Poland. Remove our boundary from addresses. + NEW.rank_address := 0; ELSE - NEW.rank_address := parent_address_level + 2; + parent_address_level := location.rank_address; + IF location.rank_address >= NEW.rank_address THEN + IF location.rank_address >= 24 THEN + NEW.rank_address := 25; + ELSE + NEW.rank_address := location.rank_address + 2; + END IF; + END IF; END IF; - END IF; + END LOOP; + IF NEW.rank_address > 9 THEN -- Second check that the boundary is not completely contained in a -- place area with a higher address rank @@ -630,7 +622,7 @@ BEGIN and ST_Relate(geometry, NEW.geometry, 'T*T***FF*') -- contains but not equal ORDER BY rank_address desc LIMIT 1 LOOP - NEW.rank_address := location.rank_address + 2; + NEW.rank_address := location.rank_address + 2; END LOOP; END IF; ELSEIF NEW.class = 'place' and NEW.osm_type = 'N' diff --git a/test/bdd/db/import/rank_computation.feature b/test/bdd/db/import/rank_computation.feature index cea4d973..beecb366 100644 --- a/test/bdd/db/import/rank_computation.feature +++ b/test/bdd/db/import/rank_computation.feature @@ -74,15 +74,15 @@ Feature: Rank assignment | R21 | boundary | administrative | 9 | municipality | (0 0, 0 1, 1 1, 1 0, 0 0) | | R22 | boundary | administrative | 9 | suburb | (0 0, 0 1, 1 1, 1 0, 0 0) | When importing - Then place_addressline contains - | object | address | cached_rank_address | - | R21 | R20 | 16 | - | R22 | R20 | 16 | Then placex contains | object | rank_search | rank_address | | R20 | 16 | 16 | | R21 | 18 | 18 | | R22 | 18 | 20 | + Then place_addressline contains + | object | address | cached_rank_address | + | R21 | R20 | 16 | + | R22 | R20 | 16 | Scenario: Admin levels cannot overtake each other due to place address ranks Given the named places @@ -146,3 +146,42 @@ Feature: Rank assignment | object | rank_search | rank_address | | R10 | 16 | 16 | | R20 | 12 | 12 | + + + Scenario: adjacent admin_levels are considered the same object when they have the same wikidata + Given the named places + | osm | class | type | admin | extra+wikidata | geometry | + | N20 | place | square | 15 | Q123 | 0.1 0.1 | + | R23 | boundary | administrative | 10 | Q444 | (0 0, 0 1, 1 1, 1 0, 0 0) | + | R21 | boundary | administrative | 9 | Q444 | (0 0, 0 1, 1 1, 1 0, 0 0) | + | R22 | boundary | administrative | 8 | Q444 | (0 0, 0 1, 1 1, 1 0, 0 0) | + When importing + Then placex contains + | object | rank_search | rank_address | + | R23 | 20 | 0 | + | R21 | 18 | 0 | + | R22 | 16 | 16 | + Then place_addressline contains + | object | address | cached_rank_address | + | N20 | R22 | 16 | + Then place_addressline doesn't contain + | object | address | + | N20 | R21 | + | N20 | R23 | + + Scenario: adjacent admin_levels are considered different objects when they have different wikidata + Given the named places + | osm | class | type | admin | extra+wikidata | geometry | + | N20 | place | square | 15 | Q123 | 0.1 0.1 | + | R21 | boundary | administrative | 9 | Q4441 | (0 0, 0 1, 1 1, 1 0, 0 0) | + | R22 | boundary | administrative | 8 | Q444 | (0 0, 0 1, 1 1, 1 0, 0 0) | + When importing + Then placex contains + | object | rank_search | rank_address | + | R21 | 18 | 18 | + | R22 | 16 | 16 | + Then place_addressline contains + | object | address | cached_rank_address | + | N20 | R22 | 16 | + | N20 | R21 | 18 | +