]> git.openstreetmap.org Git - nominatim.git/commitdiff
ensure consistent country assignments
authorSarah Hoffmann <lonvia@denofr.de>
Thu, 15 Aug 2024 12:26:09 +0000 (14:26 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Thu, 15 Aug 2024 12:26:09 +0000 (14:26 +0200)
When OSM data has areas with overlapping countries, use the country
assignments from the pre-defined country grid for tie-breaking.
If that fails, fall back to the country with the smaller partition
number.

lib-sql/functions/utils.sql
test/bdd/db/import/country.feature

index f8b365c582cce8cae569564bc9655335b8b69e1d..50116566b99c738c37f391cf476da6481ceec491 100644 (file)
@@ -164,18 +164,44 @@ CREATE OR REPLACE FUNCTION get_country_code(place geometry)
 DECLARE
   place_centre GEOMETRY;
   nearcountry RECORD;
+  countries TEXT[];
 BEGIN
   place_centre := ST_PointOnSurface(place);
 
 -- RAISE WARNING 'get_country_code, start: %', ST_AsText(place_centre);
 
   -- Try for a OSM polygon
-  FOR nearcountry IN
-    SELECT country_code from location_area_country
-    WHERE country_code is not null and st_covers(geometry, place_centre) limit 1
-  LOOP
-    RETURN nearcountry.country_code;
-  END LOOP;
+  SELECT array_agg(country_code) FROM location_area_country
+    WHERE country_code is not null and st_covers(geometry, place_centre)
+    INTO countries;
+
+  IF array_length(countries, 1) = 1 THEN
+    RETURN countries[1];
+  END IF;
+
+  IF array_length(countries, 1) > 1 THEN
+    -- more than one country found, confirm against the fallback data what to choose
+    FOR nearcountry IN
+        SELECT country_code FROM country_osm_grid
+          WHERE ST_Covers(geometry, place_centre) AND country_code = ANY(countries)
+          ORDER BY area ASC
+    LOOP
+        RETURN nearcountry.country_code;
+    END LOOP;
+    -- Still nothing? Choose the country code with the smallest partition number.
+    -- And failing that, just go by the alphabet.
+    FOR nearcountry IN
+        SELECT cc,
+               (SELECT partition FROM country_name WHERE country_code = cc) as partition
+        FROM unnest(countries) cc
+        ORDER BY partition, cc
+    LOOP
+        RETURN nearcountry.cc;
+    END LOOP;
+
+    -- Should never be reached.
+    RETURN countries[1];
+  END IF;
 
 -- RAISE WARNING 'osm fallback: %', ST_AsText(place_centre);
 
index 070baeef17fe1f7354bb66563f0242ccfc8c9843..bfa46969854831c22e45d80d58dad1f5f2e445f8 100644 (file)
@@ -20,7 +20,8 @@ Feature: Country handling
         Then results contain
             | osm | display_name |
             | N1  | Wenig, Loudou |
-    Scenario: OSM country relations outside expected boundaries are ignored
+
+    Scenario: OSM country relations outside expected boundaries are ignored for naming
         Given the grid
             | 1 |  | 2 |
             | 4 |  | 3 |
@@ -37,6 +38,7 @@ Feature: Country handling
         Then results contain
             | osm | display_name |
             | N1  | Wenig, Germany |
+
     Scenario: Pre-defined country names are used
         Given the grid with origin CH
             | 1 |
@@ -50,3 +52,41 @@ Feature: Country handling
         Then results contain
             | osm | display_name |
             | N1  | Ingb, Switzerland |
+
+    Scenario: For overlapping countries, pre-defined countries are tie-breakers
+        Given the grid with origin US
+            | 1 |   | 2 |   | 5 |
+            |   | 9 |   | 8 |   |
+            | 4 |   | 3 |   | 6 |
+        Given the named places
+            | osm  | class    | type           | admin | country | geometry |
+            | R1   | boundary | administrative | 2     | de      | (1,5,6,4,1) |
+            | R2   | boundary | administrative | 2     | us      | (1,2,3,4,1) |
+        And the named places
+            | osm  | class    | type  | geometry   |
+            | N1   | place    | town  | 9 |
+            | N2   | place    | town  | 8 |
+        When importing
+        Then placex contains
+            | object | country_code |
+            | N1     | us           |
+            | N2     | de           |
+
+    Scenario: For overlapping countries outside pre-define countries prefer smaller partition
+        Given the grid with origin US
+            | 1 |   | 2 |   | 5 |
+            |   | 9 |   | 8 |   |
+            | 4 |   | 3 |   | 6 |
+        Given the named places
+            | osm  | class    | type           | admin | country | geometry |
+            | R1   | boundary | administrative | 2     | ch      | (1,5,6,4,1) |
+            | R2   | boundary | administrative | 2     | de      | (1,2,3,4,1) |
+        And the named places
+            | osm  | class    | type  | geometry   |
+            | N1   | place    | town  | 9 |
+            | N2   | place    | town  | 8 |
+        When importing
+        Then placex contains
+            | object | country_code |
+            | N1     | de           |
+            | N2     | ch           |