]> git.openstreetmap.org Git - nominatim.git/commitdiff
Make rank assignments configurable
authorSarah Hoffmann <lonvia@denofr.de>
Fri, 23 Nov 2018 22:02:32 +0000 (23:02 +0100)
committerSarah Hoffmann <lonvia@denofr.de>
Sat, 24 Nov 2018 15:21:16 +0000 (16:21 +0100)
The initial search and address rank is saved in a table
that is set up from a json configuration file. Ranks may
be assigned on a country level according to class and
type of the object. Special handling that depends on the
geometry or OSM type is still hard-coded in placex insert.

The new default config file mimicks the current assignment
as close as possible. A couple of exceptions have been
removed, most notably the exception for Irish townlands.

data/address-levels.json [new file with mode: 0644]
lib/setup/AddressLevelParser.php [new file with mode: 0644]
lib/setup/SetupClass.php
settings/defaults.php
sql/functions.sql
test/bdd/db/import/placex.feature
test/bdd/db/update/simple.feature

diff --git a/data/address-levels.json b/data/address-levels.json
new file mode 100644 (file)
index 0000000..b06c67e
--- /dev/null
@@ -0,0 +1,86 @@
+[
+{ "tags" : {
+      "place" : {
+          "sea" : [2, 0],
+          "continent" : [2, 0],
+          "country" : [4, 4],
+          "state" : [8, 8],
+          "region" : [18, 0],
+          "county" : 12,
+          "city" : 16,
+          "island" : [17, 0],
+          "town" : [18, 16],
+          "village" : [19, 16],
+          "hamlet" : [19, 16],
+          "municipality" : [19, 16],
+          "district" : [19, 16],
+          "unincorporated_area" : [19, 16],
+          "borough" : [19, 16],
+          "suburb" : 20,
+          "croft" : 20,
+          "subdivision" : 20,
+          "isolated_dwelling" : 20,
+          "farm" : [20, 0],
+          "locality" : [20, 0],
+          "islet" : [20, 0],
+          "mountain_pass" : [20, 0],
+          "neighbourhood" : 22,
+          "houses" : [28, 0]
+      },
+      "boundary" : {
+          "administrative2" : 4,
+          "administrative3" : 6,
+          "administrative4" : 8,
+          "administrative5" : 10,
+          "administrative6" : 12,
+          "administrative7" : 14,
+          "administrative8" : 16,
+          "administrative9" : 18,
+          "administrative10" : 20,
+          "administrative11" : 22,
+          "administrative12" : 24
+      },
+      "landuse" : {
+          "residential" : 22,
+          "farm" : 22,
+          "farmyard" : 22,
+          "industrial" : 22,
+          "commercial" : 22,
+          "allotments" : 22,
+          "retail" : 22,
+          "" : [22, 0]
+      },
+      "leisure" : {
+          "park" : [24, 0]
+      },
+      "natural" : {
+          "peak" : [18, 0],
+          "volcano" : [18, 0],
+          "mountain_range" : [18, 0],
+          "sea" : [4, 0],
+          "" : [22, 0]
+      },
+      "waterway" : {
+          "" : [17, 0]
+      },
+      "highway" : {
+          "" : 26,
+          "service" : 27,
+          "cycleway" : 27,
+          "path" : 27,
+          "footway" : 27,
+          "steps" : 27,
+          "bridleway" : 27,
+          "motorway_link" : 27,
+          "primary_link" : 27,
+          "trunk_link" : 27,
+          "secondary_link" : 27,
+          "tertiary_link" : 27
+      },
+      "mountain_pass" : {
+          "" : [20, 0]
+      }
+  }
+}
+]
+
diff --git a/lib/setup/AddressLevelParser.php b/lib/setup/AddressLevelParser.php
new file mode 100644 (file)
index 0000000..5bc17d6
--- /dev/null
@@ -0,0 +1,98 @@
+<?php
+
+namespace Nominatim\Setup;
+
+/**
+ * Parses an address level description.
+ */
+class AddressLevelParser
+{
+    private $aLevels;
+
+    public function __construct(string $sDescriptionFile)
+    {
+        $sJson = file_get_contents($sDescriptionFile);
+        $this->aLevels = json_decode($sJson, true);
+        if (!$this->aLevels) {
+            switch (json_last_error()) {
+                case JSON_ERROR_NONE:
+                    break;
+                case JSON_ERROR_DEPTH:
+                    fail('JSON error - Maximum stack depth exceeded');
+                    break;
+                case JSON_ERROR_STATE_MISMATCH:
+                    fail('JSON error - Underflow or the modes mismatch');
+                    break;
+                case JSON_ERROR_CTRL_CHAR:
+                    fail('JSON error - Unexpected control character found');
+                    break;
+                case JSON_ERROR_SYNTAX:
+                    fail('JSON error - Syntax error, malformed JSON');
+                    break;
+                case JSON_ERROR_UTF8:
+                    fail('JSON error - Malformed UTF-8 characters, possibly incorrectly encoded');
+                    break;
+                default:
+                    fail('JSON error - Unknown error');
+                    break;
+            }
+        }
+    }
+
+    /**
+     * Dump the description into a database table.
+     *
+     * @param object $oDB    Database conneciton to use.
+     * @param string $sTable Name of table to create.
+     *
+     * @return null
+     *
+     * A new table is created. Any previously existing table is dropped.
+     * The table has the following columns:
+     * country, class, type, rank_search, rank_address.
+     */
+    public function createTable($oDB, $sTable)
+    {
+        chksql($oDB->query('DROP TABLE IF EXISTS '.$sTable));
+        $sSql = 'CREATE TABLE '.$sTable;
+        $sSql .= '(country_code varchar(2), class TEXT, type TEXT,';
+        $sSql .= ' rank_search SMALLINT, rank_address SMALLINT)';
+        chksql($oDB->query($sSql));
+
+        $sSql = 'CREATE UNIQUE INDEX ON '.$sTable.'(country_code, class, type)';
+        chksql($oDB->query($sSql));
+
+        $sSql = 'INSERT INTO '.$sTable.' VALUES ';
+        foreach ($this->aLevels as $aLevel) {
+            $aCountries = array();
+            if (isset($aLevel['countries'])) {
+                foreach ($aLevel['countries'] as $sCountry) {
+                    $aCountries[$sCountry] = getDBQuoted($sCountry);
+                }
+            } else {
+                $aCountries['NULL'] = 'NULL';
+            }
+            foreach ($aLevel['tags'] as $sKey => $aValues) {
+                foreach ($aValues as $sValue => $mRanks) {
+                    $aFields = array(
+                        getDBQuoted($sKey),
+                        $sValue ? getDBQuoted($sValue) : 'NULL'
+                    );
+                    if (is_array($mRanks)) {
+                        $aFields[] = (string) $mRanks[0];
+                        $aFields[] = (string) $mRanks[1];
+                    } else {
+                        $aFields[] = (string) $mRanks;
+                        $aFields[] = (string) $mRanks;
+                    }
+                    $sLine = ','.join(',', $aFields).'),';
+
+                    foreach ($aCountries as $sCountries) {
+                        $sSql .= '('.$sCountries.$sLine;
+                    }
+                }
+            }
+        }
+        chksql($oDB->query(rtrim($sSql, ',')));
+    }
+}
index 5c6d69e43bcf52160b943008b05c7a506a27d642..0f96bc8bdccba6d46ae10d2cffc49e73f92a33e4 100755 (executable)
@@ -2,6 +2,8 @@
 
 namespace Nominatim\Setup;
 
+require_once(CONST_BasePath.'/lib/setup/AddressLevelParser.php');
+
 class SetupFunctions
 {
     protected $iCacheMemory;
@@ -272,6 +274,9 @@ class SetupFunctions
         if ($bReverseOnly) {
             $this->pgExec('DROP TABLE search_name');
         }
+
+        $oAlParser = new AddressLevelParser(CONST_Address_Level_Config);
+        $oAlParser->createTable($this->oDB, 'address_levels');
     }
 
     public function createPartitionTables()
index 8cdbcb5aa2344688554682dcfc5bb92a97d48cf0..8a286b11eb4234d273bd1699e4e07fb33cc88437 100644 (file)
@@ -49,6 +49,7 @@ if (isset($_GET['debug']) && $_GET['debug']) @define('CONST_Debug', true);
 @define('CONST_Pyosmium_Binary', '@PYOSMIUM_PATH@');
 @define('CONST_Tiger_Data_Path', CONST_ExtraDataPath.'/tiger');
 @define('CONST_Wikipedia_Data_Path', CONST_ExtraDataPath);
+@define('CONST_Address_Level_Config', CONST_ExtraDataPath.'/address-levels.json');
 
 // osm2pgsql settings
 @define('CONST_Osm2pgsql_Flatnode_File', null);
index f17976adfc703a3e5ddca23f26625252d4307421..8ce36c5408cf7531c5f5229691c761f10ca8da67 100644 (file)
@@ -817,11 +817,12 @@ DECLARE
   i INTEGER;
   postcode TEXT;
   result BOOLEAN;
+  is_area BOOLEAN;
   country_code VARCHAR(2);
   default_language VARCHAR(10);
   diameter FLOAT;
   classtable TEXT;
-  line RECORD;
+  classtype TEXT;
 BEGIN
   --DEBUG: RAISE WARNING '% % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type;
 
@@ -848,148 +849,71 @@ BEGIN
   IF NEW.osm_type = 'X' THEN
     -- E'X'ternal records should already be in the right format so do nothing
   ELSE
-    NEW.rank_search := 30;
-    NEW.rank_address := NEW.rank_search;
+    is_area := ST_GeometryType(NEW.geometry) IN ('ST_Polygon','ST_MultiPolygon');
 
-    -- By doing in postgres we have the country available to us - currently only used for postcode
-    IF NEW.class in ('place','boundary') AND NEW.type in ('postcode','postal_code') THEN
+    IF NEW.class in ('place','boundary')
+       AND NEW.type in ('postcode','postal_code') THEN
 
-        IF NEW.address IS NULL OR NOT NEW.address ? 'postcode' THEN
-            -- most likely just a part of a multipolygon postcode boundary, throw it away
-            RETURN NULL;
-        END IF;
-
-        NEW.name := hstore('ref', NEW.address->'postcode');
+      IF NEW.address IS NULL OR NOT NEW.address ? 'postcode' THEN
+          -- most likely just a part of a multipolygon postcode boundary, throw it away
+          RETURN NULL;
+      END IF;
 
-        SELECT * FROM get_postcode_rank(NEW.country_code, NEW.address->'postcode')
-          INTO NEW.rank_search, NEW.rank_address;
+      NEW.name := hstore('ref', NEW.address->'postcode');
 
-        IF NOT ST_GeometryType(NEW.geometry) IN ('ST_Polygon','ST_MultiPolygon') THEN
-            NEW.rank_address := 0;
-        END IF;
+      SELECT * FROM get_postcode_rank(NEW.country_code, NEW.address->'postcode')
+        INTO NEW.rank_search, NEW.rank_address;
 
-    ELSEIF NEW.class = 'place' THEN
-      IF NEW.type in ('continent', 'sea') THEN
-        NEW.rank_search := 2;
-        NEW.rank_address := 0;
-        NEW.country_code := NULL;
-      ELSEIF NEW.type in ('country') THEN
-        NEW.rank_search := 4;
-        IF ST_GeometryType(NEW.geometry) IN ('ST_Polygon','ST_MultiPolygon') THEN
-            NEW.rank_address := NEW.rank_search;
-        ELSE
-            NEW.rank_address := 0;
-        END IF;
-      ELSEIF NEW.type in ('state') THEN
-        NEW.rank_search := 8;
-        IF ST_GeometryType(NEW.geometry) IN ('ST_Polygon','ST_MultiPolygon') THEN
-            NEW.rank_address := NEW.rank_search;
-        ELSE
-            NEW.rank_address := 0;
-        END IF;
-      ELSEIF NEW.type in ('region') THEN
-        NEW.rank_search := 18; -- dropped from previous value of 10
-        NEW.rank_address := 0; -- So badly miss-used that better to just drop it!
-      ELSEIF NEW.type in ('county') THEN
-        NEW.rank_search := 12;
-        NEW.rank_address := NEW.rank_search;
-      ELSEIF NEW.type in ('city') THEN
-        NEW.rank_search := 16;
-        NEW.rank_address := NEW.rank_search;
-      ELSEIF NEW.type in ('island') THEN
-        NEW.rank_search := 17;
-        NEW.rank_address := 0;
-      ELSEIF NEW.type in ('town') THEN
-        NEW.rank_search := 18;
-        NEW.rank_address := 16;
-      ELSEIF NEW.type in ('village','hamlet','municipality','district','unincorporated_area','borough') THEN
-        NEW.rank_search := 19;
-        NEW.rank_address := 16;
-      ELSEIF NEW.type in ('suburb','croft','subdivision','isolated_dwelling') THEN
-        NEW.rank_search := 20;
-        NEW.rank_address := NEW.rank_search;
-      ELSEIF NEW.type in ('farm','locality','islet','mountain_pass') THEN
-        NEW.rank_search := 20;
-        NEW.rank_address := 0;
-        -- Irish townlands, tagged as place=locality and locality=townland
-        IF (NEW.extratags -> 'locality') = 'townland' THEN
-          NEW.rank_address := 20;
-        END IF;
-      ELSEIF NEW.type in ('neighbourhood') THEN
-        NEW.rank_search := 22;
-        NEW.rank_address := 22;
-      ELSEIF NEW.type in ('house','building') THEN
-        NEW.rank_search := 30;
-        NEW.rank_address := NEW.rank_search;
-      ELSEIF NEW.type in ('houses') THEN
-        -- can't guarantee all required nodes loaded yet due to caching in osm2pgsql
-        NEW.rank_search := 28;
-        NEW.rank_address := 0;
+      IF NOT is_area THEN
+          NEW.rank_address := 0;
       END IF;
-
-    ELSEIF NEW.class = 'boundary' THEN
-      IF ST_GeometryType(NEW.geometry) NOT IN ('ST_Polygon','ST_MultiPolygon') THEN
---        RAISE WARNING 'invalid boundary %',NEW.osm_id;
+    ELSEIF NEW.class = 'boundary' AND NOT is_area THEN
         return NULL;
-      END IF;
-      NEW.rank_search := NEW.admin_level * 2;
-      IF NEW.type = 'administrative' THEN
-        NEW.rank_address := NEW.rank_search;
+    ELSEIF NEW.class = 'railway' AND NEW.type in ('rail') THEN
+        return NULL;
+    ELSEIF NEW.osm_type = 'N' AND NEW.class = 'highway' THEN
+        NEW.rank_search = 30;
+        NEW.rank_address = 0;
+    ELSEIF NEW.class = 'landuse' AND NOT is_area THEN
+        NEW.rank_search = 30;
+        NEW.rank_address = 0;
+    ELSE
+      -- do table lookup stuff
+      IF NEW.class = 'boundary' and NEW.type = 'administrative' THEN
+        classtype = NEW.type || NEW.admin_level::TEXT;
       ELSE
-        NEW.rank_address := 0;
+        classtype = NEW.type;
       END IF;
-    ELSEIF NEW.class = 'landuse' AND ST_GeometryType(NEW.geometry) in ('ST_Polygon','ST_MultiPolygon') THEN
-      NEW.rank_search := 22;
-      IF NEW.type in ('residential', 'farm', 'farmyard', 'industrial', 'commercial', 'allotments', 'retail') THEN
-        NEW.rank_address := NEW.rank_search;
-      ELSE
-        NEW.rank_address := 0;
+      SELECT l.rank_search, l.rank_address FROM address_levels l
+       WHERE (l.country_code = NEW.country_code or l.country_code is NULL)
+             AND l.class = NEW.class AND (l.type = classtype or l.type is NULL)
+       ORDER BY l.country_code, l.class, l.type LIMIT 1
+        INTO NEW.rank_search, NEW.rank_address;
+
+      IF NEW.rank_search is NULL THEN
+        NEW.rank_search := 30;
       END IF;
-    ELSEIF NEW.class = 'leisure' and NEW.type in ('park') THEN
-      NEW.rank_search := 24;
-      NEW.rank_address := 0;
-    ELSEIF NEW.class = 'natural' and NEW.type in ('peak','volcano','mountain_range') THEN
-      NEW.rank_search := 18;
-      NEW.rank_address := 0;
-    ELSEIF NEW.class = 'natural' and NEW.type = 'sea' THEN
-      NEW.rank_search := 4;
-      NEW.rank_address := NEW.rank_search;
-    -- any feature more than 5 square miles is probably worth indexing
-    ELSEIF ST_GeometryType(NEW.geometry) in ('ST_Polygon','ST_MultiPolygon') AND ST_Area(NEW.geometry) > 0.1 THEN
-      NEW.rank_search := 22;
-      NEW.rank_address := 0;
-    ELSEIF NEW.class = 'railway' AND NEW.type in ('rail') THEN
-      RETURN NULL;
-    ELSEIF NEW.class = 'waterway' THEN
-      IF NEW.osm_type = 'R' THEN
-        NEW.rank_search := 16;
-      ELSE
-        NEW.rank_search := 17;
+
+      IF NEW.rank_address is NULL THEN
+        NEW.rank_address := 30;
       END IF;
-      NEW.rank_address := 0;
-    ELSEIF NEW.class = 'highway' AND NEW.osm_type != 'N' AND NEW.type in ('service','cycleway','path','footway','steps','bridleway','motorway_link','primary_link','trunk_link','secondary_link','tertiary_link') THEN
-      NEW.rank_search := 27;
-      NEW.rank_address := NEW.rank_search;
-    ELSEIF NEW.class = 'highway' AND NEW.osm_type != 'N' THEN
-      NEW.rank_search := 26;
-      NEW.rank_address := NEW.rank_search;
-    ELSEIF NEW.class = 'mountain_pass' THEN
-        NEW.rank_search := 20;
-        NEW.rank_address := 0;
     END IF;
 
-  END IF;
-
-  IF NEW.rank_search > 30 THEN
-    NEW.rank_search := 30;
-  END IF;
+    -- some postcorrections
+    IF NEW.class = 'place' THEN
+      IF NEW.type in ('continent', 'sea', 'country', 'state') AND NEW.osm_type = 'N' THEN
+        NEW.rank_address := 0;
+      END IF;
+    ELSEIF NEW.class = 'waterway' AND NEW.osm_type = 'R' THEN
+        -- Slightly promote waterway relations so that they are processed
+        -- before their members.
+        NEW.rank_search := NEW.rank_search - 1;
+    END IF;
 
-  IF NEW.rank_address > 30 THEN
-    NEW.rank_address := 30;
-  END IF;
+    IF (NEW.extratags -> 'capital') = 'yes' THEN
+      NEW.rank_search := NEW.rank_search - 1;
+    END IF;
 
-  IF (NEW.extratags -> 'capital') = 'yes' THEN
-    NEW.rank_search := NEW.rank_search - 1;
   END IF;
 
   -- a country code make no sense below rank 4 (country)
index 08ba9cbd8e46283106b0abcdac7cf793dab3dfe4..46827c4376876e66b8fb48b95b7b9e345eeee9da 100644 (file)
@@ -26,8 +26,8 @@ Feature: Import into placex
           | R1  | boundary | administrative  | 2     | de      | (-100 40, -101 40, -101 41, -100 41, -100 40) |
         When importing
         Then placex contains
-          | object | addr+country | country_code |
-          | R1     | de           | de           |
+          | object | rank_search| addr+country | country_code |
+          | R1     | 4          | de           | de           |
 
     Scenario: Illegal country code tag for countries is ignored
         Given the named places
@@ -157,9 +157,6 @@ Feature: Import into placex
           | N36  | place     | house               |
           | N37  | place     | building            |
           | N38  | place     | houses              |
-        And the named places
-          | osm  | class     | type      | extra+locality |
-          | N100 | place     | locality  | townland |
         And the named places
           | osm  | class     | type      | extra+capital |
           | N101 | place     | city      | yes |
@@ -191,7 +188,6 @@ Feature: Import into placex
           | N32    | 20          | 0 |
           | N33    | 20          | 0 |
           | N34    | 20          | 0 |
-          | N100   | 20          | 20 |
           | N101   | 15          | 16 |
           | N35    | 22          | 22 |
           | N36    | 30          | 30 |
@@ -222,8 +218,8 @@ Feature: Import into placex
           | object | rank_search | rank_address |
           | R20    | 4           | 4 |
           | R21    | 30          | 30 |
-          | R22    | 12          | 0 |
-          | R23    | 20          | 0 |
+          | R22    | 30          | 30 |
+          | R23    | 30          | 30 |
           | R40    | 4           | 4 |
           | R41    | 8           | 8 |
 
@@ -243,7 +239,7 @@ Feature: Import into placex
         When importing
         Then placex contains
           | object | rank_search | rank_address |
-          | N1     | 30          | 30 |
+          | N1     | 30          |  0 |
           | W1     | 26          | 26 |
           | W2     | 26          | 26 |
           | W3     | 26          | 26 |
@@ -264,11 +260,11 @@ Feature: Import into placex
         When importing
         Then placex contains
           | object | rank_search | rank_address |
-          | N2     | 30          | 30 |
-          | W2     | 30          | 30 |
+          | N2     | 30          |  0 |
+          | W2     | 30          |  0 |
           | W4     | 22          | 22 |
           | R2     | 22          | 22 |
-          | R3     | 22          | 0 |
+          | R3     | 22          |  0 |
 
     Scenario: rank and inclusion of naturals
        Given the named places
@@ -289,11 +285,11 @@ Feature: Import into placex
           | object | rank_search | rank_address |
           | N2     | 18          | 0 |
           | N4     | 18          | 0 |
-          | N5     | 30          | 30 |
+          | N5     | 22          | 0 |
           | W2     | 18          | 0 |
           | R3     | 18          | 0 |
           | R4     | 22          | 0 |
-          | R5     | 4           | 4 |
-          | R6     | 4           | 4 |
-          | W3     | 30          | 30 |
+          | R5     | 4           | 0 |
+          | R6     | 4           | 0 |
+          | W3     | 22          | 0 |
 
index d48e97fa881ff85cff2e9d5331cd7a08f5dfc7a5..195d101e1485b56dda71f1d8788c1705cdeafcef 100644 (file)
@@ -34,7 +34,7 @@ Feature: Update of simple objects
         When importing
         Then placex contains
           | object | rank_address |
-          | R1     | 0 |
+          | R1     | 30 |
           | W1     | 30 |
         When marking for delete R1,W1
         Then placex has no entry for W1
@@ -103,4 +103,4 @@ Feature: Update of simple objects
           | W1  | boundary | historic       | Haha | 5     | (1, 2, 4, 3, 1) |
         Then placex contains
           | object | rank_address |
-          | W1     | 0            |
+          | W1     | 30            |