- if [[ $TEST_SUITE == "tests" ]]; then phpunit ./ ; fi
- cd $TRAVIS_BUILD_DIR/test/bdd
- # behave --format=progress3 api
- - if [[ $TEST_SUITE == "tests" ]]; then behave --format=progress3 db ; fi
+ - if [[ $TEST_SUITE == "tests" ]]; then behave -DREMOVE_TEMPLATE=1 --format=progress3 db ; fi
- if [[ $TEST_SUITE == "tests" ]]; then behave --format=progress3 osm2pgsql ; fi
- cd $TRAVIS_BUILD_DIR/build
- if [[ $TEST_SUITE == "monaco" ]]; then wget --no-verbose --output-document=../data/monaco.osm.pbf http://download.geofabrik.de/europe/monaco-latest.osm.pbf; fi
--- /dev/null
+# Place Ranking in Nominatim
+
+Nominatim uses two metrics to rank a place: search rank and address rank.
+Both can be assigned a value between 0 and 30. They serve slightly
+different purposes, which are explained in this chapter.
+
+## Search rank
+
+The search rank describes the extent and importance of a place. It is used
+when ranking search result. Simply put, if there are two results for a
+search query which are otherwise equal, then the result with the _lower_
+search rank will be appear higher in the result list.
+
+Search ranks are not so important these days because many well-known
+places use the Wikipedia importance ranking instead.
+
+## Address rank
+
+The address rank describes where a place shows up in an address hierarchy.
+Usually only administrative boundaries and place nodes and areas are
+eligible to be part of an address. All other objects have an address rank
+of 0.
+
+Note that the search rank of a place place a role in the address computation
+as well. When collecting the places that should make up the address parts
+then only places are taken into account that have a lower address rank than
+the search rank of the base object.
+
+## Rank configuration
+
+Search and address ranks are assigned to a place when it is first imported
+into the database. There are a few hard-coded rules for the assignment:
+
+ * postcodes follow special rules according to their length
+ * boundaries that are not areas and railway=rail are dropped completely
+ * the following are always search rank 30 and address rank 0:
+ * highway nodes
+ * landuse that is not an area
+
+Other than that, the ranks can be freely assigned via the json file
+defined with `CONST_Address_Level_Config` according to their type and
+the country they are in.
+
+The address level configuration must consist of an array of configuration
+entries, each containing a tag definition and an optional country array:
+
+```
+[ {
+ "tags" : {
+ "place" : {
+ "county" : 12,
+ "city" : 16,
+ },
+ "landuse" : {
+ "residential" : 22,
+ "" : 30
+ }
+ }
+ },
+ {
+ "countries" : [ "ca", "us" ],
+ "tags" : {
+ "boundary" : {
+ "administrative8" : 18,
+ "administrative9" : 20
+ },
+ "landuse" : {
+ "residential" : [22, 0]
+ }
+ }
+ }
+]
+```
+
+The `countries` field contains a list of countries (as ISO 3166-1 alpha 2 code)
+for which the definition applies. When the field is omitted, then the
+definition is used as a fallback, when nothing more specific for a given
+country exists.
+
+`tags` contains the ranks for key/value pairs. The ranks can be either a
+single number, in which case they are to search and address rank, or a tuple
+of search and address rank (in that order). The value may be left empty.
+Then the rank is used when no more specific value is found for the given
+key.
+
+Countries and key/value combination may appear in multiple defintions. Just
+make sure that each combination of counrty/key/value appears only once per
+file. Otherwise the import will fail with a UNIQUE INDEX constraint violation
+on import.
- 'Troubleshooting' : 'admin/Faq.md'
- 'Developers Guide':
- 'Overview' : 'develop/overview.md'
+ - 'Place Ranking' : 'develop/Ranking.md'
- 'External Data Sources':
- 'Overview' : 'data-sources/overview.md'
- 'US Census (Tiger)': 'data-sources/US-Tiger.md'
--- /dev/null
+<?php
+
+namespace Nominatim\Setup;
+
+/**
+ * Parses an address level description.
+ */
+class AddressLevelParser
+{
+ private $aLevels;
+
+ public function __construct($sDescriptionFile)
+ {
+ $sJson = file_get_contents($sDescriptionFile);
+ $this->aLevels = json_decode($sJson, true);
+ if (!$this->aLevels) {
+ switch (json_last_error()) {
+ case JSON_ERROR_NONE:
+ break;
+ case JSON_ERROR_DEPTH:
+ fail('JSON error - Maximum stack depth exceeded');
+ break;
+ case JSON_ERROR_STATE_MISMATCH:
+ fail('JSON error - Underflow or the modes mismatch');
+ break;
+ case JSON_ERROR_CTRL_CHAR:
+ fail('JSON error - Unexpected control character found');
+ break;
+ case JSON_ERROR_SYNTAX:
+ fail('JSON error - Syntax error, malformed JSON');
+ break;
+ case JSON_ERROR_UTF8:
+ fail('JSON error - Malformed UTF-8 characters, possibly incorrectly encoded');
+ break;
+ default:
+ fail('JSON error - Unknown error');
+ break;
+ }
+ }
+ }
+
+ /**
+ * Dump the description into a database table.
+ *
+ * @param object $oDB Database conneciton to use.
+ * @param string $sTable Name of table to create.
+ *
+ * @return null
+ *
+ * A new table is created. Any previously existing table is dropped.
+ * The table has the following columns:
+ * country, class, type, rank_search, rank_address.
+ */
+ public function createTable($oDB, $sTable)
+ {
+ chksql($oDB->query('DROP TABLE IF EXISTS '.$sTable));
+ $sSql = 'CREATE TABLE '.$sTable;
+ $sSql .= '(country_code varchar(2), class TEXT, type TEXT,';
+ $sSql .= ' rank_search SMALLINT, rank_address SMALLINT)';
+ chksql($oDB->query($sSql));
+
+ $sSql = 'CREATE UNIQUE INDEX ON '.$sTable.'(country_code, class, type)';
+ chksql($oDB->query($sSql));
+
+ $sSql = 'INSERT INTO '.$sTable.' VALUES ';
+ foreach ($this->aLevels as $aLevel) {
+ $aCountries = array();
+ if (isset($aLevel['countries'])) {
+ foreach ($aLevel['countries'] as $sCountry) {
+ $aCountries[$sCountry] = getDBQuoted($sCountry);
+ }
+ } else {
+ $aCountries['NULL'] = 'NULL';
+ }
+ foreach ($aLevel['tags'] as $sKey => $aValues) {
+ foreach ($aValues as $sValue => $mRanks) {
+ $aFields = array(
+ getDBQuoted($sKey),
+ $sValue ? getDBQuoted($sValue) : 'NULL'
+ );
+ if (is_array($mRanks)) {
+ $aFields[] = (string) $mRanks[0];
+ $aFields[] = (string) $mRanks[1];
+ } else {
+ $aFields[] = (string) $mRanks;
+ $aFields[] = (string) $mRanks;
+ }
+ $sLine = ','.join(',', $aFields).'),';
+
+ foreach ($aCountries as $sCountries) {
+ $sSql .= '('.$sCountries.$sLine;
+ }
+ }
+ }
+ }
+ chksql($oDB->query(rtrim($sSql, ',')));
+ }
+}
namespace Nominatim\Setup;
+require_once(CONST_BasePath.'/lib/setup/AddressLevelParser.php');
+
class SetupFunctions
{
protected $iCacheMemory;
if ($bReverseOnly) {
$this->pgExec('DROP TABLE search_name');
}
+
+ $oAlParser = new AddressLevelParser(CONST_Address_Level_Config);
+ $oAlParser->createTable($this->oDB, 'address_levels');
}
public function createPartitionTables()
--- /dev/null
+[
+{ "tags" : {
+ "place" : {
+ "sea" : [2, 0],
+ "continent" : [2, 0],
+ "country" : [4, 4],
+ "state" : [8, 8],
+ "region" : [18, 0],
+ "county" : 12,
+ "city" : 16,
+ "island" : [17, 0],
+ "town" : [18, 16],
+ "village" : [19, 16],
+ "hamlet" : [19, 16],
+ "municipality" : [19, 16],
+ "district" : [19, 16],
+ "unincorporated_area" : [19, 16],
+ "borough" : [19, 16],
+ "suburb" : 20,
+ "croft" : 20,
+ "subdivision" : 20,
+ "isolated_dwelling" : 20,
+ "farm" : [20, 0],
+ "locality" : [20, 0],
+ "islet" : [20, 0],
+ "mountain_pass" : [20, 0],
+ "neighbourhood" : 22,
+ "houses" : [28, 0]
+ },
+ "boundary" : {
+ "administrative2" : 4,
+ "administrative3" : 6,
+ "administrative4" : 8,
+ "administrative5" : 10,
+ "administrative6" : 12,
+ "administrative7" : 14,
+ "administrative8" : 16,
+ "administrative9" : 18,
+ "administrative10" : 20,
+ "administrative11" : 22,
+ "administrative12" : 24
+ },
+ "landuse" : {
+ "residential" : 22,
+ "farm" : 22,
+ "farmyard" : 22,
+ "industrial" : 22,
+ "commercial" : 22,
+ "allotments" : 22,
+ "retail" : 22,
+ "" : [22, 0]
+ },
+ "leisure" : {
+ "park" : [24, 0]
+ },
+ "natural" : {
+ "peak" : [18, 0],
+ "volcano" : [18, 0],
+ "mountain_range" : [18, 0],
+ "sea" : [4, 0]
+ },
+ "waterway" : {
+ "" : [17, 0]
+ },
+ "highway" : {
+ "" : 26,
+ "service" : 27,
+ "cycleway" : 27,
+ "path" : 27,
+ "footway" : 27,
+ "steps" : 27,
+ "bridleway" : 27,
+ "motorway_link" : 27,
+ "primary_link" : 27,
+ "trunk_link" : 27,
+ "secondary_link" : 27,
+ "tertiary_link" : 27
+ },
+ "mountain_pass" : {
+ "" : [20, 0]
+ }
+ }
+}
+]
+
@define('CONST_Pyosmium_Binary', '@PYOSMIUM_PATH@');
@define('CONST_Tiger_Data_Path', CONST_ExtraDataPath.'/tiger');
@define('CONST_Wikipedia_Data_Path', CONST_ExtraDataPath);
+@define('CONST_Address_Level_Config', CONST_BasePath.'/settings/address-levels.json');
// osm2pgsql settings
@define('CONST_Osm2pgsql_Flatnode_File', null);
i INTEGER;
postcode TEXT;
result BOOLEAN;
+ is_area BOOLEAN;
country_code VARCHAR(2);
default_language VARCHAR(10);
diameter FLOAT;
classtable TEXT;
- line RECORD;
+ classtype TEXT;
BEGIN
--DEBUG: RAISE WARNING '% % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type;
IF NEW.osm_type = 'X' THEN
-- E'X'ternal records should already be in the right format so do nothing
ELSE
- NEW.rank_search := 30;
- NEW.rank_address := NEW.rank_search;
+ is_area := ST_GeometryType(NEW.geometry) IN ('ST_Polygon','ST_MultiPolygon');
- -- By doing in postgres we have the country available to us - currently only used for postcode
- IF NEW.class in ('place','boundary') AND NEW.type in ('postcode','postal_code') THEN
+ IF NEW.class in ('place','boundary')
+ AND NEW.type in ('postcode','postal_code') THEN
- IF NEW.address IS NULL OR NOT NEW.address ? 'postcode' THEN
- -- most likely just a part of a multipolygon postcode boundary, throw it away
- RETURN NULL;
- END IF;
-
- NEW.name := hstore('ref', NEW.address->'postcode');
+ IF NEW.address IS NULL OR NOT NEW.address ? 'postcode' THEN
+ -- most likely just a part of a multipolygon postcode boundary, throw it away
+ RETURN NULL;
+ END IF;
- SELECT * FROM get_postcode_rank(NEW.country_code, NEW.address->'postcode')
- INTO NEW.rank_search, NEW.rank_address;
+ NEW.name := hstore('ref', NEW.address->'postcode');
- IF NOT ST_GeometryType(NEW.geometry) IN ('ST_Polygon','ST_MultiPolygon') THEN
- NEW.rank_address := 0;
- END IF;
+ SELECT * FROM get_postcode_rank(NEW.country_code, NEW.address->'postcode')
+ INTO NEW.rank_search, NEW.rank_address;
- ELSEIF NEW.class = 'place' THEN
- IF NEW.type in ('continent', 'sea') THEN
- NEW.rank_search := 2;
- NEW.rank_address := 0;
- NEW.country_code := NULL;
- ELSEIF NEW.type in ('country') THEN
- NEW.rank_search := 4;
- IF ST_GeometryType(NEW.geometry) IN ('ST_Polygon','ST_MultiPolygon') THEN
- NEW.rank_address := NEW.rank_search;
- ELSE
- NEW.rank_address := 0;
- END IF;
- ELSEIF NEW.type in ('state') THEN
- NEW.rank_search := 8;
- IF ST_GeometryType(NEW.geometry) IN ('ST_Polygon','ST_MultiPolygon') THEN
- NEW.rank_address := NEW.rank_search;
- ELSE
- NEW.rank_address := 0;
- END IF;
- ELSEIF NEW.type in ('region') THEN
- NEW.rank_search := 18; -- dropped from previous value of 10
- NEW.rank_address := 0; -- So badly miss-used that better to just drop it!
- ELSEIF NEW.type in ('county') THEN
- NEW.rank_search := 12;
- NEW.rank_address := NEW.rank_search;
- ELSEIF NEW.type in ('city') THEN
- NEW.rank_search := 16;
- NEW.rank_address := NEW.rank_search;
- ELSEIF NEW.type in ('island') THEN
- NEW.rank_search := 17;
- NEW.rank_address := 0;
- ELSEIF NEW.type in ('town') THEN
- NEW.rank_search := 18;
- NEW.rank_address := 16;
- ELSEIF NEW.type in ('village','hamlet','municipality','district','unincorporated_area','borough') THEN
- NEW.rank_search := 19;
- NEW.rank_address := 16;
- ELSEIF NEW.type in ('suburb','croft','subdivision','isolated_dwelling') THEN
- NEW.rank_search := 20;
- NEW.rank_address := NEW.rank_search;
- ELSEIF NEW.type in ('farm','locality','islet','mountain_pass') THEN
- NEW.rank_search := 20;
- NEW.rank_address := 0;
- -- Irish townlands, tagged as place=locality and locality=townland
- IF (NEW.extratags -> 'locality') = 'townland' THEN
- NEW.rank_address := 20;
- END IF;
- ELSEIF NEW.type in ('neighbourhood') THEN
- NEW.rank_search := 22;
- NEW.rank_address := 22;
- ELSEIF NEW.type in ('house','building') THEN
- NEW.rank_search := 30;
- NEW.rank_address := NEW.rank_search;
- ELSEIF NEW.type in ('houses') THEN
- -- can't guarantee all required nodes loaded yet due to caching in osm2pgsql
- NEW.rank_search := 28;
- NEW.rank_address := 0;
+ IF NOT is_area THEN
+ NEW.rank_address := 0;
END IF;
-
- ELSEIF NEW.class = 'boundary' THEN
- IF ST_GeometryType(NEW.geometry) NOT IN ('ST_Polygon','ST_MultiPolygon') THEN
--- RAISE WARNING 'invalid boundary %',NEW.osm_id;
+ ELSEIF NEW.class = 'boundary' AND NOT is_area THEN
return NULL;
- END IF;
- NEW.rank_search := NEW.admin_level * 2;
- IF NEW.type = 'administrative' THEN
- NEW.rank_address := NEW.rank_search;
+ ELSEIF NEW.class = 'railway' AND NEW.type in ('rail') THEN
+ return NULL;
+ ELSEIF NEW.osm_type = 'N' AND NEW.class = 'highway' THEN
+ NEW.rank_search = 30;
+ NEW.rank_address = 0;
+ ELSEIF NEW.class = 'landuse' AND NOT is_area THEN
+ NEW.rank_search = 30;
+ NEW.rank_address = 0;
+ ELSE
+ -- do table lookup stuff
+ IF NEW.class = 'boundary' and NEW.type = 'administrative' THEN
+ classtype = NEW.type || NEW.admin_level::TEXT;
ELSE
- NEW.rank_address := 0;
+ classtype = NEW.type;
END IF;
- ELSEIF NEW.class = 'landuse' AND ST_GeometryType(NEW.geometry) in ('ST_Polygon','ST_MultiPolygon') THEN
- NEW.rank_search := 22;
- IF NEW.type in ('residential', 'farm', 'farmyard', 'industrial', 'commercial', 'allotments', 'retail') THEN
- NEW.rank_address := NEW.rank_search;
- ELSE
- NEW.rank_address := 0;
+ SELECT l.rank_search, l.rank_address FROM address_levels l
+ WHERE (l.country_code = NEW.country_code or l.country_code is NULL)
+ AND l.class = NEW.class AND (l.type = classtype or l.type is NULL)
+ ORDER BY l.country_code, l.class, l.type LIMIT 1
+ INTO NEW.rank_search, NEW.rank_address;
+
+ IF NEW.rank_search is NULL THEN
+ NEW.rank_search := 30;
END IF;
- ELSEIF NEW.class = 'leisure' and NEW.type in ('park') THEN
- NEW.rank_search := 24;
- NEW.rank_address := 0;
- ELSEIF NEW.class = 'natural' and NEW.type in ('peak','volcano','mountain_range') THEN
- NEW.rank_search := 18;
- NEW.rank_address := 0;
- ELSEIF NEW.class = 'natural' and NEW.type = 'sea' THEN
- NEW.rank_search := 4;
- NEW.rank_address := NEW.rank_search;
- -- any feature more than 5 square miles is probably worth indexing
- ELSEIF ST_GeometryType(NEW.geometry) in ('ST_Polygon','ST_MultiPolygon') AND ST_Area(NEW.geometry) > 0.1 THEN
- NEW.rank_search := 22;
- NEW.rank_address := 0;
- ELSEIF NEW.class = 'railway' AND NEW.type in ('rail') THEN
- RETURN NULL;
- ELSEIF NEW.class = 'waterway' THEN
- IF NEW.osm_type = 'R' THEN
- NEW.rank_search := 16;
- ELSE
- NEW.rank_search := 17;
+
+ IF NEW.rank_address is NULL THEN
+ NEW.rank_address := 30;
END IF;
- NEW.rank_address := 0;
- ELSEIF NEW.class = 'highway' AND NEW.osm_type != 'N' AND NEW.type in ('service','cycleway','path','footway','steps','bridleway','motorway_link','primary_link','trunk_link','secondary_link','tertiary_link') THEN
- NEW.rank_search := 27;
- NEW.rank_address := NEW.rank_search;
- ELSEIF NEW.class = 'highway' AND NEW.osm_type != 'N' THEN
- NEW.rank_search := 26;
- NEW.rank_address := NEW.rank_search;
- ELSEIF NEW.class = 'mountain_pass' THEN
- NEW.rank_search := 20;
- NEW.rank_address := 0;
END IF;
- END IF;
-
- IF NEW.rank_search > 30 THEN
- NEW.rank_search := 30;
- END IF;
+ -- some postcorrections
+ IF NEW.class = 'place' THEN
+ IF NEW.type in ('continent', 'sea', 'country', 'state') AND NEW.osm_type = 'N' THEN
+ NEW.rank_address := 0;
+ END IF;
+ ELSEIF NEW.class = 'waterway' AND NEW.osm_type = 'R' THEN
+ -- Slightly promote waterway relations so that they are processed
+ -- before their members.
+ NEW.rank_search := NEW.rank_search - 1;
+ END IF;
- IF NEW.rank_address > 30 THEN
- NEW.rank_address := 30;
- END IF;
+ IF (NEW.extratags -> 'capital') = 'yes' THEN
+ NEW.rank_search := NEW.rank_search - 1;
+ END IF;
- IF (NEW.extratags -> 'capital') = 'yes' THEN
- NEW.rank_search := NEW.rank_search - 1;
END IF;
-- a country code make no sense below rank 4 (country)
| R1 | boundary | administrative | 2 | de | (-100 40, -101 40, -101 41, -100 41, -100 40) |
When importing
Then placex contains
- | object | addr+country | country_code |
- | R1 | de | de |
+ | object | rank_search| addr+country | country_code |
+ | R1 | 4 | de | de |
Scenario: Illegal country code tag for countries is ignored
Given the named places
| N36 | place | house |
| N37 | place | building |
| N38 | place | houses |
- And the named places
- | osm | class | type | extra+locality |
- | N100 | place | locality | townland |
And the named places
| osm | class | type | extra+capital |
| N101 | place | city | yes |
| N32 | 20 | 0 |
| N33 | 20 | 0 |
| N34 | 20 | 0 |
- | N100 | 20 | 20 |
| N101 | 15 | 16 |
| N35 | 22 | 22 |
| N36 | 30 | 30 |
| object | rank_search | rank_address |
| R20 | 4 | 4 |
| R21 | 30 | 30 |
- | R22 | 12 | 0 |
- | R23 | 20 | 0 |
+ | R22 | 30 | 30 |
+ | R23 | 30 | 30 |
| R40 | 4 | 4 |
| R41 | 8 | 8 |
When importing
Then placex contains
| object | rank_search | rank_address |
- | N1 | 30 | 30 |
+ | N1 | 30 | 0 |
| W1 | 26 | 26 |
| W2 | 26 | 26 |
| W3 | 26 | 26 |
When importing
Then placex contains
| object | rank_search | rank_address |
- | N2 | 30 | 30 |
- | W2 | 30 | 30 |
+ | N2 | 30 | 0 |
+ | W2 | 30 | 0 |
| W4 | 22 | 22 |
| R2 | 22 | 22 |
- | R3 | 22 | 0 |
+ | R3 | 22 | 0 |
Scenario: rank and inclusion of naturals
Given the named places
| N5 | 30 | 30 |
| W2 | 18 | 0 |
| R3 | 18 | 0 |
- | R4 | 22 | 0 |
- | R5 | 4 | 4 |
- | R6 | 4 | 4 |
+ | R4 | 30 | 30 |
+ | R5 | 4 | 0 |
+ | R6 | 4 | 0 |
| W3 | 30 | 30 |
When importing
Then placex contains
| object | rank_address |
- | R1 | 0 |
+ | R1 | 30 |
| W1 | 30 |
When marking for delete R1,W1
Then placex has no entry for W1
| W1 | boundary | historic | Haha | 5 | (1, 2, 4, 3, 1) |
Then placex contains
| object | rank_address |
- | W1 | 0 |
+ | W1 | 30 |
require_once(CONST_BasePath.'/lib/init-cmd.php');
require_once(CONST_BasePath.'/lib/setup_functions.php');
require_once(CONST_BasePath.'/lib/setup/SetupClass.php');
+require_once(CONST_BasePath.'/lib/setup/AddressLevelParser.php');
ini_set('memory_limit', '800M');
array('deduplicate', '', 0, 1, 0, 0, 'bool', 'Deduplicate tokens'),
array('recompute-word-counts', '', 0, 1, 0, 0, 'bool', 'Compute frequency of full-word search terms'),
+ array('update-address-levels', '', 0, 1, 0, 0, 'bool', 'Reimport address level configuration (EXPERT)'),
array('no-npi', '', 0, 1, 0, 0, 'bool', '(obsolete)'),
);
runWithEnv($sCmd, $aProcEnv);
}
+if ($aResult['update-address-levels']) {
+ echo 'Updating address levels from '.CONST_Address_Level_Config.".\n";
+ $oAlParser = new \Nominatim\Setup\AddressLevelParser(CONST_Address_Level_Config);
+ $oAlParser->createTable($oDB, 'address_levels');
+}
+
if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) {
//
if (strpos(CONST_Replication_Url, 'download.geofabrik.de') !== false && CONST_Replication_Update_Interval < 86400) {