From a86563cfb1843cd186184dec4028b8d442925323 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 7 Apr 2016 21:47:51 +0200 Subject: [PATCH] make tables for external data (Tiger and aux) configurable Introduces two new settings CONST_Use_US_Tiger_Data and CONST_Use_Aux_Location_data, which are disabled by default. When false the corresponding tables are ignored in queries and updates. Aux and tiger tables are no longer created by default. This has to be done by the corresponding import scripts. The former aux table creation can be found in sql/aux_tables.sql for reference. --- lib/Geocode.php | 115 ++++++++++++++++++++++------------------- lib/PlaceLookup.php | 4 +- lib/ReverseGeocode.php | 2 +- settings/settings.php | 10 ++++ sql/aux_tables.sql | 6 +++ sql/functions.sql | 10 +++- sql/tables.sql | 9 ---- utils/setup.php | 23 +++++++-- website/details.php | 15 ++++-- website/hierarchy.php | 10 ++-- 10 files changed, 125 insertions(+), 79 deletions(-) create mode 100644 sql/aux_tables.sql diff --git a/lib/Geocode.php b/lib/Geocode.php index f2ca5e50..611ca6de 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -431,56 +431,62 @@ if (30 >= $this->iMinAddressRank && 30 <= $this->iMaxAddressRank) { - //query also location_property_tiger and location_property_aux - //Tiger search only if a housenumber was searched and if it was found (i.e. aPlaceIDs[placeID] = housenumber != -1) (realized through a join) - //only Tiger housenumbers need to be interpolated, because they are saved as lines with start- and endnumber, the common osm housenumbers are usually saved as points - $sHousenumbers = ""; - $i = 0; - $length = count($aPlaceIDs); - foreach($aPlaceIDs as $placeID => $housenumber) - { - $i++; - $sHousenumbers .= "(".$placeID.", ".$housenumber.")"; - if($i<$length) - $sHousenumbers .= ", "; + if (CONST_Use_US_Tiger_Data) + { + //query also location_property_tiger and location_property_aux + //Tiger search only if a housenumber was searched and if it was found (i.e. aPlaceIDs[placeID] = housenumber != -1) (realized through a join) + //only Tiger housenumbers need to be interpolated, because they are saved as lines with start- and endnumber, the common osm housenumbers are usually saved as points + $sHousenumbers = ""; + $i = 0; + $length = count($aPlaceIDs); + foreach($aPlaceIDs as $placeID => $housenumber) + { + $i++; + $sHousenumbers .= "(".$placeID.", ".$housenumber.")"; + if($i<$length) + $sHousenumbers .= ", "; + } + + $sSQL .= "union "; + $sSQL .= "select 'T' as osm_type, place_id as osm_id, 'place' as class, 'house' as type, null as admin_level, 30 as rank_search, 30 as rank_address, min(place_id) as place_id, min(parent_place_id) as parent_place_id, 'us' as country_code"; + $sSQL .= ", get_address_by_language(place_id, housenumber_for_place, $sLanguagePrefArraySQL) as langaddress "; + $sSQL .= ", null as placename"; + $sSQL .= ", null as ref"; + if ($this->bIncludeExtraTags) $sSQL .= ", null as extra"; + if ($this->bIncludeNameDetails) $sSQL .= ", null as names"; + $sSQL .= ", avg(st_x(centroid)) as lon, avg(st_y(centroid)) as lat,"; + $sSQL .= $sImportanceSQL."-1.15 as importance "; + $sSQL .= ", (select max(p.importance*(p.rank_address+2)) from place_addressline s, placex p where s.place_id = min(blub.parent_place_id) and p.place_id = s.address_place_id and s.isaddress and p.importance is not null) as addressimportance "; + $sSQL .= ", null as extra_place "; + $sSQL .= " from (select place_id"; + //interpolate the Tiger housenumbers here + $sSQL .= ", ST_LineInterpolatePoint(linegeo, (housenumber_for_place-startnumber::float)/(endnumber-startnumber)::float) as centroid, parent_place_id, housenumber_for_place "; + $sSQL .= "from (location_property_tiger "; + $sSQL .= " join (values ".$sHousenumbers.") as housenumbers(place_id, housenumber_for_place) using(place_id)) "; + $sSQL .= " where housenumber_for_place>=0 and 30 between $this->iMinAddressRank and $this->iMaxAddressRank) as blub"; //postgres wants an alias here + $sSQL .= " group by place_id, housenumber_for_place"; //is this group by really needed?, place_id + housenumber (in combination) are unique + if (!$this->bDeDupe) $sSQL .= ", place_id "; } - $sSQL .= "union "; - $sSQL .= "select 'T' as osm_type, place_id as osm_id, 'place' as class, 'house' as type, null as admin_level, 30 as rank_search, 30 as rank_address, min(place_id) as place_id, min(parent_place_id) as parent_place_id, 'us' as country_code"; - $sSQL .= ", get_address_by_language(place_id, housenumber_for_place, $sLanguagePrefArraySQL) as langaddress "; - $sSQL .= ", null as placename"; - $sSQL .= ", null as ref"; - if ($this->bIncludeExtraTags) $sSQL .= ", null as extra"; - if ($this->bIncludeNameDetails) $sSQL .= ", null as names"; - $sSQL .= ", avg(st_x(centroid)) as lon, avg(st_y(centroid)) as lat,"; - $sSQL .= $sImportanceSQL."-1.15 as importance "; - $sSQL .= ", (select max(p.importance*(p.rank_address+2)) from place_addressline s, placex p where s.place_id = min(blub.parent_place_id) and p.place_id = s.address_place_id and s.isaddress and p.importance is not null) as addressimportance "; - $sSQL .= ", null as extra_place "; - $sSQL .= " from (select place_id"; - //interpolate the Tiger housenumbers here - $sSQL .= ", ST_LineInterpolatePoint(linegeo, (housenumber_for_place-startnumber::float)/(endnumber-startnumber)::float) as centroid, parent_place_id, housenumber_for_place "; - $sSQL .= "from (location_property_tiger "; - $sSQL .= " join (values ".$sHousenumbers.") as housenumbers(place_id, housenumber_for_place) using(place_id)) "; - $sSQL .= " where housenumber_for_place>=0 and 30 between $this->iMinAddressRank and $this->iMaxAddressRank) as blub"; //postgres wants an alias here - $sSQL .= " group by place_id, housenumber_for_place"; //is this group by really needed?, place_id + housenumber (in combination) are unique - if (!$this->bDeDupe) $sSQL .= ", place_id "; - - $sSQL .= " union "; - $sSQL .= "select 'L' as osm_type, place_id as osm_id, 'place' as class, 'house' as type, null as admin_level, 0 as rank_search, 0 as rank_address, min(place_id) as place_id, min(parent_place_id) as parent_place_id, 'us' as country_code, "; - $sSQL .= "get_address_by_language(place_id, -1, $sLanguagePrefArraySQL) as langaddress, "; - $sSQL .= "null as placename, "; - $sSQL .= "null as ref, "; - if ($this->bIncludeExtraTags) $sSQL .= "null as extra, "; - if ($this->bIncludeNameDetails) $sSQL .= "null as names, "; - $sSQL .= "avg(ST_X(centroid)) as lon, avg(ST_Y(centroid)) as lat, "; - $sSQL .= $sImportanceSQL."-1.10 as importance, "; - $sSQL .= "(select max(p.importance*(p.rank_address+2)) from place_addressline s, placex p where s.place_id = min(location_property_aux.parent_place_id) and p.place_id = s.address_place_id and s.isaddress and p.importance is not null) as addressimportance, "; - $sSQL .= "null as extra_place "; - $sSQL .= "from location_property_aux where place_id in ($sPlaceIDs) "; - $sSQL .= "and 30 between $this->iMinAddressRank and $this->iMaxAddressRank "; - $sSQL .= "group by place_id"; - if (!$this->bDeDupe) $sSQL .= ", place_id"; - $sSQL .= ", get_address_by_language(place_id, -1, $sLanguagePrefArraySQL) "; + if (CONST_Use_Aux_Location_data) + { + $sSQL .= " union "; + $sSQL .= "select 'L' as osm_type, place_id as osm_id, 'place' as class, 'house' as type, null as admin_level, 0 as rank_search, 0 as rank_address, min(place_id) as place_id, min(parent_place_id) as parent_place_id, 'us' as country_code, "; + $sSQL .= "get_address_by_language(place_id, -1, $sLanguagePrefArraySQL) as langaddress, "; + $sSQL .= "null as placename, "; + $sSQL .= "null as ref, "; + if ($this->bIncludeExtraTags) $sSQL .= "null as extra, "; + if ($this->bIncludeNameDetails) $sSQL .= "null as names, "; + $sSQL .= "avg(ST_X(centroid)) as lon, avg(ST_Y(centroid)) as lat, "; + $sSQL .= $sImportanceSQL."-1.10 as importance, "; + $sSQL .= "(select max(p.importance*(p.rank_address+2)) from place_addressline s, placex p where s.place_id = min(location_property_aux.parent_place_id) and p.place_id = s.address_place_id and s.isaddress and p.importance is not null) as addressimportance, "; + $sSQL .= "null as extra_place "; + $sSQL .= "from location_property_aux where place_id in ($sPlaceIDs) "; + $sSQL .= "and 30 between $this->iMinAddressRank and $this->iMaxAddressRank "; + $sSQL .= "group by place_id"; + if (!$this->bDeDupe) $sSQL .= ", place_id"; + $sSQL .= ", get_address_by_language(place_id, -1, $sLanguagePrefArraySQL) "; + } } $sSQL .= " order by importance desc"; @@ -1431,7 +1437,7 @@ $aPlaceIDs = $this->oDB->getCol($sSQL); // If nothing found try the aux fallback table - if (!sizeof($aPlaceIDs)) + if (CONST_Use_Aux_Location_data && !sizeof($aPlaceIDs)) { $sSQL = "select place_id from location_property_aux where parent_place_id in (".$sPlaceIDs.") and housenumber = '".pg_escape_string($aSearch['sHouseNumber'])."'"; if (sizeof($this->aExcludePlaceIDs)) @@ -1444,7 +1450,7 @@ } //if nothing was found in placex or location_property_aux, then search in Tiger data for this housenumber(location_property_tiger) $searchedHousenumber = intval($aSearch['sHouseNumber']); - if (!sizeof($aPlaceIDs)) + if (CONST_Use_US_Tiger_Data && !sizeof($aPlaceIDs)) { //new query for lines, not housenumbers anymore if($searchedHousenumber%2 == 0){ @@ -1616,9 +1622,12 @@ $sSQL .= "and (placex.rank_address between $this->iMinAddressRank and $this->iMaxAddressRank "; if (14 >= $this->iMinAddressRank && 14 <= $this->iMaxAddressRank) $sSQL .= " OR (extratags->'place') = 'city'"; if ($this->aAddressRankList) $sSQL .= " OR placex.rank_address in (".join(',',$this->aAddressRankList).")"; - $sSQL .= ") UNION select place_id from location_property_tiger where place_id in (".join(',',array_keys($aResultPlaceIDs)).") "; - $sSQL .= "and (30 between $this->iMinAddressRank and $this->iMaxAddressRank "; - if ($this->aAddressRankList) $sSQL .= " OR 30 in (".join(',',$this->aAddressRankList).")"; + if (CONST_Use_US_Tiger_Data) + { + $sSQL .= ") UNION select place_id from location_property_tiger where place_id in (".join(',',array_keys($aResultPlaceIDs)).") "; + $sSQL .= "and (30 between $this->iMinAddressRank and $this->iMaxAddressRank "; + if ($this->aAddressRankList) $sSQL .= " OR 30 in (".join(',',$this->aAddressRankList).")"; + } $sSQL .= ")"; if (CONST_Debug) var_dump($sSQL); $aFilteredPlaceIDs = $this->oDB->getCol($sSQL); diff --git a/lib/PlaceLookup.php b/lib/PlaceLookup.php index 94e414dc..c1fec649 100644 --- a/lib/PlaceLookup.php +++ b/lib/PlaceLookup.php @@ -122,7 +122,7 @@ $sLanguagePrefArraySQL = "ARRAY[".join(',',array_map("getDBQuoted", $this->aLangPrefOrder))."]"; - if ($this->sType == 'tiger') + if (CONST_Use_US_Tiger_Data && $this->sType == 'tiger') { $sSQL = "select place_id,partition, 'T' as osm_type, place_id as osm_id, 'place' as class, 'house' as type, null as admin_level, housenumber, null as street, null as isin, postcode,"; $sSQL .= " 'us' as country_code, parent_place_id, null as linked_place_id, 30 as rank_address, 30 as rank_search,"; @@ -166,7 +166,7 @@ if ($this->bAddressDetails) { - if($this->sType == 'tiger') // to get addressdetails for tiger data, the housenumber is needed + if(CONST_Use_US_Tiger_Data && $this->sType == 'tiger') // to get addressdetails for tiger data, the housenumber is needed $aAddress = $this->getAddressNames($aPlace['housenumber']); else $aAddress = $this->getAddressNames(); diff --git a/lib/ReverseGeocode.php b/lib/ReverseGeocode.php index 3dff161e..75a9b71c 100644 --- a/lib/ReverseGeocode.php +++ b/lib/ReverseGeocode.php @@ -158,7 +158,7 @@ } // Only street found? If it's in the US we can check TIGER data for nearest housenumber - if ($bIsInUnitedStates && $iMaxRank_orig >= 28 && $iPlaceID && ($aPlace['rank_search'] == 26 || $aPlace['rank_search'] == 27 )) + if (CONST_Use_US_Tiger_Data && $bIsInUnitedStates && $iMaxRank_orig >= 28 && $iPlaceID && ($aPlace['rank_search'] == 26 || $aPlace['rank_search'] == 27 )) { $fSearchDiam = 0.001; $sSQL = 'SELECT place_id,parent_place_id,30 as rank_search, ST_line_locate_point(linegeo,'.$sPointSQL.') as fraction'; diff --git a/settings/settings.php b/settings/settings.php index e1dd625f..266ff356 100644 --- a/settings/settings.php +++ b/settings/settings.php @@ -13,6 +13,16 @@ @define('CONST_Limit_Reindexing', true); // Set to false to avoid importing extra postcodes for the US. @define('CONST_Use_Extra_US_Postcodes', true); + // Set to true after importing Tiger house number data for the US. + // Note: The tables must already exist or queries will throw errors. + // After changing this setting run ./utils/setup --create-functions + // again. + @define('CONST_Use_US_Tiger_Data', false); + // Set to true after importing other external house number data. + // Note: the aux tables must already exist or queries will throw errors. + // After changing this setting run ./utils/setup --create-functions + // again. + @define('CONST_Use_Aux_Location_data', false); // Proxy settings @define('CONST_HTTP_Proxy', false); diff --git a/sql/aux_tables.sql b/sql/aux_tables.sql new file mode 100644 index 00000000..81054731 --- /dev/null +++ b/sql/aux_tables.sql @@ -0,0 +1,6 @@ +CREATE TABLE location_property_aux () INHERITS (location_property); +CREATE INDEX idx_location_property_aux_place_id ON location_property_aux USING BTREE (place_id); +CREATE INDEX idx_location_property_aux_parent_place_id ON location_property_aux USING BTREE (parent_place_id); +CREATE INDEX idx_location_property_aux_housenumber_parent_place_id ON location_property_aux USING BTREE (parent_place_id, housenumber); +GRANT SELECT ON location_property_aux TO "{www-user}"; + diff --git a/sql/functions.sql b/sql/functions.sql index 5b068dfe..4256490e 100644 --- a/sql/functions.sql +++ b/sql/functions.sql @@ -1024,7 +1024,7 @@ BEGIN --DEBUG: RAISE WARNING 'placex_insert:END: % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type; - RETURN NEW; -- @DIFFUPDATES@ The following is not needed until doing diff updates, and slows the main index process down + RETURN NEW; -- %DIFFUPDATES% The following is not needed until doing diff updates, and slows the main index process down IF NEW.rank_address > 0 THEN IF (ST_GeometryType(NEW.geometry) in ('ST_Polygon','ST_MultiPolygon') AND ST_IsValid(NEW.geometry)) THEN @@ -1650,6 +1650,7 @@ BEGIN END IF; END IF; + -- %NOTIGERDATA% IF 0 THEN -- for the USA we have an additional address table. Merge in zip codes from there too IF NEW.rank_search = 26 AND NEW.calculated_country_code = 'us' THEN FOR location IN SELECT distinct postcode from location_property_tiger where parent_place_id = NEW.place_id LOOP @@ -1662,6 +1663,7 @@ BEGIN nameaddress_vector := array_merge(nameaddress_vector, ARRAY[address_street_word_id]); END LOOP; END IF; + -- %NOTIGERDATA% END IF; -- RAISE WARNING 'ISIN: %', isin_tokens; @@ -2257,18 +2259,22 @@ DECLARE hadcountry BOOLEAN; BEGIN --first query tiger data + -- %NOTIGERDATA% IF 0 THEN select parent_place_id,'us', 30, postcode, null, 'place', 'house' from location_property_tiger WHERE place_id = in_place_id AND in_housenumber>=startnumber AND in_housenumber <= endnumber INTO for_place_id,searchcountrycode, searchrankaddress, searchpostcode, searchhousename, searchclass, searchtype; IF for_place_id IS NOT NULL THEN searchhousenumber = in_housenumber::text; END IF; - + -- %NOTIGERDATA% END IF; + + -- %NOAUXDATA% IF 0 THEN IF for_place_id IS NULL THEN select parent_place_id,'us', housenumber, 30, postcode, null, 'place', 'house' from location_property_aux WHERE place_id = in_place_id INTO for_place_id,searchcountrycode, searchhousenumber, searchrankaddress, searchpostcode, searchhousename, searchclass, searchtype; END IF; + -- %NOAUXDATA% END IF; IF for_place_id IS NULL THEN select parent_place_id, calculated_country_code, housenumber, rank_search, postcode, name, class, type from placex diff --git a/sql/tables.sql b/sql/tables.sql index 0434e13a..49338c4a 100644 --- a/sql/tables.sql +++ b/sql/tables.sql @@ -100,15 +100,6 @@ CREATE TABLE location_property ( ); SELECT AddGeometryColumn('location_property', 'centroid', 4326, 'POINT', 2); -CREATE TABLE location_property_aux () INHERITS (location_property); -CREATE INDEX idx_location_property_aux_place_id ON location_property_aux USING BTREE (place_id); -CREATE INDEX idx_location_property_aux_parent_place_id ON location_property_aux USING BTREE (parent_place_id); -CREATE INDEX idx_location_property_aux_housenumber_parent_place_id ON location_property_aux USING BTREE (parent_place_id, housenumber); -GRANT SELECT ON location_property_aux TO "{www-user}"; - -CREATE TABLE location_property_tiger (linegeo GEOMETRY, place_id BIGINT, partition INTEGER, parent_place_id BIGINT, startnumber INTEGER, endnumber INTEGER, interpolationtype TEXT, postcode TEXT); -GRANT SELECT ON location_property_tiger TO "{www-user}"; - drop table IF EXISTS search_name; CREATE TABLE search_name ( place_id BIGINT, diff --git a/utils/setup.php b/utils/setup.php index 0026a813..cfe7f560 100755 --- a/utils/setup.php +++ b/utils/setup.php @@ -219,9 +219,26 @@ if (!file_exists(CONST_InstallPath.'/module/nominatim.so')) fail("nominatim module not built"); $sTemplate = file_get_contents(CONST_BasePath.'/sql/functions.sql'); $sTemplate = str_replace('{modulepath}', CONST_InstallPath.'/module', $sTemplate); - if ($aCMDResult['enable-diff-updates']) $sTemplate = str_replace('RETURN NEW; -- @DIFFUPDATES@', '--', $sTemplate); - if ($aCMDResult['enable-debug-statements']) $sTemplate = str_replace('--DEBUG:', '', $sTemplate); - if (CONST_Limit_Reindexing) $sTemplate = str_replace('--LIMIT INDEXING:', '', $sTemplate); + if ($aCMDResult['enable-diff-updates']) + { + $sTemplate = str_replace('RETURN NEW; -- %DIFFUPDATES%', '--', $sTemplate); + } + if ($aCMDResult['enable-debug-statements']) + { + $sTemplate = str_replace('--DEBUG:', '', $sTemplate); + } + if (CONST_Limit_Reindexing) + { + $sTemplate = str_replace('--LIMIT INDEXING:', '', $sTemplate); + } + if (!CONST_Use_US_Tiger_Data) + { + $sTemplate = str_replace('-- %NOTIGERDATA% ', '', $sTemplate); + } + if (!CONST_Use_Aux_Location_data) + { + $sTemplate = str_replace('-- %NOAUXDATA% ', '', $sTemplate); + } pgsqlRunScript($sTemplate); } diff --git a/website/details.php b/website/details.php index c7a56755..5edef6f5 100755 --- a/website/details.php +++ b/website/details.php @@ -56,10 +56,17 @@ $iPlaceID = (int)$_GET['place_id']; - $iParentPlaceID = $oDB->getOne('select parent_place_id from location_property_tiger where place_id = '.$iPlaceID); - if ($iParentPlaceID) $iPlaceID = $iParentPlaceID; - $iParentPlaceID = $oDB->getOne('select parent_place_id from location_property_aux where place_id = '.$iPlaceID); - if ($iParentPlaceID) $iPlaceID = $iParentPlaceID; + if (CONST_Use_US_Tiger_Data) + { + $iParentPlaceID = $oDB->getOne('select parent_place_id from location_property_tiger where place_id = '.$iPlaceID); + if ($iParentPlaceID) $iPlaceID = $iParentPlaceID; + } + + if (CONST_Use_Aux_Location_data) + { + $iParentPlaceID = $oDB->getOne('select parent_place_id from location_property_aux where place_id = '.$iPlaceID); + if ($iParentPlaceID) $iPlaceID = $iParentPlaceID; + } $hLog = logStart($oDB, 'details', $_SERVER['QUERY_STRING'], $aLangPrefOrder); diff --git a/website/hierarchy.php b/website/hierarchy.php index 5e3d7299..6a281aa8 100755 --- a/website/hierarchy.php +++ b/website/hierarchy.php @@ -47,13 +47,13 @@ $iPlaceID = (int)$_GET['place_id']; - $sAuxHouseNumber = false; - $iParentPlaceID = $oDB->getOne('select parent_place_id from location_property_tiger where place_id = '.$iPlaceID); - if ($iParentPlaceID) + if (CONST_Use_US_Tiger_Data) { - $iPlaceID = $iParentPlaceID; + $iParentPlaceID = $oDB->getOne('select parent_place_id from location_property_tiger where place_id = '.$iPlaceID); + if ($iParentPlaceID) $iPlaceID = $iParentPlaceID; } - else + + if (CONST_Use_Aux_Location_data) { $iParentPlaceID = $oDB->getOne('select parent_place_id from location_property_aux where place_id = '.$iPlaceID); if ($iParentPlaceID) $iPlaceID = $iParentPlaceID; -- 2.39.5