From: Sarah Hoffmann Date: Tue, 3 Oct 2017 14:28:22 +0000 (+0200) Subject: Merge pull request #803 from lonvia/update-postcodes X-Git-Tag: v3.1.0~60 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/e276ec2e94cdb16b99062bf0ec44c958925c9766?hp=218b70fd967a9f4567955dfaefeecef0069ac862 Merge pull request #803 from lonvia/update-postcodes Add script to update table with artifical postcode centroids --- diff --git a/lib/cmd.php b/lib/cmd.php index d7ba285d..898fbe95 100644 --- a/lib/cmd.php +++ b/lib/cmd.php @@ -136,15 +136,53 @@ function info($sMsg) $aWarnings = []; + function warn($sMsg) { $GLOBALS['aWarnings'][] = $sMsg; echo date('Y-m-d H:i:s == ').'WARNING: '.$sMsg."\n"; } + function repeatWarnings() { foreach ($GLOBALS['aWarnings'] as $sMsg) { echo ' * ',$sMsg."\n"; } } + + +function runSQLScript($sScript, $bfatal = true, $bVerbose = false, $bIgnoreErrors = false) +{ + // Convert database DSN to psql parameters + $aDSNInfo = DB::parseDSN(CONST_Database_DSN); + if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432; + $sCMD = 'psql -p '.$aDSNInfo['port'].' -d '.$aDSNInfo['database']; + if (!$bVerbose) { + $sCMD .= ' -q'; + } + if ($bfatal && !$bIgnoreErrors) { + $sCMD .= ' -v ON_ERROR_STOP=1'; + } + $aDescriptors = array( + 0 => array('pipe', 'r'), + 1 => STDOUT, + 2 => STDERR + ); + $ahPipes = null; + $hProcess = @proc_open($sCMD, $aDescriptors, $ahPipes); + if (!is_resource($hProcess)) { + fail('unable to start pgsql'); + } + + while (strlen($sScript)) { + $written = fwrite($ahPipes[0], $sScript); + if ($written <= 0) break; + $sScript = substr($sScript, $written); + } + fclose($ahPipes[0]); + $iReturn = proc_close($hProcess); + if ($bfatal && $iReturn > 0) { + fail("pgsql returned with error code ($iReturn)"); + } +} diff --git a/sql/update-postcodes.sql b/sql/update-postcodes.sql new file mode 100644 index 00000000..a90abe25 --- /dev/null +++ b/sql/update-postcodes.sql @@ -0,0 +1,52 @@ +-- Create a temporary table with postcodes from placex. + +CREATE TEMP TABLE tmp_new_postcode_locations AS +SELECT country_code, + upper(trim (both ' ' from address->'postcode')) as pc, + ST_Centroid(ST_Collect(ST_Centroid(geometry))) as centroid + FROM placex + WHERE address ? 'postcode' + AND address->'postcode' NOT SIMILAR TO '%(,|;)%' + AND geometry IS NOT null +GROUP BY country_code, pc; + +CREATE INDEX idx_tmp_new_postcode_locations + ON tmp_new_postcode_locations (pc, country_code); + +-- add extra US postcodes +INSERT INTO tmp_new_postcode_locations (country_code, pc, centroid) + SELECT 'us', postcode, ST_SetSRID(ST_Point(x,y),4326) + FROM us_postcode u + WHERE NOT EXISTS (SELECT 0 FROM tmp_new_postcode_locations new + WHERE new.country_code = 'us' AND new.pc = u.postcode); +-- add extra UK postcodes +INSERT INTO tmp_new_postcode_locations (country_code, pc, centroid) + SELECT 'gb', postcode, geometry FROM gb_postcode g + WHERE NOT EXISTS (SELECT 0 FROM tmp_new_postcode_locations new + WHERE new.country_code = 'gb' and new.pc = g.postcode); + +-- Remove all postcodes that are no longer valid +DELETE FROM location_postcode old + WHERE NOT EXISTS(SELECT 0 FROM tmp_new_postcode_locations new + WHERE old.postcode = new.pc + AND old.country_code = new.country_code); + +-- Update geometries where necessary +UPDATE location_postcode old SET geometry = new.centroid, indexed_status = 1 + FROM tmp_new_postcode_locations new + WHERE old.postcode = new.pc AND old.country_code = new.country_code + AND ST_AsText(old.geometry) != ST_AsText(new.centroid); + +-- Remove all postcodes that already exist from the temporary table +DELETE FROM tmp_new_postcode_locations new + WHERE EXISTS(SELECT 0 FROM location_postcode old + WHERE old.postcode = new.pc AND old.country_code = new.country_code); + +-- Add newly added postcode +INSERT INTO location_postcode + (place_id, indexed_status, country_code, postcode, geometry) + SELECT nextval('seq_place'), 1, country_code, pc, centroid + FROM tmp_new_postcode_locations new; + +-- Finally index the newly inserted postcodes +UPDATE location_postcode SET indexed_status = 0 WHERE indexed_status > 0; diff --git a/utils/setup.php b/utils/setup.php index 2369b183..86630196 100755 --- a/utils/setup.php +++ b/utils/setup.php @@ -541,8 +541,10 @@ if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all']) { $sSQL .= " (SELECT postcode FROM location_postcode"; $sSQL .= " WHERE country_code = 'us')"; - if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection)); + } else { + $sSQL .= "TRUNCATE TABLE us_postcode"; } + if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection)); // add missing postcodes for GB (if available) $sSQL = "INSERT INTO location_postcode"; @@ -765,34 +767,12 @@ function pgsqlRunScriptFile($sFilename) function pgsqlRunScript($sScript, $bfatal = true) { global $aCMDResult; - // Convert database DSN to psql parameters - $aDSNInfo = DB::parseDSN(CONST_Database_DSN); - if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432; - $sCMD = 'psql -p '.$aDSNInfo['port'].' -d '.$aDSNInfo['database']; - if (!$aCMDResult['verbose']) { - $sCMD .= ' -q'; - } - if ($bfatal && !$aCMDResult['ignore-errors']) - $sCMD .= ' -v ON_ERROR_STOP=1'; - $aDescriptors = array( - 0 => array('pipe', 'r'), - 1 => STDOUT, - 2 => STDERR - ); - $ahPipes = null; - $hProcess = @proc_open($sCMD, $aDescriptors, $ahPipes); - if (!is_resource($hProcess)) fail('unable to start pgsql'); - - while (strlen($sScript)) { - $written = fwrite($ahPipes[0], $sScript); - if ($written <= 0) break; - $sScript = substr($sScript, $written); - } - fclose($ahPipes[0]); - $iReturn = proc_close($hProcess); - if ($bfatal && $iReturn > 0) { - fail("pgsql returned with error code ($iReturn)"); - } + runSQLScript( + $sScript, + $bfatal, + $aCMDResult['verbose'], + $aCMDResult['ignore-errors'] + ); } function pgsqlRunPartitionScript($sTemplate) diff --git a/utils/update.php b/utils/update.php index 3cc9cdd6..ca700f0b 100755 --- a/utils/update.php +++ b/utils/update.php @@ -15,10 +15,9 @@ $aCMDOptions array('init-updates', '', 0, 1, 0, 0, 'bool', 'Set up database for updating'), array('import-osmosis', '', 0, 1, 0, 0, 'bool', 'Import updates once'), array('import-osmosis-all', '', 0, 1, 0, 0, 'bool', 'Import updates forever'), - array('no-npi', '', 0, 1, 0, 0, 'bool', '(obsolate)'), array('no-index', '', 0, 1, 0, 0, 'bool', 'Do not index the new data'), - array('import-all', '', 0, 1, 0, 0, 'bool', 'Import all available files'), + array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Update postcode centroid table'), array('import-file', '', 0, 1, 1, 1, 'realpath', 'Re-import data from an OSM file'), array('import-diff', '', 0, 1, 1, 1, 'realpath', 'Import a diff (osc) file from local file system'), @@ -34,6 +33,7 @@ $aCMDOptions array('index-instances', '', 0, 1, 1, 1, 'int', 'Number of indexing instances (threads)'), array('deduplicate', '', 0, 1, 0, 0, 'bool', 'Deduplicate tokens'), + array('no-npi', '', 0, 1, 0, 0, 'bool', '(obsolete)'), ); getCmdOpt($_SERVER['argv'], $aCMDOptions, $aResult, true, true); @@ -101,6 +101,7 @@ if ($aResult['init-updates']) { if (isset($aResult['import-diff']) || isset($aResult['import-file'])) { // import diffs and files directly (e.g. from osmosis --rri) $sNextFile = isset($aResult['import-diff']) ? $aResult['import-diff'] : $aResult['import-file']; + if (!file_exists($sNextFile)) { fail("Cannot open $sNextFile\n"); } @@ -117,6 +118,12 @@ if (isset($aResult['import-diff']) || isset($aResult['import-file'])) { // Don't update the import status - we don't know what this file contains } +if ($aResult['calculate-postcodes']) { + info("Update postcodes centroids"); + $sTemplate = file_get_contents(CONST_BasePath.'/sql/update-postcodes.sql'); + runSQLScript($sTemplate, true, true); +} + $sTemporaryFile = CONST_BasePath.'/data/osmosischange.osc'; $bHaveDiff = false; $bUseOSMApi = isset($aResult['import-from-main-api']) && $aResult['import-from-main-api'];