require_once(CONST_BasePath.'/lib/init-cmd.php');
ini_set('memory_limit', '800M');
-$aCMDOptions = array(
- "Import / update / index osm data",
- array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
- array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
- array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
-
- array('import-osmosis', '', 0, 1, 0, 0, 'bool', 'Import using osmosis'),
- array('import-osmosis-all', '', 0, 1, 0, 0, 'bool', 'Import using osmosis forever'),
- array('no-npi', '', 0, 1, 0, 0, 'bool', '(obsolate)'),
- array('no-index', '', 0, 1, 0, 0, 'bool', 'Do not index the new data'),
-
- array('import-all', '', 0, 1, 0, 0, 'bool', 'Import all available files'),
-
- array('import-file', '', 0, 1, 1, 1, 'realpath', 'Re-import data from an OSM file'),
- array('import-diff', '', 0, 1, 1, 1, 'realpath', 'Import a diff (osc) file from local file system'),
- array('osm2pgsql-cache', '', 0, 1, 1, 1, 'int', 'Cache size used by osm2pgsql'),
-
- array('import-node', '', 0, 1, 1, 1, 'int', 'Re-import node'),
- array('import-way', '', 0, 1, 1, 1, 'int', 'Re-import way'),
- array('import-relation', '', 0, 1, 1, 1, 'int', 'Re-import relation'),
- array('import-from-main-api', '', 0, 1, 0, 0, 'bool', 'Use OSM API instead of Overpass to download objects'),
-
- array('index', '', 0, 1, 0, 0, 'bool', 'Index'),
- array('index-rank', '', 0, 1, 1, 1, 'int', 'Rank to start indexing from'),
- array('index-instances', '', 0, 1, 1, 1, 'int', 'Number of indexing instances (threads)'),
-
- array('deduplicate', '', 0, 1, 0, 0, 'bool', 'Deduplicate tokens'),
-);
+$aCMDOptions
+= array(
+ "Import / update / index osm data",
+ array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
+ array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
+ array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
+
+ array('import-osmosis', '', 0, 1, 0, 0, 'bool', 'Import using osmosis'),
+ array('import-osmosis-all', '', 0, 1, 0, 0, 'bool', 'Import using osmosis forever'),
+ array('no-npi', '', 0, 1, 0, 0, 'bool', '(obsolate)'),
+ array('no-index', '', 0, 1, 0, 0, 'bool', 'Do not index the new data'),
+
+ array('import-all', '', 0, 1, 0, 0, 'bool', 'Import all available files'),
+
+ array('import-file', '', 0, 1, 1, 1, 'realpath', 'Re-import data from an OSM file'),
+ array('import-diff', '', 0, 1, 1, 1, 'realpath', 'Import a diff (osc) file from local file system'),
+ array('osm2pgsql-cache', '', 0, 1, 1, 1, 'int', 'Cache size used by osm2pgsql'),
+
+ array('import-node', '', 0, 1, 1, 1, 'int', 'Re-import node'),
+ array('import-way', '', 0, 1, 1, 1, 'int', 'Re-import way'),
+ array('import-relation', '', 0, 1, 1, 1, 'int', 'Re-import relation'),
+ array('import-from-main-api', '', 0, 1, 0, 0, 'bool', 'Use OSM API instead of Overpass to download objects'),
+
+ array('index', '', 0, 1, 0, 0, 'bool', 'Index'),
+ array('index-rank', '', 0, 1, 1, 1, 'int', 'Rank to start indexing from'),
+ array('index-instances', '', 0, 1, 1, 1, 'int', 'Number of indexing instances (threads)'),
+
+ array('deduplicate', '', 0, 1, 0, 0, 'bool', 'Deduplicate tokens'),
+ );
getCmdOpt($_SERVER['argv'], $aCMDOptions, $aResult, true, true);
if (!isset($aResult['index-instances'])) $aResult['index-instances'] = 1;
$bHaveDiff = true;
$aSpec = array(
- 0 => array("pipe", "r"), // stdin
- 1 => array("pipe", "w"), // stdout
- 2 => array("pipe", "w") // stderr
- );
+ 0 => array("pipe", "r"), // stdin
+ 1 => array("pipe", "w"), // stdout
+ 2 => array("pipe", "w") // stderr
+ );
$sCMD = CONST_Osmosis_Binary.' --read-xml - --read-empty --derive-change --write-xml-change '.$sTemporaryFile;
echo $sCMD."\n";
$hProc = proc_open($sCMD, $aSpec, $aPipes);
}
if ($aResult['deduplicate']) {
- //
- if (getPostgresVersion() < 9.3) {
+ $oDB =& getDB();
+
+ if (getPostgresVersion($oDB) < 9.3) {
fail("ERROR: deduplicate is only currently supported in postgresql 9.3");
}
- $oDB =& getDB();
$sSQL = 'select partition from country_name order by country_code';
$aPartitions = chksql($oDB->getCol($sSQL));
$aPartitions[] = 0;
- $sSQL = "select word_token,count(*) from word where substr(word_token, 1, 1) = ' ' and class is null and type is null and country_code is null group by word_token having count(*) > 1 order by word_token";
+ // we don't care about empty search_name_* artitions, they can't contain mentions of duplicates
+ foreach ($aPartitions as $i => $sPartition) {
+ $sSQL = "select count(*) from search_name_".$sPartition;
+ $nEntries = chksql($oDB->getOne($sSQL));
+ if ($nEntries == 0) {
+ unset($aPartitions[$i]);
+ }
+ }
+
+ $sSQL = "select word_token,count(*) from word where substr(word_token, 1, 1) = ' '";
+ $sSQL .= " and class is null and type is null and country_code is null";
+ $sSQL .= " group by word_token having count(*) > 1 order by word_token";
$aDuplicateTokens = chksql($oDB->getAll($sSQL));
foreach ($aDuplicateTokens as $aToken) {
if (trim($aToken['word_token']) == '' || trim($aToken['word_token']) == '-') continue;
echo "Deduping ".$aToken['word_token']."\n";
- $sSQL = "select word_id,(select count(*) from search_name where nameaddress_vector @> ARRAY[word_id]) as num from word where word_token = '".$aToken['word_token']."' and class is null and type is null and country_code is null order by num desc";
+ $sSQL = "select word_id,";
+ $sSQL .= " (select count(*) from search_name where nameaddress_vector @> ARRAY[word_id]) as num";
+ $sSQL .= " from word where word_token = '".$aToken['word_token'];
+ $sSQL .= "' and class is null and type is null and country_code is null order by num desc";
$aTokenSet = chksql($oDB->getAll($sSQL));
$aKeep = array_shift($aTokenSet);
// First check if there are new updates published (except for minutelies - there's always new diffs to process)
if (CONST_Replication_Update_Interval > 60) {
unset($aReplicationLag);
- exec($sCMDCheckReplicationLag, $aReplicationLag, $iErrorLevel);
+ exec($sCMDCheckReplicationLag, $aReplicationLag, $iErrorLevel);
while ($iErrorLevel > 0 || $aReplicationLag[0] < 1) {
if ($iErrorLevel) {
echo "Error: $iErrorLevel. ";
}
sleep(CONST_Replication_Recheck_Interval);
unset($aReplicationLag);
- exec($sCMDCheckReplicationLag, $aReplicationLag, $iErrorLevel);
+ exec($sCMDCheckReplicationLag, $aReplicationLag, $iErrorLevel);
}
// There are new replication files - use osmosis to download the file
echo "\n".date('Y-m-d H:i:s')." Replication Delay is ".$aReplicationLag[0]."\n";
}
$iFileSize = filesize($sImportFile);
$sBatchEnd = getosmosistimestamp($sOsmosisConfigDirectory);
- $sSQL = "INSERT INTO import_osmosis_log values ('$sBatchEnd',$iFileSize,'".date('Y-m-d H:i:s',$fCMDStartTime)."','".date('Y-m-d H:i:s')."','osmosis')";
+ $sSQL = "INSERT INTO import_osmosis_log values ('$sBatchEnd',$iFileSize,'".date('Y-m-d H:i:s', $fCMDStartTime)."','".date('Y-m-d H:i:s')."','osmosis')";
var_Dump($sSQL);
$oDB->query($sSQL);
- echo date('Y-m-d H:i:s')." Completed osmosis step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60,2)." minutes\n";
+ echo date('Y-m-d H:i:s')." Completed osmosis step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n";
}
$iFileSize = filesize($sImportFile);
echo "Error: $iErrorLevel\n";
exit($iErrorLevel);
}
- $sSQL = "INSERT INTO import_osmosis_log values ('$sBatchEnd',$iFileSize,'".date('Y-m-d H:i:s',$fCMDStartTime)."','".date('Y-m-d H:i:s')."','osm2pgsql')";
+ $sSQL = "INSERT INTO import_osmosis_log values ('$sBatchEnd',$iFileSize,'".date('Y-m-d H:i:s', $fCMDStartTime)."','".date('Y-m-d H:i:s')."','osm2pgsql')";
var_Dump($sSQL);
$oDB->query($sSQL);
- echo date('Y-m-d H:i:s')." Completed osm2pgsql step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60,2)." minutes\n";
+ echo date('Y-m-d H:i:s')." Completed osm2pgsql step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n";
// Archive for debug?
unlink($sImportFile);
}
}
- $sSQL = "INSERT INTO import_osmosis_log values ('$sBatchEnd',$iFileSize,'".date('Y-m-d H:i:s',$fCMDStartTime)."','".date('Y-m-d H:i:s')."','index')";
+ $sSQL = "INSERT INTO import_osmosis_log values ('$sBatchEnd',$iFileSize,'".date('Y-m-d H:i:s', $fCMDStartTime)."','".date('Y-m-d H:i:s')."','index')";
var_Dump($sSQL);
$oDB->query($sSQL);
- echo date('Y-m-d H:i:s')." Completed index step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60,2)." minutes\n";
+ echo date('Y-m-d H:i:s')." Completed index step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n";
$sSQL = "update import_status set lastimportdate = '$sBatchEnd'";
$oDB->query($sSQL);
$fDuration = time() - $fStartTime;
- echo date('Y-m-d H:i:s')." Completed all for $sBatchEnd in ".round($fDuration/60,2)." minutes\n";
+ echo date('Y-m-d H:i:s')." Completed all for $sBatchEnd in ".round($fDuration/60, 2)." minutes\n";
if (!$aResult['import-osmosis-all']) exit(0);
if (CONST_Replication_Update_Interval > 60) {
- $iSleep = max(0,(strtotime($sBatchEnd)+CONST_Replication_Update_Interval-time()));
+ $iSleep = max(0, (strtotime($sBatchEnd)+CONST_Replication_Update_Interval-time()));
} else {
- $iSleep = max(0,CONST_Replication_Update_Interval-$fDuration);
+ $iSleep = max(0, CONST_Replication_Update_Interval-$fDuration);
}
echo date('Y-m-d H:i:s')." Sleeping $iSleep seconds\n";
sleep($iSleep);
}
}
+
function getosmosistimestamp($sOsmosisConfigDirectory)
{
$sStateFile = file_get_contents($sOsmosisConfigDirectory.'/state.txt');
preg_match('#timestamp=(.+)#', $sStateFile, $aResult);
- return str_replace('\:',':',$aResult[1]);
+ return str_replace('\:', ':', $aResult[1]);
}