4 require_once(dirname(dirname(__FILE__)).'/settings/settings.php');
5 require_once(CONST_BasePath.'/lib/init-cmd.php');
6 ini_set('memory_limit', '800M');
9 "Import / update / index osm data",
10 array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
11 array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
12 array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
14 array('import-osmosis', '', 0, 1, 0, 0, 'bool', 'Import using osmosis'),
15 array('import-osmosis-all', '', 0, 1, 0, 0, 'bool', 'Import using osmosis forever'),
16 array('no-npi', '', 0, 1, 0, 0, 'bool', '(obsolate)'),
17 array('no-index', '', 0, 1, 0, 0, 'bool', 'Do not index the new data'),
19 array('import-all', '', 0, 1, 0, 0, 'bool', 'Import all available files'),
21 array('import-file', '', 0, 1, 1, 1, 'realpath', 'Re-import data from an OSM file'),
22 array('import-diff', '', 0, 1, 1, 1, 'realpath', 'Import a diff (osc) file from local file system'),
23 array('osm2pgsql-cache', '', 0, 1, 1, 1, 'int', 'Cache size used by osm2pgsql'),
25 array('import-node', '', 0, 1, 1, 1, 'int', 'Re-import node'),
26 array('import-way', '', 0, 1, 1, 1, 'int', 'Re-import way'),
27 array('import-relation', '', 0, 1, 1, 1, 'int', 'Re-import relation'),
28 array('import-from-main-api', '', 0, 1, 0, 0, 'bool', 'Use OSM API instead of Overpass to download objects'),
30 array('index', '', 0, 1, 0, 0, 'bool', 'Index'),
31 array('index-rank', '', 0, 1, 1, 1, 'int', 'Rank to start indexing from'),
32 array('index-instances', '', 0, 1, 1, 1, 'int', 'Number of indexing instances (threads)'),
34 array('deduplicate', '', 0, 1, 0, 0, 'bool', 'Deduplicate tokens'),
36 getCmdOpt($_SERVER['argv'], $aCMDOptions, $aResult, true, true);
38 if (!isset($aResult['index-instances'])) $aResult['index-instances'] = 1;
39 if (!isset($aResult['index-rank'])) $aResult['index-rank'] = 0;
41 date_default_timezone_set('Etc/UTC');
45 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
46 if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
48 // cache memory to be used by osm2pgsql, should not be more than the available memory
49 $iCacheMemory = (isset($aResult['osm2pgsql-cache'])?$aResult['osm2pgsql-cache']:2000);
50 if ($iCacheMemory + 500 > getTotalMemoryMB())
52 $iCacheMemory = getCacheMemoryMB();
53 echo "WARNING: resetting cache memory to $iCacheMemory\n";
55 $sOsm2pgsqlCmd = CONST_Osm2pgsql_Binary.' -klas --number-processes 1 -C '.$iCacheMemory.' -O gazetteer -d '.$aDSNInfo['database'].' -P '.$aDSNInfo['port'];
56 if (!is_null(CONST_Osm2pgsql_Flatnode_File))
58 $sOsm2pgsqlCmd .= ' --flat-nodes '.CONST_Osm2pgsql_Flatnode_File;
62 if (isset($aResult['import-diff']))
64 // import diff directly (e.g. from osmosis --rri)
65 $sNextFile = $aResult['import-diff'];
66 if (!file_exists($sNextFile))
68 fail("Cannot open $sNextFile\n");
72 $sCMD = $sOsm2pgsqlCmd.' '.$sNextFile;
74 exec($sCMD, $sJunk, $iErrorLevel);
78 fail("Error from osm2pgsql, $iErrorLevel\n");
81 // Don't update the import status - we don't know what this file contains
84 $sTemporaryFile = CONST_BasePath.'/data/osmosischange.osc';
86 if (isset($aResult['import-file']) && $aResult['import-file'])
89 $sCMD = CONST_Osmosis_Binary.' --read-xml \''.$aResult['import-file'].'\' --read-empty --derive-change --write-xml-change '.$sTemporaryFile;
91 exec($sCMD, $sJunk, $iErrorLevel);
94 fail("Error converting osm to osc, osmosis returned: $iErrorLevel\n");
98 $bUseOSMApi = isset($aResult['import-from-main-api']) && $aResult['import-from-main-api'];
100 if (isset($aResult['import-node']) && $aResult['import-node'])
104 $sContentURL = 'http://www.openstreetmap.org/api/0.6/node/'.$aResult['import-node'];
108 $sContentURL = 'http://overpass-api.de/api/interpreter?data=node('.$aResult['import-node'].');out%20meta;';
111 if (isset($aResult['import-way']) && $aResult['import-way'])
115 $sContentURL = 'http://www.openstreetmap.org/api/0.6/way/'.$aResult['import-way'].'/full';
119 $sContentURL = 'http://overpass-api.de/api/interpreter?data=(way('.$aResult['import-way'].');node(w););out%20meta;';
122 if (isset($aResult['import-relation']) && $aResult['import-relation'])
126 $sContentURLsModifyXMLstr = 'http://www.openstreetmap.org/api/0.6/relation/'.$aResult['import-relation'].'/full';
130 $sContentURL = 'http://overpass-api.de/api/interpreter?data=((rel('.$aResult['import-relation'].');way(r);node(w));node(r));out%20meta;';
135 $sModifyXMLstr = file_get_contents($sContentURL);
139 0 => array("pipe", "r"), // stdin
140 1 => array("pipe", "w"), // stdout
141 2 => array("pipe", "w") // stderr
143 $sCMD = CONST_Osmosis_Binary.' --read-xml - --read-empty --derive-change --write-xml-change '.$sTemporaryFile;
145 $hProc = proc_open($sCMD, $aSpec, $aPipes);
146 if (!is_resource($hProc))
148 fail("Error converting osm to osc, osmosis failed\n");
150 fwrite($aPipes[0], $sModifyXMLstr);
152 $sOut = stream_get_contents($aPipes[1]);
153 if ($aResult['verbose']) echo $sOut;
155 $sErrors = stream_get_contents($aPipes[2]);
156 if ($aResult['verbose']) echo $sErrors;
158 if ($iError = proc_close($hProc))
162 fail("Error converting osm to osc, osmosis returned: $iError\n");
168 // import generated change file
169 $sCMD = $sOsm2pgsqlCmd.' '.$sTemporaryFile;
171 exec($sCMD, $sJunk, $iErrorLevel);
174 fail("osm2pgsql exited with error level $iErrorLevel\n");
178 if ($aResult['deduplicate'])
181 if (getPostgresVersion() < 9.3)
183 fail("ERROR: deduplicate is only currently supported in postgresql 9.3");
187 $sSQL = 'select partition from country_name order by country_code';
188 $aPartitions = $oDB->getCol($sSQL);
189 if (PEAR::isError($aPartitions))
191 fail($aPartitions->getMessage());
195 $sSQL = "select word_token,count(*) from word where substr(word_token, 1, 1) = ' ' and class is null and type is null and country_code is null group by word_token having count(*) > 1 order by word_token";
196 $aDuplicateTokens = $oDB->getAll($sSQL);
197 foreach($aDuplicateTokens as $aToken)
199 if (trim($aToken['word_token']) == '' || trim($aToken['word_token']) == '-') continue;
200 echo "Deduping ".$aToken['word_token']."\n";
201 $sSQL = "select word_id,(select count(*) from search_name where nameaddress_vector @> ARRAY[word_id]) as num from word where word_token = '".$aToken['word_token']."' and class is null and type is null and country_code is null order by num desc";
202 $aTokenSet = $oDB->getAll($sSQL);
203 if (PEAR::isError($aTokenSet))
205 var_dump($aTokenSet, $sSQL);
209 $aKeep = array_shift($aTokenSet);
210 $iKeepID = $aKeep['word_id'];
212 foreach($aTokenSet as $aRemove)
214 $sSQL = "update search_name set";
215 $sSQL .= " name_vector = array_replace(name_vector,".$aRemove['word_id'].",".$iKeepID."),";
216 $sSQL .= " nameaddress_vector = array_replace(nameaddress_vector,".$aRemove['word_id'].",".$iKeepID.")";
217 $sSQL .= " where name_vector @> ARRAY[".$aRemove['word_id']."]";
218 $x = $oDB->query($sSQL);
219 if (PEAR::isError($x))
225 $sSQL = "update search_name set";
226 $sSQL .= " nameaddress_vector = array_replace(nameaddress_vector,".$aRemove['word_id'].",".$iKeepID.")";
227 $sSQL .= " where nameaddress_vector @> ARRAY[".$aRemove['word_id']."]";
228 $x = $oDB->query($sSQL);
229 if (PEAR::isError($x))
235 $sSQL = "update location_area_country set";
236 $sSQL .= " keywords = array_replace(keywords,".$aRemove['word_id'].",".$iKeepID.")";
237 $sSQL .= " where keywords @> ARRAY[".$aRemove['word_id']."]";
238 $x = $oDB->query($sSQL);
239 if (PEAR::isError($x))
245 foreach ($aPartitions as $sPartition)
247 $sSQL = "update search_name_".$sPartition." set";
248 $sSQL .= " name_vector = array_replace(name_vector,".$aRemove['word_id'].",".$iKeepID.")";
249 $sSQL .= " where name_vector @> ARRAY[".$aRemove['word_id']."]";
250 $x = $oDB->query($sSQL);
251 if (PEAR::isError($x))
257 $sSQL = "update location_area_country set";
258 $sSQL .= " keywords = array_replace(keywords,".$aRemove['word_id'].",".$iKeepID.")";
259 $sSQL .= " where keywords @> ARRAY[".$aRemove['word_id']."]";
260 $x = $oDB->query($sSQL);
261 if (PEAR::isError($x))
268 $sSQL = "delete from word where word_id = ".$aRemove['word_id'];
269 $x = $oDB->query($sSQL);
270 if (PEAR::isError($x))
279 if ($aResult['index'])
281 passthru(CONST_InstallPath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -P '.$aDSNInfo['port'].' -t '.$aResult['index-instances'].' -r '.$aResult['index-rank']);
284 if ($aResult['import-osmosis'] || $aResult['import-osmosis-all'])
287 if (strpos(CONST_Replication_Url, 'download.geofabrik.de') !== false && CONST_Replication_Update_Interval < 86400) {
288 fail("Error: Update interval too low for download.geofabrik.de. Please check install documentation (http://wiki.openstreetmap.org/wiki/Nominatim/Installation#Updates)\n");
291 $sImportFile = CONST_BasePath.'/data/osmosischange.osc';
292 $sOsmosisConfigDirectory = CONST_InstallPath.'/settings';
293 $sCMDDownload = CONST_Osmosis_Binary.' --read-replication-interval workingDirectory='.$sOsmosisConfigDirectory.' --simplify-change --write-xml-change '.$sImportFile;
294 $sCMDCheckReplicationLag = CONST_Osmosis_Binary.' -q --read-replication-lag workingDirectory='.$sOsmosisConfigDirectory;
295 $sCMDImport = $sOsm2pgsqlCmd.' '.$sImportFile;
296 $sCMDIndex = CONST_InstallPath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -P '.$aDSNInfo['port'].' -t '.$aResult['index-instances'];
300 $fStartTime = time();
303 if (!file_exists($sImportFile))
305 // First check if there are new updates published (except for minutelies - there's always new diffs to process)
306 if ( CONST_Replication_Update_Interval > 60 )
309 unset($aReplicationLag);
310 exec($sCMDCheckReplicationLag, $aReplicationLag, $iErrorLevel);
311 while ($iErrorLevel > 0 || $aReplicationLag[0] < 1)
315 echo "Error: $iErrorLevel. ";
316 echo "Re-trying: ".$sCMDCheckReplicationLag." in ".CONST_Replication_Recheck_Interval." secs\n";
322 sleep(CONST_Replication_Recheck_Interval);
323 unset($aReplicationLag);
324 exec($sCMDCheckReplicationLag, $aReplicationLag, $iErrorLevel);
326 // There are new replication files - use osmosis to download the file
327 echo "\n".date('Y-m-d H:i:s')." Replication Delay is ".$aReplicationLag[0]."\n";
329 $fStartTime = time();
330 $fCMDStartTime = time();
331 echo $sCMDDownload."\n";
332 exec($sCMDDownload, $sJunk, $iErrorLevel);
333 while ($iErrorLevel > 0)
335 echo "Error: $iErrorLevel\n";
337 echo 'Re-trying: '.$sCMDDownload."\n";
338 exec($sCMDDownload, $sJunk, $iErrorLevel);
340 $iFileSize = filesize($sImportFile);
341 $sBatchEnd = getosmosistimestamp($sOsmosisConfigDirectory);
342 $sSQL = "INSERT INTO import_osmosis_log values ('$sBatchEnd',$iFileSize,'".date('Y-m-d H:i:s',$fCMDStartTime)."','".date('Y-m-d H:i:s')."','osmosis')";
345 echo date('Y-m-d H:i:s')." Completed osmosis step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60,2)." minutes\n";
348 $iFileSize = filesize($sImportFile);
349 $sBatchEnd = getosmosistimestamp($sOsmosisConfigDirectory);
352 $fCMDStartTime = time();
353 echo $sCMDImport."\n";
354 exec($sCMDImport, $sJunk, $iErrorLevel);
357 echo "Error: $iErrorLevel\n";
360 $sSQL = "INSERT INTO import_osmosis_log values ('$sBatchEnd',$iFileSize,'".date('Y-m-d H:i:s',$fCMDStartTime)."','".date('Y-m-d H:i:s')."','osm2pgsql')";
363 echo date('Y-m-d H:i:s')." Completed osm2pgsql step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60,2)." minutes\n";
365 // Archive for debug?
366 unlink($sImportFile);
368 $sBatchEnd = getosmosistimestamp($sOsmosisConfigDirectory);
371 $sThisIndexCmd = $sCMDIndex;
372 $fCMDStartTime = time();
374 if (!$aResult['no-index'])
376 echo "$sThisIndexCmd\n";
377 exec($sThisIndexCmd, $sJunk, $iErrorLevel);
380 echo "Error: $iErrorLevel\n";
385 $sSQL = "INSERT INTO import_osmosis_log values ('$sBatchEnd',$iFileSize,'".date('Y-m-d H:i:s',$fCMDStartTime)."','".date('Y-m-d H:i:s')."','index')";
388 echo date('Y-m-d H:i:s')." Completed index step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60,2)." minutes\n";
390 $sSQL = "update import_status set lastimportdate = '$sBatchEnd'";
393 $fDuration = time() - $fStartTime;
394 echo date('Y-m-d H:i:s')." Completed all for $sBatchEnd in ".round($fDuration/60,2)." minutes\n";
395 if (!$aResult['import-osmosis-all']) exit(0);
397 if ( CONST_Replication_Update_Interval > 60 )
399 $iSleep = max(0,(strtotime($sBatchEnd)+CONST_Replication_Update_Interval-time()));
403 $iSleep = max(0,CONST_Replication_Update_Interval-$fDuration);
405 echo date('Y-m-d H:i:s')." Sleeping $iSleep seconds\n";
410 function getosmosistimestamp($sOsmosisConfigDirectory)
412 $sStateFile = file_get_contents($sOsmosisConfigDirectory.'/state.txt');
413 preg_match('#timestamp=(.+)#', $sStateFile, $aResult);
414 return str_replace('\:',':',$aResult[1]);