3 require_once(CONST_LibDir.'/init-cmd.php');
4 require_once(CONST_LibDir.'/setup_functions.php');
5 require_once(CONST_LibDir.'/setup/SetupClass.php');
6 require_once(CONST_LibDir.'/setup/AddressLevelParser.php');
8 ini_set('memory_limit', '800M');
10 use Nominatim\Setup\SetupFunctions as SetupFunctions;
12 // (long-opt, short-opt, min-occurs, max-occurs, num-arguments, num-arguments, type, help)
15 'Import / update / index osm data',
16 array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
17 array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
18 array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
20 array('init-updates', '', 0, 1, 0, 0, 'bool', 'Set up database for updating'),
21 array('check-for-updates', '', 0, 1, 0, 0, 'bool', 'Check if new updates are available'),
22 array('no-update-functions', '', 0, 1, 0, 0, 'bool', 'Do not update trigger functions to support differential updates (assuming the diff update logic is already present)'),
23 array('import-osmosis', '', 0, 1, 0, 0, 'bool', 'Import updates once'),
24 array('import-osmosis-all', '', 0, 1, 0, 0, 'bool', 'Import updates forever'),
25 array('no-index', '', 0, 1, 0, 0, 'bool', 'Do not index the new data'),
27 array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Update postcode centroid table'),
29 array('import-file', '', 0, 1, 1, 1, 'realpath', 'Re-import data from an OSM file'),
30 array('import-diff', '', 0, 1, 1, 1, 'realpath', 'Import a diff (osc) file from local file system'),
31 array('osm2pgsql-cache', '', 0, 1, 1, 1, 'int', 'Cache size used by osm2pgsql'),
33 array('import-node', '', 0, 1, 1, 1, 'int', 'Re-import node'),
34 array('import-way', '', 0, 1, 1, 1, 'int', 'Re-import way'),
35 array('import-relation', '', 0, 1, 1, 1, 'int', 'Re-import relation'),
36 array('import-from-main-api', '', 0, 1, 0, 0, 'bool', 'Use OSM API instead of Overpass to download objects'),
38 array('index', '', 0, 1, 0, 0, 'bool', 'Index'),
39 array('index-rank', '', 0, 1, 1, 1, 'int', 'Rank to start indexing from'),
40 array('index-instances', '', 0, 1, 1, 1, 'int', 'Number of indexing instances (threads)'),
42 array('recompute-word-counts', '', 0, 1, 0, 0, 'bool', 'Compute frequency of full-word search terms'),
43 array('update-address-levels', '', 0, 1, 0, 0, 'bool', 'Reimport address level configuration (EXPERT)'),
44 array('recompute-importance', '', 0, 1, 0, 0, 'bool', 'Recompute place importances')
47 getCmdOpt($_SERVER['argv'], $aCMDOptions, $aResult, true, true);
49 if (!isset($aResult['index-instances'])) $aResult['index-instances'] = 1;
50 if (!isset($aResult['index-rank'])) $aResult['index-rank'] = 0;
52 date_default_timezone_set('Etc/UTC');
54 $oDB = new Nominatim\DB();
56 $fPostgresVersion = $oDB->getPostgresVersion();
58 $aDSNInfo = Nominatim\DB::parseDSN(CONST_Database_DSN);
59 if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
61 // cache memory to be used by osm2pgsql, should not be more than the available memory
62 $iCacheMemory = (isset($aResult['osm2pgsql-cache'])?$aResult['osm2pgsql-cache']:2000);
63 if ($iCacheMemory + 500 > getTotalMemoryMB()) {
64 $iCacheMemory = getCacheMemoryMB();
65 echo "WARNING: resetting cache memory to $iCacheMemory\n";
68 $oOsm2pgsqlCmd = (new \Nominatim\Shell(CONST_Osm2pgsql_Binary))
69 ->addParams('--hstore')
70 ->addParams('--latlong')
71 ->addParams('--append')
73 ->addParams('--with-forward-dependencies', 'false')
74 ->addParams('--log-progress', 'true')
75 ->addParams('--number-processes', 1)
76 ->addParams('--cache', $iCacheMemory)
77 ->addParams('--output', 'gazetteer')
78 ->addParams('--style', CONST_Import_Style)
79 ->addParams('--database', $aDSNInfo['database'])
80 ->addParams('--port', $aDSNInfo['port']);
82 if (isset($aDSNInfo['hostspec']) && $aDSNInfo['hostspec']) {
83 $oOsm2pgsqlCmd->addParams('--host', $aDSNInfo['hostspec']);
85 if (isset($aDSNInfo['username']) && $aDSNInfo['username']) {
86 $oOsm2pgsqlCmd->addParams('--user', $aDSNInfo['username']);
88 if (isset($aDSNInfo['password']) && $aDSNInfo['password']) {
89 $oOsm2pgsqlCmd->addEnvPair('PGPASSWORD', $aDSNInfo['password']);
91 if (!is_null(CONST_Osm2pgsql_Flatnode_File) && CONST_Osm2pgsql_Flatnode_File) {
92 $oOsm2pgsqlCmd->addParams('--flat-nodes', CONST_Osm2pgsql_Flatnode_File);
94 if ($fPostgresVersion >= 11.0) {
95 $oOsm2pgsqlCmd->addEnvPair(
97 '-c jit=off -c max_parallel_workers_per_gather=0'
102 $oIndexCmd = (new \Nominatim\Shell(CONST_DataDir.'/nominatim/nominatim.py'))
103 ->addParams('--database', $aDSNInfo['database'])
104 ->addParams('--port', $aDSNInfo['port'])
105 ->addParams('--threads', $aResult['index-instances']);
106 if (!$aResult['quiet']) {
107 $oIndexCmd->addParams('--verbose');
109 if ($aResult['verbose']) {
110 $oIndexCmd->addParams('--verbose');
112 if (isset($aDSNInfo['hostspec']) && $aDSNInfo['hostspec']) {
113 $oIndexCmd->addParams('--host', $aDSNInfo['hostspec']);
115 if (isset($aDSNInfo['username']) && $aDSNInfo['username']) {
116 $oIndexCmd->addParams('--username', $aDSNInfo['username']);
118 if (isset($aDSNInfo['password']) && $aDSNInfo['password']) {
119 $oIndexCmd->addEnvPair('PGPASSWORD', $aDSNInfo['password']);
123 if ($aResult['init-updates']) {
124 // sanity check that the replication URL is correct
125 $sBaseState = file_get_contents(CONST_Replication_Url.'/state.txt');
126 if ($sBaseState === false) {
127 echo "\nCannot find state.txt file at the configured replication URL.\n";
128 echo "Does the URL point to a directory containing OSM update data?\n\n";
129 fail('replication URL not reachable.');
131 // sanity check for pyosmium-get-changes
132 if (!CONST_Pyosmium_Binary) {
133 echo "\nCONST_Pyosmium_Binary not configured.\n";
134 echo "You need to install pyosmium and set up the path to pyosmium-get-changes\n";
135 echo "in your local settings file.\n\n";
136 fail('CONST_Pyosmium_Binary not configured');
140 $oCMD = new \Nominatim\Shell(CONST_Pyosmium_Binary, '--help');
141 exec($oCMD->escapedCmd(), $aOutput, $iRet);
144 echo "Cannot execute pyosmium-get-changes.\n";
145 echo "Make sure you have pyosmium installed correctly\n";
146 echo "and have set up CONST_Pyosmium_Binary to point to pyosmium-get-changes.\n";
147 fail('pyosmium-get-changes not found or not usable');
150 if (!$aResult['no-update-functions']) {
151 // instantiate setupClass to use the function therein
152 $cSetup = new SetupFunctions(array(
153 'enable-diff-updates' => true,
154 'verbose' => $aResult['verbose']
156 $cSetup->createFunctions();
159 $sDatabaseDate = getDatabaseDate($oDB);
160 if (!$sDatabaseDate) {
161 fail('Cannot determine date of database.');
163 $sWindBack = strftime('%Y-%m-%dT%H:%M:%SZ', strtotime($sDatabaseDate) - (3*60*60));
165 // get the appropriate state id
167 $oCMD = (new \Nominatim\Shell(CONST_Pyosmium_Binary))
168 ->addParams('--start-date', $sWindBack)
169 ->addParams('--server', CONST_Replication_Url);
171 exec($oCMD->escapedCmd(), $aOutput, $iRet);
172 if ($iRet != 0 || $aOutput[0] == 'None') {
173 fail('Error running pyosmium tools');
176 $oDB->exec('TRUNCATE import_status');
177 $sSQL = "INSERT INTO import_status (lastimportdate, sequence_id, indexed) VALUES('";
178 $sSQL .= $sDatabaseDate."',".$aOutput[0].', true)';
182 } catch (\Nominatim\DatabaseError $e) {
183 fail('Could not enter sequence into database.');
186 echo "Done. Database updates will start at sequence $aOutput[0] ($sWindBack)\n";
189 if ($aResult['check-for-updates']) {
190 $aLastState = $oDB->getRow('SELECT sequence_id FROM import_status');
192 if (!$aLastState['sequence_id']) {
193 fail('Updates not set up. Please run ./utils/update.php --init-updates.');
196 $oCmd = (new \Nominatim\Shell(CONST_BinDir.'/check_server_for_updates.py'))
197 ->addParams(CONST_Replication_Url)
198 ->addParams($aLastState['sequence_id']);
199 $iRet = $oCmd->run();
204 if (isset($aResult['import-diff']) || isset($aResult['import-file'])) {
205 // import diffs and files directly (e.g. from osmosis --rri)
206 $sNextFile = isset($aResult['import-diff']) ? $aResult['import-diff'] : $aResult['import-file'];
208 if (!file_exists($sNextFile)) {
209 fail("Cannot open $sNextFile\n");
213 $oCMD = (clone $oOsm2pgsqlCmd)->addParams($sNextFile);
214 echo $oCMD->escapedCmd()."\n";
215 $iRet = $oCMD->run();
218 fail("Error from osm2pgsql, $iRet\n");
221 // Don't update the import status - we don't know what this file contains
224 if ($aResult['calculate-postcodes']) {
225 info('Update postcodes centroids');
226 $sTemplate = file_get_contents(CONST_DataDir.'/sql/update-postcodes.sql');
227 runSQLScript($sTemplate, true, true);
230 $sTemporaryFile = CONST_InstallDir.'/osmosischange.osc';
232 $bUseOSMApi = isset($aResult['import-from-main-api']) && $aResult['import-from-main-api'];
234 if (isset($aResult['import-node']) && $aResult['import-node']) {
236 $sContentURL = 'https://www.openstreetmap.org/api/0.6/node/'.$aResult['import-node'];
238 $sContentURL = 'https://overpass-api.de/api/interpreter?data=node('.$aResult['import-node'].');out%20meta;';
242 if (isset($aResult['import-way']) && $aResult['import-way']) {
244 $sContentURL = 'https://www.openstreetmap.org/api/0.6/way/'.$aResult['import-way'].'/full';
246 $sContentURL = 'https://overpass-api.de/api/interpreter?data=(way('.$aResult['import-way'].');%3E;);out%20meta;';
250 if (isset($aResult['import-relation']) && $aResult['import-relation']) {
252 $sContentURL = 'https://www.openstreetmap.org/api/0.6/relation/'.$aResult['import-relation'].'/full';
254 $sContentURL = 'https://overpass-api.de/api/interpreter?data=(rel(id:'.$aResult['import-relation'].');%3E;);out%20meta;';
259 file_put_contents($sTemporaryFile, file_get_contents($sContentURL));
264 // import generated change file
266 $oCMD = (clone $oOsm2pgsqlCmd)->addParams($sTemporaryFile);
267 echo $oCMD->escapedCmd()."\n";
269 $iRet = $oCMD->run();
271 fail("osm2pgsql exited with error level $iRet\n");
275 if ($aResult['recompute-word-counts']) {
276 info('Recompute frequency of full-word search terms');
277 $sTemplate = file_get_contents(CONST_DataDir.'/sql/words_from_search_name.sql');
278 runSQLScript($sTemplate, true, true);
281 if ($aResult['index']) {
282 $oCmd = (clone $oIndexCmd)
283 ->addParams('--minrank', $aResult['index-rank'], '-b');
286 $oCmd = (clone $oIndexCmd)
287 ->addParams('--minrank', $aResult['index-rank']);
290 $oDB->exec('update import_status set indexed = true');
293 if ($aResult['update-address-levels']) {
294 echo 'Updating address levels from '.CONST_Address_Level_Config.".\n";
295 $oAlParser = new \Nominatim\Setup\AddressLevelParser(CONST_Address_Level_Config);
296 $oAlParser->createTable($oDB, 'address_levels');
299 if ($aResult['recompute-importance']) {
300 echo "Updating importance values for database.\n";
301 $oDB = new Nominatim\DB();
304 $sSQL = 'ALTER TABLE placex DISABLE TRIGGER ALL;';
305 $sSQL .= 'UPDATE placex SET (wikipedia, importance) =';
306 $sSQL .= ' (SELECT wikipedia, importance';
307 $sSQL .= ' FROM compute_importance(extratags, country_code, osm_type, osm_id));';
308 $sSQL .= 'UPDATE placex s SET wikipedia = d.wikipedia, importance = d.importance';
309 $sSQL .= ' FROM placex d';
310 $sSQL .= ' WHERE s.place_id = d.linked_place_id and d.wikipedia is not null';
311 $sSQL .= ' and (s.wikipedia is null or s.importance < d.importance);';
312 $sSQL .= 'ALTER TABLE placex ENABLE TRIGGER ALL;';
316 if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) {
318 if (strpos(CONST_Replication_Url, 'download.geofabrik.de') !== false && CONST_Replication_Update_Interval < 86400) {
319 fail('Error: Update interval too low for download.geofabrik.de. ' .
320 "Please check install documentation (https://nominatim.org/release-docs/latest/admin/Import-and-Update#setting-up-the-update-process)\n");
323 $sImportFile = CONST_InstallDir.'/osmosischange.osc';
325 $oCMDDownload = (new \Nominatim\Shell(CONST_Pyosmium_Binary))
326 ->addParams('--server', CONST_Replication_Url)
327 ->addParams('--outfile', $sImportFile)
328 ->addParams('--size', CONST_Replication_Max_Diff_size);
330 $oCMDImport = (clone $oOsm2pgsqlCmd)->addParams($sImportFile);
333 $fStartTime = time();
334 $aLastState = $oDB->getRow('SELECT *, EXTRACT (EPOCH FROM lastimportdate) as unix_ts FROM import_status');
336 if (!$aLastState['sequence_id']) {
337 echo "Updates not set up. Please run ./utils/update.php --init-updates.\n";
341 echo 'Currently at sequence '.$aLastState['sequence_id'].' ('.$aLastState['lastimportdate'].') - '.$aLastState['indexed']." indexed\n";
343 $sBatchEnd = $aLastState['lastimportdate'];
344 $iEndSequence = $aLastState['sequence_id'];
346 if ($aLastState['indexed']) {
347 // Sleep if the update interval has not yet been reached.
348 $fNextUpdate = $aLastState['unix_ts'] + CONST_Replication_Update_Interval;
349 if ($fNextUpdate > $fStartTime) {
350 $iSleepTime = $fNextUpdate - $fStartTime;
351 echo "Waiting for next update for $iSleepTime sec.";
355 // Download the next batch of changes.
357 $fCMDStartTime = time();
358 $iNextSeq = (int) $aLastState['sequence_id'];
361 $oCMD = (clone $oCMDDownload)->addParams('--start-id', $iNextSeq);
362 echo $oCMD->escapedCmd()."\n";
363 if (file_exists($sImportFile)) {
364 unlink($sImportFile);
366 exec($oCMD->escapedCmd(), $aOutput, $iResult);
369 echo 'No new updates. Sleeping for '.CONST_Replication_Recheck_Interval." sec.\n";
370 sleep(CONST_Replication_Recheck_Interval);
371 } elseif ($iResult != 0) {
372 echo 'ERROR: updates failed.';
375 $iEndSequence = (int)$aOutput[0];
379 // get the newest object from the diff file
382 $oCMD = new \Nominatim\Shell(CONST_BinDir.'/osm_file_date.py', $sImportFile);
383 exec($oCMD->escapedCmd(), $sBatchEnd, $iRet);
385 echo "Diff file is empty. skipping import.\n";
386 if (!$aResult['import-osmosis-all']) {
393 fail('Error getting date from diff file.');
395 $sBatchEnd = $sBatchEnd[0];
398 $fCMDStartTime = time();
401 echo $oCMDImport->escapedCmd()."\n";
403 $iErrorLevel = $oCMDImport->run();
405 echo "Error executing osm2pgsql: $iErrorLevel\n";
409 // write the update logs
410 $iFileSize = filesize($sImportFile);
411 $sSQL = 'INSERT INTO import_osmosis_log';
412 $sSQL .= '(batchend, batchseq, batchsize, starttime, endtime, event)';
413 $sSQL .= " values ('$sBatchEnd',$iEndSequence,$iFileSize,'";
414 $sSQL .= date('Y-m-d H:i:s', $fCMDStartTime)."','";
415 $sSQL .= date('Y-m-d H:i:s')."','import')";
420 $sSQL = "UPDATE import_status SET lastimportdate = '$sBatchEnd', indexed=false, sequence_id = $iEndSequence";
423 echo date('Y-m-d H:i:s')." Completed download step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n";
427 if (!$aResult['no-index']) {
428 $fCMDStartTime = time();
430 $oThisIndexCmd = clone($oIndexCmd);
431 $oThisIndexCmd->addParams('-b');
432 echo $oThisIndexCmd->escapedCmd()."\n";
433 $iErrorLevel = $oThisIndexCmd->run();
435 echo "Error: $iErrorLevel\n";
439 $oThisIndexCmd = clone($oIndexCmd);
440 echo $oThisIndexCmd->escapedCmd()."\n";
441 $iErrorLevel = $oThisIndexCmd->run();
443 echo "Error: $iErrorLevel\n";
447 $sSQL = 'INSERT INTO import_osmosis_log';
448 $sSQL .= '(batchend, batchseq, batchsize, starttime, endtime, event)';
449 $sSQL .= " values ('$sBatchEnd',$iEndSequence,NULL,'";
450 $sSQL .= date('Y-m-d H:i:s', $fCMDStartTime)."','";
451 $sSQL .= date('Y-m-d H:i:s')."','index')";
454 echo date('Y-m-d H:i:s')." Completed index step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n";
456 $sSQL = 'update import_status set indexed = true';
459 if ($aResult['import-osmosis-all']) {
460 echo "Error: --no-index cannot be used with continuous imports (--import-osmosis-all).\n";
465 $fDuration = time() - $fStartTime;
466 echo date('Y-m-d H:i:s')." Completed all for $sBatchEnd in ".round($fDuration/60, 2)." minutes\n";
467 if (!$aResult['import-osmosis-all']) exit(0);