3 require_once(CONST_BasePath.'/lib/init-cmd.php');
4 require_once(CONST_BasePath.'/lib/setup_functions.php');
5 require_once(CONST_BasePath.'/lib/setup/SetupClass.php');
6 require_once(CONST_BasePath.'/lib/setup/AddressLevelParser.php');
8 ini_set('memory_limit', '800M');
10 use Nominatim\Setup\SetupFunctions as SetupFunctions;
12 // (long-opt, short-opt, min-occurs, max-occurs, num-arguments, num-arguments, type, help)
15 'Import / update / index osm data',
16 array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
17 array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
18 array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
20 array('init-updates', '', 0, 1, 0, 0, 'bool', 'Set up database for updating'),
21 array('check-for-updates', '', 0, 1, 0, 0, 'bool', 'Check if new updates are available'),
22 array('no-update-functions', '', 0, 1, 0, 0, 'bool', 'Do not update trigger functions to support differential updates (assuming the diff update logic is already present)'),
23 array('import-osmosis', '', 0, 1, 0, 0, 'bool', 'Import updates once'),
24 array('import-osmosis-all', '', 0, 1, 0, 0, 'bool', 'Import updates forever'),
25 array('no-index', '', 0, 1, 0, 0, 'bool', 'Do not index the new data'),
27 array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Update postcode centroid table'),
29 array('import-file', '', 0, 1, 1, 1, 'realpath', 'Re-import data from an OSM file'),
30 array('import-diff', '', 0, 1, 1, 1, 'realpath', 'Import a diff (osc) file from local file system'),
31 array('osm2pgsql-cache', '', 0, 1, 1, 1, 'int', 'Cache size used by osm2pgsql'),
33 array('import-node', '', 0, 1, 1, 1, 'int', 'Re-import node'),
34 array('import-way', '', 0, 1, 1, 1, 'int', 'Re-import way'),
35 array('import-relation', '', 0, 1, 1, 1, 'int', 'Re-import relation'),
36 array('import-from-main-api', '', 0, 1, 0, 0, 'bool', 'Use OSM API instead of Overpass to download objects'),
38 array('index', '', 0, 1, 0, 0, 'bool', 'Index'),
39 array('index-rank', '', 0, 1, 1, 1, 'int', 'Rank to start indexing from'),
40 array('index-instances', '', 0, 1, 1, 1, 'int', 'Number of indexing instances (threads)'),
42 array('deduplicate', '', 0, 1, 0, 0, 'bool', 'Deduplicate tokens'),
43 array('recompute-word-counts', '', 0, 1, 0, 0, 'bool', 'Compute frequency of full-word search terms'),
44 array('update-address-levels', '', 0, 1, 0, 0, 'bool', 'Reimport address level configuration (EXPERT)'),
45 array('recompute-importance', '', 0, 1, 0, 0, 'bool', 'Recompute place importances'),
46 array('no-npi', '', 0, 1, 0, 0, 'bool', '(obsolete)'),
49 getCmdOpt($_SERVER['argv'], $aCMDOptions, $aResult, true, true);
51 if (!isset($aResult['index-instances'])) $aResult['index-instances'] = 1;
53 if (!isset($aResult['index-rank'])) $aResult['index-rank'] = 0;
55 date_default_timezone_set('Etc/UTC');
57 $oDB = new Nominatim\DB();
60 $aDSNInfo = Nominatim\DB::parseDSN(CONST_Database_DSN);
61 if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
63 // cache memory to be used by osm2pgsql, should not be more than the available memory
64 $iCacheMemory = (isset($aResult['osm2pgsql-cache'])?$aResult['osm2pgsql-cache']:2000);
65 if ($iCacheMemory + 500 > getTotalMemoryMB()) {
66 $iCacheMemory = getCacheMemoryMB();
67 echo "WARNING: resetting cache memory to $iCacheMemory\n";
69 $sOsm2pgsqlCmd = CONST_Osm2pgsql_Binary.' -klas --number-processes 1 -C '.$iCacheMemory.' -O gazetteer -S '.CONST_Import_Style.' -d '.$aDSNInfo['database'].' -P '.$aDSNInfo['port'];
70 if (isset($aDSNInfo['username']) && $aDSNInfo['username']) {
71 $sOsm2pgsqlCmd .= ' -U ' . $aDSNInfo['username'];
73 if (isset($aDSNInfo['hostspec']) && $aDSNInfo['hostspec']) {
74 $sOsm2pgsqlCmd .= ' -H ' . $aDSNInfo['hostspec'];
77 if (isset($aDSNInfo['password']) && $aDSNInfo['password']) {
78 $aProcEnv = array_merge(array('PGPASSWORD' => $aDSNInfo['password']), $_ENV);
81 if (!is_null(CONST_Osm2pgsql_Flatnode_File) && CONST_Osm2pgsql_Flatnode_File) {
82 $sOsm2pgsqlCmd .= ' --flat-nodes '.CONST_Osm2pgsql_Flatnode_File;
85 $sIndexCmd = CONST_BasePath.'/nominatim/nominatim.py';
87 if ($aResult['init-updates']) {
88 // sanity check that the replication URL is correct
89 $sBaseState = file_get_contents(CONST_Replication_Url.'/state.txt');
90 if ($sBaseState === false) {
91 echo "\nCannot find state.txt file at the configured replication URL.\n";
92 echo "Does the URL point to a directory containing OSM update data?\n\n";
93 fail('replication URL not reachable.');
95 // sanity check for pyosmium-get-changes
96 if (!CONST_Pyosmium_Binary) {
97 echo "\nCONST_Pyosmium_Binary not configured.\n";
98 echo "You need to install pyosmium and set up the path to pyosmium-get-changes\n";
99 echo "in your local settings file.\n\n";
100 fail('CONST_Pyosmium_Binary not configured');
103 $sCmd = CONST_Pyosmium_Binary.' --help';
104 exec($sCmd, $aOutput, $iRet);
106 echo "Cannot execute pyosmium-get-changes.\n";
107 echo "Make sure you have pyosmium installed correctly\n";
108 echo "and have set up CONST_Pyosmium_Binary to point to pyosmium-get-changes.\n";
109 fail('pyosmium-get-changes not found or not usable');
112 if (!$aResult['no-update-functions']) {
113 // instantiate setupClass to use the function therein
114 $cSetup = new SetupFunctions(array(
115 'enable-diff-updates' => true,
116 'verbose' => $aResult['verbose']
119 $cSetup->createFunctions();
122 $sDatabaseDate = getDatabaseDate($oDB);
123 if (!$sDatabaseDate) {
124 fail('Cannot determine date of database.');
126 $sWindBack = strftime('%Y-%m-%dT%H:%M:%SZ', strtotime($sDatabaseDate) - (3*60*60));
128 // get the appropriate state id
130 $sCmd = CONST_Pyosmium_Binary.' -D '.$sWindBack.' --server '.CONST_Replication_Url;
131 exec($sCmd, $aOutput, $iRet);
132 if ($iRet != 0 || $aOutput[0] == 'None') {
133 fail('Error running pyosmium tools');
136 $oDB->exec('TRUNCATE import_status');
137 $sSQL = "INSERT INTO import_status (lastimportdate, sequence_id, indexed) VALUES('";
138 $sSQL .= $sDatabaseDate."',".$aOutput[0].', true)';
142 } catch (\Nominatim\DatabaseError $e) {
143 fail('Could not enter sequence into database.');
146 echo "Done. Database updates will start at sequence $aOutput[0] ($sWindBack)\n";
149 if ($aResult['check-for-updates']) {
150 $aLastState = $oDB->getRow('SELECT sequence_id FROM import_status');
152 if (!$aLastState['sequence_id']) {
153 fail('Updates not set up. Please run ./utils/update.php --init-updates.');
156 system(CONST_BasePath.'/utils/check_server_for_updates.py '.CONST_Replication_Url.' '.$aLastState['sequence_id'], $iRet);
160 if (isset($aResult['import-diff']) || isset($aResult['import-file'])) {
161 // import diffs and files directly (e.g. from osmosis --rri)
162 $sNextFile = isset($aResult['import-diff']) ? $aResult['import-diff'] : $aResult['import-file'];
164 if (!file_exists($sNextFile)) {
165 fail("Cannot open $sNextFile\n");
169 $sCMD = $sOsm2pgsqlCmd.' '.$sNextFile;
171 $iErrorLevel = runWithEnv($sCMD, $aProcEnv);
174 fail("Error from osm2pgsql, $iErrorLevel\n");
177 // Don't update the import status - we don't know what this file contains
180 if ($aResult['calculate-postcodes']) {
181 info('Update postcodes centroids');
182 $sTemplate = file_get_contents(CONST_BasePath.'/sql/update-postcodes.sql');
183 runSQLScript($sTemplate, true, true);
186 $sTemporaryFile = CONST_BasePath.'/data/osmosischange.osc';
188 $bUseOSMApi = isset($aResult['import-from-main-api']) && $aResult['import-from-main-api'];
190 if (isset($aResult['import-node']) && $aResult['import-node']) {
192 $sContentURL = 'https://www.openstreetmap.org/api/0.6/node/'.$aResult['import-node'];
194 $sContentURL = 'https://overpass-api.de/api/interpreter?data=node('.$aResult['import-node'].');out%20meta;';
198 if (isset($aResult['import-way']) && $aResult['import-way']) {
200 $sContentURL = 'https://www.openstreetmap.org/api/0.6/way/'.$aResult['import-way'].'/full';
202 $sContentURL = 'https://overpass-api.de/api/interpreter?data=(way('.$aResult['import-way'].');node(w););out%20meta;';
206 if (isset($aResult['import-relation']) && $aResult['import-relation']) {
208 $sContentURLsModifyXMLstr = 'https://www.openstreetmap.org/api/0.6/relation/'.$aResult['import-relation'].'/full';
210 $sContentURL = 'https://overpass-api.de/api/interpreter?data=((rel('.$aResult['import-relation'].');way(r);node(w));node(r));out%20meta;';
215 file_put_contents($sTemporaryFile, file_get_contents($sContentURL));
220 // import generated change file
221 $sCMD = $sOsm2pgsqlCmd.' '.$sTemporaryFile;
223 $iErrorLevel = runWithEnv($sCMD, $aProcEnv);
225 fail("osm2pgsql exited with error level $iErrorLevel\n");
229 if ($aResult['deduplicate']) {
230 $oDB = new Nominatim\DB();
233 if ($oDB->getPostgresVersion() < 9.3) {
234 fail('ERROR: deduplicate is only currently supported in postgresql 9.3');
237 $sSQL = 'select partition from country_name order by country_code';
238 $aPartitions = $oDB->getCol($sSQL);
241 // we don't care about empty search_name_* partitions, they can't contain mentions of duplicates
242 foreach ($aPartitions as $i => $sPartition) {
243 $sSQL = 'select count(*) from search_name_'.$sPartition;
244 $nEntries = $oDB->getOne($sSQL);
245 if ($nEntries == 0) {
246 unset($aPartitions[$i]);
250 $sSQL = "select word_token,count(*) from word where substr(word_token, 1, 1) = ' '";
251 $sSQL .= ' and class is null and type is null and country_code is null';
252 $sSQL .= ' group by word_token having count(*) > 1 order by word_token';
253 $aDuplicateTokens = $oDB->getAll($sSQL);
254 foreach ($aDuplicateTokens as $aToken) {
255 if (trim($aToken['word_token']) == '' || trim($aToken['word_token']) == '-') continue;
256 echo 'Deduping '.$aToken['word_token']."\n";
257 $sSQL = 'select word_id,';
258 $sSQL .= ' (select count(*) from search_name where nameaddress_vector @> ARRAY[word_id]) as num';
259 $sSQL .= " from word where word_token = '".$aToken['word_token'];
260 $sSQL .= "' and class is null and type is null and country_code is null order by num desc";
261 $aTokenSet = $oDB->getAll($sSQL);
263 $aKeep = array_shift($aTokenSet);
264 $iKeepID = $aKeep['word_id'];
266 foreach ($aTokenSet as $aRemove) {
267 $sSQL = 'update search_name set';
268 $sSQL .= ' name_vector = array_replace(name_vector,'.$aRemove['word_id'].','.$iKeepID.'),';
269 $sSQL .= ' nameaddress_vector = array_replace(nameaddress_vector,'.$aRemove['word_id'].','.$iKeepID.')';
270 $sSQL .= ' where name_vector @> ARRAY['.$aRemove['word_id'].']';
273 $sSQL = 'update search_name set';
274 $sSQL .= ' nameaddress_vector = array_replace(nameaddress_vector,'.$aRemove['word_id'].','.$iKeepID.')';
275 $sSQL .= ' where nameaddress_vector @> ARRAY['.$aRemove['word_id'].']';
278 $sSQL = 'update location_area_country set';
279 $sSQL .= ' keywords = array_replace(keywords,'.$aRemove['word_id'].','.$iKeepID.')';
280 $sSQL .= ' where keywords @> ARRAY['.$aRemove['word_id'].']';
283 foreach ($aPartitions as $sPartition) {
284 $sSQL = 'update search_name_'.$sPartition.' set';
285 $sSQL .= ' name_vector = array_replace(name_vector,'.$aRemove['word_id'].','.$iKeepID.')';
286 $sSQL .= ' where name_vector @> ARRAY['.$aRemove['word_id'].']';
289 $sSQL = 'update location_area_country set';
290 $sSQL .= ' keywords = array_replace(keywords,'.$aRemove['word_id'].','.$iKeepID.')';
291 $sSQL .= ' where keywords @> ARRAY['.$aRemove['word_id'].']';
295 $sSQL = 'delete from word where word_id = '.$aRemove['word_id'];
301 if ($aResult['recompute-word-counts']) {
302 info('Recompute frequency of full-word search terms');
303 $sTemplate = file_get_contents(CONST_BasePath.'/sql/words_from_search_name.sql');
304 runSQLScript($sTemplate, true, true);
307 if ($aResult['index']) {
309 .' -d '.$aDSNInfo['database']
310 .' -P '.$aDSNInfo['port']
311 .' -t '.$aResult['index-instances']
312 .' -r '.$aResult['index-rank'];
313 if (isset($aDSNInfo['hostspec']) && $aDSNInfo['hostspec']) {
314 $sCmd .= ' -H ' . $aDSNInfo['hostspec'];
316 if (isset($aDSNInfo['username']) && $aDSNInfo['username']) {
317 $sCmd .= ' -U ' . $aDSNInfo['username'];
320 runWithEnv($sCmd, $aProcEnv);
322 $oDB->exec('update import_status set indexed = true');
325 if ($aResult['update-address-levels']) {
326 echo 'Updating address levels from '.CONST_Address_Level_Config.".\n";
327 $oAlParser = new \Nominatim\Setup\AddressLevelParser(CONST_Address_Level_Config);
328 $oAlParser->createTable($oDB, 'address_levels');
331 if ($aResult['recompute-importance']) {
332 echo "Updating importance values for database.\n";
333 $oDB = new Nominatim\DB();
336 $sSQL = 'ALTER TABLE placex DISABLE TRIGGER ALL;';
337 $sSQL .= 'UPDATE placex SET (wikipedia, importance) =';
338 $sSQL .= ' (SELECT wikipedia, importance';
339 $sSQL .= ' FROM compute_importance(extratags, country_code, osm_type, osm_id));';
340 $sSQL .= 'UPDATE placex s SET wikipedia = d.wikipedia, importance = d.importance';
341 $sSQL .= ' FROM placex d';
342 $sSQL .= ' WHERE s.place_id = d.linked_place_id and d.wikipedia is not null';
343 $sSQL .= ' and (s.wikipedia is null or s.importance < d.importance);';
344 $sSQL .= 'ALTER TABLE placex ENABLE TRIGGER ALL;';
348 if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) {
350 if (strpos(CONST_Replication_Url, 'download.geofabrik.de') !== false && CONST_Replication_Update_Interval < 86400) {
351 fail('Error: Update interval too low for download.geofabrik.de. ' .
352 "Please check install documentation (http://nominatim.org/release-docs/latest/Import-and-Update#setting-up-the-update-process)\n");
355 $sImportFile = CONST_InstallPath.'/osmosischange.osc';
356 $sCMDDownload = CONST_Pyosmium_Binary.' --server '.CONST_Replication_Url.' -o '.$sImportFile.' -s '.CONST_Replication_Max_Diff_size;
357 $sCMDImport = $sOsm2pgsqlCmd.' '.$sImportFile;
358 $sCMDIndex = $sIndexCmd
359 .' -d '.$aDSNInfo['database']
360 .' -P '.$aDSNInfo['port']
361 .' -t '.$aResult['index-instances'];
362 if (isset($aDSNInfo['hostspec']) && $aDSNInfo['hostspec']) {
363 $sCMDIndex .= ' -H ' . $aDSNInfo['hostspec'];
365 if (isset($aDSNInfo['username']) && $aDSNInfo['username']) {
366 $sCMDIndex .= ' -U ' . $aDSNInfo['username'];
370 $fStartTime = time();
371 $aLastState = $oDB->getRow('SELECT *, EXTRACT (EPOCH FROM lastimportdate) as unix_ts FROM import_status');
373 if (!$aLastState['sequence_id']) {
374 echo "Updates not set up. Please run ./utils/update.php --init-updates.\n";
378 echo 'Currently at sequence '.$aLastState['sequence_id'].' ('.$aLastState['lastimportdate'].') - '.$aLastState['indexed']." indexed\n";
380 $sBatchEnd = $aLastState['lastimportdate'];
381 $iEndSequence = $aLastState['sequence_id'];
383 if ($aLastState['indexed']) {
384 // Sleep if the update interval has not yet been reached.
385 $fNextUpdate = $aLastState['unix_ts'] + CONST_Replication_Update_Interval;
386 if ($fNextUpdate > $fStartTime) {
387 $iSleepTime = $fNextUpdate - $fStartTime;
388 echo "Waiting for next update for $iSleepTime sec.";
392 // Download the next batch of changes.
394 $fCMDStartTime = time();
395 $iNextSeq = (int) $aLastState['sequence_id'];
397 echo "$sCMDDownload -I $iNextSeq\n";
398 if (file_exists($sImportFile)) {
399 unlink($sImportFile);
401 exec($sCMDDownload.' -I '.$iNextSeq, $aOutput, $iResult);
404 echo 'No new updates. Sleeping for '.CONST_Replication_Recheck_Interval." sec.\n";
405 sleep(CONST_Replication_Recheck_Interval);
406 } elseif ($iResult != 0) {
407 echo 'ERROR: updates failed.';
410 $iEndSequence = (int)$aOutput[0];
414 // get the newest object from the diff file
417 exec(CONST_BasePath.'/utils/osm_file_date.py '.$sImportFile, $sBatchEnd, $iRet);
419 echo "Diff file is empty. skipping import.\n";
420 if (!$aResult['import-osmosis-all']) {
427 fail('Error getting date from diff file.');
429 $sBatchEnd = $sBatchEnd[0];
432 $fCMDStartTime = time();
433 echo $sCMDImport."\n";
435 $iErrorLevel = runWithEnv($sCMDImport, $aProcEnv);
437 echo "Error executing osm2pgsql: $iErrorLevel\n";
441 // write the update logs
442 $iFileSize = filesize($sImportFile);
443 $sSQL = 'INSERT INTO import_osmosis_log';
444 $sSQL .= '(batchend, batchseq, batchsize, starttime, endtime, event)';
445 $sSQL .= " values ('$sBatchEnd',$iEndSequence,$iFileSize,'";
446 $sSQL .= date('Y-m-d H:i:s', $fCMDStartTime)."','";
447 $sSQL .= date('Y-m-d H:i:s')."','import')";
452 $sSQL = "UPDATE import_status SET lastimportdate = '$sBatchEnd', indexed=false, sequence_id = $iEndSequence";
455 echo date('Y-m-d H:i:s')." Completed download step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n";
459 if (!$aResult['no-index']) {
460 $sThisIndexCmd = $sCMDIndex;
461 $fCMDStartTime = time();
463 echo "$sThisIndexCmd\n";
464 $iErrorLevel = runWithEnv($sThisIndexCmd, $aProcEnv);
466 echo "Error: $iErrorLevel\n";
470 $sSQL = 'INSERT INTO import_osmosis_log';
471 $sSQL .= '(batchend, batchseq, batchsize, starttime, endtime, event)';
472 $sSQL .= " values ('$sBatchEnd',$iEndSequence,NULL,'";
473 $sSQL .= date('Y-m-d H:i:s', $fCMDStartTime)."','";
474 $sSQL .= date('Y-m-d H:i:s')."','index')";
477 echo date('Y-m-d H:i:s')." Completed index step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n";
479 $sSQL = 'update import_status set indexed = true';
482 if ($aResult['import-osmosis-all']) {
483 echo "Error: --no-index cannot be used with continuous imports (--import-osmosis-all).\n";
488 $fDuration = time() - $fStartTime;
489 echo date('Y-m-d H:i:s')." Completed all for $sBatchEnd in ".round($fDuration/60, 2)." minutes\n";
490 if (!$aResult['import-osmosis-all']) exit(0);