3 require_once(CONST_BasePath.'/lib/init-cmd.php');
4 require_once(CONST_BasePath.'/lib/setup_functions.php');
5 require_once(CONST_BasePath.'/lib/setup/SetupClass.php');
6 require_once(CONST_BasePath.'/lib/setup/AddressLevelParser.php');
8 ini_set('memory_limit', '800M');
10 use Nominatim\Setup\SetupFunctions as SetupFunctions;
12 // (long-opt, short-opt, min-occurs, max-occurs, num-arguments, num-arguments, type, help)
15 'Import / update / index osm data',
16 array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
17 array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
18 array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
20 array('init-updates', '', 0, 1, 0, 0, 'bool', 'Set up database for updating'),
21 array('check-for-updates', '', 0, 1, 0, 0, 'bool', 'Check if new updates are available'),
22 array('no-update-functions', '', 0, 1, 0, 0, 'bool', 'Do not update trigger functions to support differential updates (assuming the diff update logic is already present)'),
23 array('import-osmosis', '', 0, 1, 0, 0, 'bool', 'Import updates once'),
24 array('import-osmosis-all', '', 0, 1, 0, 0, 'bool', 'Import updates forever'),
25 array('no-index', '', 0, 1, 0, 0, 'bool', 'Do not index the new data'),
27 array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Update postcode centroid table'),
29 array('import-file', '', 0, 1, 1, 1, 'realpath', 'Re-import data from an OSM file'),
30 array('import-diff', '', 0, 1, 1, 1, 'realpath', 'Import a diff (osc) file from local file system'),
31 array('osm2pgsql-cache', '', 0, 1, 1, 1, 'int', 'Cache size used by osm2pgsql'),
33 array('import-node', '', 0, 1, 1, 1, 'int', 'Re-import node'),
34 array('import-way', '', 0, 1, 1, 1, 'int', 'Re-import way'),
35 array('import-relation', '', 0, 1, 1, 1, 'int', 'Re-import relation'),
36 array('import-from-main-api', '', 0, 1, 0, 0, 'bool', 'Use OSM API instead of Overpass to download objects'),
38 array('index', '', 0, 1, 0, 0, 'bool', 'Index'),
39 array('index-rank', '', 0, 1, 1, 1, 'int', 'Rank to start indexing from'),
40 array('index-instances', '', 0, 1, 1, 1, 'int', 'Number of indexing instances (threads)'),
42 array('deduplicate', '', 0, 1, 0, 0, 'bool', 'Deduplicate tokens'),
43 array('recompute-word-counts', '', 0, 1, 0, 0, 'bool', 'Compute frequency of full-word search terms'),
44 array('update-address-levels', '', 0, 1, 0, 0, 'bool', 'Reimport address level configuration (EXPERT)'),
45 array('no-npi', '', 0, 1, 0, 0, 'bool', '(obsolete)'),
48 getCmdOpt($_SERVER['argv'], $aCMDOptions, $aResult, true, true);
50 if (!isset($aResult['index-instances'])) $aResult['index-instances'] = 1;
51 if (!isset($aResult['index-rank'])) $aResult['index-rank'] = 0;
53 date_default_timezone_set('Etc/UTC');
55 $oDB = new Nominatim\DB();
58 $aDSNInfo = Nominatim\DB::parseDSN(CONST_Database_DSN);
59 if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
61 // cache memory to be used by osm2pgsql, should not be more than the available memory
62 $iCacheMemory = (isset($aResult['osm2pgsql-cache'])?$aResult['osm2pgsql-cache']:2000);
63 if ($iCacheMemory + 500 > getTotalMemoryMB()) {
64 $iCacheMemory = getCacheMemoryMB();
65 echo "WARNING: resetting cache memory to $iCacheMemory\n";
67 $sOsm2pgsqlCmd = CONST_Osm2pgsql_Binary.' -klas --number-processes 1 -C '.$iCacheMemory.' -O gazetteer -S '.CONST_Import_Style.' -d '.$aDSNInfo['database'].' -P '.$aDSNInfo['port'];
68 if (isset($aDSNInfo['username']) && $aDSNInfo['username']) {
69 $sOsm2pgsqlCmd .= ' -U ' . $aDSNInfo['username'];
71 if (isset($aDSNInfo['hostspec']) && $aDSNInfo['hostspec']) {
72 $sOsm2pgsqlCmd .= ' -H ' . $aDSNInfo['hostspec'];
75 if (isset($aDSNInfo['password']) && $aDSNInfo['password']) {
76 $aProcEnv = array_merge(array('PGPASSWORD' => $aDSNInfo['password']), $_ENV);
79 if (!is_null(CONST_Osm2pgsql_Flatnode_File) && CONST_Osm2pgsql_Flatnode_File) {
80 $sOsm2pgsqlCmd .= ' --flat-nodes '.CONST_Osm2pgsql_Flatnode_File;
83 if ($aResult['init-updates']) {
84 // sanity check that the replication URL is correct
85 $sBaseState = file_get_contents(CONST_Replication_Url.'/state.txt');
86 if ($sBaseState === false) {
87 echo "\nCannot find state.txt file at the configured replication URL.\n";
88 echo "Does the URL point to a directory containing OSM update data?\n\n";
89 fail('replication URL not reachable.');
91 // sanity check for pyosmium-get-changes
92 if (!CONST_Pyosmium_Binary) {
93 echo "\nCONST_Pyosmium_Binary not configured.\n";
94 echo "You need to install pyosmium and set up the path to pyosmium-get-changes\n";
95 echo "in your local settings file.\n\n";
96 fail('CONST_Pyosmium_Binary not configured');
99 $sCmd = CONST_Pyosmium_Binary.' --help';
100 exec($sCmd, $aOutput, $iRet);
102 echo "Cannot execute pyosmium-get-changes.\n";
103 echo "Make sure you have pyosmium installed correctly\n";
104 echo "and have set up CONST_Pyosmium_Binary to point to pyosmium-get-changes.\n";
105 fail('pyosmium-get-changes not found or not usable');
108 if (!$aResult['no-update-functions']) {
109 // instantiate setupClass to use the function therein
110 $cSetup = new SetupFunctions(array(
111 'enable-diff-updates' => true,
112 'verbose' => $aResult['verbose']
115 $cSetup->createFunctions();
118 $sDatabaseDate = getDatabaseDate($oDB);
119 if (!$sDatabaseDate) {
120 fail('Cannot determine date of database.');
122 $sWindBack = strftime('%Y-%m-%dT%H:%M:%SZ', strtotime($sDatabaseDate) - (3*60*60));
124 // get the appropriate state id
126 $sCmd = CONST_Pyosmium_Binary.' -D '.$sWindBack.' --server '.CONST_Replication_Url;
127 exec($sCmd, $aOutput, $iRet);
128 if ($iRet != 0 || $aOutput[0] == 'None') {
129 fail('Error running pyosmium tools');
132 $oDB->exec('TRUNCATE import_status');
133 $sSQL = "INSERT INTO import_status (lastimportdate, sequence_id, indexed) VALUES('";
134 $sSQL .= $sDatabaseDate."',".$aOutput[0].', true)';
138 } catch (\Nominatim\DatabaseError $e) {
139 fail('Could not enter sequence into database.');
142 echo "Done. Database updates will start at sequence $aOutput[0] ($sWindBack)\n";
145 if ($aResult['check-for-updates']) {
146 $aLastState = $oDB->getRow('SELECT sequence_id FROM import_status');
148 if (!$aLastState['sequence_id']) {
149 fail('Updates not set up. Please run ./utils/update.php --init-updates.');
152 system(CONST_BasePath.'/utils/check_server_for_updates.py '.CONST_Replication_Url.' '.$aLastState['sequence_id'], $iRet);
156 if (isset($aResult['import-diff']) || isset($aResult['import-file'])) {
157 // import diffs and files directly (e.g. from osmosis --rri)
158 $sNextFile = isset($aResult['import-diff']) ? $aResult['import-diff'] : $aResult['import-file'];
160 if (!file_exists($sNextFile)) {
161 fail("Cannot open $sNextFile\n");
165 $sCMD = $sOsm2pgsqlCmd.' '.$sNextFile;
167 $iErrorLevel = runWithEnv($sCMD, $aProcEnv);
170 fail("Error from osm2pgsql, $iErrorLevel\n");
173 // Don't update the import status - we don't know what this file contains
176 if ($aResult['calculate-postcodes']) {
177 info('Update postcodes centroids');
178 $sTemplate = file_get_contents(CONST_BasePath.'/sql/update-postcodes.sql');
179 runSQLScript($sTemplate, true, true);
182 $sTemporaryFile = CONST_BasePath.'/data/osmosischange.osc';
184 $bUseOSMApi = isset($aResult['import-from-main-api']) && $aResult['import-from-main-api'];
186 if (isset($aResult['import-node']) && $aResult['import-node']) {
188 $sContentURL = 'https://www.openstreetmap.org/api/0.6/node/'.$aResult['import-node'];
190 $sContentURL = 'https://overpass-api.de/api/interpreter?data=node('.$aResult['import-node'].');out%20meta;';
194 if (isset($aResult['import-way']) && $aResult['import-way']) {
196 $sContentURL = 'https://www.openstreetmap.org/api/0.6/way/'.$aResult['import-way'].'/full';
198 $sContentURL = 'https://overpass-api.de/api/interpreter?data=(way('.$aResult['import-way'].');node(w););out%20meta;';
202 if (isset($aResult['import-relation']) && $aResult['import-relation']) {
204 $sContentURLsModifyXMLstr = 'https://www.openstreetmap.org/api/0.6/relation/'.$aResult['import-relation'].'/full';
206 $sContentURL = 'https://overpass-api.de/api/interpreter?data=((rel('.$aResult['import-relation'].');way(r);node(w));node(r));out%20meta;';
211 file_put_contents($sTemporaryFile, file_get_contents($sContentURL));
216 // import generated change file
217 $sCMD = $sOsm2pgsqlCmd.' '.$sTemporaryFile;
219 $iErrorLevel = runWithEnv($sCMD, $aProcEnv);
221 fail("osm2pgsql exited with error level $iErrorLevel\n");
225 if ($aResult['deduplicate']) {
226 $oDB = new Nominatim\DB();
229 if ($oDB->getPostgresVersion() < 9.3) {
230 fail('ERROR: deduplicate is only currently supported in postgresql 9.3');
233 $sSQL = 'select partition from country_name order by country_code';
234 $aPartitions = $oDB->getCol($sSQL);
237 // we don't care about empty search_name_* partitions, they can't contain mentions of duplicates
238 foreach ($aPartitions as $i => $sPartition) {
239 $sSQL = 'select count(*) from search_name_'.$sPartition;
240 $nEntries = $oDB->getOne($sSQL);
241 if ($nEntries == 0) {
242 unset($aPartitions[$i]);
246 $sSQL = "select word_token,count(*) from word where substr(word_token, 1, 1) = ' '";
247 $sSQL .= ' and class is null and type is null and country_code is null';
248 $sSQL .= ' group by word_token having count(*) > 1 order by word_token';
249 $aDuplicateTokens = $oDB->getAll($sSQL);
250 foreach ($aDuplicateTokens as $aToken) {
251 if (trim($aToken['word_token']) == '' || trim($aToken['word_token']) == '-') continue;
252 echo 'Deduping '.$aToken['word_token']."\n";
253 $sSQL = 'select word_id,';
254 $sSQL .= ' (select count(*) from search_name where nameaddress_vector @> ARRAY[word_id]) as num';
255 $sSQL .= " from word where word_token = '".$aToken['word_token'];
256 $sSQL .= "' and class is null and type is null and country_code is null order by num desc";
257 $aTokenSet = $oDB->getAll($sSQL);
259 $aKeep = array_shift($aTokenSet);
260 $iKeepID = $aKeep['word_id'];
262 foreach ($aTokenSet as $aRemove) {
263 $sSQL = 'update search_name set';
264 $sSQL .= ' name_vector = array_replace(name_vector,'.$aRemove['word_id'].','.$iKeepID.'),';
265 $sSQL .= ' nameaddress_vector = array_replace(nameaddress_vector,'.$aRemove['word_id'].','.$iKeepID.')';
266 $sSQL .= ' where name_vector @> ARRAY['.$aRemove['word_id'].']';
269 $sSQL = 'update search_name set';
270 $sSQL .= ' nameaddress_vector = array_replace(nameaddress_vector,'.$aRemove['word_id'].','.$iKeepID.')';
271 $sSQL .= ' where nameaddress_vector @> ARRAY['.$aRemove['word_id'].']';
274 $sSQL = 'update location_area_country set';
275 $sSQL .= ' keywords = array_replace(keywords,'.$aRemove['word_id'].','.$iKeepID.')';
276 $sSQL .= ' where keywords @> ARRAY['.$aRemove['word_id'].']';
279 foreach ($aPartitions as $sPartition) {
280 $sSQL = 'update search_name_'.$sPartition.' set';
281 $sSQL .= ' name_vector = array_replace(name_vector,'.$aRemove['word_id'].','.$iKeepID.')';
282 $sSQL .= ' where name_vector @> ARRAY['.$aRemove['word_id'].']';
285 $sSQL = 'update location_area_country set';
286 $sSQL .= ' keywords = array_replace(keywords,'.$aRemove['word_id'].','.$iKeepID.')';
287 $sSQL .= ' where keywords @> ARRAY['.$aRemove['word_id'].']';
291 $sSQL = 'delete from word where word_id = '.$aRemove['word_id'];
297 if ($aResult['recompute-word-counts']) {
298 info('Recompute frequency of full-word search terms');
299 $sTemplate = file_get_contents(CONST_BasePath.'/sql/words_from_search_name.sql');
300 runSQLScript($sTemplate, true, true);
303 if ($aResult['index']) {
304 $sCmd = CONST_InstallPath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -P '.$aDSNInfo['port'].' -t '.$aResult['index-instances'].' -r '.$aResult['index-rank'];
305 if (isset($aDSNInfo['hostspec']) && $aDSNInfo['hostspec']) {
306 $sCmd .= ' -H ' . $aDSNInfo['hostspec'];
308 if (isset($aDSNInfo['username']) && $aDSNInfo['username']) {
309 $sCmd .= ' -U ' . $aDSNInfo['username'];
312 runWithEnv($sCmd, $aProcEnv);
314 $oDB->exec('update import_status set indexed = true');
317 if ($aResult['update-address-levels']) {
318 echo 'Updating address levels from '.CONST_Address_Level_Config.".\n";
319 $oAlParser = new \Nominatim\Setup\AddressLevelParser(CONST_Address_Level_Config);
320 $oAlParser->createTable($oDB, 'address_levels');
323 if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) {
325 if (strpos(CONST_Replication_Url, 'download.geofabrik.de') !== false && CONST_Replication_Update_Interval < 86400) {
326 fail('Error: Update interval too low for download.geofabrik.de. ' .
327 "Please check install documentation (http://nominatim.org/release-docs/latest/Import-and-Update#setting-up-the-update-process)\n");
330 $sImportFile = CONST_InstallPath.'/osmosischange.osc';
331 $sCMDDownload = CONST_Pyosmium_Binary.' --server '.CONST_Replication_Url.' -o '.$sImportFile.' -s '.CONST_Replication_Max_Diff_size;
332 $sCMDImport = $sOsm2pgsqlCmd.' '.$sImportFile;
333 $sCMDIndex = CONST_InstallPath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -P '.$aDSNInfo['port'].' -t '.$aResult['index-instances'];
334 if (isset($aDSNInfo['hostspec']) && $aDSNInfo['hostspec']) {
335 $sCMDIndex .= ' -H ' . $aDSNInfo['hostspec'];
337 if (isset($aDSNInfo['username']) && $aDSNInfo['username']) {
338 $sCMDIndex .= ' -U ' . $aDSNInfo['username'];
342 $fStartTime = time();
343 $aLastState = $oDB->getRow('SELECT *, EXTRACT (EPOCH FROM lastimportdate) as unix_ts FROM import_status');
345 if (!$aLastState['sequence_id']) {
346 echo "Updates not set up. Please run ./utils/update.php --init-updates.\n";
350 echo 'Currently at sequence '.$aLastState['sequence_id'].' ('.$aLastState['lastimportdate'].') - '.$aLastState['indexed']." indexed\n";
352 $sBatchEnd = $aLastState['lastimportdate'];
353 $iEndSequence = $aLastState['sequence_id'];
355 if ($aLastState['indexed'] == 't') {
356 // Sleep if the update interval has not yet been reached.
357 $fNextUpdate = $aLastState['unix_ts'] + CONST_Replication_Update_Interval;
358 if ($fNextUpdate > $fStartTime) {
359 $iSleepTime = $fNextUpdate - $fStartTime;
360 echo "Waiting for next update for $iSleepTime sec.";
364 // Download the next batch of changes.
366 $fCMDStartTime = time();
367 $iNextSeq = (int) $aLastState['sequence_id'];
369 echo "$sCMDDownload -I $iNextSeq\n";
370 if (file_exists($sImportFile)) {
371 unlink($sImportFile);
373 exec($sCMDDownload.' -I '.$iNextSeq, $aOutput, $iResult);
376 echo 'No new updates. Sleeping for '.CONST_Replication_Recheck_Interval." sec.\n";
377 sleep(CONST_Replication_Recheck_Interval);
378 } elseif ($iResult != 0) {
379 echo 'ERROR: updates failed.';
382 $iEndSequence = (int)$aOutput[0];
386 // get the newest object from the diff file
389 exec(CONST_BasePath.'/utils/osm_file_date.py '.$sImportFile, $sBatchEnd, $iRet);
391 echo "Diff file is empty. skipping import.\n";
392 if (!$aResult['import-osmosis-all']) {
399 fail('Error getting date from diff file.');
401 $sBatchEnd = $sBatchEnd[0];
404 $fCMDStartTime = time();
405 echo $sCMDImport."\n";
407 $iErrorLevel = runWithEnv($sCMDImport, $aProcEnv);
409 echo "Error executing osm2pgsql: $iErrorLevel\n";
413 // write the update logs
414 $iFileSize = filesize($sImportFile);
415 $sSQL = 'INSERT INTO import_osmosis_log';
416 $sSQL .= '(batchend, batchseq, batchsize, starttime, endtime, event)';
417 $sSQL .= " values ('$sBatchEnd',$iEndSequence,$iFileSize,'";
418 $sSQL .= date('Y-m-d H:i:s', $fCMDStartTime)."','";
419 $sSQL .= date('Y-m-d H:i:s')."','import')";
424 $sSQL = "UPDATE import_status SET lastimportdate = '$sBatchEnd', indexed=false, sequence_id = $iEndSequence";
427 echo date('Y-m-d H:i:s')." Completed download step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n";
431 if (!$aResult['no-index']) {
432 $sThisIndexCmd = $sCMDIndex;
433 $fCMDStartTime = time();
435 echo "$sThisIndexCmd\n";
436 $iErrorLevel = runWithEnv($sThisIndexCmd, $aProcEnv);
438 echo "Error: $iErrorLevel\n";
442 $sSQL = 'INSERT INTO import_osmosis_log';
443 $sSQL .= '(batchend, batchseq, batchsize, starttime, endtime, event)';
444 $sSQL .= " values ('$sBatchEnd',$iEndSequence,NULL,'";
445 $sSQL .= date('Y-m-d H:i:s', $fCMDStartTime)."','";
446 $sSQL .= date('Y-m-d H:i:s')."','index')";
449 echo date('Y-m-d H:i:s')." Completed index step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n";
451 $sSQL = 'update import_status set indexed = true';
454 if ($aResult['import-osmosis-all']) {
455 echo "Error: --no-index cannot be used with continuous imports (--import-osmosis-all).\n";
460 $fDuration = time() - $fStartTime;
461 echo date('Y-m-d H:i:s')." Completed all for $sBatchEnd in ".round($fDuration/60, 2)." minutes\n";
462 if (!$aResult['import-osmosis-all']) exit(0);