3 require_once(CONST_BasePath.'/lib/init-cmd.php');
4 require_once(CONST_BasePath.'/lib/setup_functions.php');
5 require_once(CONST_BasePath.'/lib/setup/SetupClass.php');
6 require_once(CONST_BasePath.'/lib/setup/AddressLevelParser.php');
8 ini_set('memory_limit', '800M');
10 use Nominatim\Setup\SetupFunctions as SetupFunctions;
12 // (long-opt, short-opt, min-occurs, max-occurs, num-arguments, num-arguments, type, help)
15 'Import / update / index osm data',
16 array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
17 array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
18 array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
20 array('init-updates', '', 0, 1, 0, 0, 'bool', 'Set up database for updating'),
21 array('check-for-updates', '', 0, 1, 0, 0, 'bool', 'Check if new updates are available'),
22 array('no-update-functions', '', 0, 1, 0, 0, 'bool', 'Do not update trigger functions to support differential updates (assuming the diff update logic is already present)'),
23 array('import-osmosis', '', 0, 1, 0, 0, 'bool', 'Import updates once'),
24 array('import-osmosis-all', '', 0, 1, 0, 0, 'bool', 'Import updates forever'),
25 array('no-index', '', 0, 1, 0, 0, 'bool', 'Do not index the new data'),
27 array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Update postcode centroid table'),
29 array('import-file', '', 0, 1, 1, 1, 'realpath', 'Re-import data from an OSM file'),
30 array('import-diff', '', 0, 1, 1, 1, 'realpath', 'Import a diff (osc) file from local file system'),
31 array('osm2pgsql-cache', '', 0, 1, 1, 1, 'int', 'Cache size used by osm2pgsql'),
33 array('import-node', '', 0, 1, 1, 1, 'int', 'Re-import node'),
34 array('import-way', '', 0, 1, 1, 1, 'int', 'Re-import way'),
35 array('import-relation', '', 0, 1, 1, 1, 'int', 'Re-import relation'),
36 array('import-from-main-api', '', 0, 1, 0, 0, 'bool', 'Use OSM API instead of Overpass to download objects'),
38 array('index', '', 0, 1, 0, 0, 'bool', 'Index'),
39 array('index-rank', '', 0, 1, 1, 1, 'int', 'Rank to start indexing from'),
40 array('index-instances', '', 0, 1, 1, 1, 'int', 'Number of indexing instances (threads)'),
42 array('deduplicate', '', 0, 1, 0, 0, 'bool', 'Deduplicate tokens'),
43 array('recompute-word-counts', '', 0, 1, 0, 0, 'bool', 'Compute frequency of full-word search terms'),
44 array('update-address-levels', '', 0, 1, 0, 0, 'bool', 'Reimport address level configuration (EXPERT)'),
45 array('recompute-importance', '', 0, 1, 0, 0, 'bool', 'Recompute place importances'),
46 array('no-npi', '', 0, 1, 0, 0, 'bool', '(obsolete)'),
49 getCmdOpt($_SERVER['argv'], $aCMDOptions, $aResult, true, true);
51 if (!isset($aResult['index-instances'])) $aResult['index-instances'] = 1;
52 if (!isset($aResult['index-rank'])) $aResult['index-rank'] = 0;
54 date_default_timezone_set('Etc/UTC');
56 $oDB = new Nominatim\DB();
59 $aDSNInfo = Nominatim\DB::parseDSN(CONST_Database_DSN);
60 if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
62 // cache memory to be used by osm2pgsql, should not be more than the available memory
63 $iCacheMemory = (isset($aResult['osm2pgsql-cache'])?$aResult['osm2pgsql-cache']:2000);
64 if ($iCacheMemory + 500 > getTotalMemoryMB()) {
65 $iCacheMemory = getCacheMemoryMB();
66 echo "WARNING: resetting cache memory to $iCacheMemory\n";
68 $sOsm2pgsqlCmd = CONST_Osm2pgsql_Binary.' -klas --number-processes 1 -C '.$iCacheMemory.' -O gazetteer -S '.CONST_Import_Style.' -d '.$aDSNInfo['database'].' -P '.$aDSNInfo['port'];
69 if (isset($aDSNInfo['username']) && $aDSNInfo['username']) {
70 $sOsm2pgsqlCmd .= ' -U ' . $aDSNInfo['username'];
72 if (isset($aDSNInfo['hostspec']) && $aDSNInfo['hostspec']) {
73 $sOsm2pgsqlCmd .= ' -H ' . $aDSNInfo['hostspec'];
76 if (isset($aDSNInfo['password']) && $aDSNInfo['password']) {
77 $aProcEnv = array_merge(array('PGPASSWORD' => $aDSNInfo['password']), $_ENV);
80 if (!is_null(CONST_Osm2pgsql_Flatnode_File) && CONST_Osm2pgsql_Flatnode_File) {
81 $sOsm2pgsqlCmd .= ' --flat-nodes '.CONST_Osm2pgsql_Flatnode_File;
84 $sIndexCmd = CONST_BasePath.'/nominatim/nominatim.py';
86 if ($aResult['init-updates']) {
87 // sanity check that the replication URL is correct
88 $sBaseState = file_get_contents(CONST_Replication_Url.'/state.txt');
89 if ($sBaseState === false) {
90 echo "\nCannot find state.txt file at the configured replication URL.\n";
91 echo "Does the URL point to a directory containing OSM update data?\n\n";
92 fail('replication URL not reachable.');
94 // sanity check for pyosmium-get-changes
95 if (!CONST_Pyosmium_Binary) {
96 echo "\nCONST_Pyosmium_Binary not configured.\n";
97 echo "You need to install pyosmium and set up the path to pyosmium-get-changes\n";
98 echo "in your local settings file.\n\n";
99 fail('CONST_Pyosmium_Binary not configured');
102 $sCmd = CONST_Pyosmium_Binary.' --help';
103 exec($sCmd, $aOutput, $iRet);
105 echo "Cannot execute pyosmium-get-changes.\n";
106 echo "Make sure you have pyosmium installed correctly\n";
107 echo "and have set up CONST_Pyosmium_Binary to point to pyosmium-get-changes.\n";
108 fail('pyosmium-get-changes not found or not usable');
111 if (!$aResult['no-update-functions']) {
112 // instantiate setupClass to use the function therein
113 $cSetup = new SetupFunctions(array(
114 'enable-diff-updates' => true,
115 'verbose' => $aResult['verbose']
118 $cSetup->createFunctions();
121 $sDatabaseDate = getDatabaseDate($oDB);
122 if (!$sDatabaseDate) {
123 fail('Cannot determine date of database.');
125 $sWindBack = strftime('%Y-%m-%dT%H:%M:%SZ', strtotime($sDatabaseDate) - (3*60*60));
127 // get the appropriate state id
129 $sCmd = CONST_Pyosmium_Binary.' -D '.$sWindBack.' --server '.CONST_Replication_Url;
130 exec($sCmd, $aOutput, $iRet);
131 if ($iRet != 0 || $aOutput[0] == 'None') {
132 fail('Error running pyosmium tools');
135 $oDB->exec('TRUNCATE import_status');
136 $sSQL = "INSERT INTO import_status (lastimportdate, sequence_id, indexed) VALUES('";
137 $sSQL .= $sDatabaseDate."',".$aOutput[0].', true)';
141 } catch (\Nominatim\DatabaseError $e) {
142 fail('Could not enter sequence into database.');
145 echo "Done. Database updates will start at sequence $aOutput[0] ($sWindBack)\n";
148 if ($aResult['check-for-updates']) {
149 $aLastState = $oDB->getRow('SELECT sequence_id FROM import_status');
151 if (!$aLastState['sequence_id']) {
152 fail('Updates not set up. Please run ./utils/update.php --init-updates.');
155 system(CONST_BasePath.'/utils/check_server_for_updates.py '.CONST_Replication_Url.' '.$aLastState['sequence_id'], $iRet);
159 if (isset($aResult['import-diff']) || isset($aResult['import-file'])) {
160 // import diffs and files directly (e.g. from osmosis --rri)
161 $sNextFile = isset($aResult['import-diff']) ? $aResult['import-diff'] : $aResult['import-file'];
163 if (!file_exists($sNextFile)) {
164 fail("Cannot open $sNextFile\n");
168 $sCMD = $sOsm2pgsqlCmd.' '.$sNextFile;
170 $iErrorLevel = runWithEnv($sCMD, $aProcEnv);
173 fail("Error from osm2pgsql, $iErrorLevel\n");
176 // Don't update the import status - we don't know what this file contains
179 if ($aResult['calculate-postcodes']) {
180 info('Update postcodes centroids');
181 $sTemplate = file_get_contents(CONST_BasePath.'/sql/update-postcodes.sql');
182 runSQLScript($sTemplate, true, true);
185 $sTemporaryFile = CONST_BasePath.'/data/osmosischange.osc';
187 $bUseOSMApi = isset($aResult['import-from-main-api']) && $aResult['import-from-main-api'];
189 if (isset($aResult['import-node']) && $aResult['import-node']) {
191 $sContentURL = 'https://www.openstreetmap.org/api/0.6/node/'.$aResult['import-node'];
193 $sContentURL = 'https://overpass-api.de/api/interpreter?data=node('.$aResult['import-node'].');out%20meta;';
197 if (isset($aResult['import-way']) && $aResult['import-way']) {
199 $sContentURL = 'https://www.openstreetmap.org/api/0.6/way/'.$aResult['import-way'].'/full';
201 $sContentURL = 'https://overpass-api.de/api/interpreter?data=(way('.$aResult['import-way'].');node(w););out%20meta;';
205 if (isset($aResult['import-relation']) && $aResult['import-relation']) {
207 $sContentURLsModifyXMLstr = 'https://www.openstreetmap.org/api/0.6/relation/'.$aResult['import-relation'].'/full';
209 $sContentURL = 'https://overpass-api.de/api/interpreter?data=((rel('.$aResult['import-relation'].');way(r);node(w));node(r));out%20meta;';
214 file_put_contents($sTemporaryFile, file_get_contents($sContentURL));
219 // import generated change file
220 $sCMD = $sOsm2pgsqlCmd.' '.$sTemporaryFile;
222 $iErrorLevel = runWithEnv($sCMD, $aProcEnv);
224 fail("osm2pgsql exited with error level $iErrorLevel\n");
228 if ($aResult['deduplicate']) {
229 $oDB = new Nominatim\DB();
232 if ($oDB->getPostgresVersion() < 9.3) {
233 fail('ERROR: deduplicate is only currently supported in postgresql 9.3');
236 $sSQL = 'select partition from country_name order by country_code';
237 $aPartitions = $oDB->getCol($sSQL);
240 // we don't care about empty search_name_* partitions, they can't contain mentions of duplicates
241 foreach ($aPartitions as $i => $sPartition) {
242 $sSQL = 'select count(*) from search_name_'.$sPartition;
243 $nEntries = $oDB->getOne($sSQL);
244 if ($nEntries == 0) {
245 unset($aPartitions[$i]);
249 $sSQL = "select word_token,count(*) from word where substr(word_token, 1, 1) = ' '";
250 $sSQL .= ' and class is null and type is null and country_code is null';
251 $sSQL .= ' group by word_token having count(*) > 1 order by word_token';
252 $aDuplicateTokens = $oDB->getAll($sSQL);
253 foreach ($aDuplicateTokens as $aToken) {
254 if (trim($aToken['word_token']) == '' || trim($aToken['word_token']) == '-') continue;
255 echo 'Deduping '.$aToken['word_token']."\n";
256 $sSQL = 'select word_id,';
257 $sSQL .= ' (select count(*) from search_name where nameaddress_vector @> ARRAY[word_id]) as num';
258 $sSQL .= " from word where word_token = '".$aToken['word_token'];
259 $sSQL .= "' and class is null and type is null and country_code is null order by num desc";
260 $aTokenSet = $oDB->getAll($sSQL);
262 $aKeep = array_shift($aTokenSet);
263 $iKeepID = $aKeep['word_id'];
265 foreach ($aTokenSet as $aRemove) {
266 $sSQL = 'update search_name set';
267 $sSQL .= ' name_vector = array_replace(name_vector,'.$aRemove['word_id'].','.$iKeepID.'),';
268 $sSQL .= ' nameaddress_vector = array_replace(nameaddress_vector,'.$aRemove['word_id'].','.$iKeepID.')';
269 $sSQL .= ' where name_vector @> ARRAY['.$aRemove['word_id'].']';
272 $sSQL = 'update search_name set';
273 $sSQL .= ' nameaddress_vector = array_replace(nameaddress_vector,'.$aRemove['word_id'].','.$iKeepID.')';
274 $sSQL .= ' where nameaddress_vector @> ARRAY['.$aRemove['word_id'].']';
277 $sSQL = 'update location_area_country set';
278 $sSQL .= ' keywords = array_replace(keywords,'.$aRemove['word_id'].','.$iKeepID.')';
279 $sSQL .= ' where keywords @> ARRAY['.$aRemove['word_id'].']';
282 foreach ($aPartitions as $sPartition) {
283 $sSQL = 'update search_name_'.$sPartition.' set';
284 $sSQL .= ' name_vector = array_replace(name_vector,'.$aRemove['word_id'].','.$iKeepID.')';
285 $sSQL .= ' where name_vector @> ARRAY['.$aRemove['word_id'].']';
288 $sSQL = 'update location_area_country set';
289 $sSQL .= ' keywords = array_replace(keywords,'.$aRemove['word_id'].','.$iKeepID.')';
290 $sSQL .= ' where keywords @> ARRAY['.$aRemove['word_id'].']';
294 $sSQL = 'delete from word where word_id = '.$aRemove['word_id'];
300 if ($aResult['recompute-word-counts']) {
301 info('Recompute frequency of full-word search terms');
302 $sTemplate = file_get_contents(CONST_BasePath.'/sql/words_from_search_name.sql');
303 runSQLScript($sTemplate, true, true);
306 if ($aResult['index']) {
308 .' -d '.$aDSNInfo['database']
309 .' -P '.$aDSNInfo['port']
310 .' -t '.$aResult['index-instances']
311 .' -r '.$aResult['index-rank'];
312 if (isset($aDSNInfo['hostspec']) && $aDSNInfo['hostspec']) {
313 $sCmd .= ' -H ' . $aDSNInfo['hostspec'];
315 if (isset($aDSNInfo['username']) && $aDSNInfo['username']) {
316 $sCmd .= ' -U ' . $aDSNInfo['username'];
319 runWithEnv($sCmd, $aProcEnv);
321 $oDB->exec('update import_status set indexed = true');
324 if ($aResult['update-address-levels']) {
325 echo 'Updating address levels from '.CONST_Address_Level_Config.".\n";
326 $oAlParser = new \Nominatim\Setup\AddressLevelParser(CONST_Address_Level_Config);
327 $oAlParser->createTable($oDB, 'address_levels');
330 if ($aResult['recompute-importance']) {
331 echo "Updating importance values for database.\n";
332 $oDB = new Nominatim\DB();
335 $sSQL = 'ALTER TABLE placex DISABLE TRIGGER ALL;';
336 $sSQL .= 'UPDATE placex SET (wikipedia, importance) =';
337 $sSQL .= ' (SELECT wikipedia, importance';
338 $sSQL .= ' FROM compute_importance(extratags, country_code, osm_type, osm_id));';
339 $sSQL .= 'UPDATE placex s SET wikipedia = d.wikipedia, importance = d.importance';
340 $sSQL .= ' FROM placex d';
341 $sSQL .= ' WHERE s.place_id = d.linked_place_id and d.wikipedia is not null';
342 $sSQL .= ' and (s.wikipedia is null or s.importance < d.importance);';
343 $sSQL .= 'ALTER TABLE placex ENABLE TRIGGER ALL;';
347 if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) {
349 if (strpos(CONST_Replication_Url, 'download.geofabrik.de') !== false && CONST_Replication_Update_Interval < 86400) {
350 fail('Error: Update interval too low for download.geofabrik.de. ' .
351 "Please check install documentation (http://nominatim.org/release-docs/latest/Import-and-Update#setting-up-the-update-process)\n");
354 $sImportFile = CONST_InstallPath.'/osmosischange.osc';
355 $sCMDDownload = CONST_Pyosmium_Binary.' --server '.CONST_Replication_Url.' -o '.$sImportFile.' -s '.CONST_Replication_Max_Diff_size;
356 $sCMDImport = $sOsm2pgsqlCmd.' '.$sImportFile;
357 $sCMDIndex = $sIndexCmd
358 .' -d '.$aDSNInfo['database']
359 .' -P '.$aDSNInfo['port']
360 .' -t '.$aResult['index-instances'];
361 if (isset($aDSNInfo['hostspec']) && $aDSNInfo['hostspec']) {
362 $sCMDIndex .= ' -H ' . $aDSNInfo['hostspec'];
364 if (isset($aDSNInfo['username']) && $aDSNInfo['username']) {
365 $sCMDIndex .= ' -U ' . $aDSNInfo['username'];
369 $fStartTime = time();
370 $aLastState = $oDB->getRow('SELECT *, EXTRACT (EPOCH FROM lastimportdate) as unix_ts FROM import_status');
372 if (!$aLastState['sequence_id']) {
373 echo "Updates not set up. Please run ./utils/update.php --init-updates.\n";
377 echo 'Currently at sequence '.$aLastState['sequence_id'].' ('.$aLastState['lastimportdate'].') - '.$aLastState['indexed']." indexed\n";
379 $sBatchEnd = $aLastState['lastimportdate'];
380 $iEndSequence = $aLastState['sequence_id'];
382 if ($aLastState['indexed']) {
383 // Sleep if the update interval has not yet been reached.
384 $fNextUpdate = $aLastState['unix_ts'] + CONST_Replication_Update_Interval;
385 if ($fNextUpdate > $fStartTime) {
386 $iSleepTime = $fNextUpdate - $fStartTime;
387 echo "Waiting for next update for $iSleepTime sec.";
391 // Download the next batch of changes.
393 $fCMDStartTime = time();
394 $iNextSeq = (int) $aLastState['sequence_id'];
396 echo "$sCMDDownload -I $iNextSeq\n";
397 if (file_exists($sImportFile)) {
398 unlink($sImportFile);
400 exec($sCMDDownload.' -I '.$iNextSeq, $aOutput, $iResult);
403 echo 'No new updates. Sleeping for '.CONST_Replication_Recheck_Interval." sec.\n";
404 sleep(CONST_Replication_Recheck_Interval);
405 } elseif ($iResult != 0) {
406 echo 'ERROR: updates failed.';
409 $iEndSequence = (int)$aOutput[0];
413 // get the newest object from the diff file
416 exec(CONST_BasePath.'/utils/osm_file_date.py '.$sImportFile, $sBatchEnd, $iRet);
418 echo "Diff file is empty. skipping import.\n";
419 if (!$aResult['import-osmosis-all']) {
426 fail('Error getting date from diff file.');
428 $sBatchEnd = $sBatchEnd[0];
431 $fCMDStartTime = time();
432 echo $sCMDImport."\n";
434 $iErrorLevel = runWithEnv($sCMDImport, $aProcEnv);
436 echo "Error executing osm2pgsql: $iErrorLevel\n";
440 // write the update logs
441 $iFileSize = filesize($sImportFile);
442 $sSQL = 'INSERT INTO import_osmosis_log';
443 $sSQL .= '(batchend, batchseq, batchsize, starttime, endtime, event)';
444 $sSQL .= " values ('$sBatchEnd',$iEndSequence,$iFileSize,'";
445 $sSQL .= date('Y-m-d H:i:s', $fCMDStartTime)."','";
446 $sSQL .= date('Y-m-d H:i:s')."','import')";
451 $sSQL = "UPDATE import_status SET lastimportdate = '$sBatchEnd', indexed=false, sequence_id = $iEndSequence";
454 echo date('Y-m-d H:i:s')." Completed download step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n";
458 if (!$aResult['no-index']) {
459 $sThisIndexCmd = $sCMDIndex;
460 $fCMDStartTime = time();
462 echo "$sThisIndexCmd\n";
463 $iErrorLevel = runWithEnv($sThisIndexCmd, $aProcEnv);
465 echo "Error: $iErrorLevel\n";
469 $sSQL = 'INSERT INTO import_osmosis_log';
470 $sSQL .= '(batchend, batchseq, batchsize, starttime, endtime, event)';
471 $sSQL .= " values ('$sBatchEnd',$iEndSequence,NULL,'";
472 $sSQL .= date('Y-m-d H:i:s', $fCMDStartTime)."','";
473 $sSQL .= date('Y-m-d H:i:s')."','index')";
476 echo date('Y-m-d H:i:s')." Completed index step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n";
478 $sSQL = 'update import_status set indexed = true';
481 if ($aResult['import-osmosis-all']) {
482 echo "Error: --no-index cannot be used with continuous imports (--import-osmosis-all).\n";
487 $fDuration = time() - $fStartTime;
488 echo date('Y-m-d H:i:s')." Completed all for $sBatchEnd in ".round($fDuration/60, 2)." minutes\n";
489 if (!$aResult['import-osmosis-all']) exit(0);