4 require_once(dirname(dirname(__FILE__)).'/settings/settings.php');
5 require_once(CONST_BasePath.'/lib/init-cmd.php');
6 ini_set('memory_limit', '800M');
10 'Import / update / index osm data',
11 array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
12 array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
13 array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
15 array('init-updates', '', 0, 1, 0, 0, 'bool', 'Set up database for updating'),
16 array('check-for-updates', '', 0, 1, 0, 0, 'bool', 'Check if new updates are available'),
17 array('import-osmosis', '', 0, 1, 0, 0, 'bool', 'Import updates once'),
18 array('import-osmosis-all', '', 0, 1, 0, 0, 'bool', 'Import updates forever'),
19 array('no-index', '', 0, 1, 0, 0, 'bool', 'Do not index the new data'),
21 array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Update postcode centroid table'),
23 array('import-file', '', 0, 1, 1, 1, 'realpath', 'Re-import data from an OSM file'),
24 array('import-diff', '', 0, 1, 1, 1, 'realpath', 'Import a diff (osc) file from local file system'),
25 array('osm2pgsql-cache', '', 0, 1, 1, 1, 'int', 'Cache size used by osm2pgsql'),
27 array('import-node', '', 0, 1, 1, 1, 'int', 'Re-import node'),
28 array('import-way', '', 0, 1, 1, 1, 'int', 'Re-import way'),
29 array('import-relation', '', 0, 1, 1, 1, 'int', 'Re-import relation'),
30 array('import-from-main-api', '', 0, 1, 0, 0, 'bool', 'Use OSM API instead of Overpass to download objects'),
32 array('index', '', 0, 1, 0, 0, 'bool', 'Index'),
33 array('index-rank', '', 0, 1, 1, 1, 'int', 'Rank to start indexing from'),
34 array('index-instances', '', 0, 1, 1, 1, 'int', 'Number of indexing instances (threads)'),
36 array('deduplicate', '', 0, 1, 0, 0, 'bool', 'Deduplicate tokens'),
37 array('recompute-word-counts', '', 0, 1, 0, 0, 'bool', 'Compute frequency of full-word search terms'),
38 array('no-npi', '', 0, 1, 0, 0, 'bool', '(obsolete)'),
40 getCmdOpt($_SERVER['argv'], $aCMDOptions, $aResult, true, true);
42 if (!isset($aResult['index-instances'])) $aResult['index-instances'] = 1;
44 if (!isset($aResult['index-rank'])) $aResult['index-rank'] = 0;
46 date_default_timezone_set('Etc/UTC');
50 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
51 if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
53 // cache memory to be used by osm2pgsql, should not be more than the available memory
54 $iCacheMemory = (isset($aResult['osm2pgsql-cache'])?$aResult['osm2pgsql-cache']:2000);
55 if ($iCacheMemory + 500 > getTotalMemoryMB()) {
56 $iCacheMemory = getCacheMemoryMB();
57 echo "WARNING: resetting cache memory to $iCacheMemory\n";
59 $sOsm2pgsqlCmd = CONST_Osm2pgsql_Binary.' -klas --number-processes 1 -C '.$iCacheMemory.' -O gazetteer -d '.$aDSNInfo['database'].' -P '.$aDSNInfo['port'];
60 if (!is_null(CONST_Osm2pgsql_Flatnode_File) && CONST_Osm2pgsql_Flatnode_File) {
61 $sOsm2pgsqlCmd .= ' --flat-nodes '.CONST_Osm2pgsql_Flatnode_File;
64 if ($aResult['init-updates']) {
65 // sanity check that the replication URL is correct
66 $sBaseState = file_get_contents(CONST_Replication_Url.'/state.txt');
67 if ($sBaseState === false) {
68 echo "\nCannot find state.txt file at the configured replication URL.\n";
69 echo "Does the URL point to a directory containing OSM update data?\n\n";
70 fail('replication URL not reachable.');
72 // sanity check for pyosmium-get-changes
73 if (!CONST_Pyosmium_Binary) {
74 echo "\nCONST_Pyosmium_Binary not configured.\n";
75 echo "You need to install pyosmium and set up the path to pyosmium-get-changes\n";
76 echo "in your local settings file.\n\n";
77 fail('CONST_Pyosmium_Binary not configured');
80 $sCmd = CONST_Pyosmium_Binary.' --help';
81 exec($sCmd, $aOutput, $iRet);
83 echo "Cannot execute pyosmium-get-changes.\n";
84 echo "Make sure you have pyosmium installed correctly\n";
85 echo "and have set up CONST_Pyosmium_Binary to point to pyosmium-get-changes.\n";
86 fail('pyosmium-get-changes not found or not usable');
88 $sSetup = CONST_InstallPath.'/utils/setup.php';
90 passthru($sSetup.' --create-functions --enable-diff-updates', $iRet);
92 fail('Error running setup script');
95 $sDatabaseDate = getDatabaseDate($oDB);
96 if ($sDatabaseDate === false) {
97 fail('Cannot determine date of database.');
99 $sWindBack = strftime('%Y-%m-%dT%H:%M:%SZ', strtotime($sDatabaseDate) - (3*60*60));
101 // get the appropriate state id
103 $sCmd = CONST_Pyosmium_Binary.' -D '.$sWindBack.' --server '.CONST_Replication_Url;
104 exec($sCmd, $aOutput, $iRet);
105 if ($iRet != 0 || $aOutput[0] == 'None') {
106 fail('Error running pyosmium tools');
109 pg_query($oDB->connection, 'TRUNCATE import_status');
110 $sSQL = "INSERT INTO import_status (lastimportdate, sequence_id, indexed) VALUES('";
111 $sSQL .= $sDatabaseDate."',".$aOutput[0].', true)';
112 if (!pg_query($oDB->connection, $sSQL)) {
113 fail('Could not enter sequence into database.');
116 echo "Done. Database updates will start at sequence $aOutput[0] ($sWindBack)\n";
119 if ($aResult['check-for-updates']) {
120 $aLastState = chksql($oDB->getRow('SELECT sequence_id FROM import_status'));
122 if (!$aLastState['sequence_id']) {
123 fail('Updates not set up. Please run ./utils/update.php --init-updates.');
126 system(CONST_BasePath.'/utils/check_server_for_updates.py '.CONST_Replication_Url.' '.$aLastState['sequence_id'], $iRet);
130 if (isset($aResult['import-diff']) || isset($aResult['import-file'])) {
131 // import diffs and files directly (e.g. from osmosis --rri)
132 $sNextFile = isset($aResult['import-diff']) ? $aResult['import-diff'] : $aResult['import-file'];
134 if (!file_exists($sNextFile)) {
135 fail("Cannot open $sNextFile\n");
139 $sCMD = $sOsm2pgsqlCmd.' '.$sNextFile;
141 exec($sCMD, $sJunk, $iErrorLevel);
144 fail("Error from osm2pgsql, $iErrorLevel\n");
147 // Don't update the import status - we don't know what this file contains
150 if ($aResult['calculate-postcodes']) {
151 info('Update postcodes centroids');
152 $sTemplate = file_get_contents(CONST_BasePath.'/sql/update-postcodes.sql');
153 runSQLScript($sTemplate, true, true);
156 $sTemporaryFile = CONST_BasePath.'/data/osmosischange.osc';
158 $bUseOSMApi = isset($aResult['import-from-main-api']) && $aResult['import-from-main-api'];
160 if (isset($aResult['import-node']) && $aResult['import-node']) {
162 $sContentURL = 'https://www.openstreetmap.org/api/0.6/node/'.$aResult['import-node'];
164 $sContentURL = 'https://overpass-api.de/api/interpreter?data=node('.$aResult['import-node'].');out%20meta;';
168 if (isset($aResult['import-way']) && $aResult['import-way']) {
170 $sContentURL = 'https://www.openstreetmap.org/api/0.6/way/'.$aResult['import-way'].'/full';
172 $sContentURL = 'https://overpass-api.de/api/interpreter?data=(way('.$aResult['import-way'].');node(w););out%20meta;';
176 if (isset($aResult['import-relation']) && $aResult['import-relation']) {
178 $sContentURLsModifyXMLstr = 'https://www.openstreetmap.org/api/0.6/relation/'.$aResult['import-relation'].'/full';
180 $sContentURL = 'https://overpass-api.de/api/interpreter?data=((rel('.$aResult['import-relation'].');way(r);node(w));node(r));out%20meta;';
185 file_put_contents($sTemporaryFile, file_get_contents($sContentURL));
190 // import generated change file
191 $sCMD = $sOsm2pgsqlCmd.' '.$sTemporaryFile;
193 exec($sCMD, $sJunk, $iErrorLevel);
195 fail("osm2pgsql exited with error level $iErrorLevel\n");
199 if ($aResult['deduplicate']) {
202 if (getPostgresVersion($oDB) < 9.3) {
203 fail('ERROR: deduplicate is only currently supported in postgresql 9.3');
206 $sSQL = 'select partition from country_name order by country_code';
207 $aPartitions = chksql($oDB->getCol($sSQL));
210 // we don't care about empty search_name_* partitions, they can't contain mentions of duplicates
211 foreach ($aPartitions as $i => $sPartition) {
212 $sSQL = 'select count(*) from search_name_'.$sPartition;
213 $nEntries = chksql($oDB->getOne($sSQL));
214 if ($nEntries == 0) {
215 unset($aPartitions[$i]);
219 $sSQL = "select word_token,count(*) from word where substr(word_token, 1, 1) = ' '";
220 $sSQL .= ' and class is null and type is null and country_code is null';
221 $sSQL .= ' group by word_token having count(*) > 1 order by word_token';
222 $aDuplicateTokens = chksql($oDB->getAll($sSQL));
223 foreach ($aDuplicateTokens as $aToken) {
224 if (trim($aToken['word_token']) == '' || trim($aToken['word_token']) == '-') continue;
225 echo 'Deduping '.$aToken['word_token']."\n";
226 $sSQL = 'select word_id,';
227 $sSQL .= ' (select count(*) from search_name where nameaddress_vector @> ARRAY[word_id]) as num';
228 $sSQL .= " from word where word_token = '".$aToken['word_token'];
229 $sSQL .= "' and class is null and type is null and country_code is null order by num desc";
230 $aTokenSet = chksql($oDB->getAll($sSQL));
232 $aKeep = array_shift($aTokenSet);
233 $iKeepID = $aKeep['word_id'];
235 foreach ($aTokenSet as $aRemove) {
236 $sSQL = 'update search_name set';
237 $sSQL .= ' name_vector = array_replace(name_vector,'.$aRemove['word_id'].','.$iKeepID.'),';
238 $sSQL .= ' nameaddress_vector = array_replace(nameaddress_vector,'.$aRemove['word_id'].','.$iKeepID.')';
239 $sSQL .= ' where name_vector @> ARRAY['.$aRemove['word_id'].']';
240 chksql($oDB->query($sSQL));
242 $sSQL = 'update search_name set';
243 $sSQL .= ' nameaddress_vector = array_replace(nameaddress_vector,'.$aRemove['word_id'].','.$iKeepID.')';
244 $sSQL .= ' where nameaddress_vector @> ARRAY['.$aRemove['word_id'].']';
245 chksql($oDB->query($sSQL));
247 $sSQL = 'update location_area_country set';
248 $sSQL .= ' keywords = array_replace(keywords,'.$aRemove['word_id'].','.$iKeepID.')';
249 $sSQL .= ' where keywords @> ARRAY['.$aRemove['word_id'].']';
250 chksql($oDB->query($sSQL));
252 foreach ($aPartitions as $sPartition) {
253 $sSQL = 'update search_name_'.$sPartition.' set';
254 $sSQL .= ' name_vector = array_replace(name_vector,'.$aRemove['word_id'].','.$iKeepID.')';
255 $sSQL .= ' where name_vector @> ARRAY['.$aRemove['word_id'].']';
256 chksql($oDB->query($sSQL));
258 $sSQL = 'update location_area_country set';
259 $sSQL .= ' keywords = array_replace(keywords,'.$aRemove['word_id'].','.$iKeepID.')';
260 $sSQL .= ' where keywords @> ARRAY['.$aRemove['word_id'].']';
261 chksql($oDB->query($sSQL));
264 $sSQL = 'delete from word where word_id = '.$aRemove['word_id'];
265 chksql($oDB->query($sSQL));
270 if ($aResult['recompute-word-counts']) {
271 info('Recompute frequency of full-word search terms');
272 $sTemplate = file_get_contents(CONST_BasePath.'/sql/words_from_search_name.sql');
273 runSQLScript($sTemplate, true, true);
276 if ($aResult['index']) {
277 passthru(CONST_InstallPath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -P '.$aDSNInfo['port'].' -t '.$aResult['index-instances'].' -r '.$aResult['index-rank']);
280 if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) {
282 if (strpos(CONST_Replication_Url, 'download.geofabrik.de') !== false && CONST_Replication_Update_Interval < 86400) {
283 fail('Error: Update interval too low for download.geofabrik.de. ' .
284 "Please check install documentation (http://nominatim.org/release-docs/latest/Import-and-Update#setting-up-the-update-process)\n");
287 $sImportFile = CONST_InstallPath.'/osmosischange.osc';
288 $sCMDDownload = CONST_Pyosmium_Binary.' --server '.CONST_Replication_Url.' -o '.$sImportFile.' -s '.CONST_Replication_Max_Diff_size;
289 $sCMDImport = $sOsm2pgsqlCmd.' '.$sImportFile;
290 $sCMDIndex = CONST_InstallPath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -P '.$aDSNInfo['port'].' -t '.$aResult['index-instances'];
293 $fStartTime = time();
294 $aLastState = chksql($oDB->getRow('SELECT *, EXTRACT (EPOCH FROM lastimportdate) as unix_ts FROM import_status'));
296 if (!$aLastState['sequence_id']) {
297 echo "Updates not set up. Please run ./utils/update.php --init-updates.\n";
301 echo 'Currently at sequence '.$aLastState['sequence_id'].' ('.$aLastState['lastimportdate'].') - '.$aLastState['indexed']." indexed\n";
303 $sBatchEnd = $aLastState['lastimportdate'];
304 $iEndSequence = $aLastState['sequence_id'];
306 if ($aLastState['indexed'] == 't') {
307 // Sleep if the update interval has not yet been reached.
308 $fNextUpdate = $aLastState['unix_ts'] + CONST_Replication_Update_Interval;
309 if ($fNextUpdate > $fStartTime) {
310 $iSleepTime = $fNextUpdate - $fStartTime;
311 echo "Waiting for next update for $iSleepTime sec.";
315 // Download the next batch of changes.
317 $fCMDStartTime = time();
318 $iNextSeq = (int) $aLastState['sequence_id'];
320 echo "$sCMDDownload -I $iNextSeq\n";
321 if (file_exists($sImportFile)) {
322 unlink($sImportFile);
324 exec($sCMDDownload.' -I '.$iNextSeq, $aOutput, $iResult);
327 echo 'No new updates. Sleeping for '.CONST_Replication_Recheck_Interval." sec.\n";
328 sleep(CONST_Replication_Recheck_Interval);
329 } elseif ($iResult != 0) {
330 echo 'ERROR: updates failed.';
333 $iEndSequence = (int)$aOutput[0];
337 // get the newest object from the diff file
340 exec(CONST_BasePath.'/utils/osm_file_date.py '.$sImportFile, $sBatchEnd, $iRet);
342 echo "Diff file is empty. skipping import.\n";
343 if (!$aResult['import-osmosis-all']) {
350 fail('Error getting date from diff file.');
352 $sBatchEnd = $sBatchEnd[0];
355 $fCMDStartTime = time();
356 echo $sCMDImport."\n";
358 exec($sCMDImport, $sJunk, $iErrorLevel);
360 echo "Error executing osm2pgsql: $iErrorLevel\n";
364 // write the update logs
365 $iFileSize = filesize($sImportFile);
366 $sSQL = 'INSERT INTO import_osmosis_log';
367 $sSQL .= '(batchend, batchseq, batchsize, starttime, endtime, event)';
368 $sSQL .= " values ('$sBatchEnd',$iEndSequence,$iFileSize,'";
369 $sSQL .= date('Y-m-d H:i:s', $fCMDStartTime)."','";
370 $sSQL .= date('Y-m-d H:i:s')."','import')";
372 chksql($oDB->query($sSQL));
375 $sSQL = "UPDATE import_status SET lastimportdate = '$sBatchEnd', indexed=false, sequence_id = $iEndSequence";
377 chksql($oDB->query($sSQL));
378 echo date('Y-m-d H:i:s')." Completed download step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n";
382 if (!$aResult['no-index']) {
383 $sThisIndexCmd = $sCMDIndex;
384 $fCMDStartTime = time();
386 echo "$sThisIndexCmd\n";
387 exec($sThisIndexCmd, $sJunk, $iErrorLevel);
389 echo "Error: $iErrorLevel\n";
393 $sSQL = 'INSERT INTO import_osmosis_log';
394 $sSQL .= '(batchend, batchseq, batchsize, starttime, endtime, event)';
395 $sSQL .= " values ('$sBatchEnd',$iEndSequence,$iFileSize,'";
396 $sSQL .= date('Y-m-d H:i:s', $fCMDStartTime)."','";
397 $sSQL .= date('Y-m-d H:i:s')."','index')";
400 echo date('Y-m-d H:i:s')." Completed index step for $sBatchEnd in ".round((time()-$fCMDStartTime)/60, 2)." minutes\n";
402 $sSQL = 'update import_status set indexed = true';
406 $fDuration = time() - $fStartTime;
407 echo date('Y-m-d H:i:s')." Completed all for $sBatchEnd in ".round($fDuration/60, 2)." minutes\n";
408 if (!$aResult['import-osmosis-all']) exit(0);