]> git.openstreetmap.org Git - nominatim.git/blob - utils/setup.php
more tolerant regexp for parsing replication state directories
[nominatim.git] / utils / setup.php
1 #!/usr/bin/php -Cq
2 <?php
3
4         require_once(dirname(dirname(__FILE__)).'/lib/init-cmd.php');
5         ini_set('memory_limit', '800M');
6
7         $aCMDOptions = array(
8                 "Create and setup nominatim search system",
9                 array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
10                 array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
11                 array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
12
13                 array('osm-file', '', 0, 1, 1, 1, 'realpath', 'File to import'),
14                 array('threads', '', 0, 1, 1, 1, 'int', 'Number of threads (where possible)'),
15
16                 array('all', '', 0, 1, 0, 0, 'bool', 'Do the complete process'),
17
18                 array('create-db', '', 0, 1, 0, 0, 'bool', 'Create nominatim db'),
19                 array('setup-db', '', 0, 1, 0, 0, 'bool', 'Build a blank nominatim db'),
20                 array('import-data', '', 0, 1, 0, 0, 'bool', 'Import a osm file'),
21                 array('osm2pgsql-cache', '', 0, 1, 1, 1, 'int', 'Cache size used by osm2pgsql'),
22                 array('create-functions', '', 0, 1, 0, 0, 'bool', 'Create functions'),
23                 array('enable-diff-updates', '', 0, 1, 0, 0, 'bool', 'Turn on the code required to make diff updates work'),
24                 array('enable-debug-statements', '', 0, 1, 0, 0, 'bool', 'Include debug warning statements in pgsql commands'),
25                 array('ignore-errors', '', 0, 1, 0, 0, 'bool', 'Continue import even when errors in SQL are present (EXPERT)'),
26                 array('create-minimal-tables', '', 0, 1, 0, 0, 'bool', 'Create minimal main tables'),
27                 array('create-tables', '', 0, 1, 0, 0, 'bool', 'Create main tables'),
28                 array('create-partition-tables', '', 0, 1, 0, 0, 'bool', 'Create required partition tables'),
29                 array('create-partition-functions', '', 0, 1, 0, 0, 'bool', 'Create required partition triggers'),
30                 array('no-partitions', '', 0, 1, 0, 0, 'bool', "Do not partition search indices (speeds up import of single country extracts)"),
31                 array('import-wikipedia-articles', '', 0, 1, 0, 0, 'bool', 'Import wikipedia article dump'),
32                 array('load-data', '', 0, 1, 0, 0, 'bool', 'Copy data to live tables from import table'),
33                 array('disable-token-precalc', '', 0, 1, 0, 0, 'bool', 'Disable name precalculation (EXPERT)'),
34                 array('import-tiger-data', '', 0, 1, 0, 0, 'bool', 'Import tiger data (not included in \'all\')'),
35                 array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Calculate postcode centroids'),
36                 array('create-roads', '', 0, 1, 0, 0, 'bool', ''),
37                 array('osmosis-init', '', 0, 1, 0, 0, 'bool', 'Generate default osmosis configuration'),
38                 array('index', '', 0, 1, 0, 0, 'bool', 'Index the data'),
39                 array('index-noanalyse', '', 0, 1, 0, 0, 'bool', 'Do not perform analyse operations during index (EXPERT)'),
40                 array('index-output', '', 0, 1, 1, 1, 'string', 'File to dump index information to'),
41                 array('create-search-indices', '', 0, 1, 0, 0, 'bool', 'Create additional indices required for search and update'),
42                 array('create-website', '', 0, 1, 1, 1, 'realpath', 'Create symlinks to setup web directory'),
43         );
44         getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
45
46         $bDidSomething = false;
47
48         // Check if osm-file is set and points to a valid file if --all or --import-data is given
49         if ($aCMDResult['import-data'] || $aCMDResult['all'])
50         {
51                 if (!isset($aCMDResult['osm-file']))
52                 {
53                         fail('missing --osm-file for data import');
54                 }
55
56                 if (!file_exists($aCMDResult['osm-file']))
57                 {
58                         fail('the path supplied to --osm-file does not exist');
59                 }
60
61                 if (!is_readable($aCMDResult['osm-file']))
62                 {
63                         fail('osm-file "'.$aCMDResult['osm-file'].'" not readable');
64                 }
65         }
66
67
68         // This is a pretty hard core default - the number of processors in the box - 1
69         $iInstances = isset($aCMDResult['threads'])?$aCMDResult['threads']:(getProcessorCount()-1);
70         if ($iInstances < 1)
71         {
72                 $iInstances = 1;
73                 echo "WARNING: resetting threads to $iInstances\n";
74         }
75         if ($iInstances > getProcessorCount())
76         {
77                 $iInstances = getProcessorCount();
78                 echo "WARNING: resetting threads to $iInstances\n";
79         }
80
81         // Assume we can steal all the cache memory in the box (unless told otherwise)
82         $iCacheMemory = (isset($aCMDResult['osm2pgsql-cache'])?$aCMDResult['osm2pgsql-cache']:getCacheMemoryMB());
83         if ($iCacheMemory > getTotalMemoryMB())
84         {
85                 $iCacheMemory = getCacheMemoryMB();
86                 echo "WARNING: resetting cache memory to $iCacheMemory\n";
87         }
88
89         $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
90         if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
91
92         $fPostgisVersion = (float) CONST_Postgis_Version;
93
94         if ($aCMDResult['create-db'] || $aCMDResult['all'])
95         {
96                 echo "Create DB\n";
97                 $bDidSomething = true;
98                 $oDB =& DB::connect(CONST_Database_DSN, false);
99                 if (!PEAR::isError($oDB))
100                 {
101                         fail('database already exists ('.CONST_Database_DSN.')');
102                 }
103                 passthruCheckReturn('createdb -E UTF-8 -p '.$aDSNInfo['port'].' '.$aDSNInfo['database']);
104         }
105
106         if ($aCMDResult['setup-db'] || $aCMDResult['all'])
107         {
108                 echo "Setup DB\n";
109                 $bDidSomething = true;
110                 // TODO: path detection, detection memory, etc.
111
112                 $oDB =& getDB();
113
114                 $sVersionString = $oDB->getOne('select version()');
115                 preg_match('#PostgreSQL ([0-9]+)[.]([0-9]+)[^0-9]#', $sVersionString, $aMatches);
116                 if (CONST_Postgresql_Version != $aMatches[1].'.'.$aMatches[2])
117                 {
118                         echo "ERROR: PostgreSQL version is not correct.  Expected ".CONST_Postgresql_Version." found ".$aMatches[1].'.'.$aMatches[2]."\n";
119                         exit;
120                 }
121
122                 passthru('createlang plpgsql -p '.$aDSNInfo['port'].' '.$aDSNInfo['database']);
123                 $pgver = (float) CONST_Postgresql_Version;
124                 if ($pgver < 9.1) {
125                         pgsqlRunScriptFile(CONST_Path_Postgresql_Contrib.'/hstore.sql');
126                         pgsqlRunScriptFile(CONST_BasePath.'/sql/hstore_compatability_9_0.sql');
127                 } else {
128                         pgsqlRunScript('CREATE EXTENSION hstore');
129                 }
130
131                 if ($fPostgisVersion < 2.0) {
132                         pgsqlRunScriptFile(CONST_Path_Postgresql_Postgis.'/postgis.sql');
133                         pgsqlRunScriptFile(CONST_Path_Postgresql_Postgis.'/spatial_ref_sys.sql');
134                 } else {
135                         pgsqlRunScript('CREATE EXTENSION postgis');
136                 }
137                 if ($fPostgisVersion < 2.1) {
138                         // Function was renamed in 2.1 and throws an annoying deprecation warning
139                         pgsqlRunScript('ALTER FUNCTION st_line_interpolate_point(geometry, double precision) RENAME TO ST_LineInterpolatePoint');
140                 }
141                 $sVersionString = $oDB->getOne('select postgis_full_version()');
142                 preg_match('#POSTGIS="([0-9]+)[.]([0-9]+)[.]([0-9]+)( r([0-9]+))?"#', $sVersionString, $aMatches);
143                 if (CONST_Postgis_Version != $aMatches[1].'.'.$aMatches[2])
144                 {
145                         echo "ERROR: PostGIS version is not correct.  Expected ".CONST_Postgis_Version." found ".$aMatches[1].'.'.$aMatches[2]."\n";
146                         exit;
147                 }
148
149                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_name.sql');
150                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_naturalearthdata.sql');
151                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_osm_grid.sql');
152                 pgsqlRunScriptFile(CONST_BasePath.'/data/gb_postcode_table.sql');
153                 if (file_exists(CONST_BasePath.'/data/gb_postcode_data.sql.gz'))
154                 {
155                         pgsqlRunScriptFile(CONST_BasePath.'/data/gb_postcode_data.sql.gz');
156                 }
157                 else
158                 {
159                         echo "WARNING: external UK postcode table not found.\n";
160                 }
161                 pgsqlRunScriptFile(CONST_BasePath.'/data/us_statecounty.sql');
162                 pgsqlRunScriptFile(CONST_BasePath.'/data/us_state.sql');
163                 pgsqlRunScriptFile(CONST_BasePath.'/data/us_postcode.sql');
164
165                 if ($aCMDResult['no-partitions'])
166                 {
167                         pgsqlRunScript('update country_name set partition = 0');
168                 }
169
170                 // the following will be needed by create_functions later but
171                 // is only defined in the subsequently called create_tables.
172                 // Create dummies here that will be overwritten by the proper
173                 // versions in create-tables.
174                 pgsqlRunScript('CREATE TABLE place_boundingbox ()');
175                 pgsqlRunScript('create type wikipedia_article_match as ()');
176         }
177
178         if ($aCMDResult['import-data'] || $aCMDResult['all'])
179         {
180                 echo "Import\n";
181                 $bDidSomething = true;
182
183                 $osm2pgsql = CONST_Osm2pgsql_Binary;
184                 if (!file_exists($osm2pgsql))
185                 {
186                         echo "Please download and build osm2pgsql.\nIf it is already installed, check the path in your local settings (settings/local.php) file.\n";
187                         fail("osm2pgsql not found in '$osm2pgsql'");
188                 }
189
190                 if (!is_null(CONST_Osm2pgsql_Flatnode_File))
191                 {
192                         $osm2pgsql .= ' --flat-nodes '.CONST_Osm2pgsql_Flatnode_File;
193                 }
194                 $osm2pgsql .= ' -lsc -O gazetteer --hstore';
195                 $osm2pgsql .= ' -C '.$iCacheMemory;
196                 $osm2pgsql .= ' -P '.$aDSNInfo['port'];
197                 $osm2pgsql .= ' -d '.$aDSNInfo['database'].' '.$aCMDResult['osm-file'];
198                 passthruCheckReturn($osm2pgsql);
199
200                 $oDB =& getDB();
201                 $x = $oDB->getRow('select * from place limit 1');
202                 if (PEAR::isError($x)) {
203                         fail($x->getMessage());
204                 }
205                 if (!$x) fail('No Data');
206         }
207
208         if ($aCMDResult['create-functions'] || $aCMDResult['all'])
209         {
210                 echo "Functions\n";
211                 $bDidSomething = true;
212                 if (!file_exists(CONST_BasePath.'/module/nominatim.so')) fail("nominatim module not built");
213                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/functions.sql');
214                 $sTemplate = str_replace('{modulepath}', CONST_BasePath.'/module', $sTemplate);
215                 if ($aCMDResult['enable-diff-updates']) $sTemplate = str_replace('RETURN NEW; -- @DIFFUPDATES@', '--', $sTemplate);
216                 if ($aCMDResult['enable-debug-statements']) $sTemplate = str_replace('--DEBUG:', '', $sTemplate);
217                 if (CONST_Limit_Reindexing) $sTemplate = str_replace('--LIMIT INDEXING:', '', $sTemplate);
218                 pgsqlRunScript($sTemplate);
219                 if ($fPostgisVersion < 2.0) {
220                         echo "Helper functions for postgis < 2.0\n";
221                         $sTemplate = file_get_contents(CONST_BasePath.'/sql/postgis_15_aux.sql');
222                 } else {
223                         echo "Helper functions for postgis >= 2.0\n";
224                         $sTemplate = file_get_contents(CONST_BasePath.'/sql/postgis_20_aux.sql');
225                 }
226                 pgsqlRunScript($sTemplate);
227         }
228
229         if ($aCMDResult['create-minimal-tables'])
230         {
231                 echo "Minimal Tables\n";
232                 $bDidSomething = true;
233                 pgsqlRunScriptFile(CONST_BasePath.'/sql/tables-minimal.sql');
234
235                 $sScript = '';
236
237                 // Backstop the import process - easliest possible import id
238                 $sScript .= "insert into import_npi_log values (18022);\n";
239
240                 $hFile = @fopen(CONST_BasePath.'/settings/partitionedtags.def', "r");
241                 if (!$hFile) fail('unable to open list of partitions: '.CONST_BasePath.'/settings/partitionedtags.def');
242
243                 while (($sLine = fgets($hFile, 4096)) !== false && $sLine && substr($sLine,0,1) !='#')
244                 {
245                         list($sClass, $sType) = explode(' ', trim($sLine));
246                         $sScript .= "create table place_classtype_".$sClass."_".$sType." as ";
247                         $sScript .= "select place_id as place_id,geometry as centroid from placex limit 0;\n";
248
249                         $sScript .= "CREATE INDEX idx_place_classtype_".$sClass."_".$sType."_centroid ";
250                         $sScript .= "ON place_classtype_".$sClass."_".$sType." USING GIST (centroid);\n";
251
252                         $sScript .= "CREATE INDEX idx_place_classtype_".$sClass."_".$sType."_place_id ";
253                         $sScript .= "ON place_classtype_".$sClass."_".$sType." USING btree(place_id);\n";
254                 }
255                 fclose($hFile);
256                 pgsqlRunScript($sScript);
257         }
258
259         if ($aCMDResult['create-tables'] || $aCMDResult['all'])
260         {
261                 echo "Tables\n";
262                 $bDidSomething = true;
263                 pgsqlRunScriptFile(CONST_BasePath.'/sql/tables.sql');
264
265                 // re-run the functions
266                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/functions.sql');
267                 $sTemplate = str_replace('{modulepath}',CONST_BasePath.'/module', $sTemplate);
268                 pgsqlRunScript($sTemplate);
269         }
270
271         if ($aCMDResult['create-partition-tables'] || $aCMDResult['all'])
272         {
273                 echo "Partition Tables\n";
274                 $bDidSomething = true;
275                 $oDB =& getDB();
276                 $sSQL = 'select distinct partition from country_name';
277                 $aPartitions = $oDB->getCol($sSQL);
278                 if (PEAR::isError($aPartitions))
279                 {
280                         fail($aPartitions->getMessage());
281                 }
282                 if (!$aCMDResult['no-partitions']) $aPartitions[] = 0;
283
284                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/partition-tables.src.sql');
285                 preg_match_all('#^-- start(.*?)^-- end#ms', $sTemplate, $aMatches, PREG_SET_ORDER);
286                 foreach($aMatches as $aMatch)
287                 {
288                         $sResult = '';
289                         foreach($aPartitions as $sPartitionName)
290                         {
291                                 $sResult .= str_replace('-partition-', $sPartitionName, $aMatch[1]);
292                         }
293                         $sTemplate = str_replace($aMatch[0], $sResult, $sTemplate);
294                 }
295
296                 pgsqlRunScript($sTemplate);
297         }
298
299
300         if ($aCMDResult['create-partition-functions'] || $aCMDResult['all'])
301         {
302                 echo "Partition Functions\n";
303                 $bDidSomething = true;
304                 $oDB =& getDB();
305                 $sSQL = 'select distinct partition from country_name';
306                 $aPartitions = $oDB->getCol($sSQL);
307                 if (PEAR::isError($aPartitions))
308                 {
309                         fail($aPartitions->getMessage());
310                 }
311                 if (!$aCMDResult['no-partitions']) $aPartitions[] = 0;
312
313                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/partition-functions.src.sql');
314                 preg_match_all('#^-- start(.*?)^-- end#ms', $sTemplate, $aMatches, PREG_SET_ORDER);
315                 foreach($aMatches as $aMatch)
316                 {
317                         $sResult = '';
318                         foreach($aPartitions as $sPartitionName)
319                         {
320                                 $sResult .= str_replace('-partition-', $sPartitionName, $aMatch[1]);
321                         }
322                         $sTemplate = str_replace($aMatch[0], $sResult, $sTemplate);
323                 }
324
325                 pgsqlRunScript($sTemplate);
326         }
327
328         if ($aCMDResult['import-wikipedia-articles'] || $aCMDResult['all'])
329         {
330                 $bDidSomething = true;
331                 $sWikiArticlesFile = CONST_BasePath.'/data/wikipedia_article.sql.bin';
332                 $sWikiRedirectsFile = CONST_BasePath.'/data/wikipedia_redirect.sql.bin';
333                 if (file_exists($sWikiArticlesFile))
334                 {
335                         echo "Importing wikipedia articles...";
336                         pgsqlRunDropAndRestore($sWikiArticlesFile);
337                         echo "...done\n";
338                 }
339                 else
340                 {
341                         echo "WARNING: wikipedia article dump file not found - places will have default importance\n";
342                 }
343                 if (file_exists($sWikiRedirectsFile))
344                 {
345                         echo "Importing wikipedia redirects...";
346                         pgsqlRunDropAndRestore($sWikiRedirectsFile);
347                         echo "...done\n";
348                 }
349                 else
350                 {
351                         echo "WARNING: wikipedia redirect dump file not found - some place importance values may be missing\n";
352                 }
353         }
354
355
356         if ($aCMDResult['load-data'] || $aCMDResult['all'])
357         {
358                 echo "Drop old Data\n";
359                 $bDidSomething = true;
360
361                 $oDB =& getDB();
362                 if (!pg_query($oDB->connection, 'TRUNCATE word')) fail(pg_last_error($oDB->connection));
363                 echo '.';
364                 if (!pg_query($oDB->connection, 'TRUNCATE placex')) fail(pg_last_error($oDB->connection));
365                 echo '.';
366                 if (!pg_query($oDB->connection, 'TRUNCATE place_addressline')) fail(pg_last_error($oDB->connection));
367                 echo '.';
368                 if (!pg_query($oDB->connection, 'TRUNCATE place_boundingbox')) fail(pg_last_error($oDB->connection));
369                 echo '.';
370                 if (!pg_query($oDB->connection, 'TRUNCATE location_area')) fail(pg_last_error($oDB->connection));
371                 echo '.';
372                 if (!pg_query($oDB->connection, 'TRUNCATE search_name')) fail(pg_last_error($oDB->connection));
373                 echo '.';
374                 if (!pg_query($oDB->connection, 'TRUNCATE search_name_blank')) fail(pg_last_error($oDB->connection));
375                 echo '.';
376                 if (!pg_query($oDB->connection, 'DROP SEQUENCE seq_place')) fail(pg_last_error($oDB->connection));
377                 echo '.';
378                 if (!pg_query($oDB->connection, 'CREATE SEQUENCE seq_place start 100000')) fail(pg_last_error($oDB->connection));
379                 echo '.';
380
381                 $sSQL = 'select distinct partition from country_name';
382                 $aPartitions = $oDB->getCol($sSQL);
383                 if (PEAR::isError($aPartitions))
384                 {
385                         fail($aPartitions->getMessage());
386                 }
387                 if (!$aCMDResult['no-partitions']) $aPartitions[] = 0;
388                 foreach($aPartitions as $sPartition)
389                 {
390                         if (!pg_query($oDB->connection, 'TRUNCATE location_road_'.$sPartition)) fail(pg_last_error($oDB->connection));
391                         echo '.';
392                 }
393
394                 // used by getorcreate_word_id to ignore frequent partial words
395                 if (!pg_query($oDB->connection, 'CREATE OR REPLACE FUNCTION get_maxwordfreq() RETURNS integer AS $$ SELECT '.CONST_Max_Word_Frequency.' as maxwordfreq; $$ LANGUAGE SQL IMMUTABLE')) fail(pg_last_error($oDB->connection));
396                 echo ".\n";
397
398                 // pre-create the word list
399                 if (!$aCMDResult['disable-token-precalc'])
400                 {
401                         echo "Loading word list\n";
402                         pgsqlRunScriptFile(CONST_BasePath.'/data/words.sql');
403                 }
404
405                 echo "Load Data\n";
406                 $aDBInstances = array();
407                 for($i = 0; $i < $iInstances; $i++)
408                 {
409                         $aDBInstances[$i] =& getDB(true);
410                         $sSQL = 'insert into placex (osm_type, osm_id, class, type, name, admin_level, ';
411                         $sSQL .= 'housenumber, street, addr_place, isin, postcode, country_code, extratags, ';
412                         $sSQL .= 'geometry) select * from place where osm_id % '.$iInstances.' = '.$i;
413                         if ($aCMDResult['verbose']) echo "$sSQL\n";
414                         if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
415                 }
416                 $bAnyBusy = true;
417                 while($bAnyBusy)
418                 {
419                         $bAnyBusy = false;
420                         for($i = 0; $i < $iInstances; $i++)
421                         {
422                                 if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
423                         }
424                         sleep(1);
425                         echo '.';
426                 }
427                 echo "\n";
428                 echo "Reanalysing database...\n";
429                 pgsqlRunScript('ANALYSE');
430         }
431
432         if ($aCMDResult['create-roads'])
433         {
434                 $bDidSomething = true;
435
436                 $oDB =& getDB();
437                 $aDBInstances = array();
438                 for($i = 0; $i < $iInstances; $i++)
439                 {
440                         $aDBInstances[$i] =& getDB(true);
441                         if (!pg_query($aDBInstances[$i]->connection, 'set enable_bitmapscan = off')) fail(pg_last_error($oDB->connection));
442                         $sSQL = 'select count(*) from (select insertLocationRoad(partition, place_id, calculated_country_code, geometry) from ';
443                         $sSQL .= 'placex where osm_id % '.$iInstances.' = '.$i.' and rank_search between 26 and 27 and class = \'highway\') as x ';
444                         if ($aCMDResult['verbose']) echo "$sSQL\n";
445                         if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
446                 }
447                 $bAnyBusy = true;
448                 while($bAnyBusy)
449                 {
450                         $bAnyBusy = false;
451                         for($i = 0; $i < $iInstances; $i++)
452                         {
453                                 if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
454                         }
455                         sleep(1);
456                         echo '.';
457                 }
458                 echo "\n";
459         }
460
461         if ($aCMDResult['import-tiger-data'])
462         {
463                 $bDidSomething = true;
464
465                 pgsqlRunScriptFile(CONST_BasePath.'/sql/tiger_import_start.sql');
466
467                 $aDBInstances = array();
468                 for($i = 0; $i < $iInstances; $i++)
469                 {
470                         $aDBInstances[$i] =& getDB(true);
471                 }
472
473                 foreach(glob(CONST_BasePath.'/data/tiger2011/*.sql') as $sFile)
474                 {
475                         echo $sFile.': ';
476                         $hFile = fopen($sFile, "r");
477                         $sSQL = fgets($hFile, 100000);
478                         $iLines = 0;
479
480                         while(true)
481                         {
482                                 for($i = 0; $i < $iInstances; $i++)
483                                 {
484                                         if (!pg_connection_busy($aDBInstances[$i]->connection))
485                                         {
486                                                 while(pg_get_result($aDBInstances[$i]->connection));
487                                                 $sSQL = fgets($hFile, 100000);
488                                                 if (!$sSQL) break 2;
489                                                 if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
490                                                 $iLines++;
491                                                 if ($iLines == 1000)
492                                                 {
493                                                         echo ".";
494                                                         $iLines = 0;
495                                                 }
496                                         }
497                                 }
498                                 usleep(10);
499                         }
500
501                         fclose($hFile);
502
503                         $bAnyBusy = true;
504                         while($bAnyBusy)
505                         {
506                                 $bAnyBusy = false;
507                                 for($i = 0; $i < $iInstances; $i++)
508                                 {
509                                         if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
510                                 }
511                                 usleep(10);
512                         }
513                         echo "\n";
514                 }
515
516                 echo "Creating indexes\n";
517                 pgsqlRunScriptFile(CONST_BasePath.'/sql/tiger_import_finish.sql');
518         }
519
520         if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all'])
521         {
522                 $bDidSomething = true;
523                 $oDB =& getDB();
524                 if (!pg_query($oDB->connection, 'DELETE from placex where osm_type=\'P\'')) fail(pg_last_error($oDB->connection));
525                 $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,calculated_country_code,geometry) ";
526                 $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,calculated_country_code,";
527                 $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from (select calculated_country_code,postcode,";
528                 $sSQL .= "avg(st_x(st_centroid(geometry))) as x,avg(st_y(st_centroid(geometry))) as y ";
529                 $sSQL .= "from placex where postcode is not null group by calculated_country_code,postcode) as x";
530                 if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
531
532                 $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,calculated_country_code,geometry) ";
533                 $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,'us',";
534                 $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from us_postcode";
535                 if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
536         }
537
538         if ($aCMDResult['osmosis-init'] || $aCMDResult['all'])
539         {
540                 $bDidSomething = true;
541                 $oDB =& getDB();
542
543                 if (!file_exists(CONST_Osmosis_Binary))
544                 {
545                         echo "Please download osmosis.\nIf it is already installed, check the path in your local settings (settings/local.php) file.\n";
546                         if (!$aCMDResult['all'])
547                         {
548                                 fail("osmosis not found in '".CONST_Osmosis_Binary."'");
549                         }
550                 }
551                 else
552                 {
553                         if (file_exists(CONST_BasePath.'/settings/configuration.txt'))
554                         {
555                                 echo "settings/configuration.txt already exists\n";
556                         }
557                         else
558                         {
559                                 passthru(CONST_Osmosis_Binary.' --read-replication-interval-init '.CONST_BasePath.'/settings');
560                                 // update osmosis configuration.txt with our settings
561                                 passthru("sed -i 's!baseUrl=.*!baseUrl=".CONST_Replication_Url."!' ".CONST_BasePath.'/settings/configuration.txt');
562                                 passthru("sed -i 's:maxInterval = .*:maxInterval = ".CONST_Replication_MaxInterval.":' ".CONST_BasePath.'/settings/configuration.txt');
563                         }
564
565                         // Find the last node in the DB
566                         $iLastOSMID = $oDB->getOne("select max(id) from planet_osm_nodes");
567
568                         // Lookup the timestamp that node was created (less 3 hours for margin for changsets to be closed)
569                         $sLastNodeURL = 'http://www.openstreetmap.org/api/0.6/node/'.$iLastOSMID."/1";
570                         $sLastNodeXML = file_get_contents($sLastNodeURL);
571                         preg_match('#timestamp="(([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2}):([0-9]{2})Z)"#', $sLastNodeXML, $aLastNodeDate);
572                         $iLastNodeTimestamp = strtotime($aLastNodeDate[1]) - (3*60*60);
573
574                         // Search for the correct state file - uses file timestamps so need to sort by date descending
575                         $sRepURL = CONST_Replication_Url."/";
576                         $sRep = file_get_contents($sRepURL."?C=M;O=D;F=1");
577                         // download.geofabrik.de:    <a href="000/">000/</a></td><td align="right">26-Feb-2013 11:53  </td>
578                         // planet.openstreetmap.org: <a href="273/">273/</a>                    2013-03-11 07:41    -
579                         preg_match_all('#<a href="[0-9]{3}/">([0-9]{3}/)</a>\s*([-0-9a-zA-Z]+ [0-9]{2}:[0-9]{2})#', $sRep, $aRepMatches, PREG_SET_ORDER);
580                         var_dump($aRepMatches);
581                         if ($aRepMatches)
582                         {
583                                 $aPrevRepMatch = false;
584                                 foreach($aRepMatches as $aRepMatch)
585                                 {
586                                         if (strtotime($aRepMatch[2]) < $iLastNodeTimestamp) break;
587                                         $aPrevRepMatch = $aRepMatch;
588                                 }
589                                 if ($aPrevRepMatch) $aRepMatch = $aPrevRepMatch;
590
591                                 $sRepURL .= $aRepMatch[1];
592                                 $sRep = file_get_contents($sRepURL."?C=M;O=D;F=1");
593                                 preg_match_all('#<a href="[0-9]{3}/">([0-9]{3}/)</a>\s*([-0-9a-zA-Z]+ [0-9]{2}:[0-9]{2})#', $sRep, $aRepMatches, PREG_SET_ORDER);
594                                 var_dump($aRepMatches);
595                                 $aPrevRepMatch = false;
596                                 foreach($aRepMatches as $aRepMatch)
597                                 {
598                                         if (strtotime($aRepMatch[2]) < $iLastNodeTimestamp) break;
599                                         $aPrevRepMatch = $aRepMatch;
600                                 }
601                                 if ($aPrevRepMatch) $aRepMatch = $aPrevRepMatch;
602
603                                 $sRepURL .= $aRepMatch[1];
604                                 $sRep = file_get_contents($sRepURL."?C=M;O=D;F=1");
605                                 preg_match_all('#<a href="[0-9]{3}.state.txt">([0-9]{3}).state.txt</a>\s*([-0-9a-zA-Z]+ [0-9]{2}:[0-9]{2})#', $sRep, $aRepMatches, PREG_SET_ORDER);
606                                 var_dump($aRepMatches);
607                                 $aPrevRepMatch = false;
608                                 foreach($aRepMatches as $aRepMatch)
609                                 {
610                                         if (strtotime($aRepMatch[2]) < $iLastNodeTimestamp) break;
611                                         $aPrevRepMatch = $aRepMatch;
612                                 }
613                                 if ($aPrevRepMatch) $aRepMatch = $aPrevRepMatch;
614
615                                 $sRepURL .= $aRepMatch[1].'.state.txt';
616                                 echo "Getting state file: $sRepURL\n";
617                                 $sStateFile = file_get_contents($sRepURL);
618                                 if (!$sStateFile || strlen($sStateFile) > 1000) fail("unable to obtain state file");
619                                 file_put_contents(CONST_BasePath.'/settings/state.txt', $sStateFile);
620                                 echo "Updating DB status\n";
621                                 pg_query($oDB->connection, 'TRUNCATE import_status');
622                                 $sSQL = "INSERT INTO import_status VALUES('".$aRepMatch[2]."')";
623                                 pg_query($oDB->connection, $sSQL);
624                         }
625                         else
626                         {
627                                 if (!$aCMDResult['all'])
628                                 {
629                                         fail("Cannot read state file directory.");
630                                 }
631                         }
632                 }
633         }
634
635         if ($aCMDResult['index'] || $aCMDResult['all'])
636         {
637                 $bDidSomething = true;
638                 $sOutputFile = '';
639                 if (isset($aCMDResult['index-output'])) $sOutputFile = ' -F '.$aCMDResult['index-output'];
640                 $sBaseCmd = CONST_BasePath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -P '.$aDSNInfo['port'].' -t '.$iInstances.$sOutputFile;
641                 passthruCheckReturn($sBaseCmd.' -R 4');
642                 if (!$aCMDResult['index-noanalyse']) pgsqlRunScript('ANALYSE');
643                 passthruCheckReturn($sBaseCmd.' -r 5 -R 25');
644                 if (!$aCMDResult['index-noanalyse']) pgsqlRunScript('ANALYSE');
645                 passthruCheckReturn($sBaseCmd.' -r 26');
646         }
647
648         if ($aCMDResult['create-search-indices'] || $aCMDResult['all'])
649         {
650                 echo "Search indices\n";
651                 $bDidSomething = true;
652                 $oDB =& getDB();
653                 $sSQL = 'select distinct partition from country_name';
654                 $aPartitions = $oDB->getCol($sSQL);
655                 if (PEAR::isError($aPartitions))
656                 {
657                         fail($aPartitions->getMessage());
658                 }
659                 if (!$aCMDResult['no-partitions']) $aPartitions[] = 0;
660
661                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/indices.src.sql');
662                 preg_match_all('#^-- start(.*?)^-- end#ms', $sTemplate, $aMatches, PREG_SET_ORDER);
663                 foreach($aMatches as $aMatch)
664                 {
665                         $sResult = '';
666                         foreach($aPartitions as $sPartitionName)
667                         {
668                                 $sResult .= str_replace('-partition-', $sPartitionName, $aMatch[1]);
669                         }
670                         $sTemplate = str_replace($aMatch[0], $sResult, $sTemplate);
671                 }
672
673                 pgsqlRunScript($sTemplate);
674         }
675
676         if (isset($aCMDResult['create-website']))
677         {
678                 $bDidSomething = true;
679                 $sTargetDir = $aCMDResult['create-website'];
680                 if (!is_dir($sTargetDir))
681                 {
682                         echo "You must create the website directory before calling this function.\n";
683                         fail("Target directory does not exist.");
684                 }
685
686                 @symlink(CONST_BasePath.'/website/details.php', $sTargetDir.'/details.php');
687                 @symlink(CONST_BasePath.'/website/reverse.php', $sTargetDir.'/reverse.php');
688                 @symlink(CONST_BasePath.'/website/search.php', $sTargetDir.'/search.php');
689                 @symlink(CONST_BasePath.'/website/search.php', $sTargetDir.'/index.php');
690                 @symlink(CONST_BasePath.'/website/deletable.php', $sTargetDir.'/deletable.php');
691                 @symlink(CONST_BasePath.'/website/polygons.php', $sTargetDir.'/polygons.php');
692                 @symlink(CONST_BasePath.'/website/status.php', $sTargetDir.'/status.php');
693                 @symlink(CONST_BasePath.'/website/images', $sTargetDir.'/images');
694                 @symlink(CONST_BasePath.'/website/js', $sTargetDir.'/js');
695                 @symlink(CONST_BasePath.'/website/css', $sTargetDir.'/css');
696                 echo "Symlinks created\n";
697
698                 $sTestFile = @file_get_contents(CONST_Website_BaseURL.'js/tiles.js');
699                 if (!$sTestFile)
700                 {
701                         echo "\nWARNING: Unable to access the website at ".CONST_Website_BaseURL."\n";
702                         echo "You may want to update settings/local.php with @define('CONST_Website_BaseURL', 'http://[HOST]/[PATH]/');\n";
703                 }
704         }
705
706         if (!$bDidSomething)
707         {
708                 showUsage($aCMDOptions, true);
709         }
710         else
711         {
712                 echo "Setup finished.\n";
713         }
714
715         function pgsqlRunScriptFile($sFilename)
716         {
717                 if (!file_exists($sFilename)) fail('unable to find '.$sFilename);
718
719                 // Convert database DSN to psql parameters
720                 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
721                 if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
722                 $sCMD = 'psql -p '.$aDSNInfo['port'].' -d '.$aDSNInfo['database'];
723
724                 $ahGzipPipes = null;
725                 if (preg_match('/\\.gz$/', $sFilename))
726                 {
727                         $aDescriptors = array(
728                                 0 => array('pipe', 'r'),
729                                 1 => array('pipe', 'w'),
730                                 2 => array('file', '/dev/null', 'a')
731                         );
732                         $hGzipProcess = proc_open('zcat '.$sFilename, $aDescriptors, $ahGzipPipes);
733                         if (!is_resource($hGzipProcess)) fail('unable to start zcat');
734                         $aReadPipe = $ahGzipPipes[1];
735                         fclose($ahGzipPipes[0]);
736                 }
737                 else
738                 {
739                         $sCMD .= ' -f '.$sFilename;
740                         $aReadPipe = array('pipe', 'r');
741                 }
742
743                 $aDescriptors = array(
744                         0 => $aReadPipe,
745                         1 => array('pipe', 'w'),
746                         2 => array('file', '/dev/null', 'a')
747                 );
748                 $ahPipes = null;
749                 $hProcess = proc_open($sCMD, $aDescriptors, $ahPipes);
750                 if (!is_resource($hProcess)) fail('unable to start pgsql');
751
752
753                 // TODO: error checking
754                 while(!feof($ahPipes[1]))
755                 {
756                         echo fread($ahPipes[1], 4096);
757                 }
758                 fclose($ahPipes[1]);
759
760                 $iReturn = proc_close($hProcess);
761                 if ($iReturn > 0)
762                 {
763                         fail("pgsql returned with error code ($iReturn)");
764                 }
765                 if ($ahGzipPipes)
766                 {
767                         fclose($ahGzipPipes[1]);
768                         proc_close($hGzipProcess);
769                 }
770
771         }
772
773         function pgsqlRunScript($sScript)
774         {
775                 global $aCMDResult;
776                 // Convert database DSN to psql parameters
777                 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
778                 if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
779                 $sCMD = 'psql -p '.$aDSNInfo['port'].' -d '.$aDSNInfo['database'];
780                 if (!$aCMDResult['ignore-errors'])
781                         $sCMD .= ' -v ON_ERROR_STOP=1';
782                 $aDescriptors = array(
783                         0 => array('pipe', 'r'),
784                         1 => STDOUT, 
785                         2 => STDERR
786                 );
787                 $ahPipes = null;
788                 $hProcess = @proc_open($sCMD, $aDescriptors, $ahPipes);
789                 if (!is_resource($hProcess)) fail('unable to start pgsql');
790
791                 while(strlen($sScript))
792                 {
793                         $written = fwrite($ahPipes[0], $sScript);
794                         if ($written <= 0) break;
795                         $sScript = substr($sScript, $written);
796                 }
797                 fclose($ahPipes[0]);
798                 $iReturn = proc_close($hProcess);
799                 if ($iReturn > 0)
800                 {
801                         fail("pgsql returned with error code ($iReturn)");
802                 }
803         }
804
805         function pgsqlRunRestoreData($sDumpFile)
806         {
807                 // Convert database DSN to psql parameters
808                 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
809                 if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
810                 $sCMD = 'pg_restore -p '.$aDSNInfo['port'].' -d '.$aDSNInfo['database'].' -Fc -a '.$sDumpFile;
811
812                 $aDescriptors = array(
813                         0 => array('pipe', 'r'),
814                         1 => array('pipe', 'w'),
815                         2 => array('file', '/dev/null', 'a')
816                 );
817                 $ahPipes = null;
818                 $hProcess = proc_open($sCMD, $aDescriptors, $ahPipes);
819                 if (!is_resource($hProcess)) fail('unable to start pg_restore');
820
821                 fclose($ahPipes[0]);
822
823                 // TODO: error checking
824                 while(!feof($ahPipes[1]))
825                 {
826                         echo fread($ahPipes[1], 4096);
827                 }
828                 fclose($ahPipes[1]);
829
830                 $iReturn = proc_close($hProcess);
831         }
832
833         function pgsqlRunDropAndRestore($sDumpFile)
834         {
835                 // Convert database DSN to psql parameters
836                 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
837                 if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
838                 $sCMD = 'pg_restore -p '.$aDSNInfo['port'].' -d '.$aDSNInfo['database'].' -Fc --clean '.$sDumpFile;
839
840                 $aDescriptors = array(
841                         0 => array('pipe', 'r'),
842                         1 => array('pipe', 'w'),
843                         2 => array('file', '/dev/null', 'a')
844                 );
845                 $ahPipes = null;
846                 $hProcess = proc_open($sCMD, $aDescriptors, $ahPipes);
847                 if (!is_resource($hProcess)) fail('unable to start pg_restore');
848
849                 fclose($ahPipes[0]);
850
851                 // TODO: error checking
852                 while(!feof($ahPipes[1]))
853                 {
854                         echo fread($ahPipes[1], 4096);
855                 }
856                 fclose($ahPipes[1]);
857
858                 $iReturn = proc_close($hProcess);
859         }
860
861         function passthruCheckReturn($cmd)
862         {
863                 $result = -1;
864                 passthru($cmd, $result);
865                 if ($result != 0) fail('Error executing external command: '.$cmd);
866         }