]> git.openstreetmap.org Git - nominatim.git/blob - utils/setup.php
second argument of array_merge can be empty
[nominatim.git] / utils / setup.php
1 #!/usr/bin/php -Cq
2 <?php
3
4         require_once(dirname(dirname(__FILE__)).'/lib/init-cmd.php');
5         ini_set('memory_limit', '800M');
6
7         $aCMDOptions = array(
8                 "Create and setup nominatim search system",
9                 array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
10                 array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
11                 array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
12
13                 array('osm-file', '', 0, 1, 1, 1, 'realpath', 'File to import'),
14                 array('threads', '', 0, 1, 1, 1, 'int', 'Number of threads (where possible)'),
15
16                 array('all', '', 0, 1, 0, 0, 'bool', 'Do the complete process'),
17
18                 array('create-db', '', 0, 1, 0, 0, 'bool', 'Create nominatim db'),
19                 array('setup-db', '', 0, 1, 0, 0, 'bool', 'Build a blank nominatim db'),
20                 array('import-data', '', 0, 1, 0, 0, 'bool', 'Import a osm file'),
21                 array('osm2pgsql-cache', '', 0, 1, 1, 1, 'int', 'Cache size used by osm2pgsql'),
22                 array('create-functions', '', 0, 1, 0, 0, 'bool', 'Create functions'),
23                 array('enable-diff-updates', '', 0, 1, 0, 0, 'bool', 'Turn on the code required to make diff updates work'),
24                 array('enable-debug-statements', '', 0, 1, 0, 0, 'bool', 'Include debug warning statements in pgsql commands'),
25                 array('ignore-errors', '', 0, 1, 0, 0, 'bool', 'Continue import even when errors in SQL are present (EXPERT)'),
26                 array('create-minimal-tables', '', 0, 1, 0, 0, 'bool', 'Create minimal main tables'),
27                 array('create-tables', '', 0, 1, 0, 0, 'bool', 'Create main tables'),
28                 array('create-partition-tables', '', 0, 1, 0, 0, 'bool', 'Create required partition tables'),
29                 array('create-partition-functions', '', 0, 1, 0, 0, 'bool', 'Create required partition triggers'),
30                 array('no-partitions', '', 0, 1, 0, 0, 'bool', "Do not partition search indices (speeds up import of single country extracts)"),
31                 array('import-wikipedia-articles', '', 0, 1, 0, 0, 'bool', 'Import wikipedia article dump'),
32                 array('load-data', '', 0, 1, 0, 0, 'bool', 'Copy data to live tables from import table'),
33                 array('disable-token-precalc', '', 0, 1, 0, 0, 'bool', 'Disable name precalculation (EXPERT)'),
34                 array('import-tiger-data', '', 0, 1, 0, 0, 'bool', 'Import tiger data (not included in \'all\')'),
35                 array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Calculate postcode centroids'),
36                 array('osmosis-init', '', 0, 1, 0, 0, 'bool', 'Generate default osmosis configuration'),
37                 array('index', '', 0, 1, 0, 0, 'bool', 'Index the data'),
38                 array('index-noanalyse', '', 0, 1, 0, 0, 'bool', 'Do not perform analyse operations during index (EXPERT)'),
39                 array('index-output', '', 0, 1, 1, 1, 'string', 'File to dump index information to'),
40                 array('create-search-indices', '', 0, 1, 0, 0, 'bool', 'Create additional indices required for search and update'),
41                 array('create-website', '', 0, 1, 1, 1, 'realpath', 'Create symlinks to setup web directory'),
42                 array('drop', '', 0, 1, 0, 0, 'bool', 'Drop tables needed for updates, making the database readonly (EXPERIMENTAL)'),
43         );
44         getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
45
46         $bDidSomething = false;
47
48         // Check if osm-file is set and points to a valid file if --all or --import-data is given
49         if ($aCMDResult['import-data'] || $aCMDResult['all'])
50         {
51                 if (!isset($aCMDResult['osm-file']))
52                 {
53                         fail('missing --osm-file for data import');
54                 }
55
56                 if (!file_exists($aCMDResult['osm-file']))
57                 {
58                         fail('the path supplied to --osm-file does not exist');
59                 }
60
61                 if (!is_readable($aCMDResult['osm-file']))
62                 {
63                         fail('osm-file "'.$aCMDResult['osm-file'].'" not readable');
64                 }
65         }
66
67
68         // This is a pretty hard core default - the number of processors in the box - 1
69         $iInstances = isset($aCMDResult['threads'])?$aCMDResult['threads']:(getProcessorCount()-1);
70         if ($iInstances < 1)
71         {
72                 $iInstances = 1;
73                 echo "WARNING: resetting threads to $iInstances\n";
74         }
75         if ($iInstances > getProcessorCount())
76         {
77                 $iInstances = getProcessorCount();
78                 echo "WARNING: resetting threads to $iInstances\n";
79         }
80
81         // Assume we can steal all the cache memory in the box (unless told otherwise)
82         if (isset($aCMDResult['osm2pgsql-cache']))
83         {
84                 $iCacheMemory = $aCMDResult['osm2pgsql-cache'];
85         }
86         else
87         {
88                 $iCacheMemory = getCacheMemoryMB();
89         }
90
91         $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
92         if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
93
94         $fPostgisVersion = (float) CONST_Postgis_Version;
95
96         if ($aCMDResult['create-db'] || $aCMDResult['all'])
97         {
98                 echo "Create DB\n";
99                 $bDidSomething = true;
100                 $oDB =& DB::connect(CONST_Database_DSN, false);
101                 if (!PEAR::isError($oDB))
102                 {
103                         fail('database already exists ('.CONST_Database_DSN.')');
104                 }
105                 passthruCheckReturn('createdb -E UTF-8 -p '.$aDSNInfo['port'].' '.$aDSNInfo['database']);
106         }
107
108         if ($aCMDResult['setup-db'] || $aCMDResult['all'])
109         {
110                 echo "Setup DB\n";
111                 $bDidSomething = true;
112                 // TODO: path detection, detection memory, etc.
113
114                 $oDB =& getDB();
115
116                 $sVersionString = $oDB->getOne('select version()');
117                 preg_match('#PostgreSQL ([0-9]+)[.]([0-9]+)[^0-9]#', $sVersionString, $aMatches);
118                 if (CONST_Postgresql_Version != $aMatches[1].'.'.$aMatches[2])
119                 {
120                         echo "ERROR: PostgreSQL version is not correct.  Expected ".CONST_Postgresql_Version." found ".$aMatches[1].'.'.$aMatches[2]."\n";
121                         exit;
122                 }
123
124                 passthru('createlang plpgsql -p '.$aDSNInfo['port'].' '.$aDSNInfo['database']);
125                 $pgver = (float) CONST_Postgresql_Version;
126                 if ($pgver < 9.1) {
127                         pgsqlRunScriptFile(CONST_Path_Postgresql_Contrib.'/hstore.sql');
128                         pgsqlRunScriptFile(CONST_BasePath.'/sql/hstore_compatability_9_0.sql');
129                 } else {
130                         pgsqlRunScript('CREATE EXTENSION hstore');
131                 }
132
133                 if ($fPostgisVersion < 2.0) {
134                         pgsqlRunScriptFile(CONST_Path_Postgresql_Postgis.'/postgis.sql');
135                         pgsqlRunScriptFile(CONST_Path_Postgresql_Postgis.'/spatial_ref_sys.sql');
136                 } else {
137                         pgsqlRunScript('CREATE EXTENSION IF NOT EXISTS postgis');
138                 }
139                 if ($fPostgisVersion < 2.1) {
140                         // Function was renamed in 2.1 and throws an annoying deprecation warning
141                         pgsqlRunScript('ALTER FUNCTION st_line_interpolate_point(geometry, double precision) RENAME TO ST_LineInterpolatePoint');
142                 }
143                 $sVersionString = $oDB->getOne('select postgis_full_version()');
144                 preg_match('#POSTGIS="([0-9]+)[.]([0-9]+)[.]([0-9]+)( r([0-9]+))?"#', $sVersionString, $aMatches);
145                 if (CONST_Postgis_Version != $aMatches[1].'.'.$aMatches[2])
146                 {
147                         echo "ERROR: PostGIS version is not correct.  Expected ".CONST_Postgis_Version." found ".$aMatches[1].'.'.$aMatches[2]."\n";
148                         exit;
149                 }
150
151                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_name.sql');
152                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_naturalearthdata.sql');
153                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_osm_grid.sql');
154                 pgsqlRunScriptFile(CONST_BasePath.'/data/gb_postcode_table.sql');
155                 if (file_exists(CONST_BasePath.'/data/gb_postcode_data.sql.gz'))
156                 {
157                         pgsqlRunScriptFile(CONST_BasePath.'/data/gb_postcode_data.sql.gz');
158                 }
159                 else
160                 {
161                         echo "WARNING: external UK postcode table not found.\n";
162                 }
163                 pgsqlRunScriptFile(CONST_BasePath.'/data/us_statecounty.sql');
164                 pgsqlRunScriptFile(CONST_BasePath.'/data/us_state.sql');
165                 pgsqlRunScriptFile(CONST_BasePath.'/data/us_postcode.sql');
166
167                 if ($aCMDResult['no-partitions'])
168                 {
169                         pgsqlRunScript('update country_name set partition = 0');
170                 }
171
172                 // the following will be needed by create_functions later but
173                 // is only defined in the subsequently called create_tables.
174                 // Create dummies here that will be overwritten by the proper
175                 // versions in create-tables.
176                 pgsqlRunScript('CREATE TABLE place_boundingbox ()');
177                 pgsqlRunScript('create type wikipedia_article_match as ()');
178         }
179
180         if ($aCMDResult['import-data'] || $aCMDResult['all'])
181         {
182                 echo "Import\n";
183                 $bDidSomething = true;
184
185                 $osm2pgsql = CONST_Osm2pgsql_Binary;
186                 if (!file_exists($osm2pgsql))
187                 {
188                         echo "Please download and build osm2pgsql.\nIf it is already installed, check the path in your local settings (settings/local.php) file.\n";
189                         fail("osm2pgsql not found in '$osm2pgsql'");
190                 }
191
192                 if (!is_null(CONST_Osm2pgsql_Flatnode_File))
193                 {
194                         $osm2pgsql .= ' --flat-nodes '.CONST_Osm2pgsql_Flatnode_File;
195                 }
196                 if (CONST_Tablespace_Osm2pgsql_Data)
197                         $osm2pgsql .= ' --tablespace-slim-data '.CONST_Tablespace_Osm2pgsql_Data;
198                 if (CONST_Tablespace_Osm2pgsql_Index)
199                         $osm2pgsql .= ' --tablespace-slim-index '.CONST_Tablespace_Osm2pgsql_Index;
200                 if (CONST_Tablespace_Place_Data)
201                         $osm2pgsql .= ' --tablespace-main-data '.CONST_Tablespace_Place_Data;
202                 if (CONST_Tablespace_Place_Index)
203                         $osm2pgsql .= ' --tablespace-main-index '.CONST_Tablespace_Place_Index;
204                 $osm2pgsql .= ' -lsc -O gazetteer --hstore --number-processes 1';
205                 $osm2pgsql .= ' -C 25000';
206                 $osm2pgsql .= ' -P '.$aDSNInfo['port'];
207                 $osm2pgsql .= ' -d '.$aDSNInfo['database'].' '.$aCMDResult['osm-file'];
208                 passthruCheckReturn($osm2pgsql);
209
210                 $oDB =& getDB();
211                 $x = $oDB->getRow('select * from place limit 1');
212                 if (PEAR::isError($x)) {
213                         fail($x->getMessage());
214                 }
215                 if (!$x) fail('No Data');
216         }
217
218         if ($aCMDResult['create-functions'] || $aCMDResult['all'])
219         {
220                 echo "Functions\n";
221                 $bDidSomething = true;
222                 if (!file_exists(CONST_BasePath.'/module/nominatim.so')) fail("nominatim module not built");
223                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/functions.sql');
224                 $sTemplate = str_replace('{modulepath}', CONST_BasePath.'/module', $sTemplate);
225                 if ($aCMDResult['enable-diff-updates']) $sTemplate = str_replace('RETURN NEW; -- @DIFFUPDATES@', '--', $sTemplate);
226                 if ($aCMDResult['enable-debug-statements']) $sTemplate = str_replace('--DEBUG:', '', $sTemplate);
227                 if (CONST_Limit_Reindexing) $sTemplate = str_replace('--LIMIT INDEXING:', '', $sTemplate);
228                 pgsqlRunScript($sTemplate);
229
230                 if ($fPostgisVersion < 2.0) {
231                         echo "Helper functions for postgis < 2.0\n";
232                         $sTemplate = file_get_contents(CONST_BasePath.'/sql/postgis_15_aux.sql');
233                 } else {
234                         echo "Helper functions for postgis >= 2.0\n";
235                         $sTemplate = file_get_contents(CONST_BasePath.'/sql/postgis_20_aux.sql');
236                 }
237                 pgsqlRunScript($sTemplate);
238         }
239
240         if ($aCMDResult['create-minimal-tables'])
241         {
242                 echo "Minimal Tables\n";
243                 $bDidSomething = true;
244                 pgsqlRunScriptFile(CONST_BasePath.'/sql/tables-minimal.sql');
245
246                 $sScript = '';
247
248                 // Backstop the import process - easliest possible import id
249                 $sScript .= "insert into import_npi_log values (18022);\n";
250
251                 $hFile = @fopen(CONST_BasePath.'/settings/partitionedtags.def', "r");
252                 if (!$hFile) fail('unable to open list of partitions: '.CONST_BasePath.'/settings/partitionedtags.def');
253
254                 while (($sLine = fgets($hFile, 4096)) !== false && $sLine && substr($sLine,0,1) !='#')
255                 {
256                         list($sClass, $sType) = explode(' ', trim($sLine));
257                         $sScript .= "create table place_classtype_".$sClass."_".$sType." as ";
258                         $sScript .= "select place_id as place_id,geometry as centroid from placex limit 0;\n";
259
260                         $sScript .= "CREATE INDEX idx_place_classtype_".$sClass."_".$sType."_centroid ";
261                         $sScript .= "ON place_classtype_".$sClass."_".$sType." USING GIST (centroid);\n";
262
263                         $sScript .= "CREATE INDEX idx_place_classtype_".$sClass."_".$sType."_place_id ";
264                         $sScript .= "ON place_classtype_".$sClass."_".$sType." USING btree(place_id);\n";
265                 }
266                 fclose($hFile);
267                 pgsqlRunScript($sScript);
268         }
269
270         if ($aCMDResult['create-tables'] || $aCMDResult['all'])
271         {
272                 $bDidSomething = true;
273
274                 echo "Tables\n";
275                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/tables.sql');
276                 $sTemplate = str_replace('{www-user}', CONST_Database_Web_User, $sTemplate);
277                 $sTemplate = replace_tablespace('{ts:address-data}',
278                                                 CONST_Tablespace_Address_Data, $sTemplate);
279                 $sTemplate = replace_tablespace('{ts:address-index}',
280                                                 CONST_Tablespace_Address_Index, $sTemplate);
281                 $sTemplate = replace_tablespace('{ts:search-data}',
282                                                 CONST_Tablespace_Search_Data, $sTemplate);
283                 $sTemplate = replace_tablespace('{ts:search-index}',
284                                                 CONST_Tablespace_Search_Index, $sTemplate);
285                 $sTemplate = replace_tablespace('{ts:aux-data}',
286                                                 CONST_Tablespace_Aux_Data, $sTemplate);
287                 $sTemplate = replace_tablespace('{ts:aux-index}',
288                                                 CONST_Tablespace_Aux_Index, $sTemplate);
289                 pgsqlRunScript($sTemplate, false);
290
291                 // re-run the functions
292                 echo "Functions\n";
293                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/functions.sql');
294                 $sTemplate = str_replace('{modulepath}',
295                                              CONST_BasePath.'/module', $sTemplate);
296                 pgsqlRunScript($sTemplate);
297         }
298
299         if ($aCMDResult['create-partition-tables'] || $aCMDResult['all'])
300         {
301                 echo "Partition Tables\n";
302                 $bDidSomething = true;
303                 $oDB =& getDB();
304                 $sSQL = 'select distinct partition from country_name';
305                 $aPartitions = $oDB->getCol($sSQL);
306                 if (PEAR::isError($aPartitions))
307                 {
308                         fail($aPartitions->getMessage());
309                 }
310                 if (!$aCMDResult['no-partitions']) $aPartitions[] = 0;
311
312                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/partition-tables.src.sql');
313                 $sTemplate = replace_tablespace('{ts:address-data}',
314                                                 CONST_Tablespace_Address_Data, $sTemplate);
315                 $sTemplate = replace_tablespace('{ts:address-index}',
316                                                 CONST_Tablespace_Address_Index, $sTemplate);
317                 $sTemplate = replace_tablespace('{ts:search-data}',
318                                                 CONST_Tablespace_Search_Data, $sTemplate);
319                 $sTemplate = replace_tablespace('{ts:search-index}',
320                                                 CONST_Tablespace_Search_Index, $sTemplate);
321                 $sTemplate = replace_tablespace('{ts:aux-data}',
322                                                 CONST_Tablespace_Aux_Data, $sTemplate);
323                 $sTemplate = replace_tablespace('{ts:aux-index}',
324                                                 CONST_Tablespace_Aux_Index, $sTemplate);
325                 preg_match_all('#^-- start(.*?)^-- end#ms', $sTemplate, $aMatches, PREG_SET_ORDER);
326                 foreach($aMatches as $aMatch)
327                 {
328                         $sResult = '';
329                         foreach($aPartitions as $sPartitionName)
330                         {
331                                 $sResult .= str_replace('-partition-', $sPartitionName, $aMatch[1]);
332                         }
333                         $sTemplate = str_replace($aMatch[0], $sResult, $sTemplate);
334                 }
335
336                 pgsqlRunScript($sTemplate);
337         }
338
339
340         if ($aCMDResult['create-partition-functions'] || $aCMDResult['all'])
341         {
342                 echo "Partition Functions\n";
343                 $bDidSomething = true;
344                 $oDB =& getDB();
345                 $sSQL = 'select distinct partition from country_name';
346                 $aPartitions = $oDB->getCol($sSQL);
347                 if (PEAR::isError($aPartitions))
348                 {
349                         fail($aPartitions->getMessage());
350                 }
351                 if (!$aCMDResult['no-partitions']) $aPartitions[] = 0;
352
353                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/partition-functions.src.sql');
354                 preg_match_all('#^-- start(.*?)^-- end#ms', $sTemplate, $aMatches, PREG_SET_ORDER);
355                 foreach($aMatches as $aMatch)
356                 {
357                         $sResult = '';
358                         foreach($aPartitions as $sPartitionName)
359                         {
360                                 $sResult .= str_replace('-partition-', $sPartitionName, $aMatch[1]);
361                         }
362                         $sTemplate = str_replace($aMatch[0], $sResult, $sTemplate);
363                 }
364
365                 pgsqlRunScript($sTemplate);
366         }
367
368         if ($aCMDResult['import-wikipedia-articles'] || $aCMDResult['all'])
369         {
370                 $bDidSomething = true;
371                 $sWikiArticlesFile = CONST_BasePath.'/data/wikipedia_article.sql.bin';
372                 $sWikiRedirectsFile = CONST_BasePath.'/data/wikipedia_redirect.sql.bin';
373                 if (file_exists($sWikiArticlesFile))
374                 {
375                         echo "Importing wikipedia articles...";
376                         pgsqlRunDropAndRestore($sWikiArticlesFile);
377                         echo "...done\n";
378                 }
379                 else
380                 {
381                         echo "WARNING: wikipedia article dump file not found - places will have default importance\n";
382                 }
383                 if (file_exists($sWikiRedirectsFile))
384                 {
385                         echo "Importing wikipedia redirects...";
386                         pgsqlRunDropAndRestore($sWikiRedirectsFile);
387                         echo "...done\n";
388                 }
389                 else
390                 {
391                         echo "WARNING: wikipedia redirect dump file not found - some place importance values may be missing\n";
392                 }
393         }
394
395
396         if ($aCMDResult['load-data'] || $aCMDResult['all'])
397         {
398                 echo "Drop old Data\n";
399                 $bDidSomething = true;
400
401                 $oDB =& getDB();
402                 if (!pg_query($oDB->connection, 'TRUNCATE word')) fail(pg_last_error($oDB->connection));
403                 echo '.';
404                 if (!pg_query($oDB->connection, 'TRUNCATE placex')) fail(pg_last_error($oDB->connection));
405                 echo '.';
406                 if (!pg_query($oDB->connection, 'TRUNCATE place_addressline')) fail(pg_last_error($oDB->connection));
407                 echo '.';
408                 if (!pg_query($oDB->connection, 'TRUNCATE place_boundingbox')) fail(pg_last_error($oDB->connection));
409                 echo '.';
410                 if (!pg_query($oDB->connection, 'TRUNCATE location_area')) fail(pg_last_error($oDB->connection));
411                 echo '.';
412                 if (!pg_query($oDB->connection, 'TRUNCATE search_name')) fail(pg_last_error($oDB->connection));
413                 echo '.';
414                 if (!pg_query($oDB->connection, 'TRUNCATE search_name_blank')) fail(pg_last_error($oDB->connection));
415                 echo '.';
416                 if (!pg_query($oDB->connection, 'DROP SEQUENCE seq_place')) fail(pg_last_error($oDB->connection));
417                 echo '.';
418                 if (!pg_query($oDB->connection, 'CREATE SEQUENCE seq_place start 100000')) fail(pg_last_error($oDB->connection));
419                 echo '.';
420
421                 $sSQL = 'select distinct partition from country_name';
422                 $aPartitions = $oDB->getCol($sSQL);
423                 if (PEAR::isError($aPartitions))
424                 {
425                         fail($aPartitions->getMessage());
426                 }
427                 if (!$aCMDResult['no-partitions']) $aPartitions[] = 0;
428                 foreach($aPartitions as $sPartition)
429                 {
430                         if (!pg_query($oDB->connection, 'TRUNCATE location_road_'.$sPartition)) fail(pg_last_error($oDB->connection));
431                         echo '.';
432                 }
433
434                 // used by getorcreate_word_id to ignore frequent partial words
435                 if (!pg_query($oDB->connection, 'CREATE OR REPLACE FUNCTION get_maxwordfreq() RETURNS integer AS $$ SELECT '.CONST_Max_Word_Frequency.' as maxwordfreq; $$ LANGUAGE SQL IMMUTABLE')) fail(pg_last_error($oDB->connection));
436                 echo ".\n";
437
438                 // pre-create the word list
439                 if (!$aCMDResult['disable-token-precalc'])
440                 {
441                         echo "Loading word list\n";
442                         pgsqlRunScriptFile(CONST_BasePath.'/data/words.sql');
443                 }
444
445                 echo "Load Data\n";
446                 $aDBInstances = array();
447                 for($i = 0; $i < $iInstances; $i++)
448                 {
449                         $aDBInstances[$i] =& getDB(true);
450                         $sSQL = 'insert into placex (osm_type, osm_id, class, type, name, admin_level, ';
451                         $sSQL .= 'housenumber, street, addr_place, isin, postcode, country_code, extratags, ';
452                         $sSQL .= 'geometry) select * from place where osm_id % '.$iInstances.' = '.$i;
453                         if ($aCMDResult['verbose']) echo "$sSQL\n";
454                         if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
455                 }
456                 $bAnyBusy = true;
457                 while($bAnyBusy)
458                 {
459                         $bAnyBusy = false;
460                         for($i = 0; $i < $iInstances; $i++)
461                         {
462                                 if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
463                         }
464                         sleep(1);
465                         echo '.';
466                 }
467                 echo "\n";
468                 echo "Reanalysing database...\n";
469                 pgsqlRunScript('ANALYSE');
470         }
471
472         if ($aCMDResult['import-tiger-data'])
473         {
474                 $bDidSomething = true;
475
476                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/tiger_import_start.sql');
477                 $sTemplate = str_replace('{www-user}', CONST_Database_Web_User, $sTemplate);
478                 $sTemplate = replace_tablespace('{ts:aux-data}',
479                                                 CONST_Tablespace_Aux_Data, $sTemplate);
480                 $sTemplate = replace_tablespace('{ts:aux-index}',
481                                                 CONST_Tablespace_Aux_Index, $sTemplate);
482                 pgsqlRunScript($sTemplate, false);
483
484                 $aDBInstances = array();
485                 for($i = 0; $i < $iInstances; $i++)
486                 {
487                         $aDBInstances[$i] =& getDB(true);
488                 }
489
490                 foreach(glob(CONST_Tiger_Data_Path.'/*.sql') as $sFile)
491                 {
492                         echo $sFile.': ';
493                         $hFile = fopen($sFile, "r");
494                         $sSQL = fgets($hFile, 100000);
495                         $iLines = 0;
496
497                         while(true)
498                         {
499                                 for($i = 0; $i < $iInstances; $i++)
500                                 {
501                                         if (!pg_connection_busy($aDBInstances[$i]->connection))
502                                         {
503                                                 while(pg_get_result($aDBInstances[$i]->connection));
504                                                 $sSQL = fgets($hFile, 100000);
505                                                 if (!$sSQL) break 2;
506                                                 if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
507                                                 $iLines++;
508                                                 if ($iLines == 1000)
509                                                 {
510                                                         echo ".";
511                                                         $iLines = 0;
512                                                 }
513                                         }
514                                 }
515                                 usleep(10);
516                         }
517
518                         fclose($hFile);
519
520                         $bAnyBusy = true;
521                         while($bAnyBusy)
522                         {
523                                 $bAnyBusy = false;
524                                 for($i = 0; $i < $iInstances; $i++)
525                                 {
526                                         if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
527                                 }
528                                 usleep(10);
529                         }
530                         echo "\n";
531                 }
532
533                 echo "Creating indexes\n";
534                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/tiger_import_finish.sql');
535                 $sTemplate = str_replace('{www-user}', CONST_Database_Web_User, $sTemplate);
536                 $sTemplate = replace_tablespace('{ts:aux-data}',
537                                                 CONST_Tablespace_Aux_Data, $sTemplate);
538                 $sTemplate = replace_tablespace('{ts:aux-index}',
539                                                 CONST_Tablespace_Aux_Index, $sTemplate);
540                 pgsqlRunScript($sTemplate, false);
541         }
542
543         if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all'])
544         {
545                 $bDidSomething = true;
546                 $oDB =& getDB();
547                 if (!pg_query($oDB->connection, 'DELETE from placex where osm_type=\'P\'')) fail(pg_last_error($oDB->connection));
548                 $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,calculated_country_code,geometry) ";
549                 $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,calculated_country_code,";
550                 $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from (select calculated_country_code,postcode,";
551                 $sSQL .= "avg(st_x(st_centroid(geometry))) as x,avg(st_y(st_centroid(geometry))) as y ";
552                 $sSQL .= "from placex where postcode is not null and calculated_country_code not in ('ie') group by calculated_country_code,postcode) as x";
553                 if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
554
555                 $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,calculated_country_code,geometry) ";
556                 $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,'us',";
557                 $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from us_postcode";
558                 if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
559         }
560
561         if ($aCMDResult['osmosis-init'] || ($aCMDResult['all'] && !$aCMDResult['drop'])) // no use doing osmosis-init when dropping update tables
562         {
563                 $bDidSomething = true;
564                 $oDB =& getDB();
565
566                 if (!file_exists(CONST_Osmosis_Binary))
567                 {
568                         echo "Please download osmosis.\nIf it is already installed, check the path in your local settings (settings/local.php) file.\n";
569                         if (!$aCMDResult['all'])
570                         {
571                                 fail("osmosis not found in '".CONST_Osmosis_Binary."'");
572                         }
573                 }
574                 else
575                 {
576                         if (file_exists(CONST_BasePath.'/settings/configuration.txt'))
577                         {
578                                 echo "settings/configuration.txt already exists\n";
579                         }
580                         else
581                         {
582                                 passthru(CONST_Osmosis_Binary.' --read-replication-interval-init '.CONST_BasePath.'/settings');
583                                 // update osmosis configuration.txt with our settings
584                                 passthru("sed -i 's!baseUrl=.*!baseUrl=".CONST_Replication_Url."!' ".CONST_BasePath.'/settings/configuration.txt');
585                                 passthru("sed -i 's:maxInterval = .*:maxInterval = ".CONST_Replication_MaxInterval.":' ".CONST_BasePath.'/settings/configuration.txt');
586                         }
587
588                         // Find the last node in the DB
589                         $iLastOSMID = $oDB->getOne("select max(osm_id) from place where osm_type = 'N'");
590
591                         // Lookup the timestamp that node was created (less 3 hours for margin for changsets to be closed)
592                         $sLastNodeURL = 'http://www.openstreetmap.org/api/0.6/node/'.$iLastOSMID."/1";
593                         $sLastNodeXML = file_get_contents($sLastNodeURL);
594                         preg_match('#timestamp="(([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2}):([0-9]{2})Z)"#', $sLastNodeXML, $aLastNodeDate);
595                         $iLastNodeTimestamp = strtotime($aLastNodeDate[1]) - (3*60*60);
596
597                         // Search for the correct state file - uses file timestamps so need to sort by date descending
598                         $sRepURL = CONST_Replication_Url."/";
599                         $sRep = file_get_contents($sRepURL."?C=M;O=D;F=1");
600                         // download.geofabrik.de:    <a href="000/">000/</a></td><td align="right">26-Feb-2013 11:53  </td>
601                         // planet.openstreetmap.org: <a href="273/">273/</a>                    2013-03-11 07:41    -
602                         preg_match_all('#<a href="[0-9]{3}/">([0-9]{3}/)</a>\s*([-0-9a-zA-Z]+ [0-9]{2}:[0-9]{2})#', $sRep, $aRepMatches, PREG_SET_ORDER);
603                         if ($aRepMatches)
604                         {
605                                 $aPrevRepMatch = false;
606                                 foreach($aRepMatches as $aRepMatch)
607                                 {
608                                         if (strtotime($aRepMatch[2]) < $iLastNodeTimestamp) break;
609                                         $aPrevRepMatch = $aRepMatch;
610                                 }
611                                 if ($aPrevRepMatch) $aRepMatch = $aPrevRepMatch;
612
613                                 $sRepURL .= $aRepMatch[1];
614                                 $sRep = file_get_contents($sRepURL."?C=M;O=D;F=1");
615                                 preg_match_all('#<a href="[0-9]{3}/">([0-9]{3}/)</a>\s*([-0-9a-zA-Z]+ [0-9]{2}:[0-9]{2})#', $sRep, $aRepMatches, PREG_SET_ORDER);
616                                 $aPrevRepMatch = false;
617                                 foreach($aRepMatches as $aRepMatch)
618                                 {
619                                         if (strtotime($aRepMatch[2]) < $iLastNodeTimestamp) break;
620                                         $aPrevRepMatch = $aRepMatch;
621                                 }
622                                 if ($aPrevRepMatch) $aRepMatch = $aPrevRepMatch;
623
624                                 $sRepURL .= $aRepMatch[1];
625                                 $sRep = file_get_contents($sRepURL."?C=M;O=D;F=1");
626                                 preg_match_all('#<a href="[0-9]{3}.state.txt">([0-9]{3}).state.txt</a>\s*([-0-9a-zA-Z]+ [0-9]{2}:[0-9]{2})#', $sRep, $aRepMatches, PREG_SET_ORDER);
627                                 $aPrevRepMatch = false;
628                                 foreach($aRepMatches as $aRepMatch)
629                                 {
630                                         if (strtotime($aRepMatch[2]) < $iLastNodeTimestamp) break;
631                                         $aPrevRepMatch = $aRepMatch;
632                                 }
633                                 if ($aPrevRepMatch) $aRepMatch = $aPrevRepMatch;
634
635                                 $sRepURL .= $aRepMatch[1].'.state.txt';
636                                 echo "Getting state file: $sRepURL\n";
637                                 $sStateFile = file_get_contents($sRepURL);
638                                 if (!$sStateFile || strlen($sStateFile) > 1000) fail("unable to obtain state file");
639                                 file_put_contents(CONST_BasePath.'/settings/state.txt', $sStateFile);
640                                 echo "Updating DB status\n";
641                                 pg_query($oDB->connection, 'TRUNCATE import_status');
642                                 $sSQL = "INSERT INTO import_status VALUES('".$aRepMatch[2]."')";
643                                 pg_query($oDB->connection, $sSQL);
644                         }
645                         else
646                         {
647                                 if (!$aCMDResult['all'])
648                                 {
649                                         fail("Cannot read state file directory.");
650                                 }
651                         }
652                 }
653         }
654
655         if ($aCMDResult['index'] || $aCMDResult['all'])
656         {
657                 $bDidSomething = true;
658                 $sOutputFile = '';
659                 if (isset($aCMDResult['index-output'])) $sOutputFile = ' -F '.$aCMDResult['index-output'];
660                 $sBaseCmd = CONST_BasePath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -P '.$aDSNInfo['port'].' -t '.$iInstances.$sOutputFile;
661                 passthruCheckReturn($sBaseCmd.' -R 4');
662                 if (!$aCMDResult['index-noanalyse']) pgsqlRunScript('ANALYSE');
663                 passthruCheckReturn($sBaseCmd.' -r 5 -R 25');
664                 if (!$aCMDResult['index-noanalyse']) pgsqlRunScript('ANALYSE');
665                 passthruCheckReturn($sBaseCmd.' -r 26');
666         }
667
668         if ($aCMDResult['create-search-indices'] || $aCMDResult['all'])
669         {
670                 echo "Search indices\n";
671                 $bDidSomething = true;
672                 $oDB =& getDB();
673                 $sSQL = 'select distinct partition from country_name';
674                 $aPartitions = $oDB->getCol($sSQL);
675                 if (PEAR::isError($aPartitions))
676                 {
677                         fail($aPartitions->getMessage());
678                 }
679                 if (!$aCMDResult['no-partitions']) $aPartitions[] = 0;
680
681                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/indices.src.sql');
682                 $sTemplate = replace_tablespace('{ts:address-index}',
683                                                 CONST_Tablespace_Address_Index, $sTemplate);
684                 $sTemplate = replace_tablespace('{ts:search-index}',
685                                                 CONST_Tablespace_Search_Index, $sTemplate);
686                 $sTemplate = replace_tablespace('{ts:aux-index}',
687                                                 CONST_Tablespace_Aux_Index, $sTemplate);
688                 preg_match_all('#^-- start(.*?)^-- end#ms', $sTemplate, $aMatches, PREG_SET_ORDER);
689                 foreach($aMatches as $aMatch)
690                 {
691                         $sResult = '';
692                         foreach($aPartitions as $sPartitionName)
693                         {
694                                 $sResult .= str_replace('-partition-', $sPartitionName, $aMatch[1]);
695                         }
696                         $sTemplate = str_replace($aMatch[0], $sResult, $sTemplate);
697                 }
698
699                 pgsqlRunScript($sTemplate);
700         }
701
702         if (isset($aCMDResult['create-website']))
703         {
704                 $bDidSomething = true;
705                 $sTargetDir = $aCMDResult['create-website'];
706                 if (!is_dir($sTargetDir))
707                 {
708                         echo "You must create the website directory before calling this function.\n";
709                         fail("Target directory does not exist.");
710                 }
711
712                 @symlink(CONST_BasePath.'/website/details.php', $sTargetDir.'/details.php');
713                 @symlink(CONST_BasePath.'/website/reverse.php', $sTargetDir.'/reverse.php');
714                 @symlink(CONST_BasePath.'/website/search.php', $sTargetDir.'/search.php');
715                 @symlink(CONST_BasePath.'/website/search.php', $sTargetDir.'/index.php');
716                 @symlink(CONST_BasePath.'/website/lookup.php', $sTargetDir.'/lookup.php');
717                 @symlink(CONST_BasePath.'/website/deletable.php', $sTargetDir.'/deletable.php');
718                 @symlink(CONST_BasePath.'/website/polygons.php', $sTargetDir.'/polygons.php');
719                 @symlink(CONST_BasePath.'/website/status.php', $sTargetDir.'/status.php');
720                 @symlink(CONST_BasePath.'/website/images', $sTargetDir.'/images');
721                 @symlink(CONST_BasePath.'/website/js', $sTargetDir.'/js');
722                 @symlink(CONST_BasePath.'/website/css', $sTargetDir.'/css');
723                 echo "Symlinks created\n";
724
725                 $sTestFile = @file_get_contents(CONST_Website_BaseURL.'js/tiles.js');
726                 if (!$sTestFile)
727                 {
728                         echo "\nWARNING: Unable to access the website at ".CONST_Website_BaseURL."\n";
729                         echo "You may want to update settings/local.php with @define('CONST_Website_BaseURL', 'http://[HOST]/[PATH]/');\n";
730                 }
731         }
732
733         if ($aCMDResult['drop'])
734         {
735                 // The implementation is potentially a bit dangerous because it uses
736                 // a positive selection of tables to keep, and deletes everything else.
737                 // Including any tables that the unsuspecting user might have manually
738                 // created. USE AT YOUR OWN PERIL.
739                 $bDidSomething = true;
740
741                 // tables we want to keep. everything else goes.
742                 $aKeepTables = array(
743                    "*columns",
744                    "import_polygon_*",
745                    "import_status",
746                    "place_addressline",
747                    "location_property*",
748                    "placex",
749                    "search_name",
750                    "seq_*",
751                    "word",
752                    "query_log",
753                    "new_query_log",
754                    "gb_postcode",
755                    "spatial_ref_sys",
756                    "country_name",
757                    "place_classtype_*"
758                 );
759
760                 $oDB =& getDB();
761                 $aDropTables = array();
762                 $aHaveTables = $oDB->getCol("SELECT tablename FROM pg_tables WHERE schemaname='public'");
763                 if (PEAR::isError($aHaveTables))
764                 {
765                         fail($aPartitions->getMessage());
766                 }
767                 foreach($aHaveTables as $sTable)
768                 {
769                         $bFound = false;
770                         foreach ($aKeepTables as $sKeep)
771                         {
772                                 if (fnmatch($sKeep, $sTable))
773                                 {
774                                         $bFound = true;
775                                         break;
776                                 }
777                         }
778                         if (!$bFound) array_push($aDropTables, $sTable);
779                 }
780
781                 foreach ($aDropTables as $sDrop)
782                 {
783                         if ($aCMDResult['verbose']) echo "dropping table $sDrop\n";
784                         @pg_query($oDB->connection, "DROP TABLE $sDrop CASCADE");
785                         // ignore warnings/errors as they might be caused by a table having
786                         // been deleted already by CASCADE
787                 }
788
789                 if (!is_null(CONST_Osm2pgsql_Flatnode_File))
790                 {
791                         if ($aCMDResult['verbose']) echo "deleting ".CONST_Osm2pgsql_Flatnode_File."\n";
792                         unlink(CONST_Osm2pgsql_Flatnode_File);
793                 }
794         }
795
796         if (!$bDidSomething)
797         {
798                 showUsage($aCMDOptions, true);
799         }
800         else
801         {
802                 echo "Setup finished.\n";
803         }
804
805         function pgsqlRunScriptFile($sFilename)
806         {
807                 if (!file_exists($sFilename)) fail('unable to find '.$sFilename);
808
809                 // Convert database DSN to psql parameters
810                 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
811                 if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
812                 $sCMD = 'psql -p '.$aDSNInfo['port'].' -d '.$aDSNInfo['database'];
813
814                 $ahGzipPipes = null;
815                 if (preg_match('/\\.gz$/', $sFilename))
816                 {
817                         $aDescriptors = array(
818                                 0 => array('pipe', 'r'),
819                                 1 => array('pipe', 'w'),
820                                 2 => array('file', '/dev/null', 'a')
821                         );
822                         $hGzipProcess = proc_open('zcat '.$sFilename, $aDescriptors, $ahGzipPipes);
823                         if (!is_resource($hGzipProcess)) fail('unable to start zcat');
824                         $aReadPipe = $ahGzipPipes[1];
825                         fclose($ahGzipPipes[0]);
826                 }
827                 else
828                 {
829                         $sCMD .= ' -f '.$sFilename;
830                         $aReadPipe = array('pipe', 'r');
831                 }
832
833                 $aDescriptors = array(
834                         0 => $aReadPipe,
835                         1 => array('pipe', 'w'),
836                         2 => array('file', '/dev/null', 'a')
837                 );
838                 $ahPipes = null;
839                 $hProcess = proc_open($sCMD, $aDescriptors, $ahPipes);
840                 if (!is_resource($hProcess)) fail('unable to start pgsql');
841
842
843                 // TODO: error checking
844                 while(!feof($ahPipes[1]))
845                 {
846                         echo fread($ahPipes[1], 4096);
847                 }
848                 fclose($ahPipes[1]);
849
850                 $iReturn = proc_close($hProcess);
851                 if ($iReturn > 0)
852                 {
853                         fail("pgsql returned with error code ($iReturn)");
854                 }
855                 if ($ahGzipPipes)
856                 {
857                         fclose($ahGzipPipes[1]);
858                         proc_close($hGzipProcess);
859                 }
860
861         }
862
863         function pgsqlRunScript($sScript, $bfatal = true)
864         {
865                 global $aCMDResult;
866                 // Convert database DSN to psql parameters
867                 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
868                 if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
869                 $sCMD = 'psql -p '.$aDSNInfo['port'].' -d '.$aDSNInfo['database'];
870                 if ($bfatal && !$aCMDResult['ignore-errors'])
871                         $sCMD .= ' -v ON_ERROR_STOP=1';
872                 $aDescriptors = array(
873                         0 => array('pipe', 'r'),
874                         1 => STDOUT, 
875                         2 => STDERR
876                 );
877                 $ahPipes = null;
878                 $hProcess = @proc_open($sCMD, $aDescriptors, $ahPipes);
879                 if (!is_resource($hProcess)) fail('unable to start pgsql');
880
881                 while(strlen($sScript))
882                 {
883                         $written = fwrite($ahPipes[0], $sScript);
884                         if ($written <= 0) break;
885                         $sScript = substr($sScript, $written);
886                 }
887                 fclose($ahPipes[0]);
888                 $iReturn = proc_close($hProcess);
889                 if ($bfatal && $iReturn > 0)
890                 {
891                         fail("pgsql returned with error code ($iReturn)");
892                 }
893         }
894
895         function pgsqlRunRestoreData($sDumpFile)
896         {
897                 // Convert database DSN to psql parameters
898                 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
899                 if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
900                 $sCMD = 'pg_restore -p '.$aDSNInfo['port'].' -d '.$aDSNInfo['database'].' -Fc -a '.$sDumpFile;
901
902                 $aDescriptors = array(
903                         0 => array('pipe', 'r'),
904                         1 => array('pipe', 'w'),
905                         2 => array('file', '/dev/null', 'a')
906                 );
907                 $ahPipes = null;
908                 $hProcess = proc_open($sCMD, $aDescriptors, $ahPipes);
909                 if (!is_resource($hProcess)) fail('unable to start pg_restore');
910
911                 fclose($ahPipes[0]);
912
913                 // TODO: error checking
914                 while(!feof($ahPipes[1]))
915                 {
916                         echo fread($ahPipes[1], 4096);
917                 }
918                 fclose($ahPipes[1]);
919
920                 $iReturn = proc_close($hProcess);
921         }
922
923         function pgsqlRunDropAndRestore($sDumpFile)
924         {
925                 // Convert database DSN to psql parameters
926                 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
927                 if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
928                 $sCMD = 'pg_restore -p '.$aDSNInfo['port'].' -d '.$aDSNInfo['database'].' -Fc --clean '.$sDumpFile;
929
930                 $aDescriptors = array(
931                         0 => array('pipe', 'r'),
932                         1 => array('pipe', 'w'),
933                         2 => array('file', '/dev/null', 'a')
934                 );
935                 $ahPipes = null;
936                 $hProcess = proc_open($sCMD, $aDescriptors, $ahPipes);
937                 if (!is_resource($hProcess)) fail('unable to start pg_restore');
938
939                 fclose($ahPipes[0]);
940
941                 // TODO: error checking
942                 while(!feof($ahPipes[1]))
943                 {
944                         echo fread($ahPipes[1], 4096);
945                 }
946                 fclose($ahPipes[1]);
947
948                 $iReturn = proc_close($hProcess);
949         }
950
951         function passthruCheckReturn($cmd)
952         {
953                 $result = -1;
954                 passthru($cmd, $result);
955                 if ($result != 0) fail('Error executing external command: '.$cmd);
956         }
957
958         function replace_tablespace($sTemplate, $sTablespace, $sSql)
959         {
960                 if ($sTablespace)
961                         $sSql = str_replace($sTemplate, 'TABLESPACE "'.$sTablespace.'"',
962                                             $sSql);
963                 else
964                         $sSql = str_replace($sTemplate, '', $sSql);
965
966                 return $sSql;
967         }
968