]> git.openstreetmap.org Git - nominatim.git/blob - utils/setup.php
script for scraper blocking using apache log files
[nominatim.git] / utils / setup.php
1 #!/usr/bin/php -Cq
2 <?php
3
4         require_once(dirname(dirname(__FILE__)).'/lib/init-cmd.php');
5         ini_set('memory_limit', '800M');
6
7         $aCMDOptions = array(
8                 "Create and setup nominatim search system",
9                 array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
10                 array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
11                 array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
12
13                 array('osm-file', '', 0, 1, 1, 1, 'realpath', 'File to import'),
14                 array('threads', '', 0, 1, 1, 1, 'int', 'Number of threads (where possible)'),
15
16                 array('all', '', 0, 1, 0, 0, 'bool', 'Do the complete process'),
17
18                 array('create-db', '', 0, 1, 0, 0, 'bool', 'Create nominatim db'),
19                 array('setup-db', '', 0, 1, 0, 0, 'bool', 'Build a blank nominatim db'),
20                 array('import-data', '', 0, 1, 0, 0, 'bool', 'Import a osm file'),
21                 array('osm2pgsql-cache', '', 0, 1, 1, 1, 'int', 'Cache size used by osm2pgsql'),
22                 array('create-functions', '', 0, 1, 0, 0, 'bool', 'Create functions'),
23                 array('enable-diff-updates', '', 0, 1, 0, 0, 'bool', 'Turn on the code required to make diff updates work'),
24                 array('enable-debug-statements', '', 0, 1, 0, 0, 'bool', 'Include debug warning statements in pgsql commands'),
25                 array('ignore-errors', '', 0, 1, 0, 0, 'bool', 'Continue import even when errors in SQL are present (EXPERT)'),
26                 array('create-minimal-tables', '', 0, 1, 0, 0, 'bool', 'Create minimal main tables'),
27                 array('create-tables', '', 0, 1, 0, 0, 'bool', 'Create main tables'),
28                 array('create-partition-tables', '', 0, 1, 0, 0, 'bool', 'Create required partition tables'),
29                 array('create-partition-functions', '', 0, 1, 0, 0, 'bool', 'Create required partition triggers'),
30                 array('no-partitions', '', 0, 1, 0, 0, 'bool', "Do not partition search indices (speeds up import of single country extracts)"),
31                 array('import-wikipedia-articles', '', 0, 1, 0, 0, 'bool', 'Import wikipedia article dump'),
32                 array('load-data', '', 0, 1, 0, 0, 'bool', 'Copy data to live tables from import table'),
33                 array('disable-token-precalc', '', 0, 1, 0, 0, 'bool', 'Disable name precalculation (EXPERT)'),
34                 array('import-tiger-data', '', 0, 1, 0, 0, 'bool', 'Import tiger data (not included in \'all\')'),
35                 array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Calculate postcode centroids'),
36                 array('create-roads', '', 0, 1, 0, 0, 'bool', ''),
37                 array('osmosis-init', '', 0, 1, 0, 0, 'bool', 'Generate default osmosis configuration'),
38                 array('index', '', 0, 1, 0, 0, 'bool', 'Index the data'),
39                 array('index-noanalyse', '', 0, 1, 0, 0, 'bool', 'Do not perform analyse operations during index (EXPERT)'),
40                 array('index-output', '', 0, 1, 1, 1, 'string', 'File to dump index information to'),
41                 array('create-search-indices', '', 0, 1, 0, 0, 'bool', 'Create additional indices required for search and update'),
42                 array('create-website', '', 0, 1, 1, 1, 'realpath', 'Create symlinks to setup web directory'),
43         );
44         getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
45
46         $bDidSomething = false;
47
48         // Check if osm-file is set and points to a valid file if --all or --import-data is given
49         if ($aCMDResult['import-data'] || $aCMDResult['all'])
50         {
51                 if (!isset($aCMDResult['osm-file']))
52                 {
53                         fail('missing --osm-file for data import');
54                 }
55
56                 if (!file_exists($aCMDResult['osm-file']))
57                 {
58                         fail('the path supplied to --osm-file does not exist');
59                 }
60
61                 if (!is_readable($aCMDResult['osm-file']))
62                 {
63                         fail('osm-file "'.$aCMDResult['osm-file'].'" not readable');
64                 }
65         }
66
67
68         // This is a pretty hard core default - the number of processors in the box - 1
69         $iInstances = isset($aCMDResult['threads'])?$aCMDResult['threads']:(getProcessorCount()-1);
70         if ($iInstances < 1)
71         {
72                 $iInstances = 1;
73                 echo "WARNING: resetting threads to $iInstances\n";
74         }
75         if ($iInstances > getProcessorCount())
76         {
77                 $iInstances = getProcessorCount();
78                 echo "WARNING: resetting threads to $iInstances\n";
79         }
80
81         // Assume we can steal all the cache memory in the box (unless told otherwise)
82         $iCacheMemory = (isset($aCMDResult['osm2pgsql-cache'])?$aCMDResult['osm2pgsql-cache']:getCacheMemoryMB());
83         if ($iCacheMemory > getTotalMemoryMB())
84         {
85                 $iCacheMemory = getCacheMemoryMB();
86                 echo "WARNING: resetting cache memory to $iCacheMemory\n";
87         }
88
89         $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
90         if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
91
92         $fPostgisVersion = (float) CONST_Postgis_Version;
93
94         if ($aCMDResult['create-db'] || $aCMDResult['all'])
95         {
96                 echo "Create DB\n";
97                 $bDidSomething = true;
98                 $oDB =& DB::connect(CONST_Database_DSN, false);
99                 if (!PEAR::isError($oDB))
100                 {
101                         fail('database already exists ('.CONST_Database_DSN.')');
102                 }
103                 passthruCheckReturn('createdb -E UTF-8 -p '.$aDSNInfo['port'].' '.$aDSNInfo['database']);
104         }
105
106         if ($aCMDResult['setup-db'] || $aCMDResult['all'])
107         {
108                 echo "Setup DB\n";
109                 $bDidSomething = true;
110                 // TODO: path detection, detection memory, etc.
111
112                 $oDB =& getDB();
113
114                 $sVersionString = $oDB->getOne('select version()');
115                 preg_match('#PostgreSQL ([0-9]+)[.]([0-9]+)[^0-9]#', $sVersionString, $aMatches);
116                 if (CONST_Postgresql_Version != $aMatches[1].'.'.$aMatches[2])
117                 {
118                         echo "ERROR: PostgreSQL version is not correct.  Expected ".CONST_Postgresql_Version." found ".$aMatches[1].'.'.$aMatches[2]."\n";
119                         exit;
120                 }
121
122                 passthru('createlang plpgsql -p '.$aDSNInfo['port'].' '.$aDSNInfo['database']);
123                 $pgver = (float) CONST_Postgresql_Version;
124                 if ($pgver < 9.1) {
125                         pgsqlRunScriptFile(CONST_Path_Postgresql_Contrib.'/hstore.sql');
126                         pgsqlRunScriptFile(CONST_BasePath.'/sql/hstore_compatability_9_0.sql');
127                 } else {
128                         pgsqlRunScript('CREATE EXTENSION hstore');
129                 }
130
131                 if ($fPostgisVersion < 2.0) {
132                         pgsqlRunScriptFile(CONST_Path_Postgresql_Postgis.'/postgis.sql');
133                         pgsqlRunScriptFile(CONST_Path_Postgresql_Postgis.'/spatial_ref_sys.sql');
134                 } else {
135                         pgsqlRunScript('CREATE EXTENSION postgis');
136                 }
137                 if ($fPostgisVersion < 2.1) {
138                         // Function was renamed in 2.1 and throws an annoying deprecation warning
139                         pgsqlRunScript('ALTER FUNCTION st_line_interpolate_point(geometry, double precision) RENAME TO ST_LineInterpolatePoint');
140                 }
141                 $sVersionString = $oDB->getOne('select postgis_full_version()');
142                 preg_match('#POSTGIS="([0-9]+)[.]([0-9]+)[.]([0-9]+)( r([0-9]+))?"#', $sVersionString, $aMatches);
143                 if (CONST_Postgis_Version != $aMatches[1].'.'.$aMatches[2])
144                 {
145                         echo "ERROR: PostGIS version is not correct.  Expected ".CONST_Postgis_Version." found ".$aMatches[1].'.'.$aMatches[2]."\n";
146                         exit;
147                 }
148
149                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_name.sql');
150                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_naturalearthdata.sql');
151                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_osm_grid.sql');
152                 pgsqlRunScriptFile(CONST_BasePath.'/data/gb_postcode_table.sql');
153                 if (file_exists(CONST_BasePath.'/data/gb_postcode_data.sql.gz'))
154                 {
155                         pgsqlRunScriptFile(CONST_BasePath.'/data/gb_postcode_data.sql.gz');
156                 }
157                 else
158                 {
159                         echo "WARNING: external UK postcode table not found.\n";
160                 }
161                 pgsqlRunScriptFile(CONST_BasePath.'/data/us_statecounty.sql');
162                 pgsqlRunScriptFile(CONST_BasePath.'/data/us_state.sql');
163                 pgsqlRunScriptFile(CONST_BasePath.'/data/us_postcode.sql');
164
165                 if ($aCMDResult['no-partitions'])
166                 {
167                         pgsqlRunScript('update country_name set partition = 0');
168                 }
169
170                 // the following will be needed by create_functions later but
171                 // is only defined in the subsequently called create_tables.
172                 // Create dummies here that will be overwritten by the proper
173                 // versions in create-tables.
174                 pgsqlRunScript('CREATE TABLE place_boundingbox ()');
175                 pgsqlRunScript('create type wikipedia_article_match as ()');
176         }
177
178         if ($aCMDResult['import-data'] || $aCMDResult['all'])
179         {
180                 echo "Import\n";
181                 $bDidSomething = true;
182
183                 $osm2pgsql = CONST_Osm2pgsql_Binary;
184                 if (!file_exists($osm2pgsql))
185                 {
186                         echo "Please download and build osm2pgsql.\nIf it is already installed, check the path in your local settings (settings/local.php) file.\n";
187                         fail("osm2pgsql not found in '$osm2pgsql'");
188                 }
189
190                 if (!is_null(CONST_Osm2pgsql_Flatnode_File))
191                 {
192                         $osm2pgsql .= ' --flat-nodes '.CONST_Osm2pgsql_Flatnode_File;
193                 }
194                 $osm2pgsql .= ' --tablespace-slim-index ssd --tablespace-main-index ssd --tablespace-main-data ssd --tablespace-slim-data data';
195                 $osm2pgsql .= ' -lsc -O gazetteer --hstore';
196                 $osm2pgsql .= ' -C 25000';
197                 $osm2pgsql .= ' -P '.$aDSNInfo['port'];
198                 $osm2pgsql .= ' -d '.$aDSNInfo['database'].' '.$aCMDResult['osm-file'];
199                 passthruCheckReturn($osm2pgsql);
200
201                 $oDB =& getDB();
202                 $x = $oDB->getRow('select * from place limit 1');
203                 if (PEAR::isError($x)) {
204                         fail($x->getMessage());
205                 }
206                 if (!$x) fail('No Data');
207         }
208
209         if ($aCMDResult['create-functions'] || $aCMDResult['all'])
210         {
211                 echo "Functions\n";
212                 $bDidSomething = true;
213                 if (!file_exists(CONST_BasePath.'/module/nominatim.so')) fail("nominatim module not built");
214                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/functions.sql');
215                 $sTemplate = str_replace('{modulepath}', CONST_BasePath.'/module', $sTemplate);
216                 if ($aCMDResult['enable-diff-updates']) $sTemplate = str_replace('RETURN NEW; -- @DIFFUPDATES@', '--', $sTemplate);
217                 if ($aCMDResult['enable-debug-statements']) $sTemplate = str_replace('--DEBUG:', '', $sTemplate);
218                 if (CONST_Limit_Reindexing) $sTemplate = str_replace('--LIMIT INDEXING:', '', $sTemplate);
219                 pgsqlRunScript($sTemplate);
220                 if ($fPostgisVersion < 2.0) {
221                         echo "Helper functions for postgis < 2.0\n";
222                         $sTemplate = file_get_contents(CONST_BasePath.'/sql/postgis_15_aux.sql');
223                 } else {
224                         echo "Helper functions for postgis >= 2.0\n";
225                         $sTemplate = file_get_contents(CONST_BasePath.'/sql/postgis_20_aux.sql');
226                 }
227                 pgsqlRunScript($sTemplate);
228         }
229
230         if ($aCMDResult['create-minimal-tables'])
231         {
232                 echo "Minimal Tables\n";
233                 $bDidSomething = true;
234                 pgsqlRunScriptFile(CONST_BasePath.'/sql/tables-minimal.sql');
235
236                 $sScript = '';
237
238                 // Backstop the import process - easliest possible import id
239                 $sScript .= "insert into import_npi_log values (18022);\n";
240
241                 $hFile = @fopen(CONST_BasePath.'/settings/partitionedtags.def', "r");
242                 if (!$hFile) fail('unable to open list of partitions: '.CONST_BasePath.'/settings/partitionedtags.def');
243
244                 while (($sLine = fgets($hFile, 4096)) !== false && $sLine && substr($sLine,0,1) !='#')
245                 {
246                         list($sClass, $sType) = explode(' ', trim($sLine));
247                         $sScript .= "create table place_classtype_".$sClass."_".$sType." as ";
248                         $sScript .= "select place_id as place_id,geometry as centroid from placex limit 0;\n";
249
250                         $sScript .= "CREATE INDEX idx_place_classtype_".$sClass."_".$sType."_centroid ";
251                         $sScript .= "ON place_classtype_".$sClass."_".$sType." USING GIST (centroid);\n";
252
253                         $sScript .= "CREATE INDEX idx_place_classtype_".$sClass."_".$sType."_place_id ";
254                         $sScript .= "ON place_classtype_".$sClass."_".$sType." USING btree(place_id);\n";
255                 }
256                 fclose($hFile);
257                 pgsqlRunScript($sScript);
258         }
259
260         if ($aCMDResult['create-tables'] || $aCMDResult['all'])
261         {
262                 echo "Tables\n";
263                 $bDidSomething = true;
264                 pgsqlRunScriptFile(CONST_BasePath.'/sql/tables.sql');
265
266                 // re-run the functions
267                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/functions.sql');
268                 $sTemplate = str_replace('{modulepath}',CONST_BasePath.'/module', $sTemplate);
269                 pgsqlRunScript($sTemplate);
270         }
271
272         if ($aCMDResult['create-partition-tables'] || $aCMDResult['all'])
273         {
274                 echo "Partition Tables\n";
275                 $bDidSomething = true;
276                 $oDB =& getDB();
277                 $sSQL = 'select distinct partition from country_name';
278                 $aPartitions = $oDB->getCol($sSQL);
279                 if (PEAR::isError($aPartitions))
280                 {
281                         fail($aPartitions->getMessage());
282                 }
283                 if (!$aCMDResult['no-partitions']) $aPartitions[] = 0;
284
285                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/partition-tables.src.sql');
286                 preg_match_all('#^-- start(.*?)^-- end#ms', $sTemplate, $aMatches, PREG_SET_ORDER);
287                 foreach($aMatches as $aMatch)
288                 {
289                         $sResult = '';
290                         foreach($aPartitions as $sPartitionName)
291                         {
292                                 $sResult .= str_replace('-partition-', $sPartitionName, $aMatch[1]);
293                         }
294                         $sTemplate = str_replace($aMatch[0], $sResult, $sTemplate);
295                 }
296
297                 pgsqlRunScript($sTemplate);
298         }
299
300
301         if ($aCMDResult['create-partition-functions'] || $aCMDResult['all'])
302         {
303                 echo "Partition Functions\n";
304                 $bDidSomething = true;
305                 $oDB =& getDB();
306                 $sSQL = 'select distinct partition from country_name';
307                 $aPartitions = $oDB->getCol($sSQL);
308                 if (PEAR::isError($aPartitions))
309                 {
310                         fail($aPartitions->getMessage());
311                 }
312                 if (!$aCMDResult['no-partitions']) $aPartitions[] = 0;
313
314                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/partition-functions.src.sql');
315                 preg_match_all('#^-- start(.*?)^-- end#ms', $sTemplate, $aMatches, PREG_SET_ORDER);
316                 foreach($aMatches as $aMatch)
317                 {
318                         $sResult = '';
319                         foreach($aPartitions as $sPartitionName)
320                         {
321                                 $sResult .= str_replace('-partition-', $sPartitionName, $aMatch[1]);
322                         }
323                         $sTemplate = str_replace($aMatch[0], $sResult, $sTemplate);
324                 }
325
326                 pgsqlRunScript($sTemplate);
327         }
328
329         if ($aCMDResult['import-wikipedia-articles'] || $aCMDResult['all'])
330         {
331                 $bDidSomething = true;
332                 $sWikiArticlesFile = CONST_BasePath.'/data/wikipedia_article.sql.bin';
333                 $sWikiRedirectsFile = CONST_BasePath.'/data/wikipedia_redirect.sql.bin';
334                 if (file_exists($sWikiArticlesFile))
335                 {
336                         echo "Importing wikipedia articles...";
337                         pgsqlRunDropAndRestore($sWikiArticlesFile);
338                         echo "...done\n";
339                 }
340                 else
341                 {
342                         echo "WARNING: wikipedia article dump file not found - places will have default importance\n";
343                 }
344                 if (file_exists($sWikiRedirectsFile))
345                 {
346                         echo "Importing wikipedia redirects...";
347                         pgsqlRunDropAndRestore($sWikiRedirectsFile);
348                         echo "...done\n";
349                 }
350                 else
351                 {
352                         echo "WARNING: wikipedia redirect dump file not found - some place importance values may be missing\n";
353                 }
354         }
355
356
357         if ($aCMDResult['load-data'] || $aCMDResult['all'])
358         {
359                 echo "Drop old Data\n";
360                 $bDidSomething = true;
361
362                 $oDB =& getDB();
363                 if (!pg_query($oDB->connection, 'TRUNCATE word')) fail(pg_last_error($oDB->connection));
364                 echo '.';
365                 if (!pg_query($oDB->connection, 'TRUNCATE placex')) fail(pg_last_error($oDB->connection));
366                 echo '.';
367                 if (!pg_query($oDB->connection, 'TRUNCATE place_addressline')) fail(pg_last_error($oDB->connection));
368                 echo '.';
369                 if (!pg_query($oDB->connection, 'TRUNCATE place_boundingbox')) fail(pg_last_error($oDB->connection));
370                 echo '.';
371                 if (!pg_query($oDB->connection, 'TRUNCATE location_area')) fail(pg_last_error($oDB->connection));
372                 echo '.';
373                 if (!pg_query($oDB->connection, 'TRUNCATE search_name')) fail(pg_last_error($oDB->connection));
374                 echo '.';
375                 if (!pg_query($oDB->connection, 'TRUNCATE search_name_blank')) fail(pg_last_error($oDB->connection));
376                 echo '.';
377                 if (!pg_query($oDB->connection, 'DROP SEQUENCE seq_place')) fail(pg_last_error($oDB->connection));
378                 echo '.';
379                 if (!pg_query($oDB->connection, 'CREATE SEQUENCE seq_place start 100000')) fail(pg_last_error($oDB->connection));
380                 echo '.';
381
382                 $sSQL = 'select distinct partition from country_name';
383                 $aPartitions = $oDB->getCol($sSQL);
384                 if (PEAR::isError($aPartitions))
385                 {
386                         fail($aPartitions->getMessage());
387                 }
388                 if (!$aCMDResult['no-partitions']) $aPartitions[] = 0;
389                 foreach($aPartitions as $sPartition)
390                 {
391                         if (!pg_query($oDB->connection, 'TRUNCATE location_road_'.$sPartition)) fail(pg_last_error($oDB->connection));
392                         echo '.';
393                 }
394
395                 // used by getorcreate_word_id to ignore frequent partial words
396                 if (!pg_query($oDB->connection, 'CREATE OR REPLACE FUNCTION get_maxwordfreq() RETURNS integer AS $$ SELECT '.CONST_Max_Word_Frequency.' as maxwordfreq; $$ LANGUAGE SQL IMMUTABLE')) fail(pg_last_error($oDB->connection));
397                 echo ".\n";
398
399                 // pre-create the word list
400                 if (!$aCMDResult['disable-token-precalc'])
401                 {
402                         echo "Loading word list\n";
403                         pgsqlRunScriptFile(CONST_BasePath.'/data/words.sql');
404                 }
405
406                 echo "Load Data\n";
407                 $aDBInstances = array();
408                 for($i = 0; $i < $iInstances; $i++)
409                 {
410                         $aDBInstances[$i] =& getDB(true);
411                         $sSQL = 'insert into placex (osm_type, osm_id, class, type, name, admin_level, ';
412                         $sSQL .= 'housenumber, street, addr_place, isin, postcode, country_code, extratags, ';
413                         $sSQL .= 'geometry) select * from place where osm_id % '.$iInstances.' = '.$i;
414                         if ($aCMDResult['verbose']) echo "$sSQL\n";
415                         if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
416                 }
417                 $bAnyBusy = true;
418                 while($bAnyBusy)
419                 {
420                         $bAnyBusy = false;
421                         for($i = 0; $i < $iInstances; $i++)
422                         {
423                                 if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
424                         }
425                         sleep(1);
426                         echo '.';
427                 }
428                 echo "\n";
429                 echo "Reanalysing database...\n";
430                 pgsqlRunScript('ANALYSE');
431         }
432
433         if ($aCMDResult['create-roads'])
434         {
435                 $bDidSomething = true;
436
437                 $oDB =& getDB();
438                 $aDBInstances = array();
439                 for($i = 0; $i < $iInstances; $i++)
440                 {
441                         $aDBInstances[$i] =& getDB(true);
442                         if (!pg_query($aDBInstances[$i]->connection, 'set enable_bitmapscan = off')) fail(pg_last_error($oDB->connection));
443                         $sSQL = 'select count(*) from (select insertLocationRoad(partition, place_id, calculated_country_code, geometry) from ';
444                         $sSQL .= 'placex where osm_id % '.$iInstances.' = '.$i.' and rank_search between 26 and 27 and class = \'highway\') as x ';
445                         if ($aCMDResult['verbose']) echo "$sSQL\n";
446                         if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
447                 }
448                 $bAnyBusy = true;
449                 while($bAnyBusy)
450                 {
451                         $bAnyBusy = false;
452                         for($i = 0; $i < $iInstances; $i++)
453                         {
454                                 if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
455                         }
456                         sleep(1);
457                         echo '.';
458                 }
459                 echo "\n";
460         }
461
462         if ($aCMDResult['import-tiger-data'])
463         {
464                 $bDidSomething = true;
465
466                 pgsqlRunScriptFile(CONST_BasePath.'/sql/tiger_import_start.sql');
467
468                 $aDBInstances = array();
469                 for($i = 0; $i < $iInstances; $i++)
470                 {
471                         $aDBInstances[$i] =& getDB(true);
472                 }
473
474                 foreach(glob(CONST_BasePath.'/data/tiger2011/*.sql') as $sFile)
475                 {
476                         echo $sFile.': ';
477                         $hFile = fopen($sFile, "r");
478                         $sSQL = fgets($hFile, 100000);
479                         $iLines = 0;
480
481                         while(true)
482                         {
483                                 for($i = 0; $i < $iInstances; $i++)
484                                 {
485                                         if (!pg_connection_busy($aDBInstances[$i]->connection))
486                                         {
487                                                 while(pg_get_result($aDBInstances[$i]->connection));
488                                                 $sSQL = fgets($hFile, 100000);
489                                                 if (!$sSQL) break 2;
490                                                 if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
491                                                 $iLines++;
492                                                 if ($iLines == 1000)
493                                                 {
494                                                         echo ".";
495                                                         $iLines = 0;
496                                                 }
497                                         }
498                                 }
499                                 usleep(10);
500                         }
501
502                         fclose($hFile);
503
504                         $bAnyBusy = true;
505                         while($bAnyBusy)
506                         {
507                                 $bAnyBusy = false;
508                                 for($i = 0; $i < $iInstances; $i++)
509                                 {
510                                         if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
511                                 }
512                                 usleep(10);
513                         }
514                         echo "\n";
515                 }
516
517                 echo "Creating indexes\n";
518                 pgsqlRunScriptFile(CONST_BasePath.'/sql/tiger_import_finish.sql');
519         }
520
521         if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all'])
522         {
523                 $bDidSomething = true;
524                 $oDB =& getDB();
525                 if (!pg_query($oDB->connection, 'DELETE from placex where osm_type=\'P\'')) fail(pg_last_error($oDB->connection));
526                 $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,calculated_country_code,geometry) ";
527                 $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,calculated_country_code,";
528                 $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from (select calculated_country_code,postcode,";
529                 $sSQL .= "avg(st_x(st_centroid(geometry))) as x,avg(st_y(st_centroid(geometry))) as y ";
530                 $sSQL .= "from placex where postcode is not null and calculated_country_code not in ('ie') group by calculated_country_code,postcode) as x";
531                 if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
532
533                 $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,calculated_country_code,geometry) ";
534                 $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,'us',";
535                 $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from us_postcode";
536                 if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
537         }
538
539         if ($aCMDResult['osmosis-init'] || $aCMDResult['all'])
540         {
541                 $bDidSomething = true;
542                 $oDB =& getDB();
543
544                 if (!file_exists(CONST_Osmosis_Binary))
545                 {
546                         echo "Please download osmosis.\nIf it is already installed, check the path in your local settings (settings/local.php) file.\n";
547                         if (!$aCMDResult['all'])
548                         {
549                                 fail("osmosis not found in '".CONST_Osmosis_Binary."'");
550                         }
551                 }
552                 else
553                 {
554                         if (file_exists(CONST_BasePath.'/settings/configuration.txt'))
555                         {
556                                 echo "settings/configuration.txt already exists\n";
557                         }
558                         else
559                         {
560                                 passthru(CONST_Osmosis_Binary.' --read-replication-interval-init '.CONST_BasePath.'/settings');
561                                 // update osmosis configuration.txt with our settings
562                                 passthru("sed -i 's!baseUrl=.*!baseUrl=".CONST_Replication_Url."!' ".CONST_BasePath.'/settings/configuration.txt');
563                                 passthru("sed -i 's:maxInterval = .*:maxInterval = ".CONST_Replication_MaxInterval.":' ".CONST_BasePath.'/settings/configuration.txt');
564                         }
565
566                         // Find the last node in the DB
567                         $iLastOSMID = $oDB->getOne("select max(id) from planet_osm_nodes");
568
569                         // Lookup the timestamp that node was created (less 3 hours for margin for changsets to be closed)
570                         $sLastNodeURL = 'http://www.openstreetmap.org/api/0.6/node/'.$iLastOSMID."/1";
571                         $sLastNodeXML = file_get_contents($sLastNodeURL);
572                         preg_match('#timestamp="(([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2}):([0-9]{2})Z)"#', $sLastNodeXML, $aLastNodeDate);
573                         $iLastNodeTimestamp = strtotime($aLastNodeDate[1]) - (3*60*60);
574
575                         // Search for the correct state file - uses file timestamps so need to sort by date descending
576                         $sRepURL = CONST_Replication_Url."/";
577                         $sRep = file_get_contents($sRepURL."?C=M;O=D");
578                         // download.geofabrik.de:    <a href="000/">000/</a></td><td align="right">26-Feb-2013 11:53  </td>
579                         // planet.openstreetmap.org: <a href="273/">273/</a>                    22-Mar-2013 07:41    -
580                         preg_match_all('#<a href="[0-9]{3}/">([0-9]{3}/)</a>.*(([0-9]{2})-([A-z]{3})-([0-9]{4}) ([0-9]{2}):([0-9]{2}))#', $sRep, $aRepMatches, PREG_SET_ORDER);
581                         $aPrevRepMatch = false;
582                         foreach($aRepMatches as $aRepMatch)
583                         {
584                                 if (strtotime($aRepMatch[2]) < $iLastNodeTimestamp) break;
585                                 $aPrevRepMatch = $aRepMatch;
586                         }
587                         if ($aPrevRepMatch) $aRepMatch = $aPrevRepMatch;
588
589                         $sRepURL .= $aRepMatch[1];
590                         $sRep = file_get_contents($sRepURL."?C=M;O=D");
591                         preg_match_all('#<a href="[0-9]{3}/">([0-9]{3}/)</a>.*(([0-9]{2})-([A-z]{3})-([0-9]{4}) ([0-9]{2}):([0-9]{2}))#', $sRep, $aRepMatches, PREG_SET_ORDER);
592                         $aPrevRepMatch = false;
593                         foreach($aRepMatches as $aRepMatch)
594                         {
595                                 if (strtotime($aRepMatch[2]) < $iLastNodeTimestamp) break;
596                                 $aPrevRepMatch = $aRepMatch;
597                         }
598                         if ($aPrevRepMatch) $aRepMatch = $aPrevRepMatch;
599
600                         $sRepURL .= $aRepMatch[1];
601                         $sRep = file_get_contents($sRepURL."?C=M;O=D");
602                         preg_match_all('#<a href="[0-9]{3}.state.txt">([0-9]{3}).state.txt</a>.*(([0-9]{2})-([A-z]{3})-([0-9]{4}) ([0-9]{2}):([0-9]{2}))#', $sRep, $aRepMatches, PREG_SET_ORDER);
603                         $aPrevRepMatch = false;
604                         foreach($aRepMatches as $aRepMatch)
605                         {
606                                 if (strtotime($aRepMatch[2]) < $iLastNodeTimestamp) break;
607                                 $aPrevRepMatch = $aRepMatch;
608                         }
609                         if ($aPrevRepMatch) $aRepMatch = $aPrevRepMatch;
610
611                         $sRepURL .= $aRepMatch[1].'.state.txt';
612                         echo "Getting state file: $sRepURL\n";
613                         $sStateFile = file_get_contents($sRepURL);
614                         if (!$sStateFile || strlen($sStateFile) > 1000) fail("unable to obtain state file");
615                         file_put_contents(CONST_BasePath.'/settings/state.txt', $sStateFile);
616                         echo "Updating DB status\n";
617                         pg_query($oDB->connection, 'TRUNCATE import_status');
618                         $sSQL = "INSERT INTO import_status VALUES('".$aRepMatch[2]."')";
619                         pg_query($oDB->connection, $sSQL);
620                 }
621         }
622
623         if ($aCMDResult['index'] || $aCMDResult['all'])
624         {
625                 $bDidSomething = true;
626                 $sOutputFile = '';
627                 if (isset($aCMDResult['index-output'])) $sOutputFile = ' -F '.$aCMDResult['index-output'];
628                 $sBaseCmd = CONST_BasePath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -P '.$aDSNInfo['port'].' -t '.$iInstances.$sOutputFile;
629                 passthruCheckReturn($sBaseCmd.' -R 4');
630                 if (!$aCMDResult['index-noanalyse']) pgsqlRunScript('ANALYSE');
631                 passthruCheckReturn($sBaseCmd.' -r 5 -R 25');
632                 if (!$aCMDResult['index-noanalyse']) pgsqlRunScript('ANALYSE');
633                 passthruCheckReturn($sBaseCmd.' -r 26');
634         }
635
636         if ($aCMDResult['create-search-indices'] || $aCMDResult['all'])
637         {
638                 echo "Search indices\n";
639                 $bDidSomething = true;
640                 $oDB =& getDB();
641                 $sSQL = 'select distinct partition from country_name';
642                 $aPartitions = $oDB->getCol($sSQL);
643                 if (PEAR::isError($aPartitions))
644                 {
645                         fail($aPartitions->getMessage());
646                 }
647                 if (!$aCMDResult['no-partitions']) $aPartitions[] = 0;
648
649                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/indices.src.sql');
650                 preg_match_all('#^-- start(.*?)^-- end#ms', $sTemplate, $aMatches, PREG_SET_ORDER);
651                 foreach($aMatches as $aMatch)
652                 {
653                         $sResult = '';
654                         foreach($aPartitions as $sPartitionName)
655                         {
656                                 $sResult .= str_replace('-partition-', $sPartitionName, $aMatch[1]);
657                         }
658                         $sTemplate = str_replace($aMatch[0], $sResult, $sTemplate);
659                 }
660
661                 pgsqlRunScript($sTemplate);
662         }
663
664         if (isset($aCMDResult['create-website']))
665         {
666                 $bDidSomething = true;
667                 $sTargetDir = $aCMDResult['create-website'];
668                 if (!is_dir($sTargetDir))
669                 {
670                         echo "You must create the website directory before calling this function.\n";
671                         fail("Target directory does not exist.");
672                 }
673
674                 @symlink(CONST_BasePath.'/website/details.php', $sTargetDir.'/details.php');
675                 @symlink(CONST_BasePath.'/website/reverse.php', $sTargetDir.'/reverse.php');
676                 @symlink(CONST_BasePath.'/website/search.php', $sTargetDir.'/search.php');
677                 @symlink(CONST_BasePath.'/website/search.php', $sTargetDir.'/index.php');
678                 @symlink(CONST_BasePath.'/website/deletable.php', $sTargetDir.'/deletable.php');
679                 @symlink(CONST_BasePath.'/website/polygons.php', $sTargetDir.'/polygons.php');
680                 @symlink(CONST_BasePath.'/website/status.php', $sTargetDir.'/status.php');
681                 @symlink(CONST_BasePath.'/website/images', $sTargetDir.'/images');
682                 @symlink(CONST_BasePath.'/website/js', $sTargetDir.'/js');
683                 @symlink(CONST_BasePath.'/website/css', $sTargetDir.'/css');
684                 echo "Symlinks created\n";
685
686                 $sTestFile = @file_get_contents(CONST_Website_BaseURL.'js/tiles.js');
687                 if (!$sTestFile)
688                 {
689                         echo "\nWARNING: Unable to access the website at ".CONST_Website_BaseURL."\n";
690                         echo "You may want to update settings/local.php with @define('CONST_Website_BaseURL', 'http://[HOST]/[PATH]/');\n";
691                 }
692         }
693
694         if (!$bDidSomething)
695         {
696                 showUsage($aCMDOptions, true);
697         }
698         else
699         {
700                 echo "Setup finished.\n";
701         }
702
703         function pgsqlRunScriptFile($sFilename)
704         {
705                 if (!file_exists($sFilename)) fail('unable to find '.$sFilename);
706
707                 // Convert database DSN to psql parameters
708                 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
709                 if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
710                 $sCMD = 'psql -p '.$aDSNInfo['port'].' -d '.$aDSNInfo['database'];
711
712                 $ahGzipPipes = null;
713                 if (preg_match('/\\.gz$/', $sFilename))
714                 {
715                         $aDescriptors = array(
716                                 0 => array('pipe', 'r'),
717                                 1 => array('pipe', 'w'),
718                                 2 => array('file', '/dev/null', 'a')
719                         );
720                         $hGzipProcess = proc_open('zcat '.$sFilename, $aDescriptors, $ahGzipPipes);
721                         if (!is_resource($hGzipProcess)) fail('unable to start zcat');
722                         $aReadPipe = $ahGzipPipes[1];
723                         fclose($ahGzipPipes[0]);
724                 }
725                 else
726                 {
727                         $sCMD .= ' -f '.$sFilename;
728                         $aReadPipe = array('pipe', 'r');
729                 }
730
731                 $aDescriptors = array(
732                         0 => $aReadPipe,
733                         1 => array('pipe', 'w'),
734                         2 => array('file', '/dev/null', 'a')
735                 );
736                 $ahPipes = null;
737                 $hProcess = proc_open($sCMD, $aDescriptors, $ahPipes);
738                 if (!is_resource($hProcess)) fail('unable to start pgsql');
739
740
741                 // TODO: error checking
742                 while(!feof($ahPipes[1]))
743                 {
744                         echo fread($ahPipes[1], 4096);
745                 }
746                 fclose($ahPipes[1]);
747
748                 $iReturn = proc_close($hProcess);
749                 if ($iReturn > 0)
750                 {
751                         fail("pgsql returned with error code ($iReturn)");
752                 }
753                 if ($ahGzipPipes)
754                 {
755                         fclose($ahGzipPipes[1]);
756                         proc_close($hGzipProcess);
757                 }
758
759         }
760
761         function pgsqlRunScript($sScript)
762         {
763                 global $aCMDResult;
764                 // Convert database DSN to psql parameters
765                 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
766                 if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
767                 $sCMD = 'psql -p '.$aDSNInfo['port'].' -d '.$aDSNInfo['database'];
768                 if (!$aCMDResult['ignore-errors'])
769                         $sCMD .= ' -v ON_ERROR_STOP=1';
770                 $aDescriptors = array(
771                         0 => array('pipe', 'r'),
772                         1 => STDOUT, 
773                         2 => STDERR
774                 );
775                 $ahPipes = null;
776                 $hProcess = @proc_open($sCMD, $aDescriptors, $ahPipes);
777                 if (!is_resource($hProcess)) fail('unable to start pgsql');
778
779                 while(strlen($sScript))
780                 {
781                         $written = fwrite($ahPipes[0], $sScript);
782                         if ($written <= 0) break;
783                         $sScript = substr($sScript, $written);
784                 }
785                 fclose($ahPipes[0]);
786                 $iReturn = proc_close($hProcess);
787                 if ($iReturn > 0)
788                 {
789                         fail("pgsql returned with error code ($iReturn)");
790                 }
791         }
792
793         function pgsqlRunRestoreData($sDumpFile)
794         {
795                 // Convert database DSN to psql parameters
796                 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
797                 if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
798                 $sCMD = 'pg_restore -p '.$aDSNInfo['port'].' -d '.$aDSNInfo['database'].' -Fc -a '.$sDumpFile;
799
800                 $aDescriptors = array(
801                         0 => array('pipe', 'r'),
802                         1 => array('pipe', 'w'),
803                         2 => array('file', '/dev/null', 'a')
804                 );
805                 $ahPipes = null;
806                 $hProcess = proc_open($sCMD, $aDescriptors, $ahPipes);
807                 if (!is_resource($hProcess)) fail('unable to start pg_restore');
808
809                 fclose($ahPipes[0]);
810
811                 // TODO: error checking
812                 while(!feof($ahPipes[1]))
813                 {
814                         echo fread($ahPipes[1], 4096);
815                 }
816                 fclose($ahPipes[1]);
817
818                 $iReturn = proc_close($hProcess);
819         }
820
821         function pgsqlRunDropAndRestore($sDumpFile)
822         {
823                 // Convert database DSN to psql parameters
824                 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
825                 if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
826                 $sCMD = 'pg_restore -p '.$aDSNInfo['port'].' -d '.$aDSNInfo['database'].' -Fc --clean '.$sDumpFile;
827
828                 $aDescriptors = array(
829                         0 => array('pipe', 'r'),
830                         1 => array('pipe', 'w'),
831                         2 => array('file', '/dev/null', 'a')
832                 );
833                 $ahPipes = null;
834                 $hProcess = proc_open($sCMD, $aDescriptors, $ahPipes);
835                 if (!is_resource($hProcess)) fail('unable to start pg_restore');
836
837                 fclose($ahPipes[0]);
838
839                 // TODO: error checking
840                 while(!feof($ahPipes[1]))
841                 {
842                         echo fread($ahPipes[1], 4096);
843                 }
844                 fclose($ahPipes[1]);
845
846                 $iReturn = proc_close($hProcess);
847         }
848
849         function passthruCheckReturn($cmd)
850         {
851                 $result = -1;
852                 passthru($cmd, $result);
853                 if ($result != 0) fail('Error executing external command: '.$cmd);
854         }