]> git.openstreetmap.org Git - nominatim.git/blob - utils/setup.php
extend scripts for Tiger 2011 data
[nominatim.git] / utils / setup.php
1 #!/usr/bin/php -Cq
2 <?php
3
4         require_once(dirname(dirname(__FILE__)).'/lib/init-cmd.php');
5         ini_set('memory_limit', '800M');
6
7         $aCMDOptions = array(
8                 "Create and setup nominatim search system",
9                 array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
10                 array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
11                 array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
12
13                 array('osm-file', '', 0, 1, 1, 1, 'realpath', 'File to import'),
14                 array('threads', '', 0, 1, 1, 1, 'int', 'Number of threads (where possible)'),
15
16                 array('all', '', 0, 1, 0, 0, 'bool', 'Do the complete process'),
17
18                 array('create-db', '', 0, 1, 0, 0, 'bool', 'Create nominatim db'),
19                 array('setup-db', '', 0, 1, 0, 0, 'bool', 'Build a blank nominatim db'),
20                 array('import-data', '', 0, 1, 0, 0, 'bool', 'Import a osm file'),
21                 array('create-functions', '', 0, 1, 0, 0, 'bool', 'Create functions'),
22                 array('create-minimal-tables', '', 0, 1, 0, 0, 'bool', 'Create minimal main tables'),
23                 array('create-tables', '', 0, 1, 0, 0, 'bool', 'Create main tables'),
24                 array('create-partitions', '', 0, 1, 0, 0, 'bool', 'Create required partition tables and triggers'),
25                 array('load-data', '', 0, 1, 0, 0, 'bool', 'Copy data to live tables from import table'),
26                 array('import-tiger-data', '', 0, 1, 0, 0, 'bool', 'Import tiger data (not included in \'all\')'),
27                 array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Calculate postcode centroids'),
28                 array('create-roads', '', 0, 1, 0, 0, 'bool', 'Calculate postcode centroids'),
29                 array('osmosis-init', '', 0, 1, 0, 0, 'bool', 'Generate default osmosis configuration'),
30                 array('osmosis-init-date', '', 0, 1, 1, 1, 'string', 'Generate default osmosis configuration'),
31                 array('index', '', 0, 1, 0, 0, 'bool', 'Index the data'),
32                 array('index-output', '', 0, 1, 1, 1, 'string', 'File to dump index information to'),
33                 array('create-search-indices', '', 0, 1, 0, 0, 'bool', 'Create additional indices required for search and update'),
34                 array('create-website', '', 0, 1, 1, 1, 'realpath', 'Create symlinks to setup web directory'),
35         );
36         getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
37
38         $bDidSomething = false;
39
40         // This is a pretty hard core defult - the number of processors in the box - 1
41         $iInstances = isset($aCMDResult['threads'])?$aCMDResult['threads']:(getProcessorCount()-1);
42         if ($iInstances < 1)
43         {
44                 $iInstances = 1;
45                 echo "WARNING: resetting threads to $iInstances\n";
46         }
47         if ($iInstances > getProcessorCount())
48         {
49                 $iInstances = getProcessorCount();
50                 echo "WARNING: resetting threads to $iInstances\n";
51         }
52         if (isset($aCMDResult['osm-file']) && !isset($aCMDResult['osmosis-init-date']))
53         {
54                 $sBaseFile = basename($aCMDResult['osm-file']);
55                 if (preg_match('#^planet-([0-9]{2})([0-9]{2})([0-9]{2})[.]#', $sBaseFile, $aMatch))
56                 {
57                         $iTime = mktime(0, 0, 0, $aMatch[2], $aMatch[3], '20'.$aMatch[1]);
58                         $iTime -= (60*60*24);
59                         $aCMDResult['osmosis-init-date'] = date('Y-m-d', $iTime).'T22:00:00Z';
60                 }
61         }
62         $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
63         if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
64
65         if ($aCMDResult['create-db'] || $aCMDResult['all'])
66         {
67                 echo "Create DB\n";
68                 $bDidSomething = true;
69                 $oDB =& DB::connect(CONST_Database_DSN, false);
70                 if (!PEAR::isError($oDB))
71                 {
72                         fail('database already exists ('.CONST_Database_DSN.')');
73                 }
74                 passthru('createdb '.$aDSNInfo['database']);
75         }
76
77         if ($aCMDResult['create-db'] || $aCMDResult['all'])
78         {
79                 echo "Create DB (2)\n";
80                 $bDidSomething = true;
81                 // TODO: path detection, detection memory, etc.
82
83                 $oDB =& getDB();
84                 passthru('createlang plpgsql '.$aDSNInfo['database']);
85         $pgver = (float) CONST_Postgresql_Version;
86                 if ($pgver < 9.1) {
87                         pgsqlRunScriptFile(CONST_Path_Postgresql_Contrib.'/hstore.sql');
88                 } else {
89                         pgsqlRunScript('CREATE EXTENSION hstore');
90                 }
91                 pgsqlRunScriptFile(CONST_Path_Postgresql_Postgis.'/postgis.sql');
92                 pgsqlRunScriptFile(CONST_Path_Postgresql_Postgis.'/spatial_ref_sys.sql');
93                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_name.sql');
94                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_naturalearthdata.sql');
95                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_osm_grid.sql');
96                 pgsqlRunScriptFile(CONST_BasePath.'/data/gb_postcode.sql');
97                 pgsqlRunScriptFile(CONST_BasePath.'/data/us_statecounty.sql');
98                 pgsqlRunScriptFile(CONST_BasePath.'/data/us_state.sql');
99                 pgsqlRunScriptFile(CONST_BasePath.'/data/us_postcode.sql');
100                 pgsqlRunScriptFile(CONST_BasePath.'/data/worldboundaries.sql');
101         }
102
103         if ($aCMDResult['import-data'] || $aCMDResult['all'])
104         {
105                 echo "Import\n";
106                 $bDidSomething = true;
107
108                 $osm2pgsql = CONST_Osm2pgsql_Binary;
109                 if (!file_exists($osm2pgsql)) fail("please download and build osm2pgsql");
110                 passthru($osm2pgsql.' -lsc -O gazetteer -C 12000 --hstore -d '.$aDSNInfo['database'].' '.$aCMDResult['osm-file']);
111
112                 $oDB =& getDB();
113                 $x = $oDB->getRow('select * from place limit 1');
114                 if (!$x || PEAR::isError($x)) fail('No Data');
115         }
116
117         if ($aCMDResult['create-functions'] || $aCMDResult['all'])
118         {
119                 echo "Functions\n";
120                 $bDidSomething = true;
121                 if (!file_exists(CONST_BasePath.'/module/nominatim.so')) fail("nominatim module not built");
122                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/functions.sql');
123                 $sTemplate = str_replace('{modulepath}',CONST_BasePath.'/module', $sTemplate);
124                 pgsqlRunScript($sTemplate);
125         }
126
127         if ($aCMDResult['create-minimal-tables'])
128         {
129                 echo "Minimal Tables\n";
130                 $bDidSomething = true;
131                 pgsqlRunScriptFile(CONST_BasePath.'/sql/tables-minimal.sql');
132
133                 $sScript = '';
134
135                 // Backstop the import process - easliest possible import id
136                 $sScript .= "insert into import_npi_log values (18022);\n";
137
138                 $hFile = @fopen(CONST_BasePath.'/settings/partitionedtags.def', "r");
139                 if (!$hFile) fail('unable to open list of partitions: '.CONST_BasePath.'/settings/partitionedtags.def');
140
141                 while (($sLine = fgets($hFile, 4096)) !== false && $sLine && substr($sLine,0,1) !='#')
142                 {
143                         list($sClass, $sType) = explode(' ', trim($sLine));
144                         $sScript .= "create table place_classtype_".$sClass."_".$sType." as ";
145                         $sScript .= "select place_id as place_id,geometry as centroid from placex limit 0;\n";
146
147                         $sScript .= "CREATE INDEX idx_place_classtype_".$sClass."_".$sType."_centroid ";
148                         $sScript .= "ON place_classtype_".$sClass."_".$sType." USING GIST (centroid);\n";
149
150                         $sScript .= "CREATE INDEX idx_place_classtype_".$sClass."_".$sType."_place_id ";
151                         $sScript .= "ON place_classtype_".$sClass."_".$sType." USING btree(place_id);\n";
152                 }
153                 fclose($hFile);
154                 pgsqlRunScript($sScript);
155         }
156
157         if ($aCMDResult['create-tables'] || $aCMDResult['all'])
158         {
159                 echo "Tables\n";
160                 $bDidSomething = true;
161                 pgsqlRunScriptFile(CONST_BasePath.'/sql/tables.sql');
162
163                 // re-run the functions
164                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/functions.sql');
165                 $sTemplate = str_replace('{modulepath}',CONST_BasePath.'/module', $sTemplate);
166                 pgsqlRunScript($sTemplate);
167         }
168
169         if ($aCMDResult['create-partitions'] || $aCMDResult['all'])
170         {
171                 echo "Partitions\n";
172                 $bDidSomething = true;
173                 $oDB =& getDB();
174                 $sSQL = 'select partition from country_name order by country_code';
175                 $aPartitions = $oDB->getCol($sSQL);
176                 if (PEAR::isError($aPartitions))
177                 {
178                         fail($aPartitions->getMessage());
179                 }
180                 $aPartitions[] = 0;
181
182                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/partitions.src.sql');
183                 preg_match_all('#^-- start(.*?)^-- end#ms', $sTemplate, $aMatches, PREG_SET_ORDER);
184                 foreach($aMatches as $aMatch)
185                 {
186                         $sResult = '';
187                         foreach($aPartitions as $sPartitionName)
188                         {
189                                 $sResult .= str_replace('-partition-', $sPartitionName, $aMatch[1]);
190                         }
191                         $sTemplate = str_replace($aMatch[0], $sResult, $sTemplate);
192                 }
193
194                 pgsqlRunScript($sTemplate);
195         }
196
197         if ($aCMDResult['load-data'] || $aCMDResult['all'])
198         {
199                 echo "Load Data\n";
200                 $bDidSomething = true;
201
202                 $oDB =& getDB();
203                 if (!pg_query($oDB->connection, 'TRUNCATE word')) fail(pg_last_error($oDB->connection));
204                 echo '.';
205                 if (!pg_query($oDB->connection, 'TRUNCATE placex')) fail(pg_last_error($oDB->connection));
206                 echo '.';
207                 if (!pg_query($oDB->connection, 'TRUNCATE place_addressline')) fail(pg_last_error($oDB->connection));
208                 echo '.';
209                 if (!pg_query($oDB->connection, 'TRUNCATE place_boundingbox')) fail(pg_last_error($oDB->connection));
210                 echo '.';
211                 if (!pg_query($oDB->connection, 'TRUNCATE location_area')) fail(pg_last_error($oDB->connection));
212                 echo '.';
213                 if (!pg_query($oDB->connection, 'TRUNCATE search_name')) fail(pg_last_error($oDB->connection));
214                 echo '.';
215                 if (!pg_query($oDB->connection, 'TRUNCATE search_name_blank')) fail(pg_last_error($oDB->connection));
216                 echo '.';
217                 if (!pg_query($oDB->connection, 'DROP SEQUENCE seq_place')) fail(pg_last_error($oDB->connection));
218                 echo '.';
219                 if (!pg_query($oDB->connection, 'CREATE SEQUENCE seq_place start 100000')) fail(pg_last_error($oDB->connection));
220                 echo '.';
221
222                 $aDBInstances = array();
223                 for($i = 0; $i < $iInstances; $i++)
224                 {
225                         $aDBInstances[$i] =& getDB(true);
226                         $sSQL = 'insert into placex (osm_type, osm_id, class, type, name, admin_level, ';
227                         $sSQL .= 'housenumber, street, isin, postcode, country_code, extratags, ';
228                         $sSQL .= 'geometry) select * from place where osm_id % '.$iInstances.' = '.$i;
229                         if ($aCMDResult['verbose']) echo "$sSQL\n";
230                         if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
231                 }
232                 $bAnyBusy = true;
233                 while($bAnyBusy)
234                 {
235                         $bAnyBusy = false;
236                         for($i = 0; $i < $iInstances; $i++)
237                         {
238                                 if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
239                         }
240                         sleep(1);
241                         echo '.';
242                 }
243                 echo "\n";
244                 echo "Reanalysing database...\n";
245                 pgsqlRunScript('ANALYSE');
246         }
247
248         if ($aCMDResult['create-roads'])
249         {
250                 $bDidSomething = true;
251
252                 $oDB =& getDB();
253                 $aDBInstances = array();
254                 for($i = 0; $i < $iInstances; $i++)
255                 {
256                         $aDBInstances[$i] =& getDB(true);
257                         if (!pg_query($aDBInstances[$i]->connection, 'set enable_bitmapscan = off')) fail(pg_last_error($oDB->connection));
258                         $sSQL = 'select count(*) from (select insertLocationRoad(partition, place_id, country_code, geometry) from ';
259                         $sSQL .= 'placex where osm_id % '.$iInstances.' = '.$i.' and rank_search between 26 and 27 and class = \'highway\') as x ';
260                         if ($aCMDResult['verbose']) echo "$sSQL\n";
261                         if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
262                 }
263                 $bAnyBusy = true;
264                 while($bAnyBusy)
265                 {
266                         $bAnyBusy = false;
267                         for($i = 0; $i < $iInstances; $i++)
268                         {
269                                 if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
270                         }
271                         sleep(1);
272                         echo '.';
273                 }
274                 echo "\n";
275         }
276
277         if ($aCMDResult['import-tiger-data'])
278         {
279                 $bDidSomething = true;
280
281                 $aDBInstances = array();
282                 for($i = 0; $i < $iInstances; $i++)
283                 {
284                         $aDBInstances[$i] =& getDB(true);
285                 }
286
287                 foreach(glob(CONST_BasePath.'/data/tiger2011/*.sql') as $sFile)
288                 {
289                         echo $sFile.': ';
290                         $hFile = fopen($sFile, "r");
291                         $sSQL = fgets($hFile, 100000);
292                         $iLines = 0;
293
294                         while(true)
295                         {
296                                 for($i = 0; $i < $iInstances; $i++)
297                                 {
298                                         if (!pg_connection_busy($aDBInstances[$i]->connection))
299                                         {
300                                                 while(pg_get_result($aDBInstances[$i]->connection));
301                                                 $sSQL = fgets($hFile, 100000);
302                                                 if (!$sSQL) break 2;
303                                                 if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
304                                                 $iLines++;
305                                                 if ($iLines == 1000)
306                                                 {
307                                                         echo ".";
308                                                         $iLines = 0;
309                                                 }
310                                         }
311                                 }
312                                 usleep(10);
313                         }
314
315                         fclose($hFile);
316         
317                         $bAnyBusy = true;
318                         while($bAnyBusy)
319                         {
320                                 $bAnyBusy = false;
321                                 for($i = 0; $i < $iInstances; $i++)
322                                 {
323                                         if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
324                                 }
325                                 usleep(10);
326                         }
327                         echo "\n";
328                 }
329         }
330
331         if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all'])
332         {
333                 $bDidSomething = true;
334                 $oDB =& getDB();
335                 if (!pg_query($oDB->connection, 'DELETE from placex where osm_type=\'P\'')) fail(pg_last_error($oDB->connection));
336                 $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,country_code,geometry) ";
337                 $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,country_code,";
338                 $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from (select country_code,postcode,";
339                 $sSQL .= "avg(st_x(st_centroid(geometry))) as x,avg(st_y(st_centroid(geometry))) as y ";
340                 $sSQL .= "from placex where postcode is not null group by country_code,postcode) as x";
341                 if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
342
343                 $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,country_code,geometry) ";
344                 $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,'us',";
345                 $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from us_postcode";
346                 if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
347         }
348
349         if ($aCMDResult['osmosis-init'] && isset($aCMDResult['osmosis-init-date']))
350         {
351                 $bDidSomething = true;
352
353                 if (!file_exists(CONST_Osmosis_Binary)) fail("please download osmosis");
354                 if (file_exists(CONST_BasePath.'/settings/configuration.txt')) echo "settings/configuration.txt already exists\n";
355                 else passthru(CONST_Osmosis_Binary.' --read-replication-interval-init '.CONST_BasePath.'/settings');
356
357                 $sDate = $aCMDResult['osmosis-init-date'];
358                 $sURL = 'http://toolserver.org/~mazder/replicate-sequences/?'.$sDate;
359                 echo "Getting state file: $sURL\n";
360                 $sStateFile = file_get_contents($sURL);
361                 if (!$sStateFile || strlen($sStateFile) > 1000) fail("unable to obtain state file");
362                 file_put_contents(CONST_BasePath.'/settings/state.txt', $sStateFile);
363         }
364
365         if ($aCMDResult['index'] || $aCMDResult['all'])
366         {
367                 $bDidSomething = true;
368                 $sOutputFile = '';
369                 if (isset($aCMDResult['index-output'])) $sOutputFile = ' -F '.$aCMDResult['index-output'];
370                 $sBaseCmd = CONST_BasePath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -t '.$iInstances.$sOutputFile;
371                 passthru($sBaseCmd.' -R 4');
372                 pgsqlRunScript('ANALYSE');
373                 passthru($sBaseCmd.' -r 5 -R 25');
374                 pgsqlRunScript('ANALYSE');
375                 passthru($sBaseCmd.' -r 26');
376         }
377
378         if ($aCMDResult['create-search-indices'] || $aCMDResult['all'])
379         {
380                 echo "Search indices\n";
381                 $bDidSomething = true;
382                 $oDB =& getDB();
383                 $sSQL = 'select partition from country_name order by country_code';
384                 $aPartitions = $oDB->getCol($sSQL);
385                 if (PEAR::isError($aPartitions))
386                 {
387                         fail($aPartitions->getMessage());
388                 }
389                 $aPartitions[] = 0;
390
391                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/indices.src.sql');
392                 preg_match_all('#^-- start(.*?)^-- end#ms', $sTemplate, $aMatches, PREG_SET_ORDER);
393                 foreach($aMatches as $aMatch)
394                 {
395                         $sResult = '';
396                         foreach($aPartitions as $sPartitionName)
397                         {
398                                 $sResult .= str_replace('-partition-', $sPartitionName, $aMatch[1]);
399                         }
400                         $sTemplate = str_replace($aMatch[0], $sResult, $sTemplate);
401                 }
402
403                 pgsqlRunScript($sTemplate);
404         }
405
406         if (isset($aCMDResult['create-website']))
407         {
408                 $bDidSomething = true;
409                 $sTargetDir = $aCMDResult['create-website'];
410                 if (!is_dir($sTargetDir)) fail('please specify a directory to setup');
411                 @symlink(CONST_BasePath.'/website/details.php', $sTargetDir.'/details.php');
412                 @symlink(CONST_BasePath.'/website/reverse.php', $sTargetDir.'/reverse.php');
413                 @symlink(CONST_BasePath.'/website/search.php', $sTargetDir.'/search.php');
414                 @symlink(CONST_BasePath.'/website/search.php', $sTargetDir.'/index.php');
415                 @symlink(CONST_BasePath.'/website/images', $sTargetDir.'/images');
416                 @symlink(CONST_BasePath.'/website/js', $sTargetDir.'/js');
417                 echo "Symlinks created\n";
418         }
419
420         if (!$bDidSomething)
421         {
422                 showUsage($aCMDOptions, true);
423         }
424
425         function pgsqlRunScriptFile($sFilename)
426         {
427                 if (!file_exists($sFilename)) fail('unable to find '.$sFilename);
428
429                 // Convert database DSN to psql paramaters
430                 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
431                 $sCMD = 'psql -f '.$sFilename.' '.$aDSNInfo['database'];
432
433                 $aDescriptors = array(
434                         0 => array('pipe', 'r'),
435                         1 => array('pipe', 'w'),
436                         2 => array('file', '/dev/null', 'a')
437                 );
438                 $ahPipes = null;
439                 $hProcess = proc_open($sCMD, $aDescriptors, $ahPipes);
440                 if (!is_resource($hProcess)) fail('unable to start pgsql');
441
442                 fclose($ahPipes[0]);
443
444                 // TODO: error checking
445                 while(!feof($ahPipes[1]))
446                 {
447                         echo fread($ahPipes[1], 4096);
448                 }
449                 fclose($ahPipes[1]);
450
451                 proc_close($hProcess);
452         }
453
454         function pgsqlRunScript($sScript)
455         {
456                 // Convert database DSN to psql paramaters
457                 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
458                 if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
459                 $sCMD = 'psql -p '.$aDSNInfo['port'].' '.$aDSNInfo['database'];
460                 $aDescriptors = array(
461                         0 => array('pipe', 'r'),
462                         1 => STDOUT, 
463                         2 => STDERR
464                 );
465                 $ahPipes = null;
466                 $hProcess = @proc_open($sCMD, $aDescriptors, $ahPipes);
467                 if (!is_resource($hProcess)) fail('unable to start pgsql');
468
469                 while(strlen($sScript))
470                 {
471                         $written = fwrite($ahPipes[0], $sScript);
472                         $sScript = substr($sScript, $written);
473                 }
474                 fclose($ahPipes[0]);
475                 proc_close($hProcess);
476         }