]> git.openstreetmap.org Git - nominatim.git/blob - utils/setup.php
improve import performance for special phrases
[nominatim.git] / utils / setup.php
1 #!/usr/bin/php -Cq
2 <?php
3
4         require_once(dirname(dirname(__FILE__)).'/lib/init-cmd.php');
5         ini_set('memory_limit', '800M');
6
7         $aCMDOptions = array(
8                 "Create and setup nominatim search system",
9                 array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
10                 array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
11                 array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
12
13                 array('osm-file', '', 0, 1, 1, 1, 'realpath', 'File to import'),
14                 array('threads', '', 0, 1, 1, 1, 'int', 'Number of threads (where possible)'),
15
16                 array('all', '', 0, 1, 0, 0, 'bool', 'Do the complete process'),
17
18                 array('create-db', '', 0, 1, 0, 0, 'bool', 'Create nominatim db'),
19                 array('setup-db', '', 0, 1, 0, 0, 'bool', 'Build a blank nominatim db'),
20                 array('import-data', '', 0, 1, 0, 0, 'bool', 'Import a osm file'),
21                 array('create-functions', '', 0, 1, 0, 0, 'bool', 'Create functions'),
22                 array('create-minimal-tables', '', 0, 1, 0, 0, 'bool', 'Create minimal main tables'),
23                 array('create-tables', '', 0, 1, 0, 0, 'bool', 'Create main tables'),
24                 array('create-partitions', '', 0, 1, 0, 0, 'bool', 'Create required partition tables and triggers'),
25                 array('load-data', '', 0, 1, 0, 0, 'bool', 'Copy data to live tables from import table'),
26                 array('import-tiger-data', '', 0, 1, 0, 0, 'bool', 'Import tiger data (not included in \'all\')'),
27                 array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Calculate postcode centroids'),
28                 array('create-roads', '', 0, 1, 0, 0, 'bool', 'Calculate postcode centroids'),
29                 array('osmosis-init', '', 0, 1, 0, 0, 'bool', 'Generate default osmosis configuration'),
30                 array('osmosis-init-date', '', 0, 1, 1, 1, 'string', 'Generate default osmosis configuration'),
31                 array('index', '', 0, 1, 0, 0, 'bool', 'Index the data'),
32                 array('index-output', '', 0, 1, 1, 1, 'string', 'File to dump index information to'),
33                 array('create-website', '', 0, 1, 1, 1, 'realpath', 'Create symlinks to setup web directory'),
34         );
35         getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
36
37         $bDidSomething = false;
38
39         // This is a pretty hard core defult - the number of processors in the box - 1
40         $iInstances = isset($aCMDResult['threads'])?$aCMDResult['threads']:(getProcessorCount()-1);
41         if ($iInstances < 1)
42         {
43                 $iInstances = 1;
44                 echo "WARNING: resetting threads to $iInstances\n";
45         }
46         if ($iInstances > getProcessorCount())
47         {
48                 $iInstances = getProcessorCount();
49                 echo "WARNING: resetting threads to $iInstances\n";
50         }
51         if (isset($aCMDResult['osm-file']) && !isset($aCMDResult['osmosis-init-date']))
52         {
53                 $sBaseFile = basename($aCMDResult['osm-file']);
54                 if (preg_match('#^planet-([0-9]{2})([0-9]{2})([0-9]{2})[.]#', $sBaseFile, $aMatch))
55                 {
56                         $iTime = mktime(0, 0, 0, $aMatch[2], $aMatch[3], '20'.$aMatch[1]);
57                         $iTime -= (60*60*24);
58                         $aCMDResult['osmosis-init-date'] = date('Y-m-d', $iTime).'T22:00:00Z';
59                 }
60         }
61         $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
62         if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
63
64         if ($aCMDResult['create-db'] || $aCMDResult['all'])
65         {
66                 echo "Create DB\n";
67                 $bDidSomething = true;
68                 $oDB =& DB::connect(CONST_Database_DSN, false);
69                 if (!PEAR::isError($oDB))
70                 {
71                         fail('database already exists ('.CONST_Database_DSN.')');
72                 }
73                 passthru('createdb '.$aDSNInfo['database']);
74         }
75
76         if ($aCMDResult['create-db'] || $aCMDResult['all'])
77         {
78                 echo "Create DB (2)\n";
79                 $bDidSomething = true;
80                 // TODO: path detection, detection memory, etc.
81
82                 $oDB =& getDB();
83                 passthru('createlang plpgsql '.$aDSNInfo['database']);
84         $pgver = (float) CONST_Postgresql_Version;
85                 if ($pgver < 9.1) {
86                         pgsqlRunScriptFile(CONST_Path_Postgresql_Contrib.'/hstore.sql');
87                 } else {
88                         pgsqlRunScript('CREATE EXTENSION hstore');
89                 }
90                 pgsqlRunScriptFile(CONST_Path_Postgresql_Postgis.'/postgis.sql');
91                 pgsqlRunScriptFile(CONST_Path_Postgresql_Postgis.'/spatial_ref_sys.sql');
92                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_name.sql');
93                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_naturalearthdata.sql');
94                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_osm_grid.sql');
95                 pgsqlRunScriptFile(CONST_BasePath.'/data/gb_postcode.sql');
96                 pgsqlRunScriptFile(CONST_BasePath.'/data/us_statecounty.sql');
97                 pgsqlRunScriptFile(CONST_BasePath.'/data/us_state.sql');
98                 pgsqlRunScriptFile(CONST_BasePath.'/data/us_postcode.sql');
99                 pgsqlRunScriptFile(CONST_BasePath.'/data/worldboundaries.sql');
100         }
101
102         if ($aCMDResult['import-data'] || $aCMDResult['all'])
103         {
104                 echo "Import\n";
105                 $bDidSomething = true;
106
107                 $osm2pgsql = CONST_Osm2pgsql_Binary;
108                 if (!file_exists($osm2pgsql)) fail("please download and build osm2pgsql");
109                 passthru($osm2pgsql.' -lsc -O gazetteer -C 12000 --hstore -d '.$aDSNInfo['database'].' '.$aCMDResult['osm-file']);
110         pgsqlRunScript('ANALYSE');
111
112                 $oDB =& getDB();
113                 $x = $oDB->getRow('select * from place limit 1');
114                 if (!$x || PEAR::isError($x)) fail('No Data');
115         }
116
117         if ($aCMDResult['create-functions'] || $aCMDResult['all'])
118         {
119                 echo "Functions\n";
120                 $bDidSomething = true;
121                 if (!file_exists(CONST_BasePath.'/module/nominatim.so')) fail("nominatim module not built");
122                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/functions.sql');
123                 $sTemplate = str_replace('{modulepath}',CONST_BasePath.'/module', $sTemplate);
124                 pgsqlRunScript($sTemplate);
125         }
126
127         if ($aCMDResult['create-minimal-tables'])
128         {
129                 echo "Minimal Tables\n";
130                 $bDidSomething = true;
131                 pgsqlRunScriptFile(CONST_BasePath.'/sql/tables-minimal.sql');
132
133                 $sScript = '';
134
135                 // Backstop the import process - easliest possible import id
136                 $sScript .= "insert into import_npi_log values (18022);\n";
137
138                 $hFile = @fopen(CONST_BasePath.'/settings/partitionedtags.def', "r");
139                 if (!$hFile) fail('unable to open list of partitions: '.CONST_BasePath.'/settings/partitionedtags.def');
140
141                 while (($sLine = fgets($hFile, 4096)) !== false && $sLine && substr($sLine,0,1) !='#')
142                 {
143                         list($sClass, $sType) = explode(' ', trim($sLine));
144                         $sScript .= "create table place_classtype_".$sClass."_".$sType." as ";
145                         $sScript .= "select place_id as place_id,geometry as centroid from placex limit 0;\n";
146
147                         $sScript .= "CREATE INDEX idx_place_classtype_".$sClass."_".$sType."_centroid ";
148                         $sScript .= "ON place_classtype_".$sClass."_".$sType." USING GIST (centroid);\n";
149
150                         $sScript .= "CREATE INDEX idx_place_classtype_".$sClass."_".$sType."_place_id ";
151                         $sScript .= "ON place_classtype_".$sClass."_".$sType." USING btree(place_id);\n";
152                 }
153                 fclose($hFile);
154                 pgsqlRunScript($sScript);
155         }
156
157         if ($aCMDResult['create-tables'] || $aCMDResult['all'])
158         {
159                 echo "Tables\n";
160                 $bDidSomething = true;
161                 pgsqlRunScriptFile(CONST_BasePath.'/sql/tables.sql');
162
163                 // re-run the functions
164                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/functions.sql');
165                 $sTemplate = str_replace('{modulepath}',CONST_BasePath.'/module', $sTemplate);
166                 pgsqlRunScript($sTemplate);
167         }
168
169         if ($aCMDResult['create-partitions'] || $aCMDResult['all'])
170         {
171                 echo "Partitions\n";
172                 $bDidSomething = true;
173                 $oDB =& getDB();
174                 $sSQL = 'select partition from country_name order by country_code';
175                 $aPartitions = $oDB->getCol($sSQL);
176                 if (PEAR::isError($aPartitions))
177                 {
178                         fail($aPartitions->getMessage());
179                 }
180                 $aPartitions[] = 0;
181
182                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/partitions.src.sql');
183                 preg_match_all('#^-- start(.*?)^-- end#ms', $sTemplate, $aMatches, PREG_SET_ORDER);
184                 foreach($aMatches as $aMatch)
185                 {
186                         $sResult = '';
187                         foreach($aPartitions as $sPartitionName)
188                         {
189                                 $sResult .= str_replace('-partition-', $sPartitionName, $aMatch[1]);
190                         }
191                         $sTemplate = str_replace($aMatch[0], $sResult, $sTemplate);
192                 }
193
194                 pgsqlRunScript($sTemplate);
195         }
196
197         if ($aCMDResult['load-data'] || $aCMDResult['all'])
198         {
199                 echo "Load Data\n";
200                 $bDidSomething = true;
201
202                 $oDB =& getDB();
203                 if (!pg_query($oDB->connection, 'TRUNCATE word')) fail(pg_last_error($oDB->connection));
204                 echo '.';
205                 if (!pg_query($oDB->connection, 'TRUNCATE placex')) fail(pg_last_error($oDB->connection));
206                 echo '.';
207                 if (!pg_query($oDB->connection, 'TRUNCATE place_addressline')) fail(pg_last_error($oDB->connection));
208                 echo '.';
209                 if (!pg_query($oDB->connection, 'TRUNCATE place_boundingbox')) fail(pg_last_error($oDB->connection));
210                 echo '.';
211                 if (!pg_query($oDB->connection, 'TRUNCATE location_area')) fail(pg_last_error($oDB->connection));
212                 echo '.';
213                 if (!pg_query($oDB->connection, 'TRUNCATE search_name')) fail(pg_last_error($oDB->connection));
214                 echo '.';
215                 if (!pg_query($oDB->connection, 'TRUNCATE search_name_blank')) fail(pg_last_error($oDB->connection));
216                 echo '.';
217                 if (!pg_query($oDB->connection, 'DROP SEQUENCE seq_place')) fail(pg_last_error($oDB->connection));
218                 echo '.';
219                 if (!pg_query($oDB->connection, 'CREATE SEQUENCE seq_place start 100000')) fail(pg_last_error($oDB->connection));
220                 echo '.';
221
222                 $aDBInstances = array();
223                 for($i = 0; $i < $iInstances; $i++)
224                 {
225                         $aDBInstances[$i] =& getDB(true);
226                         $sSQL = 'insert into placex (osm_type, osm_id, class, type, name, admin_level, ';
227                         $sSQL .= 'housenumber, street, isin, postcode, country_code, extratags, ';
228                         $sSQL .= 'geometry) select * from place where osm_id % '.$iInstances.' = '.$i;
229                         if ($aCMDResult['verbose']) echo "$sSQL\n";
230                         if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
231                 }
232                 $bAnyBusy = true;
233                 while($bAnyBusy)
234                 {
235                         $bAnyBusy = false;
236                         for($i = 0; $i < $iInstances; $i++)
237                         {
238                                 if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
239                         }
240                         sleep(1);
241                         echo '.';
242                 }
243                 echo "\n";
244         }
245
246         if ($aCMDResult['create-roads'])
247         {
248                 $bDidSomething = true;
249
250                 $oDB =& getDB();
251                 $aDBInstances = array();
252                 for($i = 0; $i < $iInstances; $i++)
253                 {
254                         $aDBInstances[$i] =& getDB(true);
255                         if (!pg_query($aDBInstances[$i]->connection, 'set enable_bitmapscan = off')) fail(pg_last_error($oDB->connection));
256                         $sSQL = 'select count(*) from (select insertLocationRoad(partition, place_id, country_code, geometry) from ';
257                         $sSQL .= 'placex where osm_id % '.$iInstances.' = '.$i.' and rank_search between 26 and 27 and class = \'highway\') as x ';
258                         if ($aCMDResult['verbose']) echo "$sSQL\n";
259                         if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
260                 }
261                 $bAnyBusy = true;
262                 while($bAnyBusy)
263                 {
264                         $bAnyBusy = false;
265                         for($i = 0; $i < $iInstances; $i++)
266                         {
267                                 if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
268                         }
269                         sleep(1);
270                         echo '.';
271                 }
272                 echo "\n";
273         }
274
275         if ($aCMDResult['import-tiger-data'])
276         {
277                 $bDidSomething = true;
278
279                 $aDBInstances = array();
280                 for($i = 0; $i < $iInstances; $i++)
281                 {
282                         $aDBInstances[$i] =& getDB(true);
283                 }
284
285                 foreach(glob(CONST_BasePath.'/data/tiger2009/*.sql') as $sFile)
286                 {
287                         echo $sFile.': ';
288                         $hFile = fopen($sFile, "r");
289                         $sSQL = fgets($hFile, 100000);
290                         $iLines = 0;
291
292                         while(true)
293                         {
294                                 for($i = 0; $i < $iInstances; $i++)
295                                 {
296                                         if (!pg_connection_busy($aDBInstances[$i]->connection))
297                                         {
298                                                 while(pg_get_result($aDBInstances[$i]->connection));
299                                                 $sSQL = fgets($hFile, 100000);
300                                                 if (!$sSQL) break 2;
301                                                 if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
302                                                 $iLines++;
303                                                 if ($iLines == 1000)
304                                                 {
305                                                         echo ".";
306                                                         $iLines = 0;
307                                                 }
308                                         }
309                                 }
310                                 usleep(10);
311                         }
312
313                         fclose($hFile);
314         
315                         $bAnyBusy = true;
316                         while($bAnyBusy)
317                         {
318                                 $bAnyBusy = false;
319                                 for($i = 0; $i < $iInstances; $i++)
320                                 {
321                                         if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
322                                 }
323                                 usleep(10);
324                         }
325                         echo "\n";
326                 }
327         }
328
329         if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all'])
330         {
331                 $bDidSomething = true;
332                 $oDB =& getDB();
333                 if (!pg_query($oDB->connection, 'DELETE from placex where osm_type=\'P\'')) fail(pg_last_error($oDB->connection));
334                 $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,country_code,geometry) ";
335                 $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,country_code,";
336                 $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from (select country_code,postcode,";
337                 $sSQL .= "avg(st_x(st_centroid(geometry))) as x,avg(st_y(st_centroid(geometry))) as y ";
338                 $sSQL .= "from placex where postcode is not null group by country_code,postcode) as x";
339                 if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
340
341                 $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,country_code,geometry) ";
342                 $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,'us',";
343                 $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from us_postcode";
344                 if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
345         }
346
347         if (($aCMDResult['osmosis-init'] || $aCMDResult['all']) && isset($aCMDResult['osmosis-init-date']))
348         {
349                 $bDidSomething = true;
350
351                 if (!file_exists(CONST_Osmosis_Binary)) fail("please download osmosis");
352                 if (file_exists(CONST_BasePath.'/settings/configuration.txt')) echo "settings/configuration.txt already exists\n";
353                 else passthru(CONST_Osmosis_Binary.' --read-replication-interval-init '.CONST_BasePath.'/settings');
354
355                 $sDate = $aCMDResult['osmosis-init-date'];
356                 $sURL = 'http://toolserver.org/~mazder/replicate-sequences/?'.$sDate;
357                 echo "Getting state file: $sURL\n";
358                 $sStateFile = file_get_contents($sURL);
359                 if (!$sStateFile || strlen($sStateFile) > 1000) fail("unable to obtain state file");
360                 file_put_contents(CONST_BasePath.'/settings/state.txt', $sStateFile);
361         }
362
363         if ($aCMDResult['index'] || $aCMDResult['all'])
364         {
365                 $bDidSomething = true;
366                 $sOutputFile = '';
367                 if (isset($aCMDResult['index-output'])) $sOutputFile = ' -F '.$aCMDResult['index-output'];
368                 passthru(CONST_BasePath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -t '.$iInstances.$sOutputFile);
369         }
370
371         if (isset($aCMDResult['create-website']))
372         {
373                 $bDidSomething = true;
374                 $sTargetDir = $aCMDResult['create-website'];
375                 if (!is_dir($sTargetDir)) fail('please specify a directory to setup');
376                 @symlink(CONST_BasePath.'/website/details.php', $sTargetDir.'/details.php');
377                 @symlink(CONST_BasePath.'/website/reverse.php', $sTargetDir.'/reverse.php');
378                 @symlink(CONST_BasePath.'/website/search.php', $sTargetDir.'/search.php');
379                 @symlink(CONST_BasePath.'/website/search.php', $sTargetDir.'/index.php');
380                 @symlink(CONST_BasePath.'/website/images', $sTargetDir.'/images');
381                 @symlink(CONST_BasePath.'/website/js', $sTargetDir.'/js');
382                 echo "Symlinks created\n";
383         }
384
385         if (!$bDidSomething)
386         {
387                 showUsage($aCMDOptions, true);
388         }
389
390         function pgsqlRunScriptFile($sFilename)
391         {
392                 if (!file_exists($sFilename)) fail('unable to find '.$sFilename);
393
394                 // Convert database DSN to psql paramaters
395                 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
396                 $sCMD = 'psql -f '.$sFilename.' '.$aDSNInfo['database'];
397
398                 $aDescriptors = array(
399                         0 => array('pipe', 'r'),
400                         1 => array('pipe', 'w'),
401                         2 => array('file', '/dev/null', 'a')
402                 );
403                 $ahPipes = null;
404                 $hProcess = proc_open($sCMD, $aDescriptors, $ahPipes);
405                 if (!is_resource($hProcess)) fail('unable to start pgsql');
406
407                 fclose($ahPipes[0]);
408
409                 // TODO: error checking
410                 while(!feof($ahPipes[1]))
411                 {
412                         echo fread($ahPipes[1], 4096);
413                 }
414                 fclose($ahPipes[1]);
415
416                 proc_close($hProcess);
417         }
418
419         function pgsqlRunScript($sScript)
420         {
421                 // Convert database DSN to psql paramaters
422                 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
423                 if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
424                 $sCMD = 'psql -p '.$aDSNInfo['port'].' '.$aDSNInfo['database'];
425                 $aDescriptors = array(
426                         0 => array('pipe', 'r'),
427                         1 => STDOUT, 
428                         2 => STDERR
429                 );
430                 $ahPipes = null;
431                 $hProcess = @proc_open($sCMD, $aDescriptors, $ahPipes);
432                 if (!is_resource($hProcess)) fail('unable to start pgsql');
433
434                 while(strlen($sScript))
435                 {
436                         $written = fwrite($ahPipes[0], $sScript);
437                         $sScript = substr($sScript, $written);
438                 }
439                 fclose($ahPipes[0]);
440                 proc_close($hProcess);
441         }