]> git.openstreetmap.org Git - nominatim.git/blob - utils/setup.php
update place_id to BIGINT
[nominatim.git] / utils / setup.php
1 #!/usr/bin/php -Cq
2 <?php
3
4         require_once(dirname(dirname(__FILE__)).'/lib/init-cmd.php');
5         ini_set('memory_limit', '800M');
6
7         $aCMDOptions = array(
8                 "Create and setup nominatim search system",
9                 array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
10                 array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
11                 array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
12
13                 array('osm-file', '', 0, 1, 1, 1, 'realpath', 'File to import'),
14                 array('threads', '', 0, 1, 1, 1, 'int', 'Number of threads (where possible)'),
15
16                 array('all', '', 0, 1, 0, 0, 'bool', 'Do the complete process'),
17
18                 array('create-db', '', 0, 1, 0, 0, 'bool', 'Create nominatim db'),
19                 array('setup-db', '', 0, 1, 0, 0, 'bool', 'Build a blank nominatim db'),
20                 array('import-data', '', 0, 1, 0, 0, 'bool', 'Import a osm file'),
21                 array('create-functions', '', 0, 1, 0, 0, 'bool', 'Create functions'),
22                 array('create-minimal-tables', '', 0, 1, 0, 0, 'bool', 'Create minimal main tables'),
23                 array('create-tables', '', 0, 1, 0, 0, 'bool', 'Create main tables'),
24                 array('create-partitions', '', 0, 1, 0, 0, 'bool', 'Create required partition tables and triggers'),
25                 array('load-data', '', 0, 1, 0, 0, 'bool', 'Copy data to live tables from import table'),
26                 array('import-tiger-data', '', 0, 1, 0, 0, 'bool', 'Import tiger data (not included in \'all\')'),
27                 array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Calculate postcode centroids'),
28                 array('create-roads', '', 0, 1, 0, 0, 'bool', 'Calculate postcode centroids'),
29                 array('osmosis-init', '', 0, 1, 0, 0, 'bool', 'Generate default osmosis configuration'),
30                 array('osmosis-init-date', '', 0, 1, 1, 1, 'string', 'Generate default osmosis configuration'),
31                 array('index', '', 0, 1, 0, 0, 'bool', 'Index the data'),
32                 array('index-output', '', 0, 1, 1, 1, 'string', 'File to dump index information to'),
33                 array('create-website', '', 0, 1, 1, 1, 'realpath', 'Create symlinks to setup web directory'),
34         );
35         getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
36
37         $bDidSomething = false;
38
39         // This is a pretty hard core defult - the number of processors in the box - 1
40         $iInstances = isset($aCMDResult['threads'])?$aCMDResult['threads']:(getProcessorCount()-1);
41         if ($iInstances < 1)
42         {
43                 $iInstances = 1;
44                 echo "WARNING: resetting threads to $iInstances\n";
45         }
46         if ($iInstances > getProcessorCount())
47         {
48                 $iInstances = getProcessorCount();
49                 echo "WARNING: resetting threads to $iInstances\n";
50         }
51         if (isset($aCMDResult['osm-file']) && !isset($aCMDResult['osmosis-init-date']))
52         {
53                 $sBaseFile = basename($aCMDResult['osm-file']);
54                 if (preg_match('#^planet-([0-9]{2})([0-9]{2})([0-9]{2})[.]#', $sBaseFile, $aMatch))
55                 {
56                         $iTime = mktime(0, 0, 0, $aMatch[2], $aMatch[3], '20'.$aMatch[1]);
57                         $iTime -= (60*60*24);
58                         $aCMDResult['osmosis-init-date'] = date('Y-m-d', $iTime).'T22:00:00Z';
59                 }
60         }
61         $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
62         if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
63
64         if ($aCMDResult['create-db'] || $aCMDResult['all'])
65         {
66                 echo "Create DB\n";
67                 $bDidSomething = true;
68                 $oDB =& DB::connect(CONST_Database_DSN, false);
69                 if (!PEAR::isError($oDB))
70                 {
71                         fail('database already exists ('.CONST_Database_DSN.')');
72                 }
73                 passthru('createdb '.$aDSNInfo['database']);
74         }
75
76         if ($aCMDResult['create-db'] || $aCMDResult['all'])
77         {
78                 echo "Create DB (2)\n";
79                 $bDidSomething = true;
80                 // TODO: path detection, detection memory, etc.
81
82                 $oDB =& getDB();
83                 passthru('createlang plpgsql '.$aDSNInfo['database']);
84                 pgsqlRunScriptFile(CONST_Path_Postgresql_Contrib.'/_int.sql');
85                 pgsqlRunScriptFile(CONST_Path_Postgresql_Contrib.'/hstore.sql');
86                 pgsqlRunScriptFile(CONST_Path_Postgresql_Postgis.'/postgis.sql');
87                 pgsqlRunScriptFile(CONST_Path_Postgresql_Postgis.'/spatial_ref_sys.sql');
88                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_name.sql');
89                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_naturalearthdata.sql');
90                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_osm_grid.sql');
91                 pgsqlRunScriptFile(CONST_BasePath.'/data/gb_postcode.sql');
92                 pgsqlRunScriptFile(CONST_BasePath.'/data/us_statecounty.sql');
93                 pgsqlRunScriptFile(CONST_BasePath.'/data/us_state.sql');
94                 pgsqlRunScriptFile(CONST_BasePath.'/data/us_postcode.sql');
95                 pgsqlRunScriptFile(CONST_BasePath.'/data/worldboundaries.sql');
96         }
97
98         if ($aCMDResult['import-data'] || $aCMDResult['all'])
99         {
100                 echo "Import\n";
101                 $bDidSomething = true;
102
103                 $osm2pgsql = CONST_BasePath.'/osm2pgsql/osm2pgsql';
104                 if (!file_exists($osm2pgsql)) $osm2pgsql = trim(`which osm2pgsql`);
105                 if (!file_exists($osm2pgsql)) fail("please download and build osm2pgsql");
106                 passthru($osm2pgsql.' -lsc -O gazetteer -C 10000 --hstore -d '.$aDSNInfo['database'].' '.$aCMDResult['osm-file']);
107
108                 $oDB =& getDB();
109                 $x = $oDB->getRow('select * from place limit 1');
110                 if (!$x || PEAR::isError($x)) fail('No Data');
111         }
112
113         if ($aCMDResult['create-functions'] || $aCMDResult['all'])
114         {
115                 echo "Functions\n";
116                 $bDidSomething = true;
117                 if (!file_exists(CONST_BasePath.'/module/nominatim.so')) fail("nominatim module not built");
118                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/functions.sql');
119                 $sTemplate = str_replace('{modulepath}',CONST_BasePath.'/module', $sTemplate);
120                 pgsqlRunScript($sTemplate);
121         }
122
123         if ($aCMDResult['create-minimal-tables'])
124         {
125                 echo "Minimal Tables\n";
126                 $bDidSomething = true;
127                 pgsqlRunScriptFile(CONST_BasePath.'/sql/tables-minimal.sql');
128
129                 $sScript = '';
130
131                 // Backstop the import process - easliest possible import id
132                 $sScript .= "insert into import_npi_log values (18022);\n";
133
134                 $hFile = @fopen(CONST_BasePath.'/settings/partitionedtags.def', "r");
135                 if (!$hFile) fail('unable to open list of partitions: '.CONST_BasePath.'/settings/partitionedtags.def');
136
137                 while (($sLine = fgets($hFile, 4096)) !== false && $sLine && substr($sLine,0,1) !='#')
138                 {
139                         list($sClass, $sType) = explode(' ', trim($sLine));
140                         $sScript .= "create table place_classtype_".$sClass."_".$sType." as ";
141                         $sScript .= "select place_id as place_id,geometry as centroid from placex limit 0;\n";
142
143                         $sScript .= "CREATE INDEX idx_place_classtype_".$sClass."_".$sType."_centroid ";
144                         $sScript .= "ON place_classtype_".$sClass."_".$sType." USING GIST (centroid);\n";
145
146                         $sScript .= "CREATE INDEX idx_place_classtype_".$sClass."_".$sType."_place_id ";
147                         $sScript .= "ON place_classtype_".$sClass."_".$sType." USING btree(place_id);\n";
148                 }
149                 fclose($hFile);
150                 pgsqlRunScript($sScript);
151         }
152
153         if ($aCMDResult['create-tables'] || $aCMDResult['all'])
154         {
155                 echo "Tables\n";
156                 $bDidSomething = true;
157                 pgsqlRunScriptFile(CONST_BasePath.'/sql/tables.sql');
158
159                 // re-run the functions
160                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/functions.sql');
161                 $sTemplate = str_replace('{modulepath}',CONST_BasePath.'/module', $sTemplate);
162                 pgsqlRunScript($sTemplate);
163         }
164
165         if ($aCMDResult['create-partitions'] || $aCMDResult['all'])
166         {
167                 echo "Partitions\n";
168                 $bDidSomething = true;
169                 $oDB =& getDB();
170                 $sSQL = 'select partition from country_name order by country_code';
171                 $aPartitions = $oDB->getCol($sSQL);
172                 if (PEAR::isError($aPartitions))
173                 {
174                         fail($aPartitions->getMessage());
175                 }
176                 $aPartitions[] = 0;
177
178                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/partitions.src.sql');
179                 preg_match_all('#^-- start(.*?)^-- end#ms', $sTemplate, $aMatches, PREG_SET_ORDER);
180                 foreach($aMatches as $aMatch)
181                 {
182                         $sResult = '';
183                         foreach($aPartitions as $sPartitionName)
184                         {
185                                 $sResult .= str_replace('-partition-', $sPartitionName, $aMatch[1]);
186                         }
187                         $sTemplate = str_replace($aMatch[0], $sResult, $sTemplate);
188                 }
189
190                 pgsqlRunScript($sTemplate);
191         }
192
193         if ($aCMDResult['load-data'] || $aCMDResult['all'])
194         {
195                 echo "Load Data\n";
196                 $bDidSomething = true;
197
198                 $oDB =& getDB();
199                 if (!pg_query($oDB->connection, 'TRUNCATE word')) fail(pg_last_error($oDB->connection));
200                 echo '.';
201                 if (!pg_query($oDB->connection, 'TRUNCATE placex')) fail(pg_last_error($oDB->connection));
202                 echo '.';
203                 if (!pg_query($oDB->connection, 'TRUNCATE place_addressline')) fail(pg_last_error($oDB->connection));
204                 echo '.';
205                 if (!pg_query($oDB->connection, 'TRUNCATE place_boundingbox')) fail(pg_last_error($oDB->connection));
206                 echo '.';
207                 if (!pg_query($oDB->connection, 'TRUNCATE location_area')) fail(pg_last_error($oDB->connection));
208                 echo '.';
209                 if (!pg_query($oDB->connection, 'TRUNCATE search_name')) fail(pg_last_error($oDB->connection));
210                 echo '.';
211                 if (!pg_query($oDB->connection, 'TRUNCATE search_name_blank')) fail(pg_last_error($oDB->connection));
212                 echo '.';
213                 if (!pg_query($oDB->connection, 'DROP SEQUENCE seq_place')) fail(pg_last_error($oDB->connection));
214                 echo '.';
215                 if (!pg_query($oDB->connection, 'CREATE SEQUENCE seq_place start 100000')) fail(pg_last_error($oDB->connection));
216                 echo '.';
217
218                 $aDBInstances = array();
219                 for($i = 0; $i < $iInstances; $i++)
220                 {
221                         $aDBInstances[$i] =& getDB(true);
222                         $sSQL = 'insert into placex (osm_type, osm_id, class, type, name, admin_level, ';
223                         $sSQL .= 'housenumber, street, isin, postcode, country_code, extratags, ';
224                         $sSQL .= 'geometry) select * from place where osm_id % '.$iInstances.' = '.$i;
225                         if ($aCMDResult['verbose']) echo "$sSQL\n";
226                         if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
227                 }
228                 $bAnyBusy = true;
229                 while($bAnyBusy)
230                 {
231                         $bAnyBusy = false;
232                         for($i = 0; $i < $iInstances; $i++)
233                         {
234                                 if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
235                         }
236                         sleep(1);
237                         echo '.';
238                 }
239                 echo "\n";
240         }
241
242         if ($aCMDResult['create-roads'])
243         {
244                 $bDidSomething = true;
245
246                 $oDB =& getDB();
247                 $aDBInstances = array();
248                 for($i = 0; $i < $iInstances; $i++)
249                 {
250                         $aDBInstances[$i] =& getDB(true);
251                         if (!pg_query($aDBInstances[$i]->connection, 'set enable_bitmapscan = off')) fail(pg_last_error($oDB->connection));
252                         $sSQL = 'select count(*) from (select insertLocationRoad(partition, place_id, country_code, geometry) from ';
253                         $sSQL .= 'placex where osm_id % '.$iInstances.' = '.$i.' and rank_search between 26 and 27 and class = \'highway\') as x ';
254                         if ($aCMDResult['verbose']) echo "$sSQL\n";
255                         if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
256                 }
257                 $bAnyBusy = true;
258                 while($bAnyBusy)
259                 {
260                         $bAnyBusy = false;
261                         for($i = 0; $i < $iInstances; $i++)
262                         {
263                                 if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
264                         }
265                         sleep(1);
266                         echo '.';
267                 }
268                 echo "\n";
269         }
270
271         if ($aCMDResult['import-tiger-data'])
272         {
273                 $bDidSomething = true;
274
275                 $aDBInstances = array();
276                 for($i = 0; $i < $iInstances; $i++)
277                 {
278                         $aDBInstances[$i] =& getDB(true);
279                 }
280
281                 foreach(glob(CONST_BasePath.'/data/tiger2009/*.sql') as $sFile)
282                 {
283                         echo $sFile.': ';
284                         $hFile = fopen($sFile, "r");
285                         $sSQL = fgets($hFile, 100000);
286                         $iLines = 0;
287
288                         while(true)
289                         {
290                                 for($i = 0; $i < $iInstances; $i++)
291                                 {
292                                         if (!pg_connection_busy($aDBInstances[$i]->connection))
293                                         {
294                                                 while(pg_get_result($aDBInstances[$i]->connection));
295                                                 $sSQL = fgets($hFile, 100000);
296                                                 if (!$sSQL) break 2;
297                                                 if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
298                                                 $iLines++;
299                                                 if ($iLines == 1000)
300                                                 {
301                                                         echo ".";
302                                                         $iLines = 0;
303                                                 }
304                                         }
305                                 }
306                                 usleep(10);
307                         }
308
309                         fclose($hFile);
310         
311                         $bAnyBusy = true;
312                         while($bAnyBusy)
313                         {
314                                 $bAnyBusy = false;
315                                 for($i = 0; $i < $iInstances; $i++)
316                                 {
317                                         if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
318                                 }
319                                 usleep(10);
320                         }
321                         echo "\n";
322                 }
323         }
324
325         if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all'])
326         {
327                 $bDidSomething = true;
328                 $oDB =& getDB();
329                 if (!pg_query($oDB->connection, 'DELETE from placex where osm_type=\'P\'')) fail(pg_last_error($oDB->connection));
330                 $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,country_code,geometry) ";
331                 $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,country_code,";
332                 $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from (select country_code,postcode,";
333                 $sSQL .= "avg(st_x(st_centroid(geometry))) as x,avg(st_y(st_centroid(geometry))) as y ";
334                 $sSQL .= "from placex where postcode is not null group by country_code,postcode) as x";
335                 if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
336
337                 $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,country_code,geometry) ";
338                 $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,'us',";
339                 $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from us_postcode";
340                 if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
341         }
342
343         if (($aCMDResult['osmosis-init'] || $aCMDResult['all']) && isset($aCMDResult['osmosis-init-date']))
344         {
345                 $bDidSomething = true;
346
347                 if (!file_exists(CONST_BasePath.'/osmosis-0.38/bin/osmosis')) fail("please download osmosis");
348                 if (file_exists(CONST_BasePath.'/settings/configuration.txt')) echo "settings/configuration.txt already exists\n";
349                 else passthru(CONST_BasePath.'/osmosis-0.38/bin/osmosis --read-replication-interval-init '.CONST_BasePath.'/settings');
350
351                 $sDate = $aCMDResult['osmosis-init-date'];
352                 $sURL = 'http://toolserver.org/~mazder/replicate-sequences/?'.$sDate;
353                 echo "Getting state file: $sURL\n";
354                 $sStateFile = file_get_contents($sURL);
355                 if (!$sStateFile || strlen($sStateFile) > 1000) fail("unable to obtain state file");
356                 file_put_contents(CONST_BasePath.'/settings/state.txt', $sStateFile);
357         }
358
359         if ($aCMDResult['index'] || $aCMDResult['all'])
360         {
361                 $bDidSomething = true;
362                 $sOutputFile = '';
363                 if (isset($aCMDResult['index-output'])) $sOutputFile = ' -F '.$aCMDResult['index-output'];
364                 passthru(CONST_BasePath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -t '.$iInstances.$sOutputFile);
365         }
366
367         if (isset($aCMDResult['create-website']))
368         {
369                 $bDidSomething = true;
370                 $sTargetDir = $aCMDResult['create-website'];
371                 if (!is_dir($sTargetDir)) fail('please specify a directory to setup');
372                 @symlink(CONST_BasePath.'/website/details.php', $sTargetDir.'/details.php');
373                 @symlink(CONST_BasePath.'/website/reverse.php', $sTargetDir.'/reverse.php');
374                 @symlink(CONST_BasePath.'/website/search.php', $sTargetDir.'/search.php');
375                 @symlink(CONST_BasePath.'/website/search.php', $sTargetDir.'/index.php');
376                 @symlink(CONST_BasePath.'/website/images', $sTargetDir.'/images');
377                 @symlink(CONST_BasePath.'/website/js', $sTargetDir.'/js');
378                 echo "Symlinks created\n";
379         }
380
381         if (!$bDidSomething)
382         {
383                 showUsage($aCMDOptions, true);
384         }
385
386         function pgsqlRunScriptFile($sFilename)
387         {
388                 if (!file_exists($sFilename)) fail('unable to find '.$sFilename);
389
390                 // Convert database DSN to psql paramaters
391                 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
392                 $sCMD = 'psql -f '.$sFilename.' '.$aDSNInfo['database'];
393
394                 $aDescriptors = array(
395                         0 => array('pipe', 'r'),
396                         1 => array('pipe', 'w'),
397                         2 => array('file', '/dev/null', 'a')
398                 );
399                 $ahPipes = null;
400                 $hProcess = proc_open($sCMD, $aDescriptors, $ahPipes);
401                 if (!is_resource($hProcess)) fail('unable to start pgsql');
402
403                 fclose($ahPipes[0]);
404
405                 // TODO: error checking
406                 while(!feof($ahPipes[1]))
407                 {
408                         echo fread($ahPipes[1], 4096);
409                 }
410                 fclose($ahPipes[1]);
411
412                 proc_close($hProcess);
413         }
414
415         function pgsqlRunScript($sScript)
416         {
417                 // Convert database DSN to psql paramaters
418                 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
419                 if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
420                 $sCMD = 'psql -p '.$aDSNInfo['port'].' '.$aDSNInfo['database'];
421                 $aDescriptors = array(
422                         0 => array('pipe', 'r'),
423                         1 => STDOUT, 
424                         2 => STDERR
425                 );
426                 $ahPipes = null;
427                 $hProcess = @proc_open($sCMD, $aDescriptors, $ahPipes);
428                 if (!is_resource($hProcess)) fail('unable to start pgsql');
429
430                 while(strlen($sScript))
431                 {
432                         $written = fwrite($ahPipes[0], $sScript);
433                         $sScript = substr($sScript, $written);
434                 }
435                 fclose($ahPipes[0]);
436                 proc_close($hProcess);
437         }