]> git.openstreetmap.org Git - nominatim.git/blob - utils/setup.php
aux properties, memory leaks, tweaks to search order
[nominatim.git] / utils / setup.php
1 #!/usr/bin/php -Cq
2 <?php
3
4         require_once(dirname(dirname(__FILE__)).'/lib/init-cmd.php');
5         ini_set('memory_limit', '800M');
6
7         $aCMDOptions = array(
8                 "Create and setup nominatim search system",
9                 array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
10                 array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
11                 array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
12
13                 array('osm-file', '', 0, 1, 1, 1, 'realpath', 'File to import'),
14                 array('threads', '', 0, 1, 1, 1, 'int', 'Number of threads (where possible)'),
15
16                 array('all', '', 0, 1, 0, 0, 'bool', 'Do the complete process'),
17
18                 array('create-db', '', 0, 1, 0, 0, 'bool', 'Create nominatim db'),
19                 array('setup-db', '', 0, 1, 0, 0, 'bool', 'Build a blank nominatim db'),
20                 array('import-data', '', 0, 1, 0, 0, 'bool', 'Import a osm file'),
21                 array('create-functions', '', 0, 1, 0, 0, 'bool', 'Create functions'),
22                 array('create-tables', '', 0, 1, 0, 0, 'bool', 'Create main tables'),
23                 array('create-partitions', '', 0, 1, 0, 0, 'bool', 'Create required partition tables and triggers'),
24                 array('load-data', '', 0, 1, 0, 0, 'bool', 'Copy data to live tables from import table'),
25                 array('import-tiger-data', '', 0, 1, 0, 0, 'bool', 'Import tiger data (not included in \'all\')'),
26                 array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Calculate postcode centroids'),
27                 array('create-roads', '', 0, 1, 0, 0, 'bool', 'Calculate postcode centroids'),
28                 array('osmosis-init', '', 0, 1, 0, 0, 'bool', 'Generate default osmosis configuration'),
29                 array('osmosis-init-date', '', 0, 1, 1, 1, 'string', 'Generate default osmosis configuration'),
30                 array('index', '', 0, 1, 0, 0, 'bool', 'Index the data'),
31                 array('index-output', '', 0, 1, 1, 1, 'string', 'File to dump index information to'),
32         );
33         getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
34
35         $bDidSomething = false;
36
37         // This is a pretty hard core defult - the number of processors in the box - 1
38         $iInstances = isset($aCMDResult['threads'])?$aCMDResult['threads']:(getProcessorCount()-1);
39         if ($iInstances < 1)
40         {
41                 $iInstances = 1;
42                 echo "WARNING: resetting threads to $iInstances\n";
43         }
44         if ($iInstances > getProcessorCount())
45         {
46                 $iInstances = getProcessorCount();
47                 echo "WARNING: resetting threads to $iInstances\n";
48         }
49         if (isset($aCMDResult['osm-file']) && !isset($aCMDResult['osmosis-init-date']))
50         {
51                 $sBaseFile = basename($aCMDResult['osm-file']);
52                 if (preg_match('#^planet-([0-9]{2})([0-9]{2})([0-9]{2})[.]#', $sBaseFile, $aMatch))
53                 {
54                         $iTime = mktime(0, 0, 0, $aMatch[2], $aMatch[3], '20'.$aMatch[1]);
55                         $iTime -= (60*60*24);
56                         $aCMDResult['osmosis-init-date'] = date('Y-m-d', $iTime).'T22:00:00Z';
57                 }
58         }
59
60         if ($aCMDResult['create-db'] || $aCMDResult['all'])
61         {
62                 echo "Create DB\n";
63                 $bDidSomething = true;
64                 $oDB =& DB::connect(CONST_Database_DSN, false);
65                 if (!PEAR::isError($oDB))
66                 {
67                         fail('database already exists');
68                 }
69                 passthru('createdb nominatim');
70         }
71
72         if ($aCMDResult['create-db'] || $aCMDResult['all'])
73         {
74                 echo "Create DB (2)\n";
75                 $bDidSomething = true;
76                 // TODO: path detection, detection memory, etc.
77
78                 $oDB =& getDB();
79                 passthru('createlang plpgsql nominatim');
80                 pgsqlRunScriptFile(CONST_Path_Postgresql_Contrib.'/_int.sql');
81                 pgsqlRunScriptFile(CONST_Path_Postgresql_Contrib.'/hstore.sql');
82                 pgsqlRunScriptFile(CONST_Path_Postgresql_Postgis.'/postgis.sql');
83                 pgsqlRunScriptFile(CONST_Path_Postgresql_Postgis.'/spatial_ref_sys.sql');
84                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_name.sql');
85                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_naturalearthdata.sql');
86                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_osm_grid.sql');
87                 pgsqlRunScriptFile(CONST_BasePath.'/data/gb_postcode.sql');
88                 pgsqlRunScriptFile(CONST_BasePath.'/data/us_statecounty.sql');
89                 pgsqlRunScriptFile(CONST_BasePath.'/data/us_state.sql');
90                 pgsqlRunScriptFile(CONST_BasePath.'/data/us_postcode.sql');
91                 pgsqlRunScriptFile(CONST_BasePath.'/data/worldboundaries.sql');
92         }
93
94         if ($aCMDResult['import-data'] || $aCMDResult['all'])
95         {
96                 echo "Import\n";
97                 $bDidSomething = true;
98
99                 if (!file_exists(CONST_BasePath.'/osm2pgsql/osm2pgsql')) fail("please download and build osm2pgsql");
100                 passthru(CONST_BasePath.'/osm2pgsql/osm2pgsql -lsc -O gazetteer -C 10000 --hstore -d nominatim '.$aCMDResult['osm-file']);
101
102                 $oDB =& getDB();
103                 $x = $oDB->getRow('select * from place limit 1');
104                 if (!$x || PEAR::isError($x)) fail('No Data');
105         }
106
107         if ($aCMDResult['create-functions'] || $aCMDResult['all'])
108         {
109                 echo "Functions\n";
110                 $bDidSomething = true;
111                 if (!file_exists(CONST_BasePath.'/module/nominatim.so')) fail("nominatim module not built");
112                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/functions.sql');
113                 $sTemplate = str_replace('{modulepath}',CONST_BasePath.'/module', $sTemplate);
114                 pgsqlRunScript($sTemplate);
115         }
116
117         if ($aCMDResult['create-tables'] || $aCMDResult['all'])
118         {
119                 echo "Tables\n";
120                 $bDidSomething = true;
121                 pgsqlRunScriptFile(CONST_BasePath.'/sql/tables.sql');
122
123                 // re-run the functions
124                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/functions.sql');
125                 $sTemplate = str_replace('{modulepath}',CONST_BasePath.'/module', $sTemplate);
126                 pgsqlRunScript($sTemplate);
127         }
128
129         if ($aCMDResult['create-partitions'] || $aCMDResult['all'])
130         {
131                 echo "Partitions\n";
132                 $bDidSomething = true;
133 echo "here";
134                 $oDB =& getDB();
135 echo "there";
136                 $sSQL = 'select partition from country_name order by country_code';
137 var_dump($sSQL);
138                 $aPartitions = $oDB->getCol($sSQL);
139 var_dump($aPartitions);
140                 if (PEAR::isError($aPartitions))
141                 {
142                         fail($aPartitions->getMessage());
143                 }
144                 $aPartitions[] = 0;
145
146                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/partitions.src.sql');
147                 preg_match_all('#^-- start(.*?)^-- end#ms', $sTemplate, $aMatches, PREG_SET_ORDER);
148                 foreach($aMatches as $aMatch)
149                 {
150 var_dump($aMatch);
151                         $sResult = '';
152                         foreach($aPartitions as $sPartitionName)
153                         {
154 var_dump($sPartitionName);
155                                 $sResult .= str_replace('-partition-', $sPartitionName, $aMatch[1]);
156                         }
157                         $sTemplate = str_replace($aMatch[0], $sResult, $sTemplate);
158                 }
159
160                 pgsqlRunScript($sTemplate);
161         }
162
163         if ($aCMDResult['load-data'] || $aCMDResult['all'])
164         {
165                 echo "Load Data\n";
166                 $bDidSomething = true;
167
168                 $oDB =& getDB();
169                 if (!pg_query($oDB->connection, 'TRUNCATE word')) fail(pg_last_error($oDB->connection));
170                 echo '.';
171                 if (!pg_query($oDB->connection, 'TRUNCATE placex')) fail(pg_last_error($oDB->connection));
172                 echo '.';
173                 if (!pg_query($oDB->connection, 'TRUNCATE place_addressline')) fail(pg_last_error($oDB->connection));
174                 echo '.';
175                 if (!pg_query($oDB->connection, 'TRUNCATE place_boundingbox')) fail(pg_last_error($oDB->connection));
176                 echo '.';
177                 if (!pg_query($oDB->connection, 'TRUNCATE location_area')) fail(pg_last_error($oDB->connection));
178                 echo '.';
179                 if (!pg_query($oDB->connection, 'TRUNCATE search_name')) fail(pg_last_error($oDB->connection));
180                 echo '.';
181                 if (!pg_query($oDB->connection, 'TRUNCATE search_name_blank')) fail(pg_last_error($oDB->connection));
182                 echo '.';
183                 if (!pg_query($oDB->connection, 'DROP SEQUENCE seq_place')) fail(pg_last_error($oDB->connection));
184                 echo '.';
185                 if (!pg_query($oDB->connection, 'CREATE SEQUENCE seq_place start 100000')) fail(pg_last_error($oDB->connection));
186                 echo '.';
187
188                 $aDBInstances = array();
189                 for($i = 0; $i < $iInstances; $i++)
190                 {
191                         $aDBInstances[$i] =& getDB(true);
192                         $sSQL = 'insert into placex (osm_type, osm_id, class, type, name, admin_level, ';
193                         $sSQL .= 'housenumber, street, isin, postcode, country_code, extratags, ';
194                         $sSQL .= 'geometry) select * from place where osm_id % '.$iInstances.' = '.$i;
195                         if ($aCMDResult['verbose']) echo "$sSQL\n";
196                         if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
197                 }
198                 $bAnyBusy = true;
199                 while($bAnyBusy)
200                 {
201                         $bAnyBusy = false;
202                         for($i = 0; $i < $iInstances; $i++)
203                         {
204                                 if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
205                         }
206                         sleep(1);
207                         echo '.';
208                 }
209                 echo "\n";
210         }
211
212         if ($aCMDResult['create-roads'])
213         {
214                 $bDidSomething = true;
215
216                 $oDB =& getDB();
217                 $aDBInstances = array();
218                 for($i = 0; $i < $iInstances; $i++)
219                 {
220                         $aDBInstances[$i] =& getDB(true);
221                         if (!pg_query($aDBInstances[$i]->connection, 'set enable_bitmapscan = off')) fail(pg_last_error($oDB->connection));
222                         $sSQL = 'select count(*) from (select insertLocationRoad(partition, place_id, country_code, geometry) from ';
223                         $sSQL .= 'placex where osm_id % '.$iInstances.' = '.$i.' and rank_search between 26 and 27 and class = \'highway\') as x ';
224                         if ($aCMDResult['verbose']) echo "$sSQL\n";
225                         if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
226                 }
227                 $bAnyBusy = true;
228                 while($bAnyBusy)
229                 {
230                         $bAnyBusy = false;
231                         for($i = 0; $i < $iInstances; $i++)
232                         {
233                                 if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
234                         }
235                         sleep(1);
236                         echo '.';
237                 }
238                 echo "\n";
239         }
240
241         if ($aCMDResult['import-tiger-data'])
242         {
243                 $bDidSomething = true;
244
245                 $aDBInstances = array();
246                 for($i = 0; $i < $iInstances; $i++)
247                 {
248                         $aDBInstances[$i] =& getDB(true);
249                 }
250
251                 foreach(glob(CONST_BasePath.'/data/tiger2009/*.sql') as $sFile)
252                 {
253                         echo $sFile.': ';
254                         $hFile = fopen($sFile, "r");
255                         $sSQL = fgets($hFile, 100000);
256                         $iLines = 0;
257
258                         while(true)
259                         {
260                                 for($i = 0; $i < $iInstances; $i++)
261                                 {
262                                         if (!pg_connection_busy($aDBInstances[$i]->connection))
263                                         {
264                                                 while(pg_get_result($aDBInstances[$i]->connection));
265                                                 $sSQL = fgets($hFile, 100000);
266                                                 if (!$sSQL) break 2;
267                                                 if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
268                                                 $iLines++;
269                                                 if ($iLines == 1000)
270                                                 {
271                                                         echo ".";
272                                                         $iLines = 0;
273                                                 }
274                                         }
275                                 }
276                                 usleep(10);
277                         }
278
279                         fclose($hFile);
280         
281                         $bAnyBusy = true;
282                         while($bAnyBusy)
283                         {
284                                 $bAnyBusy = false;
285                                 for($i = 0; $i < $iInstances; $i++)
286                                 {
287                                         if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
288                                 }
289                                 usleep(10);
290                         }
291                         echo "\n";
292                 }
293         }
294
295         if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all'])
296         {
297                 $oDB =& getDB();
298                 if (!pg_query($oDB->connection, 'DELETE from placex where osm_type=\'P\'')) fail(pg_last_error($oDB->connection));
299                 $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,country_code,geometry) ";
300                 $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,country_code,";
301                 $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from (select country_code,postcode,";
302                 $sSQL .= "avg(st_x(st_centroid(geometry))) as x,avg(st_y(st_centroid(geometry))) as y ";
303                 $sSQL .= "from placex where postcode is not null group by country_code,postcode) as x";
304                 if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
305
306                 $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,country_code,geometry) ";
307                 $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,'us',";
308                 $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from us_postcode";
309                 if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
310         }
311
312         if (($aCMDResult['osmosis-init'] || $aCMDResult['all']) && isset($aCMDResult['osmosis-init-date']))
313         {
314                 $bDidSomething = true;
315
316                 if (!file_exists(CONST_BasePath.'/osmosis-0.38/bin/osmosis')) fail("please download osmosis");
317                 if (file_exists(CONST_BasePath.'/settings/configuration.txt')) echo "settings/configuration.txt already exists\n";
318                 else passthru(CONST_BasePath.'/osmosis-0.38/bin/osmosis --read-replication-interval-init '.CONST_BasePath.'/settings');
319
320                 $sDate = $aCMDResult['osmosis-init-date'];
321                 $sURL = 'http://toolserver.org/~mazder/replicate-sequences/?'.$sDate;
322                 echo "Getting state file: $sURL\n";
323                 $sStateFile = file_get_contents($sURL);
324                 if (!$sStateFile || strlen($sStateFile) > 1000) fail("unable to obtain state file");
325                 file_put_contents(CONST_BasePath.'/settings/state.txt', $sStateFile);
326         }
327
328         if ($aCMDResult['index'] || $aCMDResult['all'])
329         {
330                 $bDidSomething = true;
331                 $sOutputFile = '';
332                 if (isset($aCMDResult['index-output'])) $sOutputFile = ' -F '.$aCMDResult['index-output'];
333                 passthru(CONST_BasePath.'/nominatim/nominatim -i -d nominatim -t '.$iInstances.$sOutputFile);
334         }
335
336         if (!$bDidSomething)
337         {
338                 showUsage($aCMDOptions, true);
339         }
340
341         function pgsqlRunScriptFile($sFilename)
342         {
343                 if (!file_exists($sFilename)) fail('unable to find '.$sFilename);
344
345                 // Convert database DSN to psql paramaters
346                 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
347                 $sCMD = 'psql -f '.$sFilename.' '.$aDSNInfo['database'];
348
349                 $aDescriptors = array(
350                         0 => array('pipe', 'r'),
351                         1 => array('pipe', 'w'),
352                         2 => array('file', '/dev/null', 'a')
353                 );
354                 $ahPipes = null;
355                 $hProcess = proc_open($sCMD, $aDescriptors, $ahPipes);
356                 if (!is_resource($hProcess)) fail('unable to start pgsql');
357
358                 fclose($ahPipes[0]);
359
360                 // TODO: error checking
361                 while(!feof($ahPipes[1]))
362                 {
363                         echo fread($ahPipes[1], 4096);
364                 }
365                 fclose($ahPipes[1]);
366
367                 proc_close($hProcess);
368         }
369
370         function pgsqlRunScript($sScript)
371         {
372                 // Convert database DSN to psql paramaters
373                 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
374                 $sCMD = 'psql '.$aDSNInfo['database'];
375
376                 $aDescriptors = array(
377                         0 => array('pipe', 'r'),
378                         1 => array('pipe', 'w'),
379                         2 => array('file', '/dev/null', 'a')
380                 );
381                 $ahPipes = null;
382                 $hProcess = proc_open($sCMD, $aDescriptors, $ahPipes);
383                 if (!is_resource($hProcess)) fail('unable to start pgsql');
384
385                 fwrite($ahPipes[0], $sScript);
386                 fclose($ahPipes[0]);
387
388                 // TODO: error checking
389                 while(!feof($ahPipes[1]))
390                 {
391                         echo fread($ahPipes[1], 4096);
392                 }
393                 fclose($ahPipes[1]);
394
395                 proc_close($hProcess);
396         }