]> git.openstreetmap.org Git - nominatim.git/blob - utils/setup.php
postcode/zipcode improvements, finish work on handling extratags
[nominatim.git] / utils / setup.php
1 #!/usr/bin/php -Cq
2 <?php
3
4         require_once(dirname(dirname(__FILE__)).'/lib/init-cmd.php');
5         ini_set('memory_limit', '800M');
6
7         $aCMDOptions = array(
8                 "Create and setup nominatim search system",
9                 array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
10                 array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
11                 array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
12
13                 array('osm-file', '', 0, 1, 1, 1, 'realpath', 'File to import'),
14                 array('threads', '', 0, 1, 1, 1, 'int', 'Number of threads (where possible)'),
15
16                 array('all', '', 0, 1, 0, 0, 'bool', 'Do the complete process'),
17
18                 array('create-db', '', 0, 1, 0, 0, 'bool', 'Create nominatim db'),
19                 array('setup-db', '', 0, 1, 0, 0, 'bool', 'Build a blank nominatim db'),
20                 array('import-data', '', 0, 1, 0, 0, 'bool', 'Import a osm file'),
21                 array('create-functions', '', 0, 1, 0, 0, 'bool', 'Create functions'),
22                 array('create-tables', '', 0, 1, 0, 0, 'bool', 'Create main tables'),
23                 array('create-partitions', '', 0, 1, 0, 0, 'bool', 'Create required partition tables and triggers'),
24                 array('load-data', '', 0, 1, 0, 0, 'bool', 'Copy data to live tables from import table'),
25                 array('import-tiger-data', '', 0, 1, 0, 0, 'bool', 'Import tiger data (not included in \'all\')'),
26                 array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Calculate postcode centroids'),
27                 array('index', '', 0, 1, 0, 0, 'bool', 'Index the data'),
28         );
29         getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
30
31         $bDidSomething = false;
32
33         // This is a pretty hard core defult - the number of processors in the box - 1
34         $iInstances = isset($aCMDResult['threads'])?$aCMDResult['threads']:(getProcessorCount()-1);
35         if ($iInstances < 1)
36         {
37                 $iInstances = 1;
38                 echo "WARNING: resetting threads to $iInstances\n";
39         }
40         if ($iInstances > getProcessorCount())
41         {
42                 $iInstances = getProcessorCount();
43                 echo "WARNING: resetting threads to $iInstances\n";
44         }
45
46         if ($aCMDResult['create-db'] || $aCMDResult['all'])
47         {
48                 $bDidSomething = true;
49                 $oDB =& DB::connect(CONST_Database_DSN, false);
50                 if (!PEAR::isError($oDB))
51                 {
52                         fail('database already exists');
53                 }
54                 passthru('createdb nominatim');
55         }
56
57         if ($aCMDResult['create-db'] || $aCMDResult['all'])
58         {
59                 $bDidSomething = true;
60                 // TODO: path detection, detection memory, etc.
61
62                 $oDB =& getDB();
63                 passthru('createlang plpgsql nominatim');
64                 pgsqlRunScriptFile(CONST_Path_Postgresql_Contrib.'/_int.sql');
65                 pgsqlRunScriptFile(CONST_Path_Postgresql_Contrib.'/hstore.sql');
66                 pgsqlRunScriptFile(CONST_Path_Postgresql_Postgis.'/postgis.sql');
67                 pgsqlRunScriptFile(CONST_Path_Postgresql_Postgis.'/spatial_ref_sys.sql');
68                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_name.sql');
69                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_naturalearthdata.sql');
70                 pgsqlRunScriptFile(CONST_BasePath.'/data/country_osm_grid.sql');
71                 pgsqlRunScriptFile(CONST_BasePath.'/data/gb_postcode.sql');
72                 pgsqlRunScriptFile(CONST_BasePath.'/data/us_statecounty.sql');
73                 pgsqlRunScriptFile(CONST_BasePath.'/data/us_state.sql');
74                 pgsqlRunScriptFile(CONST_BasePath.'/data/us_postcode.sql');
75                 pgsqlRunScriptFile(CONST_BasePath.'/data/worldboundaries.sql');
76         }
77
78         if ($aCMDResult['import-data'] || $aCMDResult['all'])
79         {
80                 $bDidSomething = true;
81                 passthru(CONST_BasePath.'/osm2pgsql/osm2pgsql -lsc -O gazetteer -C 10000 --hstore -d nominatim '.$aCMDResult['osm-file']);
82
83                 $oDB =& getDB();
84                 $x = $oDB->getRow('select * from place limit 1');
85                 if (!$x || PEAR::isError($x)) fail('No Data');
86         }
87
88         if ($aCMDResult['create-functions'] || $aCMDResult['all'])
89         {
90                 $bDidSomething = true;
91                 if (!file_exists(CONST_BasePath.'/module/nominatim.so')) fail("nominatim module not built");
92                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/functions.sql');
93                 $sTemplate = str_replace('{modulepath}',CONST_BasePath.'/module', $sTemplate);
94                 pgsqlRunScript($sTemplate);
95         }
96
97         if ($aCMDResult['create-tables'] || $aCMDResult['all'])
98         {
99                 $bDidSomething = true;
100                 pgsqlRunScriptFile(CONST_BasePath.'/sql/tables.sql');
101
102                 // re-run the functions
103                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/functions.sql');
104                 $sTemplate = str_replace('{modulepath}',CONST_BasePath.'/module', $sTemplate);
105                 pgsqlRunScript($sTemplate);
106         }
107
108         if ($aCMDResult['create-partitions'] || $aCMDResult['all'])
109         {
110                 $bDidSomething = true;
111                 $oDB =& getDB();
112                 $sSQL = 'select partition from country_name order by country_code';
113                 $aPartitions = $oDB->getCol($sSQL);
114                 if (PEAR::isError($aPartitions))
115                 {
116                         fail($aPartitions->getMessage());
117                 }
118                 $aPartitions[] = 0;
119
120                 $sTemplate = file_get_contents(CONST_BasePath.'/sql/partitions.src.sql');
121                 preg_match_all('#^-- start(.*?)^-- end#ms', $sTemplate, $aMatches, PREG_SET_ORDER);
122                 foreach($aMatches as $aMatch)
123                 {
124                         $sResult = '';
125                         foreach($aPartitions as $sPartitionName)
126                         {
127                                 $sResult .= str_replace('-partition-', $sPartitionName, $aMatch[1]);
128                         }
129                         $sTemplate = str_replace($aMatch[0], $sResult, $sTemplate);
130                 }
131
132                 pgsqlRunScript($sTemplate);
133         }
134
135         if ($aCMDResult['load-data'] || $aCMDResult['all'])
136         {
137                 $bDidSomething = true;
138
139                 $oDB =& getDB();
140                 if (!pg_query($oDB->connection, 'TRUNCATE word')) fail(pg_last_error($oDB->connection));
141                 echo '.';
142                 if (!pg_query($oDB->connection, 'TRUNCATE placex')) fail(pg_last_error($oDB->connection));
143                 echo '.';
144                 if (!pg_query($oDB->connection, 'TRUNCATE place_addressline')) fail(pg_last_error($oDB->connection));
145                 echo '.';
146                 if (!pg_query($oDB->connection, 'TRUNCATE place_boundingbox')) fail(pg_last_error($oDB->connection));
147                 echo '.';
148                 if (!pg_query($oDB->connection, 'TRUNCATE location_area')) fail(pg_last_error($oDB->connection));
149                 echo '.';
150                 if (!pg_query($oDB->connection, 'TRUNCATE search_name')) fail(pg_last_error($oDB->connection));
151                 echo '.';
152                 if (!pg_query($oDB->connection, 'TRUNCATE search_name_blank')) fail(pg_last_error($oDB->connection));
153                 echo '.';
154                 if (!pg_query($oDB->connection, 'DROP SEQUENCE seq_place')) fail(pg_last_error($oDB->connection));
155                 echo '.';
156                 if (!pg_query($oDB->connection, 'CREATE SEQUENCE seq_place start 100000')) fail(pg_last_error($oDB->connection));
157                 echo '.';
158
159                 $aDBInstances = array();
160                 for($i = 0; $i < $iInstances; $i++)
161                 {
162                         $aDBInstances[$i] =& getDB(true);
163                         $sSQL = 'insert into placex (osm_type, osm_id, class, type, name, admin_level, ';
164                         $sSQL .= 'housenumber, street, isin, postcode, country_code, extratags, ';
165                         $sSQL .= 'geometry) select * from place where osm_id % '.$iInstances.' = '.$i;
166                         if ($aCMDResult['verbose']) echo "$sSQL\n";
167                         if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
168                 }
169                 $bAnyBusy = true;
170                 while($bAnyBusy)
171                 {
172                         $bAnyBusy = false;
173                         for($i = 0; $i < $iInstances; $i++)
174                         {
175                                 if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
176                         }
177                         sleep(1);
178                         echo '.';
179                 }
180                 echo "\n";
181         }
182
183         if ($aCMDResult['import-tiger-data'])
184         {
185                 $bDidSomething = true;
186
187                 $aDBInstances = array();
188                 for($i = 0; $i < $iInstances; $i++)
189                 {
190                         $aDBInstances[$i] =& getDB(true);
191                 }
192
193                 foreach(glob(CONST_BasePath.'/data/tiger2009/*.sql') as $sFile)
194                 {
195                         echo $sFile.': ';
196                         if ((int)basename($sFile) <= 53033) continue;
197                         $hFile = fopen($sFile, "r");
198                         $sSQL = fgets($hFile, 100000);
199                         $iLines = 0;
200
201                         while(true)
202                         {
203                                 for($i = 0; $i < $iInstances; $i++)
204                                 {
205                                         if (!pg_connection_busy($aDBInstances[$i]->connection))
206                                         {
207                                                 while(pg_get_result($aDBInstances[$i]->connection));
208                                                 $sSQL = fgets($hFile, 100000);
209                                                 if (!$sSQL) break 2;
210                                                 if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
211                                                 $iLines++;
212                                                 if ($iLines == 1000)
213                                                 {
214                                                         echo ".";
215                                                         $iLines = 0;
216                                                 }
217                                         }
218                                 }
219                                 usleep(10);
220                         }
221
222                         fclose($hFile);
223         
224                         $bAnyBusy = true;
225                         while($bAnyBusy)
226                         {
227                                 $bAnyBusy = false;
228                                 for($i = 0; $i < $iInstances; $i++)
229                                 {
230                                         if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
231                                 }
232                                 usleep(10);
233                         }
234                         echo "\n";
235                 }
236         }
237
238         if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all'])
239         {
240                 $oDB =& getDB();
241                 if (!pg_query($oDB->connection, 'DELETE from placex where osm_type=\'P\'')) fail(pg_last_error($oDB->connection));
242                 $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,country_code,geometry) ";
243                 $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,country_code,";
244                 $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from (select country_code,postcode,";
245                 $sSQL .= "avg(st_x(st_centroid(geometry))) as x,avg(st_y(st_centroid(geometry))) as y ";
246                 $sSQL .= "from place where postcode is not null group by country_code,postcode) as x";
247                 if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
248
249                 $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,country_code,geometry) ";
250                 $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,'us',";
251                 $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from us_postcode";
252                 if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
253         }
254
255         if ($aCMDResult['index'] || $aCMDResult['all'])
256         {
257                 $bDidSomething = true;
258                 passthru(CONST_BasePath.'/nominatim/nominatim -i -d nominatim -t '.$iInstances);
259         }
260
261         if (!$bDidSomething)
262         {
263                 showUsage($aCMDOptions, true);
264         }
265
266         function pgsqlRunScriptFile($sFilename)
267         {
268                 if (!file_exists($sFilename)) fail('unable to find '.$sFilename);
269
270                 // Convert database DSN to psql paramaters
271                 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
272                 $sCMD = 'psql -f '.$sFilename.' '.$aDSNInfo['database'];
273
274                 $aDescriptors = array(
275                         0 => array('pipe', 'r'),
276                         1 => array('pipe', 'w'),
277                         2 => array('file', '/dev/null', 'a')
278                 );
279                 $ahPipes = null;
280                 $hProcess = proc_open($sCMD, $aDescriptors, $ahPipes);
281                 if (!is_resource($hProcess)) fail('unable to start pgsql');
282
283                 fclose($ahPipes[0]);
284
285                 // TODO: error checking
286                 while(!feof($ahPipes[1]))
287                 {
288                         echo fread($ahPipes[1], 4096);
289                 }
290                 fclose($ahPipes[1]);
291
292                 proc_close($hProcess);
293         }
294
295         function pgsqlRunScript($sScript)
296         {
297                 // Convert database DSN to psql paramaters
298                 $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
299                 $sCMD = 'psql '.$aDSNInfo['database'];
300
301                 $aDescriptors = array(
302                         0 => array('pipe', 'r'),
303                         1 => array('pipe', 'w'),
304                         2 => array('file', '/dev/null', 'a')
305                 );
306                 $ahPipes = null;
307                 $hProcess = proc_open($sCMD, $aDescriptors, $ahPipes);
308                 if (!is_resource($hProcess)) fail('unable to start pgsql');
309
310                 fwrite($ahPipes[0], $sScript);
311                 fclose($ahPipes[0]);
312
313                 // TODO: error checking
314                 while(!feof($ahPipes[1]))
315                 {
316                         echo fread($ahPipes[1], 4096);
317                 }
318                 fclose($ahPipes[1]);
319
320                 proc_close($hProcess);
321         }