]> git.openstreetmap.org Git - nominatim.git/blob - lib-php/setup/SetupClass.php
Merge pull request #2197 from lonvia/use-jinja-for-sql-preprocessing
[nominatim.git] / lib-php / setup / SetupClass.php
1 <?php
2
3 namespace Nominatim\Setup;
4
5 require_once(CONST_LibDir.'/Shell.php');
6
7 class SetupFunctions
8 {
9     protected $iInstances;
10     protected $aDSNInfo;
11     protected $bQuiet;
12     protected $bVerbose;
13     protected $sIgnoreErrors;
14     protected $bEnableDiffUpdates;
15     protected $bEnableDebugStatements;
16     protected $bDrop;
17     protected $oDB = null;
18     protected $oNominatimCmd;
19
20     public function __construct(array $aCMDResult)
21     {
22         // by default, use all but one processor, but never more than 15.
23         $this->iInstances = isset($aCMDResult['threads'])
24             ? $aCMDResult['threads']
25             : (min(16, getProcessorCount()) - 1);
26
27         if ($this->iInstances < 1) {
28             $this->iInstances = 1;
29             warn('resetting threads to '.$this->iInstances);
30         }
31
32         // parse database string
33         $this->aDSNInfo = \Nominatim\DB::parseDSN(getSetting('DATABASE_DSN'));
34         if (!isset($this->aDSNInfo['port'])) {
35             $this->aDSNInfo['port'] = 5432;
36         }
37
38         // setting member variables based on command line options stored in $aCMDResult
39         $this->bQuiet = isset($aCMDResult['quiet']) && $aCMDResult['quiet'];
40         $this->bVerbose = $aCMDResult['verbose'];
41
42         //setting default values which are not set by the update.php array
43         if (isset($aCMDResult['ignore-errors'])) {
44             $this->sIgnoreErrors = $aCMDResult['ignore-errors'];
45         } else {
46             $this->sIgnoreErrors = false;
47         }
48         if (isset($aCMDResult['enable-debug-statements'])) {
49             $this->bEnableDebugStatements = $aCMDResult['enable-debug-statements'];
50         } else {
51             $this->bEnableDebugStatements = false;
52         }
53         if (isset($aCMDResult['enable-diff-updates'])) {
54             $this->bEnableDiffUpdates = $aCMDResult['enable-diff-updates'];
55         } else {
56             $this->bEnableDiffUpdates = false;
57         }
58
59         $this->bDrop = isset($aCMDResult['drop']) && $aCMDResult['drop'];
60
61         $this->oNominatimCmd = new \Nominatim\Shell(getSetting('NOMINATIM_TOOL'));
62         if ($this->bQuiet) {
63             $this->oNominatimCmd->addParams('--quiet');
64         }
65         if ($this->bVerbose) {
66             $this->oNominatimCmd->addParams('--verbose');
67         }
68     }
69
70     public function importTigerData($sTigerPath)
71     {
72         info('Import Tiger data');
73
74         $aFilenames = glob($sTigerPath.'/*.sql');
75         info('Found '.count($aFilenames).' SQL files in path '.$sTigerPath);
76         if (empty($aFilenames)) {
77             warn('Tiger data import selected but no files found in path '.$sTigerPath);
78             return;
79         }
80         $sTemplate = file_get_contents(CONST_SqlDir.'/tiger_import_start.sql');
81         $sTemplate = $this->replaceSqlPatterns($sTemplate);
82
83         $this->pgsqlRunScript($sTemplate, false);
84
85         $aDBInstances = array();
86         for ($i = 0; $i < $this->iInstances; $i++) {
87             // https://secure.php.net/manual/en/function.pg-connect.php
88             $DSN = getSetting('DATABASE_DSN');
89             $DSN = preg_replace('/^pgsql:/', '', $DSN);
90             $DSN = preg_replace('/;/', ' ', $DSN);
91             $aDBInstances[$i] = pg_connect($DSN, PGSQL_CONNECT_FORCE_NEW | PGSQL_CONNECT_ASYNC);
92             pg_ping($aDBInstances[$i]);
93         }
94
95         foreach ($aFilenames as $sFile) {
96             echo $sFile.': ';
97             $hFile = fopen($sFile, 'r');
98             $sSQL = fgets($hFile, 100000);
99             $iLines = 0;
100             while (true) {
101                 for ($i = 0; $i < $this->iInstances; $i++) {
102                     if (!pg_connection_busy($aDBInstances[$i])) {
103                         while (pg_get_result($aDBInstances[$i]));
104                         $sSQL = fgets($hFile, 100000);
105                         if (!$sSQL) break 2;
106                         if (!pg_send_query($aDBInstances[$i], $sSQL)) fail(pg_last_error($aDBInstances[$i]));
107                         $iLines++;
108                         if ($iLines == 1000) {
109                             echo '.';
110                             $iLines = 0;
111                         }
112                     }
113                 }
114                 usleep(10);
115             }
116             fclose($hFile);
117
118             $bAnyBusy = true;
119             while ($bAnyBusy) {
120                 $bAnyBusy = false;
121                 for ($i = 0; $i < $this->iInstances; $i++) {
122                     if (pg_connection_busy($aDBInstances[$i])) $bAnyBusy = true;
123                 }
124                 usleep(10);
125             }
126             echo "\n";
127         }
128
129         for ($i = 0; $i < $this->iInstances; $i++) {
130             pg_close($aDBInstances[$i]);
131         }
132
133         info('Creating indexes on Tiger data');
134         $sTemplate = file_get_contents(CONST_SqlDir.'/tiger_import_finish.sql');
135         $sTemplate = $this->replaceSqlPatterns($sTemplate);
136
137         $this->pgsqlRunScript($sTemplate, false);
138     }
139
140     public function calculatePostcodes($bCMDResultAll)
141     {
142         info('Calculate Postcodes');
143         $this->pgsqlRunScriptFile(CONST_SqlDir.'/postcode_tables.sql');
144
145         $sPostcodeFilename = CONST_InstallDir.'/gb_postcode_data.sql.gz';
146         if (file_exists($sPostcodeFilename)) {
147             $this->pgsqlRunScriptFile($sPostcodeFilename);
148         } else {
149             warn('optional external GB postcode table file ('.$sPostcodeFilename.') not found. Skipping.');
150         }
151
152         $sPostcodeFilename = CONST_InstallDir.'/us_postcode_data.sql.gz';
153         if (file_exists($sPostcodeFilename)) {
154             $this->pgsqlRunScriptFile($sPostcodeFilename);
155         } else {
156             warn('optional external US postcode table file ('.$sPostcodeFilename.') not found. Skipping.');
157         }
158
159
160         $this->db()->exec('TRUNCATE location_postcode');
161
162         $sSQL  = 'INSERT INTO location_postcode';
163         $sSQL .= ' (place_id, indexed_status, country_code, postcode, geometry) ';
164         $sSQL .= "SELECT nextval('seq_place'), 1, country_code,";
165         $sSQL .= "       upper(trim (both ' ' from address->'postcode')) as pc,";
166         $sSQL .= '       ST_Centroid(ST_Collect(ST_Centroid(geometry)))';
167         $sSQL .= '  FROM placex';
168         $sSQL .= " WHERE address ? 'postcode' AND address->'postcode' NOT SIMILAR TO '%(,|;)%'";
169         $sSQL .= '       AND geometry IS NOT null';
170         $sSQL .= ' GROUP BY country_code, pc';
171         $this->db()->exec($sSQL);
172
173         // only add postcodes that are not yet available in OSM
174         $sSQL  = 'INSERT INTO location_postcode';
175         $sSQL .= ' (place_id, indexed_status, country_code, postcode, geometry) ';
176         $sSQL .= "SELECT nextval('seq_place'), 1, 'us', postcode,";
177         $sSQL .= '       ST_SetSRID(ST_Point(x,y),4326)';
178         $sSQL .= '  FROM us_postcode WHERE postcode NOT IN';
179         $sSQL .= '        (SELECT postcode FROM location_postcode';
180         $sSQL .= "          WHERE country_code = 'us')";
181         $this->db()->exec($sSQL);
182
183         // add missing postcodes for GB (if available)
184         $sSQL  = 'INSERT INTO location_postcode';
185         $sSQL .= ' (place_id, indexed_status, country_code, postcode, geometry) ';
186         $sSQL .= "SELECT nextval('seq_place'), 1, 'gb', postcode, geometry";
187         $sSQL .= '  FROM gb_postcode WHERE postcode NOT IN';
188         $sSQL .= '           (SELECT postcode FROM location_postcode';
189         $sSQL .= "             WHERE country_code = 'gb')";
190         $this->db()->exec($sSQL);
191
192         if (!$bCMDResultAll) {
193             $sSQL = "DELETE FROM word WHERE class='place' and type='postcode'";
194             $sSQL .= 'and word NOT IN (SELECT postcode FROM location_postcode)';
195             $this->db()->exec($sSQL);
196         }
197
198         $sSQL = 'SELECT count(getorcreate_postcode_id(v)) FROM ';
199         $sSQL .= '(SELECT distinct(postcode) as v FROM location_postcode) p';
200         $this->db()->exec($sSQL);
201     }
202
203     public function createCountryNames()
204     {
205         info('Create search index for default country names');
206
207         $this->pgsqlRunScript("select getorcreate_country(make_standard_name('uk'), 'gb')");
208         $this->pgsqlRunScript("select getorcreate_country(make_standard_name('united states'), 'us')");
209         $this->pgsqlRunScript('select count(*) from (select getorcreate_country(make_standard_name(country_code), country_code) from country_name where country_code is not null) as x');
210         $this->pgsqlRunScript("select count(*) from (select getorcreate_country(make_standard_name(name->'name'), country_code) from country_name where name ? 'name') as x");
211         $sSQL = 'select count(*) from (select getorcreate_country(make_standard_name(v),'
212             .'country_code) from (select country_code, skeys(name) as k, svals(name) as v from country_name) x where k ';
213         $sLanguages = getSetting('LANGUAGES');
214         if ($sLanguages) {
215             $sSQL .= 'in ';
216             $sDelim = '(';
217             foreach (explode(',', $sLanguages) as $sLang) {
218                 $sSQL .= $sDelim."'name:$sLang'";
219                 $sDelim = ',';
220             }
221             $sSQL .= ')';
222         } else {
223             // all include all simple name tags
224             $sSQL .= "like 'name:%'";
225         }
226         $sSQL .= ') v';
227         $this->pgsqlRunScript($sSQL);
228     }
229
230     /**
231      * Return the connection to the database.
232      *
233      * @return Database object.
234      *
235      * Creates a new connection if none exists yet. Otherwise reuses the
236      * already established connection.
237      */
238     private function db()
239     {
240         if (is_null($this->oDB)) {
241             $this->oDB = new \Nominatim\DB();
242             $this->oDB->connect();
243         }
244
245         return $this->oDB;
246     }
247
248     private function pgsqlRunScript($sScript, $bfatal = true)
249     {
250         runSQLScript(
251             $sScript,
252             $bfatal,
253             $this->bVerbose,
254             $this->sIgnoreErrors
255         );
256     }
257
258     public function createSqlFunctions()
259     {
260         $oCmd = (clone($this->oNominatimCmd))
261                 ->addParams('refresh', '--functions');
262
263         if (!$this->bEnableDiffUpdates) {
264             $oCmd->addParams('--no-diff-updates');
265         }
266
267         if ($this->bEnableDebugStatements) {
268             $oCmd->addParams('--enable-debug-statements');
269         }
270
271         $oCmd->run(!$this->sIgnoreErrors);
272     }
273
274     private function pgsqlRunScriptFile($sFilename)
275     {
276         if (!file_exists($sFilename)) fail('unable to find '.$sFilename);
277
278         $oCmd = (new \Nominatim\Shell('psql'))
279                 ->addParams('--port', $this->aDSNInfo['port'])
280                 ->addParams('--dbname', $this->aDSNInfo['database']);
281
282         if (!$this->bVerbose) {
283             $oCmd->addParams('--quiet');
284         }
285         if (isset($this->aDSNInfo['hostspec'])) {
286             $oCmd->addParams('--host', $this->aDSNInfo['hostspec']);
287         }
288         if (isset($this->aDSNInfo['username'])) {
289             $oCmd->addParams('--username', $this->aDSNInfo['username']);
290         }
291         if (isset($this->aDSNInfo['password'])) {
292             $oCmd->addEnvPair('PGPASSWORD', $this->aDSNInfo['password']);
293         }
294         $ahGzipPipes = null;
295         if (preg_match('/\\.gz$/', $sFilename)) {
296             $aDescriptors = array(
297                              0 => array('pipe', 'r'),
298                              1 => array('pipe', 'w'),
299                              2 => array('file', '/dev/null', 'a')
300                             );
301             $oZcatCmd = new \Nominatim\Shell('zcat', $sFilename);
302
303             $hGzipProcess = proc_open($oZcatCmd->escapedCmd(), $aDescriptors, $ahGzipPipes);
304             if (!is_resource($hGzipProcess)) fail('unable to start zcat');
305             $aReadPipe = $ahGzipPipes[1];
306             fclose($ahGzipPipes[0]);
307         } else {
308             $oCmd->addParams('--file', $sFilename);
309             $aReadPipe = array('pipe', 'r');
310         }
311         $aDescriptors = array(
312                          0 => $aReadPipe,
313                          1 => array('pipe', 'w'),
314                          2 => array('file', '/dev/null', 'a')
315                         );
316         $ahPipes = null;
317
318         $hProcess = proc_open($oCmd->escapedCmd(), $aDescriptors, $ahPipes, null, $oCmd->aEnv);
319         if (!is_resource($hProcess)) fail('unable to start pgsql');
320         // TODO: error checking
321         while (!feof($ahPipes[1])) {
322             echo fread($ahPipes[1], 4096);
323         }
324         fclose($ahPipes[1]);
325         $iReturn = proc_close($hProcess);
326         if ($iReturn > 0) {
327             fail("pgsql returned with error code ($iReturn)");
328         }
329         if ($ahGzipPipes) {
330             fclose($ahGzipPipes[1]);
331             proc_close($hGzipProcess);
332         }
333     }
334
335     private function replaceSqlPatterns($sSql)
336     {
337         $sSql = str_replace('{www-user}', getSetting('DATABASE_WEBUSER'), $sSql);
338
339         $aPatterns = array(
340                       '{ts:address-data}' => getSetting('TABLESPACE_ADDRESS_DATA'),
341                       '{ts:address-index}' => getSetting('TABLESPACE_ADDRESS_INDEX'),
342                       '{ts:search-data}' => getSetting('TABLESPACE_SEARCH_DATA'),
343                       '{ts:search-index}' =>  getSetting('TABLESPACE_SEARCH_INDEX'),
344                       '{ts:aux-data}' =>  getSetting('TABLESPACE_AUX_DATA'),
345                       '{ts:aux-index}' =>  getSetting('TABLESPACE_AUX_INDEX')
346         );
347
348         foreach ($aPatterns as $sPattern => $sTablespace) {
349             if ($sTablespace) {
350                 $sSql = str_replace($sPattern, 'TABLESPACE "'.$sTablespace.'"', $sSql);
351             } else {
352                 $sSql = str_replace($sPattern, '', $sSql);
353             }
354         }
355
356         return $sSql;
357     }
358 }