]> git.openstreetmap.org Git - nominatim.git/blob - lib-php/setup/SetupClass.php
prot load-data function to python
[nominatim.git] / lib-php / setup / SetupClass.php
1 <?php
2
3 namespace Nominatim\Setup;
4
5 require_once(CONST_LibDir.'/Shell.php');
6
7 class SetupFunctions
8 {
9     protected $iInstances;
10     protected $aDSNInfo;
11     protected $bQuiet;
12     protected $bVerbose;
13     protected $sIgnoreErrors;
14     protected $bEnableDiffUpdates;
15     protected $bEnableDebugStatements;
16     protected $bNoPartitions;
17     protected $bDrop;
18     protected $oDB = null;
19     protected $oNominatimCmd;
20
21     public function __construct(array $aCMDResult)
22     {
23         // by default, use all but one processor, but never more than 15.
24         $this->iInstances = isset($aCMDResult['threads'])
25             ? $aCMDResult['threads']
26             : (min(16, getProcessorCount()) - 1);
27
28         if ($this->iInstances < 1) {
29             $this->iInstances = 1;
30             warn('resetting threads to '.$this->iInstances);
31         }
32
33         // parse database string
34         $this->aDSNInfo = \Nominatim\DB::parseDSN(getSetting('DATABASE_DSN'));
35         if (!isset($this->aDSNInfo['port'])) {
36             $this->aDSNInfo['port'] = 5432;
37         }
38
39         // setting member variables based on command line options stored in $aCMDResult
40         $this->bQuiet = isset($aCMDResult['quiet']) && $aCMDResult['quiet'];
41         $this->bVerbose = $aCMDResult['verbose'];
42
43         //setting default values which are not set by the update.php array
44         if (isset($aCMDResult['ignore-errors'])) {
45             $this->sIgnoreErrors = $aCMDResult['ignore-errors'];
46         } else {
47             $this->sIgnoreErrors = false;
48         }
49         if (isset($aCMDResult['enable-debug-statements'])) {
50             $this->bEnableDebugStatements = $aCMDResult['enable-debug-statements'];
51         } else {
52             $this->bEnableDebugStatements = false;
53         }
54         if (isset($aCMDResult['no-partitions'])) {
55             $this->bNoPartitions = $aCMDResult['no-partitions'];
56         } else {
57             $this->bNoPartitions = false;
58         }
59         if (isset($aCMDResult['enable-diff-updates'])) {
60             $this->bEnableDiffUpdates = $aCMDResult['enable-diff-updates'];
61         } else {
62             $this->bEnableDiffUpdates = false;
63         }
64
65         $this->bDrop = isset($aCMDResult['drop']) && $aCMDResult['drop'];
66
67         $this->oNominatimCmd = new \Nominatim\Shell(getSetting('NOMINATIM_TOOL'));
68         if ($this->bQuiet) {
69             $this->oNominatimCmd->addParams('--quiet');
70         }
71         if ($this->bVerbose) {
72             $this->oNominatimCmd->addParams('--verbose');
73         }
74     }
75
76     public function createFunctions()
77     {
78         info('Create Functions');
79
80         // Try accessing the C module, so we know early if something is wrong
81         $this->checkModulePresence(); // raises exception on failure
82
83         $this->createSqlFunctions();
84     }
85
86     public function createTables($bReverseOnly = false)
87     {
88         info('Create Tables');
89
90         $sTemplate = file_get_contents(CONST_SqlDir.'/tables.sql');
91         $sTemplate = $this->replaceSqlPatterns($sTemplate);
92
93         $this->pgsqlRunScript($sTemplate, false);
94
95         if ($bReverseOnly) {
96             $this->dropTable('search_name');
97         }
98
99         (clone($this->oNominatimCmd))->addParams('refresh', '--address-levels')->run();
100     }
101
102     public function createTableTriggers()
103     {
104         info('Create Tables');
105
106         $sTemplate = file_get_contents(CONST_SqlDir.'/table-triggers.sql');
107         $sTemplate = $this->replaceSqlPatterns($sTemplate);
108
109         $this->pgsqlRunScript($sTemplate, false);
110     }
111
112     public function createPartitionTables()
113     {
114         info('Create Partition Tables');
115
116         $sTemplate = file_get_contents(CONST_SqlDir.'/partition-tables.src.sql');
117         $sTemplate = $this->replaceSqlPatterns($sTemplate);
118
119         $this->pgsqlRunPartitionScript($sTemplate);
120     }
121
122     public function importTigerData($sTigerPath)
123     {
124         info('Import Tiger data');
125
126         $aFilenames = glob($sTigerPath.'/*.sql');
127         info('Found '.count($aFilenames).' SQL files in path '.$sTigerPath);
128         if (empty($aFilenames)) {
129             warn('Tiger data import selected but no files found in path '.$sTigerPath);
130             return;
131         }
132         $sTemplate = file_get_contents(CONST_SqlDir.'/tiger_import_start.sql');
133         $sTemplate = $this->replaceSqlPatterns($sTemplate);
134
135         $this->pgsqlRunScript($sTemplate, false);
136
137         $aDBInstances = array();
138         for ($i = 0; $i < $this->iInstances; $i++) {
139             // https://secure.php.net/manual/en/function.pg-connect.php
140             $DSN = getSetting('DATABASE_DSN');
141             $DSN = preg_replace('/^pgsql:/', '', $DSN);
142             $DSN = preg_replace('/;/', ' ', $DSN);
143             $aDBInstances[$i] = pg_connect($DSN, PGSQL_CONNECT_FORCE_NEW | PGSQL_CONNECT_ASYNC);
144             pg_ping($aDBInstances[$i]);
145         }
146
147         foreach ($aFilenames as $sFile) {
148             echo $sFile.': ';
149             $hFile = fopen($sFile, 'r');
150             $sSQL = fgets($hFile, 100000);
151             $iLines = 0;
152             while (true) {
153                 for ($i = 0; $i < $this->iInstances; $i++) {
154                     if (!pg_connection_busy($aDBInstances[$i])) {
155                         while (pg_get_result($aDBInstances[$i]));
156                         $sSQL = fgets($hFile, 100000);
157                         if (!$sSQL) break 2;
158                         if (!pg_send_query($aDBInstances[$i], $sSQL)) fail(pg_last_error($aDBInstances[$i]));
159                         $iLines++;
160                         if ($iLines == 1000) {
161                             echo '.';
162                             $iLines = 0;
163                         }
164                     }
165                 }
166                 usleep(10);
167             }
168             fclose($hFile);
169
170             $bAnyBusy = true;
171             while ($bAnyBusy) {
172                 $bAnyBusy = false;
173                 for ($i = 0; $i < $this->iInstances; $i++) {
174                     if (pg_connection_busy($aDBInstances[$i])) $bAnyBusy = true;
175                 }
176                 usleep(10);
177             }
178             echo "\n";
179         }
180
181         for ($i = 0; $i < $this->iInstances; $i++) {
182             pg_close($aDBInstances[$i]);
183         }
184
185         info('Creating indexes on Tiger data');
186         $sTemplate = file_get_contents(CONST_SqlDir.'/tiger_import_finish.sql');
187         $sTemplate = $this->replaceSqlPatterns($sTemplate);
188
189         $this->pgsqlRunScript($sTemplate, false);
190     }
191
192     public function calculatePostcodes($bCMDResultAll)
193     {
194         info('Calculate Postcodes');
195         $this->pgsqlRunScriptFile(CONST_SqlDir.'/postcode_tables.sql');
196
197         $sPostcodeFilename = CONST_InstallDir.'/gb_postcode_data.sql.gz';
198         if (file_exists($sPostcodeFilename)) {
199             $this->pgsqlRunScriptFile($sPostcodeFilename);
200         } else {
201             warn('optional external GB postcode table file ('.$sPostcodeFilename.') not found. Skipping.');
202         }
203
204         $sPostcodeFilename = CONST_InstallDir.'/us_postcode_data.sql.gz';
205         if (file_exists($sPostcodeFilename)) {
206             $this->pgsqlRunScriptFile($sPostcodeFilename);
207         } else {
208             warn('optional external US postcode table file ('.$sPostcodeFilename.') not found. Skipping.');
209         }
210
211
212         $this->db()->exec('TRUNCATE location_postcode');
213
214         $sSQL  = 'INSERT INTO location_postcode';
215         $sSQL .= ' (place_id, indexed_status, country_code, postcode, geometry) ';
216         $sSQL .= "SELECT nextval('seq_place'), 1, country_code,";
217         $sSQL .= "       upper(trim (both ' ' from address->'postcode')) as pc,";
218         $sSQL .= '       ST_Centroid(ST_Collect(ST_Centroid(geometry)))';
219         $sSQL .= '  FROM placex';
220         $sSQL .= " WHERE address ? 'postcode' AND address->'postcode' NOT SIMILAR TO '%(,|;)%'";
221         $sSQL .= '       AND geometry IS NOT null';
222         $sSQL .= ' GROUP BY country_code, pc';
223         $this->db()->exec($sSQL);
224
225         // only add postcodes that are not yet available in OSM
226         $sSQL  = 'INSERT INTO location_postcode';
227         $sSQL .= ' (place_id, indexed_status, country_code, postcode, geometry) ';
228         $sSQL .= "SELECT nextval('seq_place'), 1, 'us', postcode,";
229         $sSQL .= '       ST_SetSRID(ST_Point(x,y),4326)';
230         $sSQL .= '  FROM us_postcode WHERE postcode NOT IN';
231         $sSQL .= '        (SELECT postcode FROM location_postcode';
232         $sSQL .= "          WHERE country_code = 'us')";
233         $this->db()->exec($sSQL);
234
235         // add missing postcodes for GB (if available)
236         $sSQL  = 'INSERT INTO location_postcode';
237         $sSQL .= ' (place_id, indexed_status, country_code, postcode, geometry) ';
238         $sSQL .= "SELECT nextval('seq_place'), 1, 'gb', postcode, geometry";
239         $sSQL .= '  FROM gb_postcode WHERE postcode NOT IN';
240         $sSQL .= '           (SELECT postcode FROM location_postcode';
241         $sSQL .= "             WHERE country_code = 'gb')";
242         $this->db()->exec($sSQL);
243
244         if (!$bCMDResultAll) {
245             $sSQL = "DELETE FROM word WHERE class='place' and type='postcode'";
246             $sSQL .= 'and word NOT IN (SELECT postcode FROM location_postcode)';
247             $this->db()->exec($sSQL);
248         }
249
250         $sSQL = 'SELECT count(getorcreate_postcode_id(v)) FROM ';
251         $sSQL .= '(SELECT distinct(postcode) as v FROM location_postcode) p';
252         $this->db()->exec($sSQL);
253     }
254
255     public function createSearchIndices()
256     {
257         info('Create Search indices');
258
259         $sSQL = 'SELECT relname FROM pg_class, pg_index ';
260         $sSQL .= 'WHERE pg_index.indisvalid = false AND pg_index.indexrelid = pg_class.oid';
261         $aInvalidIndices = $this->db()->getCol($sSQL);
262
263         foreach ($aInvalidIndices as $sIndexName) {
264             info("Cleaning up invalid index $sIndexName");
265             $this->db()->exec("DROP INDEX $sIndexName;");
266         }
267
268         $sTemplate = file_get_contents(CONST_SqlDir.'/indices.src.sql');
269         if (!$this->bDrop) {
270             $sTemplate .= file_get_contents(CONST_SqlDir.'/indices_updates.src.sql');
271         }
272         if (!$this->dbReverseOnly()) {
273             $sTemplate .= file_get_contents(CONST_SqlDir.'/indices_search.src.sql');
274         }
275         $sTemplate = $this->replaceSqlPatterns($sTemplate);
276
277         $this->pgsqlRunScript($sTemplate);
278     }
279
280     public function createCountryNames()
281     {
282         info('Create search index for default country names');
283
284         $this->pgsqlRunScript("select getorcreate_country(make_standard_name('uk'), 'gb')");
285         $this->pgsqlRunScript("select getorcreate_country(make_standard_name('united states'), 'us')");
286         $this->pgsqlRunScript('select count(*) from (select getorcreate_country(make_standard_name(country_code), country_code) from country_name where country_code is not null) as x');
287         $this->pgsqlRunScript("select count(*) from (select getorcreate_country(make_standard_name(name->'name'), country_code) from country_name where name ? 'name') as x");
288         $sSQL = 'select count(*) from (select getorcreate_country(make_standard_name(v),'
289             .'country_code) from (select country_code, skeys(name) as k, svals(name) as v from country_name) x where k ';
290         $sLanguages = getSetting('LANGUAGES');
291         if ($sLanguages) {
292             $sSQL .= 'in ';
293             $sDelim = '(';
294             foreach (explode(',', $sLanguages) as $sLang) {
295                 $sSQL .= $sDelim."'name:$sLang'";
296                 $sDelim = ',';
297             }
298             $sSQL .= ')';
299         } else {
300             // all include all simple name tags
301             $sSQL .= "like 'name:%'";
302         }
303         $sSQL .= ') v';
304         $this->pgsqlRunScript($sSQL);
305     }
306
307     /**
308      * Return the connection to the database.
309      *
310      * @return Database object.
311      *
312      * Creates a new connection if none exists yet. Otherwise reuses the
313      * already established connection.
314      */
315     private function db()
316     {
317         if (is_null($this->oDB)) {
318             $this->oDB = new \Nominatim\DB();
319             $this->oDB->connect();
320         }
321
322         return $this->oDB;
323     }
324
325     private function pgsqlRunScript($sScript, $bfatal = true)
326     {
327         runSQLScript(
328             $sScript,
329             $bfatal,
330             $this->bVerbose,
331             $this->sIgnoreErrors
332         );
333     }
334
335     private function createSqlFunctions()
336     {
337         $oCmd = (clone($this->oNominatimCmd))
338                 ->addParams('refresh', '--functions');
339
340         if (!$this->bEnableDiffUpdates) {
341             $oCmd->addParams('--no-diff-updates');
342         }
343
344         if ($this->bEnableDebugStatements) {
345             $oCmd->addParams('--enable-debug-statements');
346         }
347
348         $oCmd->run(!$this->sIgnoreErrors);
349     }
350
351     private function pgsqlRunPartitionScript($sTemplate)
352     {
353         $sSQL = 'select distinct partition from country_name';
354         $aPartitions = $this->db()->getCol($sSQL);
355         if (!$this->bNoPartitions) $aPartitions[] = 0;
356
357         preg_match_all('#^-- start(.*?)^-- end#ms', $sTemplate, $aMatches, PREG_SET_ORDER);
358         foreach ($aMatches as $aMatch) {
359             $sResult = '';
360             foreach ($aPartitions as $sPartitionName) {
361                 $sResult .= str_replace('-partition-', $sPartitionName, $aMatch[1]);
362             }
363             $sTemplate = str_replace($aMatch[0], $sResult, $sTemplate);
364         }
365
366         $this->pgsqlRunScript($sTemplate);
367     }
368
369     private function pgsqlRunScriptFile($sFilename)
370     {
371         if (!file_exists($sFilename)) fail('unable to find '.$sFilename);
372
373         $oCmd = (new \Nominatim\Shell('psql'))
374                 ->addParams('--port', $this->aDSNInfo['port'])
375                 ->addParams('--dbname', $this->aDSNInfo['database']);
376
377         if (!$this->bVerbose) {
378             $oCmd->addParams('--quiet');
379         }
380         if (isset($this->aDSNInfo['hostspec'])) {
381             $oCmd->addParams('--host', $this->aDSNInfo['hostspec']);
382         }
383         if (isset($this->aDSNInfo['username'])) {
384             $oCmd->addParams('--username', $this->aDSNInfo['username']);
385         }
386         if (isset($this->aDSNInfo['password'])) {
387             $oCmd->addEnvPair('PGPASSWORD', $this->aDSNInfo['password']);
388         }
389         $ahGzipPipes = null;
390         if (preg_match('/\\.gz$/', $sFilename)) {
391             $aDescriptors = array(
392                              0 => array('pipe', 'r'),
393                              1 => array('pipe', 'w'),
394                              2 => array('file', '/dev/null', 'a')
395                             );
396             $oZcatCmd = new \Nominatim\Shell('zcat', $sFilename);
397
398             $hGzipProcess = proc_open($oZcatCmd->escapedCmd(), $aDescriptors, $ahGzipPipes);
399             if (!is_resource($hGzipProcess)) fail('unable to start zcat');
400             $aReadPipe = $ahGzipPipes[1];
401             fclose($ahGzipPipes[0]);
402         } else {
403             $oCmd->addParams('--file', $sFilename);
404             $aReadPipe = array('pipe', 'r');
405         }
406         $aDescriptors = array(
407                          0 => $aReadPipe,
408                          1 => array('pipe', 'w'),
409                          2 => array('file', '/dev/null', 'a')
410                         );
411         $ahPipes = null;
412
413         $hProcess = proc_open($oCmd->escapedCmd(), $aDescriptors, $ahPipes, null, $oCmd->aEnv);
414         if (!is_resource($hProcess)) fail('unable to start pgsql');
415         // TODO: error checking
416         while (!feof($ahPipes[1])) {
417             echo fread($ahPipes[1], 4096);
418         }
419         fclose($ahPipes[1]);
420         $iReturn = proc_close($hProcess);
421         if ($iReturn > 0) {
422             fail("pgsql returned with error code ($iReturn)");
423         }
424         if ($ahGzipPipes) {
425             fclose($ahGzipPipes[1]);
426             proc_close($hGzipProcess);
427         }
428     }
429
430     private function replaceSqlPatterns($sSql)
431     {
432         $sSql = str_replace('{www-user}', getSetting('DATABASE_WEBUSER'), $sSql);
433
434         $aPatterns = array(
435                       '{ts:address-data}' => getSetting('TABLESPACE_ADDRESS_DATA'),
436                       '{ts:address-index}' => getSetting('TABLESPACE_ADDRESS_INDEX'),
437                       '{ts:search-data}' => getSetting('TABLESPACE_SEARCH_DATA'),
438                       '{ts:search-index}' =>  getSetting('TABLESPACE_SEARCH_INDEX'),
439                       '{ts:aux-data}' =>  getSetting('TABLESPACE_AUX_DATA'),
440                       '{ts:aux-index}' =>  getSetting('TABLESPACE_AUX_INDEX')
441         );
442
443         foreach ($aPatterns as $sPattern => $sTablespace) {
444             if ($sTablespace) {
445                 $sSql = str_replace($sPattern, 'TABLESPACE "'.$sTablespace.'"', $sSql);
446             } else {
447                 $sSql = str_replace($sPattern, '', $sSql);
448             }
449         }
450
451         return $sSql;
452     }
453
454     /**
455      * Drop table with the given name if it exists.
456      *
457      * @param string $sName Name of table to remove.
458      *
459      * @return null
460      */
461     private function dropTable($sName)
462     {
463         if ($this->bVerbose) echo "Dropping table $sName\n";
464         $this->db()->deleteTable($sName);
465     }
466
467     /**
468      * Check if the database is in reverse-only mode.
469      *
470      * @return True if there is no search_name table and infrastructure.
471      */
472     private function dbReverseOnly()
473     {
474         return !($this->db()->tableExists('search_name'));
475     }
476
477     /**
478      * Try accessing the C module, so we know early if something is wrong.
479      *
480      * Raises Nominatim\DatabaseError on failure
481      */
482     private function checkModulePresence()
483     {
484         $sModulePath = getSetting('DATABASE_MODULE_PATH', CONST_InstallDir.'/module');
485         $sSQL = "CREATE FUNCTION nominatim_test_import_func(text) RETURNS text AS '";
486         $sSQL .= $sModulePath . "/nominatim.so', 'transliteration' LANGUAGE c IMMUTABLE STRICT";
487         $sSQL .= ';DROP FUNCTION nominatim_test_import_func(text);';
488
489         $oDB = new \Nominatim\DB();
490         $oDB->connect();
491         $oDB->exec($sSQL, null, 'Database server failed to load '.$sModulePath.'/nominatim.so module');
492     }
493 }