]> git.openstreetmap.org Git - nominatim.git/blobdiff - lib/setup/SetupClass.php
update documentation for new wikipedia data
[nominatim.git] / lib / setup / SetupClass.php
index d3f59296a23d35a91128ed7e0d083e6ded444fd4..c1c15d9a5f846384db100800b75ccb6fadd937fd 100755 (executable)
@@ -80,13 +80,15 @@ class SetupFunctions
             fail('database already exists ('.CONST_Database_DSN.')');
         }
 
-        $sCreateDBCmd = 'createdb -E UTF-8 -p '.$this->aDSNInfo['port'].' '.$this->aDSNInfo['database'];
+        $sCreateDBCmd = 'createdb -E UTF-8'
+            .' -p '.escapeshellarg($this->aDSNInfo['port'])
+            .' '.escapeshellarg($this->aDSNInfo['database']);
         if (isset($this->aDSNInfo['username'])) {
-            $sCreateDBCmd .= ' -U '.$this->aDSNInfo['username'];
+            $sCreateDBCmd .= ' -U '.escapeshellarg($this->aDSNInfo['username']);
         }
 
         if (isset($this->aDSNInfo['hostspec'])) {
-            $sCreateDBCmd .= ' -h '.$this->aDSNInfo['hostspec'];
+            $sCreateDBCmd .= ' -h '.escapeshellarg($this->aDSNInfo['hostspec']);
         }
 
         $result = $this->runWithPgEnv($sCreateDBCmd);
@@ -106,34 +108,19 @@ class SetupFunctions
         $fPostgresVersion = $this->oDB->getPostgresVersion();
         echo 'Postgres version found: '.$fPostgresVersion."\n";
 
-        if ($fPostgresVersion < 9.01) {
-            fail('Minimum supported version of Postgresql is 9.1.');
+        if ($fPostgresVersion < 9.03) {
+            fail('Minimum supported version of Postgresql is 9.3.');
         }
 
         $this->pgsqlRunScript('CREATE EXTENSION IF NOT EXISTS hstore');
         $this->pgsqlRunScript('CREATE EXTENSION IF NOT EXISTS postgis');
 
-        // For extratags and namedetails the hstore_to_json converter is
-        // needed which is only available from Postgresql 9.3+. For older
-        // versions add a dummy function that returns nothing.
-        $iNumFunc = $this->oDB->getOne("select count(*) from pg_proc where proname = 'hstore_to_json'");
-
-        if ($iNumFunc == 0) {
-            $this->pgsqlRunScript("create function hstore_to_json(dummy hstore) returns text AS 'select null::text' language sql immutable");
-            warn('Postgresql is too old. extratags and namedetails API not available.');
-        }
-
-
         $fPostgisVersion = $this->oDB->getPostgisVersion();
         echo 'Postgis version found: '.$fPostgisVersion."\n";
 
-        if ($fPostgisVersion < 2.1) {
-            // Functions were renamed in 2.1 and throw an annoying deprecation warning
-            $this->pgsqlRunScript('ALTER FUNCTION st_line_interpolate_point(geometry, double precision) RENAME TO ST_LineInterpolatePoint');
-            $this->pgsqlRunScript('ALTER FUNCTION ST_Line_Locate_Point(geometry, geometry) RENAME TO ST_LineLocatePoint');
-        }
         if ($fPostgisVersion < 2.2) {
-            $this->pgsqlRunScript('ALTER FUNCTION ST_Distance_Spheroid(geometry, geometry, spheroid) RENAME TO ST_DistanceSpheroid');
+            echo "Minimum required Postgis version 2.2\n";
+            exit(1);
         }
 
         $i = $this->oDB->getOne("select count(*) from pg_user where usename = '".CONST_Database_Web_User."'");
@@ -152,18 +139,22 @@ class SetupFunctions
             exit(1);
         }
         $this->pgsqlRunScriptFile(CONST_BasePath.'/data/country_name.sql');
-        $this->pgsqlRunScriptFile(CONST_BasePath.'/data/country_osm_grid.sql.gz');
+        $this->pgsqlRunScriptFile(CONST_ExtraDataPath.'/country_osm_grid.sql.gz');
         $this->pgsqlRunScriptFile(CONST_BasePath.'/data/gb_postcode_table.sql');
+        $this->pgsqlRunScriptFile(CONST_BasePath.'/data/us_postcode_table.sql');
 
         $sPostcodeFilename = CONST_BasePath.'/data/gb_postcode_data.sql.gz';
         if (file_exists($sPostcodeFilename)) {
             $this->pgsqlRunScriptFile($sPostcodeFilename);
         } else {
-            warn('optional external UK postcode table file ('.$sPostcodeFilename.') not found. Skipping.');
+            warn('optional external GB postcode table file ('.$sPostcodeFilename.') not found. Skipping.');
         }
 
-        if (CONST_Use_Extra_US_Postcodes) {
-            $this->pgsqlRunScriptFile(CONST_BasePath.'/data/us_postcode.sql');
+        $sPostcodeFilename = CONST_BasePath.'/data/us_postcode_data.sql.gz';
+        if (file_exists($sPostcodeFilename)) {
+            $this->pgsqlRunScriptFile($sPostcodeFilename);
+        } else {
+            warn('optional external US postcode table file ('.$sPostcodeFilename.') not found. Skipping.');
         }
 
         if ($this->bNoPartitions) {
@@ -189,30 +180,30 @@ class SetupFunctions
             fail("osm2pgsql not found in '$osm2pgsql'");
         }
 
-        $osm2pgsql .= ' -S '.CONST_Import_Style;
+        $osm2pgsql .= ' -S '.escapeshellarg(CONST_Import_Style);
 
         if (!is_null(CONST_Osm2pgsql_Flatnode_File) && CONST_Osm2pgsql_Flatnode_File) {
-            $osm2pgsql .= ' --flat-nodes '.CONST_Osm2pgsql_Flatnode_File;
+            $osm2pgsql .= ' --flat-nodes '.escapeshellarg(CONST_Osm2pgsql_Flatnode_File);
         }
 
         if (CONST_Tablespace_Osm2pgsql_Data)
-            $osm2pgsql .= ' --tablespace-slim-data '.CONST_Tablespace_Osm2pgsql_Data;
+            $osm2pgsql .= ' --tablespace-slim-data '.escapeshellarg(CONST_Tablespace_Osm2pgsql_Data);
         if (CONST_Tablespace_Osm2pgsql_Index)
-            $osm2pgsql .= ' --tablespace-slim-index '.CONST_Tablespace_Osm2pgsql_Index;
+            $osm2pgsql .= ' --tablespace-slim-index '.escapeshellarg(CONST_Tablespace_Osm2pgsql_Index);
         if (CONST_Tablespace_Place_Data)
-            $osm2pgsql .= ' --tablespace-main-data '.CONST_Tablespace_Place_Data;
+            $osm2pgsql .= ' --tablespace-main-data '.escapeshellarg(CONST_Tablespace_Place_Data);
         if (CONST_Tablespace_Place_Index)
-            $osm2pgsql .= ' --tablespace-main-index '.CONST_Tablespace_Place_Index;
+            $osm2pgsql .= ' --tablespace-main-index '.escapeshellarg(CONST_Tablespace_Place_Index);
         $osm2pgsql .= ' -lsc -O gazetteer --hstore --number-processes 1';
-        $osm2pgsql .= ' -C '.$this->iCacheMemory;
-        $osm2pgsql .= ' -P '.$this->aDSNInfo['port'];
+        $osm2pgsql .= ' -C '.escapeshellarg($this->iCacheMemory);
+        $osm2pgsql .= ' -P '.escapeshellarg($this->aDSNInfo['port']);
         if (isset($this->aDSNInfo['username'])) {
-            $osm2pgsql .= ' -U '.$this->aDSNInfo['username'];
+            $osm2pgsql .= ' -U '.escapeshellarg($this->aDSNInfo['username']);
         }
         if (isset($this->aDSNInfo['hostspec'])) {
-            $osm2pgsql .= ' -H '.$this->aDSNInfo['hostspec'];
+            $osm2pgsql .= ' -H '.escapeshellarg($this->aDSNInfo['hostspec']);
         }
-        $osm2pgsql .= ' -d '.$this->aDSNInfo['database'].' '.$sOSMFile;
+        $osm2pgsql .= ' -d '.escapeshellarg($this->aDSNInfo['database']).' '.escapeshellarg($sOSMFile);
 
         $this->runWithPgEnv($osm2pgsql);
 
@@ -332,19 +323,12 @@ class SetupFunctions
 
     public function importWikipediaArticles()
     {
-        $sWikiArticlesFile = CONST_Wikipedia_Data_Path.'/wikipedia_article.sql.bin';
-        $sWikiRedirectsFile = CONST_Wikipedia_Data_Path.'/wikipedia_redirect.sql.bin';
+        $sWikiArticlesFile = CONST_Wikipedia_Data_Path.'/wikimedia_importance.sql.gz';
         if (file_exists($sWikiArticlesFile)) {
-            info('Importing wikipedia articles');
+            info('Importing wikipedia articles and redirects');
             $this->pgsqlRunDropAndRestore($sWikiArticlesFile);
         } else {
-            warn('wikipedia article dump file not found - places will have default importance');
-        }
-        if (file_exists($sWikiRedirectsFile)) {
-            info('Importing wikipedia redirects');
-            $this->pgsqlRunDropAndRestore($sWikiRedirectsFile);
-        } else {
-            warn('wikipedia redirect dump file not found - some place importance values may be missing');
+            warn('wikipedia importance dump file not found - places will have default importance');
         }
     }
 
@@ -479,6 +463,10 @@ class SetupFunctions
     {
         info('Import Tiger data');
 
+        $aFilenames = glob(CONST_Tiger_Data_Path.'/*.sql');
+        info('Found '.count($aFilenames).' SQL files in path '.CONST_Tiger_Data_Path);
+        if (empty($aFilenames)) return;
+
         $sTemplate = file_get_contents(CONST_BasePath.'/sql/tiger_import_start.sql');
         $sTemplate = str_replace('{www-user}', CONST_Database_Web_User, $sTemplate);
         $sTemplate = $this->replaceTablespace(
@@ -503,7 +491,7 @@ class SetupFunctions
             pg_ping($aDBInstances[$i]);
         }
 
-        foreach (glob(CONST_Tiger_Data_Path.'/*.sql') as $sFile) {
+        foreach ($aFilenames as $sFile) {
             echo $sFile.': ';
             $hFile = fopen($sFile, 'r');
             $sSQL = fgets($hFile, 100000);
@@ -573,17 +561,15 @@ class SetupFunctions
         $sSQL .= ' GROUP BY country_code, pc';
         $this->pgExec($sSQL);
 
-        if (CONST_Use_Extra_US_Postcodes) {
-            // only add postcodes that are not yet available in OSM
-            $sSQL  = 'INSERT INTO location_postcode';
-            $sSQL .= ' (place_id, indexed_status, country_code, postcode, geometry) ';
-            $sSQL .= "SELECT nextval('seq_place'), 1, 'us', postcode,";
-            $sSQL .= '       ST_SetSRID(ST_Point(x,y),4326)';
-            $sSQL .= '  FROM us_postcode WHERE postcode NOT IN';
-            $sSQL .= '        (SELECT postcode FROM location_postcode';
-            $sSQL .= "          WHERE country_code = 'us')";
-            $this->pgExec($sSQL);
-        }
+        // only add postcodes that are not yet available in OSM
+        $sSQL  = 'INSERT INTO location_postcode';
+        $sSQL .= ' (place_id, indexed_status, country_code, postcode, geometry) ';
+        $sSQL .= "SELECT nextval('seq_place'), 1, 'us', postcode,";
+        $sSQL .= '       ST_SetSRID(ST_Point(x,y),4326)';
+        $sSQL .= '  FROM us_postcode WHERE postcode NOT IN';
+        $sSQL .= '        (SELECT postcode FROM location_postcode';
+        $sSQL .= "          WHERE country_code = 'us')";
+        $this->pgExec($sSQL);
 
         // add missing postcodes for GB (if available)
         $sSQL  = 'INSERT INTO location_postcode';
@@ -608,13 +594,15 @@ class SetupFunctions
     public function index($bIndexNoanalyse)
     {
         $sOutputFile = '';
-        $sBaseCmd = CONST_InstallPath.'/nominatim/nominatim -i -d '.$this->aDSNInfo['database'].' -P '
-            .$this->aDSNInfo['port'].' -t '.$this->iInstances.$sOutputFile;
+        $sBaseCmd = CONST_InstallPath.'/nominatim/nominatim -i'
+            .' -d '.escapeshellarg($this->aDSNInfo['database'])
+            .' -P '.escapeshellarg($this->aDSNInfo['port'])
+            .' -t '.escapeshellarg($this->iInstances.$sOutputFile);
         if (isset($this->aDSNInfo['hostspec'])) {
-            $sBaseCmd .= ' -H '.$this->aDSNInfo['hostspec'];
+            $sBaseCmd .= ' -H '.escapeshellarg($this->aDSNInfo['hostspec']);
         }
         if (isset($this->aDSNInfo['username'])) {
-            $sBaseCmd .= ' -U '.$this->aDSNInfo['username'];
+            $sBaseCmd .= ' -U '.escapeshellarg($this->aDSNInfo['username']);
         }
 
         info('Index ranks 0 - 4');
@@ -738,9 +726,7 @@ class SetupFunctions
         }
         foreach ($aDropTables as $sDrop) {
             if ($this->bVerbose) echo "Dropping table $sDrop\n";
-            $this->oDB->exec("DROP TABLE $sDrop CASCADE");
-            // ignore warnings/errors as they might be caused by a table having
-            // been deleted already by CASCADE
+            $this->oDB->exec("DROP TABLE IF EXISTS $sDrop CASCADE");
         }
 
         if (!is_null(CONST_Osm2pgsql_Flatnode_File) && CONST_Osm2pgsql_Flatnode_File) {
@@ -753,15 +739,18 @@ class SetupFunctions
 
     private function pgsqlRunDropAndRestore($sDumpFile)
     {
-        $sCMD = 'pg_restore -p '.$this->aDSNInfo['port'].' -d '.$this->aDSNInfo['database'].' --no-owner -Fc --clean '.$sDumpFile;
+        $sCMD = 'pg_restore'
+            .' -p '.escapeshellarg($this->aDSNInfo['port'])
+            .' -d '.escapeshellarg($this->aDSNInfo['database'])
+            .' --no-owner -Fc --clean '.escapeshellarg($sDumpFile);
         if ($this->oDB->getPostgresVersion() >= 9.04) {
             $sCMD .= ' --if-exists';
         }
         if (isset($this->aDSNInfo['hostspec'])) {
-            $sCMD .= ' -h '.$this->aDSNInfo['hostspec'];
+            $sCMD .= ' -h '.escapeshellarg($this->aDSNInfo['hostspec']);
         }
         if (isset($this->aDSNInfo['username'])) {
-            $sCMD .= ' -U '.$this->aDSNInfo['username'];
+            $sCMD .= ' -U '.escapeshellarg($this->aDSNInfo['username']);
         }
 
         $this->runWithPgEnv($sCMD);
@@ -825,15 +814,17 @@ class SetupFunctions
     {
         if (!file_exists($sFilename)) fail('unable to find '.$sFilename);
 
-        $sCMD = 'psql -p '.$this->aDSNInfo['port'].' -d '.$this->aDSNInfo['database'];
+        $sCMD = 'psql'
+            .' -p '.escapeshellarg($this->aDSNInfo['port'])
+            .' -d '.escapeshellarg($this->aDSNInfo['database']);
         if (!$this->bVerbose) {
             $sCMD .= ' -q';
         }
         if (isset($this->aDSNInfo['hostspec'])) {
-            $sCMD .= ' -h '.$this->aDSNInfo['hostspec'];
+            $sCMD .= ' -h '.escapeshellarg($this->aDSNInfo['hostspec']);
         }
         if (isset($this->aDSNInfo['username'])) {
-            $sCMD .= ' -U '.$this->aDSNInfo['username'];
+            $sCMD .= ' -U '.escapeshellarg($this->aDSNInfo['username']);
         }
         $aProcEnv = null;
         if (isset($this->aDSNInfo['password'])) {
@@ -846,12 +837,12 @@ class SetupFunctions
                              1 => array('pipe', 'w'),
                              2 => array('file', '/dev/null', 'a')
                             );
-            $hGzipProcess = proc_open('zcat '.$sFilename, $aDescriptors, $ahGzipPipes);
+            $hGzipProcess = proc_open('zcat '.escapeshellarg($sFilename), $aDescriptors, $ahGzipPipes);
             if (!is_resource($hGzipProcess)) fail('unable to start zcat');
             $aReadPipe = $ahGzipPipes[1];
             fclose($ahGzipPipes[0]);
         } else {
-            $sCMD .= ' -f '.$sFilename;
+            $sCMD .= ' -f '.escapeshellarg($sFilename);
             $aReadPipe = array('pipe', 'r');
         }
         $aDescriptors = array(