]> git.openstreetmap.org Git - nominatim.git/blobdiff - lib/setup/SetupClass.php
update documentation for new wikipedia data
[nominatim.git] / lib / setup / SetupClass.php
index 1447e383d7c81142c4c00c7f3b96effcc80f2fb7..c1c15d9a5f846384db100800b75ccb6fadd937fd 100755 (executable)
@@ -80,13 +80,15 @@ class SetupFunctions
             fail('database already exists ('.CONST_Database_DSN.')');
         }
 
-        $sCreateDBCmd = 'createdb -E UTF-8 -p '.$this->aDSNInfo['port'].' '.$this->aDSNInfo['database'];
+        $sCreateDBCmd = 'createdb -E UTF-8'
+            .' -p '.escapeshellarg($this->aDSNInfo['port'])
+            .' '.escapeshellarg($this->aDSNInfo['database']);
         if (isset($this->aDSNInfo['username'])) {
-            $sCreateDBCmd .= ' -U '.$this->aDSNInfo['username'];
+            $sCreateDBCmd .= ' -U '.escapeshellarg($this->aDSNInfo['username']);
         }
 
         if (isset($this->aDSNInfo['hostspec'])) {
-            $sCreateDBCmd .= ' -h '.$this->aDSNInfo['hostspec'];
+            $sCreateDBCmd .= ' -h '.escapeshellarg($this->aDSNInfo['hostspec']);
         }
 
         $result = $this->runWithPgEnv($sCreateDBCmd);
@@ -137,18 +139,22 @@ class SetupFunctions
             exit(1);
         }
         $this->pgsqlRunScriptFile(CONST_BasePath.'/data/country_name.sql');
-        $this->pgsqlRunScriptFile(CONST_BasePath.'/data/country_osm_grid.sql.gz');
+        $this->pgsqlRunScriptFile(CONST_ExtraDataPath.'/country_osm_grid.sql.gz');
         $this->pgsqlRunScriptFile(CONST_BasePath.'/data/gb_postcode_table.sql');
+        $this->pgsqlRunScriptFile(CONST_BasePath.'/data/us_postcode_table.sql');
 
         $sPostcodeFilename = CONST_BasePath.'/data/gb_postcode_data.sql.gz';
         if (file_exists($sPostcodeFilename)) {
             $this->pgsqlRunScriptFile($sPostcodeFilename);
         } else {
-            warn('optional external UK postcode table file ('.$sPostcodeFilename.') not found. Skipping.');
+            warn('optional external GB postcode table file ('.$sPostcodeFilename.') not found. Skipping.');
         }
 
-        if (CONST_Use_Extra_US_Postcodes) {
-            $this->pgsqlRunScriptFile(CONST_BasePath.'/data/us_postcode.sql');
+        $sPostcodeFilename = CONST_BasePath.'/data/us_postcode_data.sql.gz';
+        if (file_exists($sPostcodeFilename)) {
+            $this->pgsqlRunScriptFile($sPostcodeFilename);
+        } else {
+            warn('optional external US postcode table file ('.$sPostcodeFilename.') not found. Skipping.');
         }
 
         if ($this->bNoPartitions) {
@@ -174,30 +180,30 @@ class SetupFunctions
             fail("osm2pgsql not found in '$osm2pgsql'");
         }
 
-        $osm2pgsql .= ' -S '.CONST_Import_Style;
+        $osm2pgsql .= ' -S '.escapeshellarg(CONST_Import_Style);
 
         if (!is_null(CONST_Osm2pgsql_Flatnode_File) && CONST_Osm2pgsql_Flatnode_File) {
-            $osm2pgsql .= ' --flat-nodes '.CONST_Osm2pgsql_Flatnode_File;
+            $osm2pgsql .= ' --flat-nodes '.escapeshellarg(CONST_Osm2pgsql_Flatnode_File);
         }
 
         if (CONST_Tablespace_Osm2pgsql_Data)
-            $osm2pgsql .= ' --tablespace-slim-data '.CONST_Tablespace_Osm2pgsql_Data;
+            $osm2pgsql .= ' --tablespace-slim-data '.escapeshellarg(CONST_Tablespace_Osm2pgsql_Data);
         if (CONST_Tablespace_Osm2pgsql_Index)
-            $osm2pgsql .= ' --tablespace-slim-index '.CONST_Tablespace_Osm2pgsql_Index;
+            $osm2pgsql .= ' --tablespace-slim-index '.escapeshellarg(CONST_Tablespace_Osm2pgsql_Index);
         if (CONST_Tablespace_Place_Data)
-            $osm2pgsql .= ' --tablespace-main-data '.CONST_Tablespace_Place_Data;
+            $osm2pgsql .= ' --tablespace-main-data '.escapeshellarg(CONST_Tablespace_Place_Data);
         if (CONST_Tablespace_Place_Index)
-            $osm2pgsql .= ' --tablespace-main-index '.CONST_Tablespace_Place_Index;
+            $osm2pgsql .= ' --tablespace-main-index '.escapeshellarg(CONST_Tablespace_Place_Index);
         $osm2pgsql .= ' -lsc -O gazetteer --hstore --number-processes 1';
-        $osm2pgsql .= ' -C '.$this->iCacheMemory;
-        $osm2pgsql .= ' -P '.$this->aDSNInfo['port'];
+        $osm2pgsql .= ' -C '.escapeshellarg($this->iCacheMemory);
+        $osm2pgsql .= ' -P '.escapeshellarg($this->aDSNInfo['port']);
         if (isset($this->aDSNInfo['username'])) {
-            $osm2pgsql .= ' -U '.$this->aDSNInfo['username'];
+            $osm2pgsql .= ' -U '.escapeshellarg($this->aDSNInfo['username']);
         }
         if (isset($this->aDSNInfo['hostspec'])) {
-            $osm2pgsql .= ' -H '.$this->aDSNInfo['hostspec'];
+            $osm2pgsql .= ' -H '.escapeshellarg($this->aDSNInfo['hostspec']);
         }
-        $osm2pgsql .= ' -d '.$this->aDSNInfo['database'].' '.$sOSMFile;
+        $osm2pgsql .= ' -d '.escapeshellarg($this->aDSNInfo['database']).' '.escapeshellarg($sOSMFile);
 
         $this->runWithPgEnv($osm2pgsql);
 
@@ -317,19 +323,12 @@ class SetupFunctions
 
     public function importWikipediaArticles()
     {
-        $sWikiArticlesFile = CONST_Wikipedia_Data_Path.'/wikipedia_article.sql.bin';
-        $sWikiRedirectsFile = CONST_Wikipedia_Data_Path.'/wikipedia_redirect.sql.bin';
+        $sWikiArticlesFile = CONST_Wikipedia_Data_Path.'/wikimedia_importance.sql.gz';
         if (file_exists($sWikiArticlesFile)) {
-            info('Importing wikipedia articles');
+            info('Importing wikipedia articles and redirects');
             $this->pgsqlRunDropAndRestore($sWikiArticlesFile);
         } else {
-            warn('wikipedia article dump file not found - places will have default importance');
-        }
-        if (file_exists($sWikiRedirectsFile)) {
-            info('Importing wikipedia redirects');
-            $this->pgsqlRunDropAndRestore($sWikiRedirectsFile);
-        } else {
-            warn('wikipedia redirect dump file not found - some place importance values may be missing');
+            warn('wikipedia importance dump file not found - places will have default importance');
         }
     }
 
@@ -464,6 +463,10 @@ class SetupFunctions
     {
         info('Import Tiger data');
 
+        $aFilenames = glob(CONST_Tiger_Data_Path.'/*.sql');
+        info('Found '.count($aFilenames).' SQL files in path '.CONST_Tiger_Data_Path);
+        if (empty($aFilenames)) return;
+
         $sTemplate = file_get_contents(CONST_BasePath.'/sql/tiger_import_start.sql');
         $sTemplate = str_replace('{www-user}', CONST_Database_Web_User, $sTemplate);
         $sTemplate = $this->replaceTablespace(
@@ -488,7 +491,7 @@ class SetupFunctions
             pg_ping($aDBInstances[$i]);
         }
 
-        foreach (glob(CONST_Tiger_Data_Path.'/*.sql') as $sFile) {
+        foreach ($aFilenames as $sFile) {
             echo $sFile.': ';
             $hFile = fopen($sFile, 'r');
             $sSQL = fgets($hFile, 100000);
@@ -558,17 +561,15 @@ class SetupFunctions
         $sSQL .= ' GROUP BY country_code, pc';
         $this->pgExec($sSQL);
 
-        if (CONST_Use_Extra_US_Postcodes) {
-            // only add postcodes that are not yet available in OSM
-            $sSQL  = 'INSERT INTO location_postcode';
-            $sSQL .= ' (place_id, indexed_status, country_code, postcode, geometry) ';
-            $sSQL .= "SELECT nextval('seq_place'), 1, 'us', postcode,";
-            $sSQL .= '       ST_SetSRID(ST_Point(x,y),4326)';
-            $sSQL .= '  FROM us_postcode WHERE postcode NOT IN';
-            $sSQL .= '        (SELECT postcode FROM location_postcode';
-            $sSQL .= "          WHERE country_code = 'us')";
-            $this->pgExec($sSQL);
-        }
+        // only add postcodes that are not yet available in OSM
+        $sSQL  = 'INSERT INTO location_postcode';
+        $sSQL .= ' (place_id, indexed_status, country_code, postcode, geometry) ';
+        $sSQL .= "SELECT nextval('seq_place'), 1, 'us', postcode,";
+        $sSQL .= '       ST_SetSRID(ST_Point(x,y),4326)';
+        $sSQL .= '  FROM us_postcode WHERE postcode NOT IN';
+        $sSQL .= '        (SELECT postcode FROM location_postcode';
+        $sSQL .= "          WHERE country_code = 'us')";
+        $this->pgExec($sSQL);
 
         // add missing postcodes for GB (if available)
         $sSQL  = 'INSERT INTO location_postcode';
@@ -593,13 +594,15 @@ class SetupFunctions
     public function index($bIndexNoanalyse)
     {
         $sOutputFile = '';
-        $sBaseCmd = CONST_InstallPath.'/nominatim/nominatim -i -d '.$this->aDSNInfo['database'].' -P '
-            .$this->aDSNInfo['port'].' -t '.$this->iInstances.$sOutputFile;
+        $sBaseCmd = CONST_InstallPath.'/nominatim/nominatim -i'
+            .' -d '.escapeshellarg($this->aDSNInfo['database'])
+            .' -P '.escapeshellarg($this->aDSNInfo['port'])
+            .' -t '.escapeshellarg($this->iInstances.$sOutputFile);
         if (isset($this->aDSNInfo['hostspec'])) {
-            $sBaseCmd .= ' -H '.$this->aDSNInfo['hostspec'];
+            $sBaseCmd .= ' -H '.escapeshellarg($this->aDSNInfo['hostspec']);
         }
         if (isset($this->aDSNInfo['username'])) {
-            $sBaseCmd .= ' -U '.$this->aDSNInfo['username'];
+            $sBaseCmd .= ' -U '.escapeshellarg($this->aDSNInfo['username']);
         }
 
         info('Index ranks 0 - 4');
@@ -723,9 +726,7 @@ class SetupFunctions
         }
         foreach ($aDropTables as $sDrop) {
             if ($this->bVerbose) echo "Dropping table $sDrop\n";
-            $this->oDB->exec("DROP TABLE $sDrop CASCADE");
-            // ignore warnings/errors as they might be caused by a table having
-            // been deleted already by CASCADE
+            $this->oDB->exec("DROP TABLE IF EXISTS $sDrop CASCADE");
         }
 
         if (!is_null(CONST_Osm2pgsql_Flatnode_File) && CONST_Osm2pgsql_Flatnode_File) {
@@ -738,15 +739,18 @@ class SetupFunctions
 
     private function pgsqlRunDropAndRestore($sDumpFile)
     {
-        $sCMD = 'pg_restore -p '.$this->aDSNInfo['port'].' -d '.$this->aDSNInfo['database'].' --no-owner -Fc --clean '.$sDumpFile;
+        $sCMD = 'pg_restore'
+            .' -p '.escapeshellarg($this->aDSNInfo['port'])
+            .' -d '.escapeshellarg($this->aDSNInfo['database'])
+            .' --no-owner -Fc --clean '.escapeshellarg($sDumpFile);
         if ($this->oDB->getPostgresVersion() >= 9.04) {
             $sCMD .= ' --if-exists';
         }
         if (isset($this->aDSNInfo['hostspec'])) {
-            $sCMD .= ' -h '.$this->aDSNInfo['hostspec'];
+            $sCMD .= ' -h '.escapeshellarg($this->aDSNInfo['hostspec']);
         }
         if (isset($this->aDSNInfo['username'])) {
-            $sCMD .= ' -U '.$this->aDSNInfo['username'];
+            $sCMD .= ' -U '.escapeshellarg($this->aDSNInfo['username']);
         }
 
         $this->runWithPgEnv($sCMD);
@@ -810,15 +814,17 @@ class SetupFunctions
     {
         if (!file_exists($sFilename)) fail('unable to find '.$sFilename);
 
-        $sCMD = 'psql -p '.$this->aDSNInfo['port'].' -d '.$this->aDSNInfo['database'];
+        $sCMD = 'psql'
+            .' -p '.escapeshellarg($this->aDSNInfo['port'])
+            .' -d '.escapeshellarg($this->aDSNInfo['database']);
         if (!$this->bVerbose) {
             $sCMD .= ' -q';
         }
         if (isset($this->aDSNInfo['hostspec'])) {
-            $sCMD .= ' -h '.$this->aDSNInfo['hostspec'];
+            $sCMD .= ' -h '.escapeshellarg($this->aDSNInfo['hostspec']);
         }
         if (isset($this->aDSNInfo['username'])) {
-            $sCMD .= ' -U '.$this->aDSNInfo['username'];
+            $sCMD .= ' -U '.escapeshellarg($this->aDSNInfo['username']);
         }
         $aProcEnv = null;
         if (isset($this->aDSNInfo['password'])) {
@@ -831,12 +837,12 @@ class SetupFunctions
                              1 => array('pipe', 'w'),
                              2 => array('file', '/dev/null', 'a')
                             );
-            $hGzipProcess = proc_open('zcat '.$sFilename, $aDescriptors, $ahGzipPipes);
+            $hGzipProcess = proc_open('zcat '.escapeshellarg($sFilename), $aDescriptors, $ahGzipPipes);
             if (!is_resource($hGzipProcess)) fail('unable to start zcat');
             $aReadPipe = $ahGzipPipes[1];
             fclose($ahGzipPipes[0]);
         } else {
-            $sCMD .= ' -f '.$sFilename;
+            $sCMD .= ' -f '.escapeshellarg($sFilename);
             $aReadPipe = array('pipe', 'r');
         }
         $aDescriptors = array(