]> git.openstreetmap.org Git - nominatim.git/blobdiff - utils/setup.php
* use osm2pgsql from $PATH if none present locally
[nominatim.git] / utils / setup.php
index 4f3886f00bb6da28f7ac2311fbca9b17aab1053e..c7667b15902e3b1ea8e9223bef8be6759fdd57a0 100755 (executable)
                array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
                array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
 
                array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
                array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
 
-               array('all', '', 0, 1, 1, 1, 'realpath', 'Do the complete process'),
+               array('osm-file', '', 0, 1, 1, 1, 'realpath', 'File to import'),
+               array('threads', '', 0, 1, 1, 1, 'int', 'Number of threads (where possible)'),
+
+               array('all', '', 0, 1, 0, 0, 'bool', 'Do the complete process'),
 
                array('create-db', '', 0, 1, 0, 0, 'bool', 'Create nominatim db'),
                array('setup-db', '', 0, 1, 0, 0, 'bool', 'Build a blank nominatim db'),
 
                array('create-db', '', 0, 1, 0, 0, 'bool', 'Create nominatim db'),
                array('setup-db', '', 0, 1, 0, 0, 'bool', 'Build a blank nominatim db'),
-               array('import-data', '', 0, 1, 1, 1, 'realpath', 'Import a osm file'),
+               array('import-data', '', 0, 1, 0, 0, 'bool', 'Import a osm file'),
                array('create-functions', '', 0, 1, 0, 0, 'bool', 'Create functions'),
                array('create-tables', '', 0, 1, 0, 0, 'bool', 'Create main tables'),
                array('create-partitions', '', 0, 1, 0, 0, 'bool', 'Create required partition tables and triggers'),
                array('load-data', '', 0, 1, 0, 0, 'bool', 'Copy data to live tables from import table'),
                array('create-functions', '', 0, 1, 0, 0, 'bool', 'Create functions'),
                array('create-tables', '', 0, 1, 0, 0, 'bool', 'Create main tables'),
                array('create-partitions', '', 0, 1, 0, 0, 'bool', 'Create required partition tables and triggers'),
                array('load-data', '', 0, 1, 0, 0, 'bool', 'Copy data to live tables from import table'),
+               array('import-tiger-data', '', 0, 1, 0, 0, 'bool', 'Import tiger data (not included in \'all\')'),
+               array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Calculate postcode centroids'),
+               array('create-roads', '', 0, 1, 0, 0, 'bool', 'Calculate postcode centroids'),
+               array('osmosis-init', '', 0, 1, 0, 0, 'bool', 'Generate default osmosis configuration'),
+               array('osmosis-init-date', '', 0, 1, 1, 1, 'string', 'Generate default osmosis configuration'),
+               array('index', '', 0, 1, 0, 0, 'bool', 'Index the data'),
+               array('index-output', '', 0, 1, 1, 1, 'string', 'File to dump index information to'),
        );
        getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
 
        $bDidSomething = false;
 
        );
        getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
 
        $bDidSomething = false;
 
-       if ($aCMDResult['create-db'] || isset($aCMDResult['all']))
+       // This is a pretty hard core defult - the number of processors in the box - 1
+       $iInstances = isset($aCMDResult['threads'])?$aCMDResult['threads']:(getProcessorCount()-1);
+       if ($iInstances < 1)
+       {
+               $iInstances = 1;
+               echo "WARNING: resetting threads to $iInstances\n";
+       }
+       if ($iInstances > getProcessorCount())
+       {
+               $iInstances = getProcessorCount();
+               echo "WARNING: resetting threads to $iInstances\n";
+       }
+       if (isset($aCMDResult['osm-file']) && !isset($aCMDResult['osmosis-init-date']))
+       {
+               $sBaseFile = basename($aCMDResult['osm-file']);
+               if (preg_match('#^planet-([0-9]{2})([0-9]{2})([0-9]{2})[.]#', $sBaseFile, $aMatch))
+               {
+                       $iTime = mktime(0, 0, 0, $aMatch[2], $aMatch[3], '20'.$aMatch[1]);
+                       $iTime -= (60*60*24);
+                       $aCMDResult['osmosis-init-date'] = date('Y-m-d', $iTime).'T22:00:00Z';
+               }
+       }
+
+       if ($aCMDResult['create-db'] || $aCMDResult['all'])
        {
        {
+               echo "Create DB\n";
                $bDidSomething = true;
                $oDB =& DB::connect(CONST_Database_DSN, false);
                if (!PEAR::isError($oDB))
                $bDidSomething = true;
                $oDB =& DB::connect(CONST_Database_DSN, false);
                if (!PEAR::isError($oDB))
@@ -35,8 +69,9 @@
                passthru('createdb nominatim');
        }
 
                passthru('createdb nominatim');
        }
 
-       if ($aCMDResult['create-db'] || isset($aCMDResult['all']))
+       if ($aCMDResult['create-db'] || $aCMDResult['all'])
        {
        {
+               echo "Create DB (2)\n";
                $bDidSomething = true;
                // TODO: path detection, detection memory, etc.
 
                $bDidSomething = true;
                // TODO: path detection, detection memory, etc.
 
                pgsqlRunScriptFile(CONST_Path_Postgresql_Postgis.'/postgis.sql');
                pgsqlRunScriptFile(CONST_Path_Postgresql_Postgis.'/spatial_ref_sys.sql');
                pgsqlRunScriptFile(CONST_BasePath.'/data/country_name.sql');
                pgsqlRunScriptFile(CONST_Path_Postgresql_Postgis.'/postgis.sql');
                pgsqlRunScriptFile(CONST_Path_Postgresql_Postgis.'/spatial_ref_sys.sql');
                pgsqlRunScriptFile(CONST_BasePath.'/data/country_name.sql');
-               pgsqlRunScriptFile(CONST_BasePath.'/data/country_naturaleathdata.sql');
+               pgsqlRunScriptFile(CONST_BasePath.'/data/country_naturalearthdata.sql');
                pgsqlRunScriptFile(CONST_BasePath.'/data/country_osm_grid.sql');
                pgsqlRunScriptFile(CONST_BasePath.'/data/gb_postcode.sql');
                pgsqlRunScriptFile(CONST_BasePath.'/data/us_statecounty.sql');
                pgsqlRunScriptFile(CONST_BasePath.'/data/us_state.sql');
                pgsqlRunScriptFile(CONST_BasePath.'/data/country_osm_grid.sql');
                pgsqlRunScriptFile(CONST_BasePath.'/data/gb_postcode.sql');
                pgsqlRunScriptFile(CONST_BasePath.'/data/us_statecounty.sql');
                pgsqlRunScriptFile(CONST_BasePath.'/data/us_state.sql');
+               pgsqlRunScriptFile(CONST_BasePath.'/data/us_postcode.sql');
                pgsqlRunScriptFile(CONST_BasePath.'/data/worldboundaries.sql');
        }
 
                pgsqlRunScriptFile(CONST_BasePath.'/data/worldboundaries.sql');
        }
 
-       if (isset($aCMDResult['all']) && !isset($aCMDResult['import-data'])) $aCMDResult['import-data'] = $aCMDResult['all'];
-       if (isset($aCMDResult['import-data']) && $aCMDResult['import-data'])
+       if ($aCMDResult['import-data'] || $aCMDResult['all'])
        {
        {
+               echo "Import\n";
                $bDidSomething = true;
                $bDidSomething = true;
-               passthru(CONST_BasePath.'/osm2pgsql/osm2pgsql -lsc -O gazetteer -C 10000 --hstore -d nominatim '.$aCMDResult['import-data']);
+
+        $osm2pgsql = CONST_BasePath.'/osm2pgsql/osm2pgsql';
+        if (!file_exists($osm2pgsql)) $osm2pgsql = trim(`which osm2pgsql`);
+        if (!file_exists($osm2pgsql)) fail("please download and build osm2pgsql");
+        passthru($osm2pgsql.' -lsc -O gazetteer -C 10000 --hstore -d nominatim '.$aCMDResult['osm-file']);
+
+               $oDB =& getDB();
+               $x = $oDB->getRow('select * from place limit 1');
+               if (!$x || PEAR::isError($x)) fail('No Data');
        }
 
        }
 
-       if ($aCMDResult['create-functions'] || isset($aCMDResult['all']))
+       if ($aCMDResult['create-functions'] || $aCMDResult['all'])
        {
        {
+               echo "Functions\n";
                $bDidSomething = true;
                $bDidSomething = true;
+               if (!file_exists(CONST_BasePath.'/module/nominatim.so')) fail("nominatim module not built");
                $sTemplate = file_get_contents(CONST_BasePath.'/sql/functions.sql');
                $sTemplate = str_replace('{modulepath}',CONST_BasePath.'/module', $sTemplate);
                pgsqlRunScript($sTemplate);
        }
 
                $sTemplate = file_get_contents(CONST_BasePath.'/sql/functions.sql');
                $sTemplate = str_replace('{modulepath}',CONST_BasePath.'/module', $sTemplate);
                pgsqlRunScript($sTemplate);
        }
 
-       if ($aCMDResult['create-tables'] || isset($aCMDResult['all']))
+       if ($aCMDResult['create-tables'] || $aCMDResult['all'])
        {
        {
+               echo "Tables\n";
                $bDidSomething = true;
                pgsqlRunScriptFile(CONST_BasePath.'/sql/tables.sql');
 
                $bDidSomething = true;
                pgsqlRunScriptFile(CONST_BasePath.'/sql/tables.sql');
 
                pgsqlRunScript($sTemplate);
        }
 
                pgsqlRunScript($sTemplate);
        }
 
-       if ($aCMDResult['create-partitions'] || isset($aCMDResult['all']))
+       if ($aCMDResult['create-partitions'] || $aCMDResult['all'])
        {
        {
+               echo "Partitions\n";
                $bDidSomething = true;
                $oDB =& getDB();
                $sSQL = 'select partition from country_name order by country_code';
                $bDidSomething = true;
                $oDB =& getDB();
                $sSQL = 'select partition from country_name order by country_code';
                        }
                        $sTemplate = str_replace($aMatch[0], $sResult, $sTemplate);
                }
                        }
                        $sTemplate = str_replace($aMatch[0], $sResult, $sTemplate);
                }
+
                pgsqlRunScript($sTemplate);
        }
 
                pgsqlRunScript($sTemplate);
        }
 
-       if ($aCMDResult['load-data'] || isset($aCMDResult['all']))
+       if ($aCMDResult['load-data'] || $aCMDResult['all'])
        {
        {
+               echo "Load Data\n";
                $bDidSomething = true;
 
                $oDB =& getDB();
                $bDidSomething = true;
 
                $oDB =& getDB();
                if (!pg_query($oDB->connection, 'CREATE SEQUENCE seq_place start 100000')) fail(pg_last_error($oDB->connection));
                echo '.';
 
                if (!pg_query($oDB->connection, 'CREATE SEQUENCE seq_place start 100000')) fail(pg_last_error($oDB->connection));
                echo '.';
 
-               $iInstances = 16;
                $aDBInstances = array();
                for($i = 0; $i < $iInstances; $i++)
                {
                $aDBInstances = array();
                for($i = 0; $i < $iInstances; $i++)
                {
                        $sSQL = 'insert into placex (osm_type, osm_id, class, type, name, admin_level, ';
                        $sSQL .= 'housenumber, street, isin, postcode, country_code, extratags, ';
                        $sSQL .= 'geometry) select * from place where osm_id % '.$iInstances.' = '.$i;
                        $sSQL = 'insert into placex (osm_type, osm_id, class, type, name, admin_level, ';
                        $sSQL .= 'housenumber, street, isin, postcode, country_code, extratags, ';
                        $sSQL .= 'geometry) select * from place where osm_id % '.$iInstances.' = '.$i;
-var_dump($sSQL);
+                       if ($aCMDResult['verbose']) echo "$sSQL\n";
+                       if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
+               }
+               $bAnyBusy = true;
+               while($bAnyBusy)
+               {
+                       $bAnyBusy = false;
+                       for($i = 0; $i < $iInstances; $i++)
+                       {
+                               if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
+                       }
+                       sleep(1);
+                       echo '.';
+               }
+               echo "\n";
+       }
+
+       if ($aCMDResult['create-roads'])
+       {
+               $bDidSomething = true;
+
+               $oDB =& getDB();
+               $aDBInstances = array();
+               for($i = 0; $i < $iInstances; $i++)
+               {
+                       $aDBInstances[$i] =& getDB(true);
+                       if (!pg_query($aDBInstances[$i]->connection, 'set enable_bitmapscan = off')) fail(pg_last_error($oDB->connection));
+                       $sSQL = 'select count(*) from (select insertLocationRoad(partition, place_id, country_code, geometry) from ';
+                       $sSQL .= 'placex where osm_id % '.$iInstances.' = '.$i.' and rank_search between 26 and 27 and class = \'highway\') as x ';
+                       if ($aCMDResult['verbose']) echo "$sSQL\n";
                        if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
                }
                $bAnyBusy = true;
                        if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
                }
                $bAnyBusy = true;
@@ -156,6 +234,102 @@ var_dump($sSQL);
                echo "\n";
        }
 
                echo "\n";
        }
 
+       if ($aCMDResult['import-tiger-data'])
+       {
+               $bDidSomething = true;
+
+               $aDBInstances = array();
+               for($i = 0; $i < $iInstances; $i++)
+               {
+                       $aDBInstances[$i] =& getDB(true);
+               }
+
+               foreach(glob(CONST_BasePath.'/data/tiger2009/*.sql') as $sFile)
+               {
+                       echo $sFile.': ';
+                       $hFile = fopen($sFile, "r");
+                       $sSQL = fgets($hFile, 100000);
+                       $iLines = 0;
+
+                       while(true)
+                       {
+                               for($i = 0; $i < $iInstances; $i++)
+                               {
+                                       if (!pg_connection_busy($aDBInstances[$i]->connection))
+                                       {
+                                               while(pg_get_result($aDBInstances[$i]->connection));
+                                               $sSQL = fgets($hFile, 100000);
+                                               if (!$sSQL) break 2;
+                                               if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection));
+                                               $iLines++;
+                                               if ($iLines == 1000)
+                                               {
+                                                       echo ".";
+                                                       $iLines = 0;
+                                               }
+                                       }
+                               }
+                               usleep(10);
+                       }
+
+                       fclose($hFile);
+       
+                       $bAnyBusy = true;
+                       while($bAnyBusy)
+                       {
+                               $bAnyBusy = false;
+                               for($i = 0; $i < $iInstances; $i++)
+                               {
+                                       if (pg_connection_busy($aDBInstances[$i]->connection)) $bAnyBusy = true;
+                               }
+                               usleep(10);
+                       }
+                       echo "\n";
+               }
+       }
+
+       if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all'])
+       {
+               $bDidSomething = true;
+               $oDB =& getDB();
+               if (!pg_query($oDB->connection, 'DELETE from placex where osm_type=\'P\'')) fail(pg_last_error($oDB->connection));
+               $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,country_code,geometry) ";
+               $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,country_code,";
+               $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from (select country_code,postcode,";
+               $sSQL .= "avg(st_x(st_centroid(geometry))) as x,avg(st_y(st_centroid(geometry))) as y ";
+               $sSQL .= "from placex where postcode is not null group by country_code,postcode) as x";
+               if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
+
+               $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,country_code,geometry) ";
+               $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,'us',";
+               $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from us_postcode";
+               if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
+       }
+
+       if (($aCMDResult['osmosis-init'] || $aCMDResult['all']) && isset($aCMDResult['osmosis-init-date']))
+       {
+               $bDidSomething = true;
+
+               if (!file_exists(CONST_BasePath.'/osmosis-0.38/bin/osmosis')) fail("please download osmosis");
+               if (file_exists(CONST_BasePath.'/settings/configuration.txt')) echo "settings/configuration.txt already exists\n";
+               else passthru(CONST_BasePath.'/osmosis-0.38/bin/osmosis --read-replication-interval-init '.CONST_BasePath.'/settings');
+
+               $sDate = $aCMDResult['osmosis-init-date'];
+               $sURL = 'http://toolserver.org/~mazder/replicate-sequences/?'.$sDate;
+               echo "Getting state file: $sURL\n";
+               $sStateFile = file_get_contents($sURL);
+               if (!$sStateFile || strlen($sStateFile) > 1000) fail("unable to obtain state file");
+               file_put_contents(CONST_BasePath.'/settings/state.txt', $sStateFile);
+       }
+
+       if ($aCMDResult['index'] || $aCMDResult['all'])
+       {
+               $bDidSomething = true;
+               $sOutputFile = '';
+               if (isset($aCMDResult['index-output'])) $sOutputFile = ' -F '.$aCMDResult['index-output'];
+               passthru(CONST_BasePath.'/nominatim/nominatim -i -d nominatim -t '.$iInstances.$sOutputFile);
+       }
+
        if (!$bDidSomething)
        {
                showUsage($aCMDOptions, true);
        if (!$bDidSomething)
        {
                showUsage($aCMDOptions, true);
@@ -194,8 +368,8 @@ var_dump($sSQL);
        {
                // Convert database DSN to psql paramaters
                $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
        {
                // Convert database DSN to psql paramaters
                $aDSNInfo = DB::parseDSN(CONST_Database_DSN);
-               $sCMD = 'psql '.$aDSNInfo['database'];
-
+               if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
+               $sCMD = 'psql -p '.$aDSNInfo['port'].' '.$aDSNInfo['database'];
                $aDescriptors = array(
                        0 => array('pipe', 'r'),
                        1 => array('pipe', 'w'),
                $aDescriptors = array(
                        0 => array('pipe', 'r'),
                        1 => array('pipe', 'w'),