X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/4c7145c2938022d8355f49eb89a5ae1a68217a18..203b5f7de12ca09d275103eb2eb70fe5a4c20466:/utils/setup.php?ds=sidebyside diff --git a/utils/setup.php b/utils/setup.php index 6a809c73..e2810737 100755 --- a/utils/setup.php +++ b/utils/setup.php @@ -7,7 +7,7 @@ ini_set('memory_limit', '800M'); $aCMDOptions = array( - "Create and setup nominatim search system", + 'Create and setup nominatim search system', array('help', 'h', 0, 1, 0, 0, false, 'Show Help'), array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'), array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'), @@ -28,7 +28,7 @@ $aCMDOptions array('create-tables', '', 0, 1, 0, 0, 'bool', 'Create main tables'), array('create-partition-tables', '', 0, 1, 0, 0, 'bool', 'Create required partition tables'), array('create-partition-functions', '', 0, 1, 0, 0, 'bool', 'Create required partition triggers'), - array('no-partitions', '', 0, 1, 0, 0, 'bool', "Do not partition search indices (speeds up import of single country extracts)"), + array('no-partitions', '', 0, 1, 0, 0, 'bool', 'Do not partition search indices (speeds up import of single country extracts)'), array('import-wikipedia-articles', '', 0, 1, 0, 0, 'bool', 'Import wikipedia article dump'), array('load-data', '', 0, 1, 0, 0, 'bool', 'Copy data to live tables from import table'), array('disable-token-precalc', '', 0, 1, 0, 0, 'bool', 'Disable name precalculation (EXPERT)'), @@ -61,15 +61,15 @@ if ($aCMDResult['import-data'] || $aCMDResult['all']) { } -// This is a pretty hard core default - the number of processors in the box - 1 -$iInstances = isset($aCMDResult['threads'])?$aCMDResult['threads']:(getProcessorCount()-1); +// by default, use all but one processor, but never more than 15. +$iInstances = isset($aCMDResult['threads'])?$aCMDResult['threads']:(min(16,getProcessorCount())-1); if ($iInstances < 1) { $iInstances = 1; - echo "WARNING: resetting threads to $iInstances\n"; + warn("resetting threads to $iInstances"); } if ($iInstances > getProcessorCount()) { $iInstances = getProcessorCount(); - echo "WARNING: resetting threads to $iInstances\n"; + warn("resetting threads to $iInstances"); } // Assume we can steal all the cache memory in the box (unless told otherwise) @@ -83,7 +83,7 @@ $aDSNInfo = DB::parseDSN(CONST_Database_DSN); if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432; if ($aCMDResult['create-db'] || $aCMDResult['all']) { - echo "Create DB\n"; + info('Create DB'); $bDidSomething = true; $oDB = DB::connect(CONST_Database_DSN, false); if (!PEAR::isError($oDB)) { @@ -93,18 +93,16 @@ if ($aCMDResult['create-db'] || $aCMDResult['all']) { } if ($aCMDResult['setup-db'] || $aCMDResult['all']) { - echo "Setup DB\n"; + info('Setup DB'); $bDidSomething = true; - // TODO: path detection, detection memory, etc. - // $oDB =& getDB(); $fPostgresVersion = getPostgresVersion($oDB); echo 'Postgres version found: '.$fPostgresVersion."\n"; if ($fPostgresVersion < 9.1) { - fail("Minimum supported version of Postgresql is 9.1."); + fail('Minimum supported version of Postgresql is 9.1.'); } pgsqlRunScript('CREATE EXTENSION IF NOT EXISTS hstore'); @@ -117,7 +115,7 @@ if ($aCMDResult['setup-db'] || $aCMDResult['all']) { if ($iNumFunc == 0) { pgsqlRunScript("create function hstore_to_json(dummy hstore) returns text AS 'select null::text' language sql immutable"); - echo "WARNING: Postgresql is too old. extratags and namedetails API not available."; + warn('Postgresql is too old. extratags and namedetails API not available.'); } $fPostgisVersion = getPostgisVersion($oDB); @@ -132,14 +130,40 @@ if ($aCMDResult['setup-db'] || $aCMDResult['all']) { pgsqlRunScript('ALTER FUNCTION ST_Distance_Spheroid(geometry, geometry, spheroid) RENAME TO ST_DistanceSpheroid'); } + $i = chksql($oDB->getOne("select count(*) from pg_user where usename = '".CONST_Database_Web_User."'")); + if ($i == 0) { + echo "\nERROR: Web user '".CONST_Database_Web_User."' does not exist. Create it with:\n"; + echo "\n createuser ".CONST_Database_Web_User."\n\n"; + exit(1); + } + + // Try accessing the C module, so we know early if something is wrong + // and can simply error out. + $sSQL = "CREATE FUNCTION nominatim_test_import_func(text) RETURNS text AS '"; + $sSQL .= CONST_InstallPath."/module/nominatim.so', 'transliteration' LANGUAGE c IMMUTABLE STRICT"; + $sSQL .= ';DROP FUNCTION nominatim_test_import_func(text);'; + $oResult = $oDB->query($sSQL); + + if (PEAR::isError($oResult)) { + echo "\nERROR: Failed to load nominatim module. Reason:\n"; + echo $oResult->userinfo."\n\n"; + exit(1); + } + + if (!file_exists(CONST_ExtraDataPath.'/country_osm_grid.sql.gz')) { + echo 'Error: you need to download the country_osm_grid first:'; + echo "\n wget -O ".CONST_ExtraDataPath."/country_osm_grid.sql.gz https://www.nominatim.org/data/country_grid.sql.gz\n"; + exit(1); + } + pgsqlRunScriptFile(CONST_BasePath.'/data/country_name.sql'); pgsqlRunScriptFile(CONST_BasePath.'/data/country_naturalearthdata.sql'); - pgsqlRunScriptFile(CONST_BasePath.'/data/country_osm_grid.sql'); + pgsqlRunScriptFile(CONST_BasePath.'/data/country_osm_grid.sql.gz'); pgsqlRunScriptFile(CONST_BasePath.'/data/gb_postcode_table.sql'); if (file_exists(CONST_BasePath.'/data/gb_postcode_data.sql.gz')) { pgsqlRunScriptFile(CONST_BasePath.'/data/gb_postcode_data.sql.gz'); } else { - echo "WARNING: external UK postcode table not found.\n"; + warn('external UK postcode table not found.'); } if (CONST_Use_Extra_US_Postcodes) { pgsqlRunScriptFile(CONST_BasePath.'/data/us_postcode.sql'); @@ -158,7 +182,7 @@ if ($aCMDResult['setup-db'] || $aCMDResult['all']) { } if ($aCMDResult['import-data'] || $aCMDResult['all']) { - echo "Import\n"; + info('Import data'); $bDidSomething = true; $osm2pgsql = CONST_Osm2pgsql_Binary; @@ -168,7 +192,7 @@ if ($aCMDResult['import-data'] || $aCMDResult['all']) { fail("osm2pgsql not found in '$osm2pgsql'"); } - if (!is_null(CONST_Osm2pgsql_Flatnode_File)) { + if (!is_null(CONST_Osm2pgsql_Flatnode_File) && CONST_Osm2pgsql_Flatnode_File) { $osm2pgsql .= ' --flat-nodes '.CONST_Osm2pgsql_Flatnode_File; } if (CONST_Tablespace_Osm2pgsql_Data) @@ -192,16 +216,18 @@ if ($aCMDResult['import-data'] || $aCMDResult['all']) { } if ($aCMDResult['create-functions'] || $aCMDResult['all']) { - echo "Functions\n"; + info('Create Functions'); $bDidSomething = true; - if (!file_exists(CONST_InstallPath.'/module/nominatim.so')) fail("nominatim module not built"); + if (!file_exists(CONST_InstallPath.'/module/nominatim.so')) { + fail('nominatim module not built'); + } create_sql_functions($aCMDResult); } if ($aCMDResult['create-tables'] || $aCMDResult['all']) { + info('Create Tables'); $bDidSomething = true; - echo "Tables\n"; $sTemplate = file_get_contents(CONST_BasePath.'/sql/tables.sql'); $sTemplate = str_replace('{www-user}', CONST_Database_Web_User, $sTemplate); $sTemplate = replace_tablespace( @@ -237,12 +263,12 @@ if ($aCMDResult['create-tables'] || $aCMDResult['all']) { pgsqlRunScript($sTemplate, false); // re-run the functions - echo "Functions\n"; + info('Recreate Functions'); create_sql_functions($aCMDResult); } if ($aCMDResult['create-partition-tables'] || $aCMDResult['all']) { - echo "Partition Tables\n"; + info('Create Partition Tables'); $bDidSomething = true; $sTemplate = file_get_contents(CONST_BasePath.'/sql/partition-tables.src.sql'); @@ -282,7 +308,7 @@ if ($aCMDResult['create-partition-tables'] || $aCMDResult['all']) { if ($aCMDResult['create-partition-functions'] || $aCMDResult['all']) { - echo "Partition Functions\n"; + info('Create Partition Functions'); $bDidSomething = true; $sTemplate = file_get_contents(CONST_BasePath.'/sql/partition-functions.src.sql'); @@ -295,24 +321,22 @@ if ($aCMDResult['import-wikipedia-articles'] || $aCMDResult['all']) { $sWikiArticlesFile = CONST_Wikipedia_Data_Path.'/wikipedia_article.sql.bin'; $sWikiRedirectsFile = CONST_Wikipedia_Data_Path.'/wikipedia_redirect.sql.bin'; if (file_exists($sWikiArticlesFile)) { - echo "Importing wikipedia articles..."; + info('Importing wikipedia articles'); pgsqlRunDropAndRestore($sWikiArticlesFile); - echo "...done\n"; } else { - echo "WARNING: wikipedia article dump file not found - places will have default importance\n"; + warn('wikipedia article dump file not found - places will have default importance'); } if (file_exists($sWikiRedirectsFile)) { - echo "Importing wikipedia redirects..."; + info('Importing wikipedia redirects'); pgsqlRunDropAndRestore($sWikiRedirectsFile); - echo "...done\n"; } else { - echo "WARNING: wikipedia redirect dump file not found - some place importance values may be missing\n"; + warn('wikipedia redirect dump file not found - some place importance values may be missing'); } } if ($aCMDResult['load-data'] || $aCMDResult['all']) { - echo "Drop old Data\n"; + info('Drop old Data'); $bDidSomething = true; $oDB =& getDB(); @@ -355,11 +379,11 @@ if ($aCMDResult['load-data'] || $aCMDResult['all']) { // pre-create the word list if (!$aCMDResult['disable-token-precalc']) { - echo "Loading word list\n"; + info('Loading word list'); pgsqlRunScriptFile(CONST_BasePath.'/data/words.sql'); } - echo "Load Data\n"; + info('Load Data'); $sColumns = 'osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry'; $aDBInstances = array(); @@ -367,7 +391,9 @@ if ($aCMDResult['load-data'] || $aCMDResult['all']) { for ($i = 0; $i < $iLoadThreads; $i++) { $aDBInstances[$i] =& getDB(true); $sSQL = "INSERT INTO placex ($sColumns) SELECT $sColumns FROM place WHERE osm_id % $iLoadThreads = $i"; - $sSQL .= " and not (class='place' and type='houses' and osm_type='W' and ST_GeometryType(geometry) = 'ST_LineString')"; + $sSQL .= " and not (class='place' and type='houses' and osm_type='W'"; + $sSQL .= " and ST_GeometryType(geometry) = 'ST_LineString')"; + $sSQL .= ' and ST_IsValid(geometry)'; if ($aCMDResult['verbose']) echo "$sSQL\n"; if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) { fail(pg_last_error($aDBInstances[$i]->connection)); @@ -394,11 +420,22 @@ if ($aCMDResult['load-data'] || $aCMDResult['all']) { echo '.'; } echo "\n"; - echo "Reanalysing database...\n"; + info('Reanalysing database'); pgsqlRunScript('ANALYSE'); + + $sDatabaseDate = getDatabaseDate($oDB); + pg_query($oDB->connection, 'TRUNCATE import_status'); + if ($sDatabaseDate === false) { + warn('could not determine database date.'); + } else { + $sSQL = "INSERT INTO import_status (lastimportdate) VALUES('".$sDatabaseDate."')"; + pg_query($oDB->connection, $sSQL); + echo "Latest data imported from $sDatabaseDate.\n"; + } } if ($aCMDResult['import-tiger-data']) { + info('Import Tiger data'); $bDidSomething = true; $sTemplate = file_get_contents(CONST_BasePath.'/sql/tiger_import_start.sql'); @@ -422,7 +459,7 @@ if ($aCMDResult['import-tiger-data']) { foreach (glob(CONST_Tiger_Data_Path.'/*.sql') as $sFile) { echo $sFile.': '; - $hFile = fopen($sFile, "r"); + $hFile = fopen($sFile, 'r'); $sSQL = fgets($hFile, 100000); $iLines = 0; @@ -435,7 +472,7 @@ if ($aCMDResult['import-tiger-data']) { if (!pg_send_query($aDBInstances[$i]->connection, $sSQL)) fail(pg_last_error($oDB->connection)); $iLines++; if ($iLines == 1000) { - echo "."; + echo '.'; $iLines = 0; } } @@ -456,7 +493,7 @@ if ($aCMDResult['import-tiger-data']) { echo "\n"; } - echo "Creating indexes\n"; + info('Creating indexes on Tiger data'); $sTemplate = file_get_contents(CONST_BasePath.'/sql/tiger_import_finish.sql'); $sTemplate = str_replace('{www-user}', CONST_Database_Web_User, $sTemplate); $sTemplate = replace_tablespace( @@ -473,120 +510,93 @@ if ($aCMDResult['import-tiger-data']) { } if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all']) { + info('Calculate Postcodes'); $bDidSomething = true; $oDB =& getDB(); - if (!pg_query($oDB->connection, 'DELETE from placex where osm_type=\'P\'')) fail(pg_last_error($oDB->connection)); - $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,country_code,geometry) "; - $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,country_code,"; - $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from (select country_code,postcode,"; - $sSQL .= "avg(st_x(st_centroid(geometry))) as x,avg(st_y(st_centroid(geometry))) as y "; - $sSQL .= "from placex where postcode is not null group by country_code,postcode) as x "; - $sSQL .= "where ST_Point(x,y) is not null"; - if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection)); + if (!pg_query($oDB->connection, 'TRUNCATE location_postcode')) { + fail(pg_last_error($oDB->connection)); + } + + $sSQL = 'INSERT INTO location_postcode'; + $sSQL .= ' (place_id, indexed_status, country_code, postcode, geometry) '; + $sSQL .= "SELECT nextval('seq_place'), 1, country_code,"; + $sSQL .= " upper(trim (both ' ' from address->'postcode')) as pc,"; + $sSQL .= ' ST_Centroid(ST_Collect(ST_Centroid(geometry)))'; + $sSQL .= ' FROM placex'; + $sSQL .= " WHERE address ? 'postcode' AND address->'postcode' NOT SIMILAR TO '%(,|;)%'"; + $sSQL .= ' AND geometry IS NOT null'; + $sSQL .= ' GROUP BY country_code, pc'; + + if (!pg_query($oDB->connection, $sSQL)) { + fail(pg_last_error($oDB->connection)); + } if (CONST_Use_Extra_US_Postcodes) { - $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,country_code,geometry) "; - $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,'us',"; - $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from us_postcode"; + // only add postcodes that are not yet available in OSM + $sSQL = 'INSERT INTO location_postcode'; + $sSQL .= ' (place_id, indexed_status, country_code, postcode, geometry) '; + $sSQL .= "SELECT nextval('seq_place'), 1, 'us', postcode,"; + $sSQL .= ' ST_SetSRID(ST_Point(x,y),4326)'; + $sSQL .= ' FROM us_postcode WHERE postcode NOT IN'; + $sSQL .= ' (SELECT postcode FROM location_postcode'; + $sSQL .= " WHERE country_code = 'us')"; if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection)); } -} -if ($aCMDResult['osmosis-init'] || ($aCMDResult['all'] && !$aCMDResult['drop'])) { // no use doing osmosis-init when dropping update tables - $bDidSomething = true; - $oDB =& getDB(); + // add missing postcodes for GB (if available) + $sSQL = 'INSERT INTO location_postcode'; + $sSQL .= ' (place_id, indexed_status, country_code, postcode, geometry) '; + $sSQL .= "SELECT nextval('seq_place'), 1, 'gb', postcode, geometry"; + $sSQL .= ' FROM gb_postcode WHERE postcode NOT IN'; + $sSQL .= ' (SELECT postcode FROM location_postcode'; + $sSQL .= " WHERE country_code = 'gb')"; + if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection)); - if (!file_exists(CONST_Osmosis_Binary)) { - echo "Please download osmosis.\nIf it is already installed, check the path in your local settings (settings/local.php) file.\n"; - if (!$aCMDResult['all']) { - fail("osmosis not found in '".CONST_Osmosis_Binary."'"); - } - } else { - if (file_exists(CONST_InstallPath.'/settings/configuration.txt')) { - echo "settings/configuration.txt already exists\n"; - } else { - passthru(CONST_Osmosis_Binary.' --read-replication-interval-init '.CONST_InstallPath.'/settings'); - // update osmosis configuration.txt with our settings - passthru("sed -i 's!baseUrl=.*!baseUrl=".CONST_Replication_Url."!' ".CONST_InstallPath.'/settings/configuration.txt'); - passthru("sed -i 's:maxInterval = .*:maxInterval = ".CONST_Replication_MaxInterval.":' ".CONST_InstallPath.'/settings/configuration.txt'); + if (!$aCMDResult['all']) { + $sSQL = "DELETE FROM word WHERE class='place' and type='postcode'"; + $sSQL .= 'and word NOT IN (SELECT postcode FROM location_postcode)'; + if (!pg_query($oDB->connection, $sSQL)) { + fail(pg_last_error($oDB->connection)); } + } + $sSQL = 'SELECT count(getorcreate_postcode_id(v)) FROM '; + $sSQL .= '(SELECT distinct(postcode) as v FROM location_postcode) p'; - // Find the last node in the DB - $iLastOSMID = $oDB->getOne("select max(osm_id) from place where osm_type = 'N'"); - - // Lookup the timestamp that node was created (less 3 hours for margin for changsets to be closed) - $sLastNodeURL = 'http://www.openstreetmap.org/api/0.6/node/'.$iLastOSMID."/1"; - $sLastNodeXML = file_get_contents($sLastNodeURL); - preg_match('#timestamp="(([0-9]{4})-([0-9]{2})-([0-9]{2})T([0-9]{2}):([0-9]{2}):([0-9]{2})Z)"#', $sLastNodeXML, $aLastNodeDate); - $iLastNodeTimestamp = strtotime($aLastNodeDate[1]) - (3*60*60); - - // Search for the correct state file - uses file timestamps so need to sort by date descending - $sRepURL = CONST_Replication_Url."/"; - $sRep = file_get_contents($sRepURL."?C=M;O=D;F=1"); - // download.geofabrik.de: 000/