From: Sarah Hoffmann Date: Thu, 4 Mar 2021 15:37:05 +0000 (+0100) Subject: Merge remote-tracking branch 'upstream/master' X-Git-Tag: deploy~178 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/4a2873617dcbaf58ff6135aa7d8dcb115c0cc5ba?hp=c678cd1572bc3b15bb8ffa369bb63905718bd0fa Merge remote-tracking branch 'upstream/master' --- diff --git a/.github/actions/build-nominatim/action.yml b/.github/actions/build-nominatim/action.yml index d62ecf86..414783d9 100644 --- a/.github/actions/build-nominatim/action.yml +++ b/.github/actions/build-nominatim/action.yml @@ -4,9 +4,9 @@ runs: using: "composite" steps: - - name: Install prerequisits + - name: Install prerequisites run: | - sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev python3-psycopg2 python3-pyosmium python3-dotenv + sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev python3-psycopg2 python3-pyosmium python3-dotenv python3-psutil python3-jinja2 shell: bash - name: Download dependencies diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 00000000..da6dbe03 --- /dev/null +++ b/.pylintrc @@ -0,0 +1,11 @@ +[MASTER] + +extension-pkg-whitelist=osmium + +[MESSAGES CONTROL] + +[TYPECHECK] + +# closing added here because it sometimes triggers a false positive with +# 'with' statements. +ignored-classes=NominatimArgs,closing diff --git a/CMakeLists.txt b/CMakeLists.txt index 0e4234b5..fdc67ffe 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -198,7 +198,7 @@ if (BUILD_TESTS) if (PYLINT) message(STATUS "Using pylint binary ${PYLINT}") add_test(NAME pylint - COMMAND ${PYLINT} --extension-pkg-whitelist=osmium nominatim + COMMAND ${PYLINT} nominatim WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}) else() message(WARNING "pylint not found. Python linting tests disabled.") diff --git a/docs/admin/Installation.md b/docs/admin/Installation.md index 0013e993..eadaaff1 100644 --- a/docs/admin/Installation.md +++ b/docs/admin/Installation.md @@ -39,12 +39,14 @@ For running Nominatim: * [PostgreSQL](https://www.postgresql.org) (9.3+) * [PostGIS](https://postgis.net) (2.2+) * [Python 3](https://www.python.org/) (3.5+) - * [Psycopg2](https://www.psycopg.org) + * [Psycopg2](https://www.psycopg.org) (2.7+) * [Python Dotenv](https://github.com/theskumar/python-dotenv) + * [psutil](https://github.com/giampaolo/psutil) + * [Jinja2](https://palletsprojects.com/p/jinja/) * [PHP](https://php.net) (7.0 or later) * PHP-pgsql * PHP-intl (bundled with PHP) - ( PHP-cgi (for running queries from the command line) + * PHP-cgi (for running queries from the command line) For running continuous updates: diff --git a/docs/api/Status.md b/docs/api/Status.md index 0241b6fa..8c3e25e9 100644 --- a/docs/api/Status.md +++ b/docs/api/Status.md @@ -35,10 +35,16 @@ will return HTTP code 200 and a structure { "status": 0, "message": "OK", - "data_updated": "2020-05-04T14:47:00+00:00" + "data_updated": "2020-05-04T14:47:00+00:00", + "software_version": "3.6.0-0", + "database_version": "3.6.0-0" } ``` +The `software_version` field contains the version of Nominatim used to serve +the API. The `database_version` field contains the version of the data format +in the database. + On error will also return HTTP status code 200 and a structure with error code and message, e.g. diff --git a/lib-php/DB.php b/lib-php/DB.php index 0454a0ff..abd23179 100644 --- a/lib-php/DB.php +++ b/lib-php/DB.php @@ -240,16 +240,6 @@ class DB return ($this->getOne($sSQL, array(':tablename' => $sTableName)) == 1); } - /** - * Returns a list of table names in the database - * - * @return array[] - */ - public function getListOfTables() - { - return $this->getCol("SELECT tablename FROM pg_tables WHERE schemaname='public'"); - } - /** * Deletes a table. Returns true if deleted or didn't exist. * @@ -262,76 +252,6 @@ class DB return $this->exec('DROP TABLE IF EXISTS '.$sTableName.' CASCADE') == 0; } - /** - * Check if an index exists in the database. Optional filtered by tablename - * - * @param string $sTableName - * - * @return boolean - */ - public function indexExists($sIndexName, $sTableName = null) - { - return in_array($sIndexName, $this->getListOfIndices($sTableName)); - } - - /** - * Returns a list of index names in the database, optional filtered by tablename - * - * @param string $sTableName - * - * @return array - */ - public function getListOfIndices($sTableName = null) - { - // table_name | index_name | column_name - // -----------------------+---------------------------------+-------------- - // country_name | idx_country_name_country_code | country_code - // country_osm_grid | idx_country_osm_grid_geometry | geometry - // import_polygon_delete | idx_import_polygon_delete_osmid | osm_id - // import_polygon_delete | idx_import_polygon_delete_osmid | osm_type - // import_polygon_error | idx_import_polygon_error_osmid | osm_id - // import_polygon_error | idx_import_polygon_error_osmid | osm_type - $sSql = <<< END -SELECT - t.relname as table_name, - i.relname as index_name, - a.attname as column_name -FROM - pg_class t, - pg_class i, - pg_index ix, - pg_attribute a -WHERE - t.oid = ix.indrelid - and i.oid = ix.indexrelid - and a.attrelid = t.oid - and a.attnum = ANY(ix.indkey) - and t.relkind = 'r' - and i.relname NOT LIKE 'pg_%' - FILTERS - ORDER BY - t.relname, - i.relname, - a.attname -END; - - $aRows = null; - if ($sTableName) { - $sSql = str_replace('FILTERS', 'and t.relname = :tablename', $sSql); - $aRows = $this->getAll($sSql, array(':tablename' => $sTableName)); - } else { - $sSql = str_replace('FILTERS', '', $sSql); - $aRows = $this->getAll($sSql); - } - - $aIndexNames = array_unique(array_map(function ($aRow) { - return $aRow['index_name']; - }, $aRows)); - sort($aIndexNames); - - return $aIndexNames; - } - /** * Tries to connect to the database but on failure doesn't throw an exception. * diff --git a/lib-php/Shell.php b/lib-php/Shell.php index 72f90735..b43db135 100644 --- a/lib-php/Shell.php +++ b/lib-php/Shell.php @@ -48,7 +48,7 @@ class Shell return join(' ', $aEscaped); } - public function run() + public function run($bExitOnFail = false) { $sCmd = $this->escapedCmd(); // $aEnv does not need escaping, proc_open seems to handle it fine @@ -67,6 +67,11 @@ class Shell fclose($aPipes[0]); // no stdin $iStat = proc_close($hProc); + + if ($iStat != 0 && $bExitOnFail) { + exit($iStat); + } + return $iStat; } diff --git a/lib-php/Status.php b/lib-php/Status.php index a276c4d5..2d9e78db 100644 --- a/lib-php/Status.php +++ b/lib-php/Status.php @@ -56,4 +56,10 @@ class Status return $iDataDateEpoch; } + + public function databaseVersion() + { + $sSQL = 'SELECT value FROM nominatim_properties WHERE property = \'database_version\''; + return $this->oDB->getOne($sSQL); + } } diff --git a/lib-php/admin/check_import_finished.php b/lib-php/admin/check_import_finished.php index f189fc9a..d5d011c4 100644 --- a/lib-php/admin/check_import_finished.php +++ b/lib-php/admin/check_import_finished.php @@ -5,197 +5,6 @@ require_once(CONST_LibDir.'/init-cmd.php'); loadSettings(getcwd()); -$term_colors = array( - 'green' => "\033[92m", - 'red' => "\x1B[31m", - 'normal' => "\033[0m" -); - -$print_success = function ($message = 'OK') use ($term_colors) { - echo $term_colors['green'].$message.$term_colors['normal']."\n"; -}; - -$print_fail = function ($message = 'Failed') use ($term_colors) { - echo $term_colors['red'].$message.$term_colors['normal']."\n"; -}; - -$oDB = new Nominatim\DB; - - -function isReverseOnlyInstallation() -{ - global $oDB; - return !$oDB->tableExists('search_name'); -} - -// Check (guess) if the setup.php included --drop -function isNoUpdateInstallation() -{ - global $oDB; - return $oDB->tableExists('placex') && !$oDB->tableExists('planet_osm_rels') ; -} - - -echo 'Checking database got created ... '; -if ($oDB->checkConnection()) { - $print_success(); -} else { - $print_fail(); - echo <<< END - Hints: - * Is the database server started? - * Check the NOMINATIM_DATABASE_DSN variable in your local .env - * Try connecting to the database with the same settings - -END; - exit(1); -} - - -echo 'Checking nominatim.so module installed ... '; -$sStandardWord = $oDB->getOne("SELECT make_standard_name('a')"); -if ($sStandardWord === 'a') { - $print_success(); -} else { - $print_fail(); - echo <<< END - The Postgresql extension nominatim.so was not found in the database. - Hints: - * Check the output of the CMmake/make installation step - * Does nominatim.so exist? - * Does nominatim.so exist on the database server? - * Can nominatim.so be accessed by the database user? - -END; - exit(1); -} - -if (!isNoUpdateInstallation()) { - echo 'Checking place table ... '; - if ($oDB->tableExists('place')) { - $print_success(); - } else { - $print_fail(); - echo <<< END - * The import didn't finish. - Hints: - * Check the output of the utils/setup.php you ran. - Usually the osm2pgsql step failed. Check for errors related to - * the file you imported not containing any places - * harddrive full - * out of memory (RAM) - * osm2pgsql killed by other scripts, for consuming to much memory - - END; - exit(1); - } -} - - -echo 'Checking indexing status ... '; -$iUnindexed = $oDB->getOne('SELECT count(*) FROM placex WHERE indexed_status > 0'); -if ($iUnindexed == 0) { - $print_success(); -} else { - $print_fail(); - echo <<< END - The indexing didn't finish. There is still $iUnindexed places. See the - question 'Can a stopped/killed import process be resumed?' in the - troubleshooting guide. - -END; - exit(1); -} - -echo "Search index creation\n"; -$aExpectedIndices = array( - // sql/indices.src.sql - 'idx_word_word_id', - 'idx_place_addressline_address_place_id', - 'idx_placex_rank_search', - 'idx_placex_rank_address', - 'idx_placex_parent_place_id', - 'idx_placex_geometry_reverse_lookuppolygon', - 'idx_placex_geometry_reverse_placenode', - 'idx_osmline_parent_place_id', - 'idx_osmline_parent_osm_id', - 'idx_postcode_id', - 'idx_postcode_postcode' -); -if (!isReverseOnlyInstallation()) { - $aExpectedIndices = array_merge($aExpectedIndices, array( - // sql/indices_search.src.sql - 'idx_search_name_nameaddress_vector', - 'idx_search_name_name_vector', - 'idx_search_name_centroid' - )); -} -if (!isNoUpdateInstallation()) { - $aExpectedIndices = array_merge($aExpectedIndices, array( - 'idx_placex_pendingsector', - 'idx_location_area_country_place_id', - 'idx_place_osm_unique', - )); -} - -foreach ($aExpectedIndices as $sExpectedIndex) { - echo "Checking index $sExpectedIndex ... "; - if ($oDB->indexExists($sExpectedIndex)) { - $print_success(); - } else { - $print_fail(); - echo <<< END - Hints: - * Run './utils/setup.php --create-search-indices --ignore-errors' to - create missing indices. - -END; - exit(1); - } -} - -echo 'Checking search indices are valid ... '; -$sSQL = <<< END - SELECT relname - FROM pg_class, pg_index - WHERE pg_index.indisvalid = false - AND pg_index.indexrelid = pg_class.oid; -END; -$aInvalid = $oDB->getCol($sSQL); -if (empty($aInvalid)) { - $print_success(); -} else { - $print_fail(); - echo <<< END - At least one index is invalid. That can happen, e.g. when index creation was - disrupted and later restarted. You should delete the affected indices and - run the index stage of setup again. - See the question 'Can a stopped/killed import process be resumed?' in the - troubleshooting guide. - Affected indices: -END; - echo join(', ', $aInvalid) . "\n"; - exit(1); -} - - - -if (getSettingBool('USE_US_TIGER_DATA')) { - echo 'Checking TIGER table exists ... '; - if ($oDB->tableExists('location_property_tiger')) { - $print_success(); - } else { - $print_fail(); - echo <<< END - Table 'location_property_tiger' does not exist. Run the TIGER data - import again. - -END; - exit(1); - } -} - - - - -exit(0); +(new \Nominatim\Shell(getSetting('NOMINATIM_TOOL'))) + ->addParams('admin', '--check-database') + ->run(); diff --git a/lib-php/admin/setup.php b/lib-php/admin/setup.php index 241b873c..f81c0ca8 100644 --- a/lib-php/admin/setup.php +++ b/lib-php/admin/setup.php @@ -56,6 +56,29 @@ setupHTTPProxy(); $bDidSomething = false; +$oNominatimCmd = new \Nominatim\Shell(getSetting('NOMINATIM_TOOL')); + +// by default, use all but one processor, but never more than 15. +$iInstances = max(1, $aCMDResult['threads'] ?? (min(16, getProcessorCount()) - 1)); + +function run($oCmd) +{ + global $iInstances; + global $aCMDResult; + $oCmd->addParams('--threads', $iInstances); + if ($aCMDResult['ignore-errors'] ?? false) { + $oCmd->addParams('--ignore-errors'); + } + if ($aCMDResult['quiet'] ?? false) { + $oCmd->addParams('--quiet'); + } + if ($aCMDResult['verbose'] ?? false) { + $oCmd->addParams('--verbose'); + } + $oCmd->run(true); +} + + //******************************************************* // Making some sanity check: // Check if osm-file is set and points to a valid file @@ -72,49 +95,67 @@ $oSetup = new SetupFunctions($aCMDResult); // go through complete process if 'all' is selected or start selected functions if ($aCMDResult['create-db'] || $aCMDResult['all']) { $bDidSomething = true; - $oSetup->createDB(); + run((clone($oNominatimCmd))->addParams('transition', '--create-db')); } if ($aCMDResult['setup-db'] || $aCMDResult['all']) { $bDidSomething = true; - $oSetup->setupDB(); + $oCmd = (clone($oNominatimCmd))->addParams('transition', '--setup-db'); + + if ($aCMDResult['no-partitions'] ?? false) { + $oCmd->addParams('--no-partitions'); + } + + run($oCmd); } if ($aCMDResult['import-data'] || $aCMDResult['all']) { $bDidSomething = true; - $oSetup->importData($aCMDResult['osm-file']); + $oCmd = (clone($oNominatimCmd)) + ->addParams('transition', '--import-data') + ->addParams('--osm-file', $aCMDResult['osm-file']); + if ($aCMDResult['drop'] ?? false) { + $oCmd->addParams('--drop'); + } + + run($oCmd); } if ($aCMDResult['create-functions'] || $aCMDResult['all']) { $bDidSomething = true; - $oSetup->createFunctions(); + $oSetup->createSqlFunctions(); } if ($aCMDResult['create-tables'] || $aCMDResult['all']) { $bDidSomething = true; - $oSetup->createTables($aCMDResult['reverse-only']); - $oSetup->createFunctions(); - $oSetup->createTableTriggers(); + $oCmd = (clone($oNominatimCmd))->addParams('transition', '--create-tables'); + + if ($aCMDResult['reverse-only'] ?? false) { + $oCmd->addParams('--reverse-only'); + } + + run($oCmd); } if ($aCMDResult['create-partition-tables'] || $aCMDResult['all']) { $bDidSomething = true; - $oSetup->createPartitionTables(); + run((clone($oNominatimCmd))->addParams('transition', '--create-partition-tables')); } if ($aCMDResult['create-partition-functions'] || $aCMDResult['all']) { $bDidSomething = true; - $oSetup->createPartitionFunctions(); + $oSetup->createSqlFunctions(); // also create partition functions } if ($aCMDResult['import-wikipedia-articles'] || $aCMDResult['all']) { $bDidSomething = true; - $oSetup->importWikipediaArticles(); + // ignore errors! + (clone($oNominatimCmd))->addParams('refresh', '--wiki-data')->run(); } if ($aCMDResult['load-data'] || $aCMDResult['all']) { $bDidSomething = true; - $oSetup->loadData($aCMDResult['disable-token-precalc']); + run((clone($oNominatimCmd))->addParams('transition', '--load-data')); } if ($aCMDResult['import-tiger-data']) { @@ -130,17 +171,29 @@ if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all']) { if ($aCMDResult['index'] || $aCMDResult['all']) { $bDidSomething = true; - $oSetup->index($aCMDResult['index-noanalyse']); + $oCmd = (clone($oNominatimCmd))->addParams('transition', '--index'); + if ($aCMDResult['index-noanalyse'] ?? false) { + $oCmd->addParams('--no-analyse'); + } + + run($oCmd); } if ($aCMDResult['drop']) { $bDidSomething = true; - $oSetup->drop($aCMDResult); + run((clone($oNominatimCmd))->addParams('freeze')); } if ($aCMDResult['create-search-indices'] || $aCMDResult['all']) { $bDidSomething = true; - $oSetup->createSearchIndices(); + + $oCmd = (clone($oNominatimCmd))->addParams('transition', '--create-search-indices'); + + if ($aCMDResult['drop'] ?? false) { + $oCmd->addParams('--drop'); + } + + run($oCmd); } if ($aCMDResult['create-country-names'] || $aCMDResult['all']) { @@ -150,7 +203,7 @@ if ($aCMDResult['create-country-names'] || $aCMDResult['all']) { if ($aCMDResult['setup-website'] || $aCMDResult['all']) { $bDidSomething = true; - $oSetup->setupWebsite(); + run((clone($oNominatimCmd))->addParams('refresh', '--website')); } // ****************************************************** diff --git a/lib-php/admin/update.php b/lib-php/admin/update.php index e8d873cd..fae4a783 100644 --- a/lib-php/admin/update.php +++ b/lib-php/admin/update.php @@ -105,11 +105,17 @@ if ($fPostgresVersion >= 11.0) { } $oNominatimCmd = new \Nominatim\Shell(getSetting('NOMINATIM_TOOL')); -if ($aResult['quiet']) { - $oNominatimCmd->addParams('--quiet'); -} -if ($aResult['verbose']) { - $oNominatimCmd->addParams('--verbose'); + +function run($oCmd) +{ + global $aCMDResult; + if ($aCMDResult['quiet'] ?? false) { + $oCmd->addParams('--quiet'); + } + if ($aCMDResult['verbose'] ?? false) { + $oCmd->addParams('--verbose'); + } + $oCmd->run(true); } @@ -120,7 +126,7 @@ if ($aResult['init-updates']) { $oCmd->addParams('--no-update-functions'); } - $oCmd->run(); + run($oCmd); } if ($aResult['check-for-updates']) { @@ -148,7 +154,7 @@ if (isset($aResult['import-diff']) || isset($aResult['import-file'])) { } if ($aResult['calculate-postcodes']) { - (clone($oNominatimCmd))->addParams('refresh', '--postcodes')->run(); + run((clone($oNominatimCmd))->addParams('refresh', '--postcodes')); } $sTemporaryFile = CONST_InstallDir.'/osmosischange.osc'; @@ -197,35 +203,21 @@ if ($bHaveDiff) { } if ($aResult['recompute-word-counts']) { - (clone($oNominatimCmd))->addParams('refresh', '--word-counts')->run(); + run((clone($oNominatimCmd))->addParams('refresh', '--word-counts')); } if ($aResult['index']) { - (clone $oNominatimCmd) + run((clone $oNominatimCmd) ->addParams('index', '--minrank', $aResult['index-rank']) - ->addParams('--threads', $aResult['index-instances']) - ->run(); + ->addParams('--threads', $aResult['index-instances'])); } if ($aResult['update-address-levels']) { - (clone($oNominatimCmd))->addParams('refresh', '--address-levels')->run(); + run((clone($oNominatimCmd))->addParams('refresh', '--address-levels')); } if ($aResult['recompute-importance']) { - echo "Updating importance values for database.\n"; - $oDB = new Nominatim\DB(); - $oDB->connect(); - - $sSQL = 'ALTER TABLE placex DISABLE TRIGGER ALL;'; - $sSQL .= 'UPDATE placex SET (wikipedia, importance) ='; - $sSQL .= ' (SELECT wikipedia, importance'; - $sSQL .= ' FROM compute_importance(extratags, country_code, osm_type, osm_id));'; - $sSQL .= 'UPDATE placex s SET wikipedia = d.wikipedia, importance = d.importance'; - $sSQL .= ' FROM placex d'; - $sSQL .= ' WHERE s.place_id = d.linked_place_id and d.wikipedia is not null'; - $sSQL .= ' and (s.wikipedia is null or s.importance < d.importance);'; - $sSQL .= 'ALTER TABLE placex ENABLE TRIGGER ALL;'; - $oDB->exec($sSQL); + run((clone($oNominatimCmd))->addParams('refresh', '--importance')); } if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) { @@ -241,5 +233,5 @@ if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) { $oCmd->addParams('--no-index'); } - exit($oCmd->run()); + run($oCmd); } diff --git a/lib-php/lib.php b/lib-php/lib.php index 6798e749..a1f528fa 100644 --- a/lib-php/lib.php +++ b/lib-php/lib.php @@ -132,24 +132,6 @@ function addQuotes($s) return "'".$s."'"; } -function fwriteConstDef($rFile, $sConstName, $value) -{ - $sEscapedValue; - - if (is_bool($value)) { - $sEscapedValue = $value ? 'true' : 'false'; - } elseif (is_numeric($value)) { - $sEscapedValue = strval($value); - } elseif (!$value) { - $sEscapedValue = 'false'; - } else { - $sEscapedValue = addQuotes(str_replace("'", "\\'", (string)$value)); - } - - fwrite($rFile, "@define('CONST_$sConstName', $sEscapedValue);\n"); -} - - function parseLatLon($sQuery) { $sFound = null; @@ -226,17 +208,6 @@ function parseLatLon($sQuery) return array($sFound, $fQueryLat, $fQueryLon); } -function createPointsAroundCenter($fLon, $fLat, $fRadius) -{ - $iSteps = max(8, min(100, ($fRadius * 40000)^2)); - $fStepSize = (2*pi())/$iSteps; - $aPolyPoints = array(); - for ($f = 0; $f < 2*pi(); $f += $fStepSize) { - $aPolyPoints[] = array('', $fLon+($fRadius*sin($f)), $fLat+($fRadius*cos($f)) ); - } - return $aPolyPoints; -} - function closestHouseNumber($aRow) { $fHouse = $aRow['startnumber'] @@ -256,25 +227,3 @@ function closestHouseNumber($aRow) return max(min($aRow['endnumber'], $iHn), $aRow['startnumber']); } - -function getSearchRankLabel($iRank) -{ - if (!isset($iRank)) return 'unknown'; - if ($iRank < 2) return 'continent'; - if ($iRank < 4) return 'sea'; - if ($iRank < 8) return 'country'; - if ($iRank < 12) return 'state'; - if ($iRank < 16) return 'county'; - if ($iRank == 16) return 'city'; - if ($iRank == 17) return 'town / island'; - if ($iRank == 18) return 'village / hamlet'; - if ($iRank == 20) return 'suburb'; - if ($iRank == 21) return 'postcode area'; - if ($iRank == 22) return 'croft / farm / locality / islet'; - if ($iRank == 23) return 'postcode area'; - if ($iRank == 25) return 'postcode point'; - if ($iRank == 26) return 'street / major landmark'; - if ($iRank == 27) return 'minory street / path'; - if ($iRank == 28) return 'house / building'; - return 'other: ' . $iRank; -} diff --git a/lib-php/setup/SetupClass.php b/lib-php/setup/SetupClass.php index fedbb644..4b6439a9 100755 --- a/lib-php/setup/SetupClass.php +++ b/lib-php/setup/SetupClass.php @@ -6,7 +6,6 @@ require_once(CONST_LibDir.'/Shell.php'); class SetupFunctions { - protected $iCacheMemory; protected $iInstances; protected $aDSNInfo; protected $bQuiet; @@ -14,7 +13,6 @@ class SetupFunctions protected $sIgnoreErrors; protected $bEnableDiffUpdates; protected $bEnableDebugStatements; - protected $bNoPartitions; protected $bDrop; protected $oDB = null; protected $oNominatimCmd; @@ -31,16 +29,6 @@ class SetupFunctions warn('resetting threads to '.$this->iInstances); } - if (isset($aCMDResult['osm2pgsql-cache'])) { - $this->iCacheMemory = $aCMDResult['osm2pgsql-cache']; - } elseif (getSetting('FLATNODE_FILE')) { - // When flatnode files are enabled then disable cache per default. - $this->iCacheMemory = 0; - } else { - // Otherwise: Assume we can steal all the cache memory in the box. - $this->iCacheMemory = getCacheMemoryMB(); - } - // parse database string $this->aDSNInfo = \Nominatim\DB::parseDSN(getSetting('DATABASE_DSN')); if (!isset($this->aDSNInfo['port'])) { @@ -62,11 +50,6 @@ class SetupFunctions } else { $this->bEnableDebugStatements = false; } - if (isset($aCMDResult['no-partitions'])) { - $this->bNoPartitions = $aCMDResult['no-partitions']; - } else { - $this->bNoPartitions = false; - } if (isset($aCMDResult['enable-diff-updates'])) { $this->bEnableDiffUpdates = $aCMDResult['enable-diff-updates']; } else { @@ -84,349 +67,6 @@ class SetupFunctions } } - public function createDB() - { - info('Create DB'); - $oDB = new \Nominatim\DB; - - if ($oDB->checkConnection()) { - fail('database already exists ('.getSetting('DATABASE_DSN').')'); - } - - $oCmd = (new \Nominatim\Shell('createdb')) - ->addParams('-E', 'UTF-8') - ->addParams('-p', $this->aDSNInfo['port']); - - if (isset($this->aDSNInfo['username'])) { - $oCmd->addParams('-U', $this->aDSNInfo['username']); - } - if (isset($this->aDSNInfo['password'])) { - $oCmd->addEnvPair('PGPASSWORD', $this->aDSNInfo['password']); - } - if (isset($this->aDSNInfo['hostspec'])) { - $oCmd->addParams('-h', $this->aDSNInfo['hostspec']); - } - $oCmd->addParams($this->aDSNInfo['database']); - - $result = $oCmd->run(); - if ($result != 0) fail('Error executing external command: '.$oCmd->escapedCmd()); - } - - public function setupDB() - { - info('Setup DB'); - - $fPostgresVersion = $this->db()->getPostgresVersion(); - echo 'Postgres version found: '.$fPostgresVersion."\n"; - - if ($fPostgresVersion < 9.03) { - fail('Minimum supported version of Postgresql is 9.3.'); - } - - $this->pgsqlRunScript('CREATE EXTENSION IF NOT EXISTS hstore'); - $this->pgsqlRunScript('CREATE EXTENSION IF NOT EXISTS postgis'); - - $fPostgisVersion = $this->db()->getPostgisVersion(); - echo 'Postgis version found: '.$fPostgisVersion."\n"; - - if ($fPostgisVersion < 2.2) { - echo "Minimum required Postgis version 2.2\n"; - exit(1); - } - - $sPgUser = getSetting('DATABASE_WEBUSER'); - $i = $this->db()->getOne("select count(*) from pg_user where usename = '$sPgUser'"); - if ($i == 0) { - echo "\nERROR: Web user '".$sPgUser."' does not exist. Create it with:\n"; - echo "\n createuser ".$sPgUser."\n\n"; - exit(1); - } - - if (!getSetting('DATABASE_MODULE_PATH')) { - // If no custom module path is set then copy the module into the - // project directory, but only if it is not the same file already - // (aka we are running from the build dir). - $sDest = CONST_InstallDir.'/module'; - if ($sDest != CONST_Default_ModulePath) { - if (!file_exists($sDest)) { - mkdir($sDest); - } - if (!copy(CONST_Default_ModulePath.'/nominatim.so', $sDest.'/nominatim.so')) { - echo "Failed to copy database module to $sDest."; - exit(1); - } - chmod($sDest.'/nominatim.so', 0755); - info("Database module installed at $sDest."); - } else { - info('Running from build directory. Leaving database module as is.'); - } - } else { - info('Using database module from DATABASE_MODULE_PATH ('.getSetting('DATABASE_MODULE_PATH').').'); - } - // Try accessing the C module, so we know early if something is wrong - $this->checkModulePresence(); // raises exception on failure - - $this->pgsqlRunScriptFile(CONST_DataDir.'/country_name.sql'); - $this->pgsqlRunScriptFile(CONST_DataDir.'/country_osm_grid.sql.gz'); - - if ($this->bNoPartitions) { - $this->pgsqlRunScript('update country_name set partition = 0'); - } - } - - public function importData($sOSMFile) - { - info('Import data'); - - if (!file_exists(getOsm2pgsqlBinary())) { - echo "Check NOMINATIM_OSM2PGSQL_BINARY in your local .env file.\n"; - echo "Normally you should not need to set this manually.\n"; - fail("osm2pgsql not found in '".getOsm2pgsqlBinary()."'"); - } - - $oCmd = new \Nominatim\Shell(getOsm2pgsqlBinary()); - $oCmd->addParams('--style', getImportStyle()); - - if (getSetting('FLATNODE_FILE')) { - $oCmd->addParams('--flat-nodes', getSetting('FLATNODE_FILE')); - } - if (getSetting('TABLESPACE_OSM_DATA')) { - $oCmd->addParams('--tablespace-slim-data', getSetting('TABLESPACE_OSM_DATA')); - } - if (getSetting('TABLESPACE_OSM_INDEX')) { - $oCmd->addParams('--tablespace-slim-index', getSetting('TABLESPACE_OSM_INDEX')); - } - if (getSetting('TABLESPACE_PLACE_DATA')) { - $oCmd->addParams('--tablespace-main-data', getSetting('TABLESPACE_PLACE_DATA')); - } - if (getSetting('TABLESPACE_PLACE_INDEX')) { - $oCmd->addParams('--tablespace-main-index', getSetting('TABLESPACE_PLACE_INDEX')); - } - $oCmd->addParams('--latlong', '--slim', '--create'); - $oCmd->addParams('--output', 'gazetteer'); - $oCmd->addParams('--hstore'); - $oCmd->addParams('--number-processes', 1); - $oCmd->addParams('--with-forward-dependencies', 'false'); - $oCmd->addParams('--log-progress', 'true'); - $oCmd->addParams('--cache', $this->iCacheMemory); - $oCmd->addParams('--port', $this->aDSNInfo['port']); - - if (isset($this->aDSNInfo['username'])) { - $oCmd->addParams('--username', $this->aDSNInfo['username']); - } - if (isset($this->aDSNInfo['password'])) { - $oCmd->addEnvPair('PGPASSWORD', $this->aDSNInfo['password']); - } - if (isset($this->aDSNInfo['hostspec'])) { - $oCmd->addParams('--host', $this->aDSNInfo['hostspec']); - } - $oCmd->addParams('--database', $this->aDSNInfo['database']); - $oCmd->addParams($sOSMFile); - $oCmd->run(); - - if (!$this->sIgnoreErrors && !$this->db()->getRow('select * from place limit 1')) { - fail('No Data'); - } - - if ($this->bDrop) { - $this->dropTable('planet_osm_nodes'); - $this->removeFlatnodeFile(); - } - } - - public function createFunctions() - { - info('Create Functions'); - - // Try accessing the C module, so we know early if something is wrong - $this->checkModulePresence(); // raises exception on failure - - $this->createSqlFunctions(); - } - - public function createTables($bReverseOnly = false) - { - info('Create Tables'); - - $sTemplate = file_get_contents(CONST_SqlDir.'/tables.sql'); - $sTemplate = $this->replaceSqlPatterns($sTemplate); - - $this->pgsqlRunScript($sTemplate, false); - - if ($bReverseOnly) { - $this->dropTable('search_name'); - } - - (clone($this->oNominatimCmd))->addParams('refresh', '--address-levels')->run(); - } - - public function createTableTriggers() - { - info('Create Tables'); - - $sTemplate = file_get_contents(CONST_SqlDir.'/table-triggers.sql'); - $sTemplate = $this->replaceSqlPatterns($sTemplate); - - $this->pgsqlRunScript($sTemplate, false); - } - - public function createPartitionTables() - { - info('Create Partition Tables'); - - $sTemplate = file_get_contents(CONST_SqlDir.'/partition-tables.src.sql'); - $sTemplate = $this->replaceSqlPatterns($sTemplate); - - $this->pgsqlRunPartitionScript($sTemplate); - } - - public function createPartitionFunctions() - { - info('Create Partition Functions'); - $this->createSqlFunctions(); // also create partition functions - } - - public function importWikipediaArticles() - { - $sWikiArticlePath = getSetting('WIKIPEDIA_DATA_PATH', CONST_InstallDir); - $sWikiArticlesFile = $sWikiArticlePath.'/wikimedia-importance.sql.gz'; - if (file_exists($sWikiArticlesFile)) { - info('Importing wikipedia articles and redirects'); - $this->dropTable('wikipedia_article'); - $this->dropTable('wikipedia_redirect'); - $this->pgsqlRunScriptFile($sWikiArticlesFile); - } else { - warn('wikipedia importance dump file not found - places will have default importance'); - } - } - - public function loadData($bDisableTokenPrecalc) - { - info('Drop old Data'); - - $oDB = $this->db(); - - $oDB->exec('TRUNCATE word'); - echo '.'; - $oDB->exec('TRUNCATE placex'); - echo '.'; - $oDB->exec('TRUNCATE location_property_osmline'); - echo '.'; - $oDB->exec('TRUNCATE place_addressline'); - echo '.'; - $oDB->exec('TRUNCATE location_area'); - echo '.'; - if (!$this->dbReverseOnly()) { - $oDB->exec('TRUNCATE search_name'); - echo '.'; - } - $oDB->exec('TRUNCATE search_name_blank'); - echo '.'; - $oDB->exec('DROP SEQUENCE seq_place'); - echo '.'; - $oDB->exec('CREATE SEQUENCE seq_place start 100000'); - echo '.'; - - $sSQL = 'select distinct partition from country_name'; - $aPartitions = $oDB->getCol($sSQL); - - if (!$this->bNoPartitions) $aPartitions[] = 0; - foreach ($aPartitions as $sPartition) { - $oDB->exec('TRUNCATE location_road_'.$sPartition); - echo '.'; - } - - // used by getorcreate_word_id to ignore frequent partial words - $sSQL = 'CREATE OR REPLACE FUNCTION get_maxwordfreq() RETURNS integer AS '; - $sSQL .= '$$ SELECT '.getSetting('MAX_WORD_FREQUENCY').' as maxwordfreq; $$ LANGUAGE SQL IMMUTABLE'; - $oDB->exec($sSQL); - echo ".\n"; - - // pre-create the word list - if (!$bDisableTokenPrecalc) { - info('Loading word list'); - $this->pgsqlRunScriptFile(CONST_DataDir.'/words.sql'); - } - - info('Load Data'); - $sColumns = 'osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry'; - - $aDBInstances = array(); - $iLoadThreads = max(1, $this->iInstances - 1); - for ($i = 0; $i < $iLoadThreads; $i++) { - // https://secure.php.net/manual/en/function.pg-connect.php - $DSN = getSetting('DATABASE_DSN'); - $DSN = preg_replace('/^pgsql:/', '', $DSN); - $DSN = preg_replace('/;/', ' ', $DSN); - $aDBInstances[$i] = pg_connect($DSN, PGSQL_CONNECT_FORCE_NEW); - pg_ping($aDBInstances[$i]); - } - - for ($i = 0; $i < $iLoadThreads; $i++) { - $sSQL = "INSERT INTO placex ($sColumns) SELECT $sColumns FROM place WHERE osm_id % $iLoadThreads = $i"; - $sSQL .= " and not (class='place' and type='houses' and osm_type='W'"; - $sSQL .= " and ST_GeometryType(geometry) = 'ST_LineString')"; - $sSQL .= ' and ST_IsValid(geometry)'; - if ($this->bVerbose) echo "$sSQL\n"; - if (!pg_send_query($aDBInstances[$i], $sSQL)) { - fail(pg_last_error($aDBInstances[$i])); - } - } - - // last thread for interpolation lines - // https://secure.php.net/manual/en/function.pg-connect.php - $DSN = getSetting('DATABASE_DSN'); - $DSN = preg_replace('/^pgsql:/', '', $DSN); - $DSN = preg_replace('/;/', ' ', $DSN); - $aDBInstances[$iLoadThreads] = pg_connect($DSN, PGSQL_CONNECT_FORCE_NEW); - pg_ping($aDBInstances[$iLoadThreads]); - $sSQL = 'insert into location_property_osmline'; - $sSQL .= ' (osm_id, address, linegeo)'; - $sSQL .= ' SELECT osm_id, address, geometry from place where '; - $sSQL .= "class='place' and type='houses' and osm_type='W' and ST_GeometryType(geometry) = 'ST_LineString'"; - if ($this->bVerbose) echo "$sSQL\n"; - if (!pg_send_query($aDBInstances[$iLoadThreads], $sSQL)) { - fail(pg_last_error($aDBInstances[$iLoadThreads])); - } - - $bFailed = false; - for ($i = 0; $i <= $iLoadThreads; $i++) { - while (($hPGresult = pg_get_result($aDBInstances[$i])) !== false) { - $resultStatus = pg_result_status($hPGresult); - // PGSQL_EMPTY_QUERY, PGSQL_COMMAND_OK, PGSQL_TUPLES_OK, - // PGSQL_COPY_OUT, PGSQL_COPY_IN, PGSQL_BAD_RESPONSE, - // PGSQL_NONFATAL_ERROR and PGSQL_FATAL_ERROR - // echo 'Query result ' . $i . ' is: ' . $resultStatus . "\n"; - if ($resultStatus != PGSQL_COMMAND_OK && $resultStatus != PGSQL_TUPLES_OK) { - $resultError = pg_result_error($hPGresult); - echo '-- error text ' . $i . ': ' . $resultError . "\n"; - $bFailed = true; - } - } - } - if ($bFailed) { - fail('SQL errors loading placex and/or location_property_osmline tables'); - } - - for ($i = 0; $i < $this->iInstances; $i++) { - pg_close($aDBInstances[$i]); - } - - echo "\n"; - info('Reanalysing database'); - $this->pgsqlRunScript('ANALYSE'); - - $sDatabaseDate = getDatabaseDate($oDB); - $oDB->exec('TRUNCATE import_status'); - if (!$sDatabaseDate) { - warn('could not determine database date.'); - } else { - $sSQL = "INSERT INTO import_status (lastimportdate) VALUES('".$sDatabaseDate."')"; - $oDB->exec($sSQL); - echo "Latest data imported from $sDatabaseDate.\n"; - } - } - public function importTigerData($sTigerPath) { info('Import Tiger data'); @@ -560,74 +200,6 @@ class SetupFunctions $this->db()->exec($sSQL); } - public function index($bIndexNoanalyse) - { - $this->checkModulePresence(); // raises exception on failure - - $oBaseCmd = (clone $this->oNominatimCmd)->addParams('index'); - - info('Index ranks 0 - 4'); - $oCmd = (clone $oBaseCmd)->addParams('--maxrank', 4); - - $iStatus = $oCmd->run(); - if ($iStatus != 0) { - fail('error status ' . $iStatus . ' running nominatim!'); - } - if (!$bIndexNoanalyse) $this->pgsqlRunScript('ANALYSE'); - - info('Index administrative boundaries'); - $oCmd = (clone $oBaseCmd)->addParams('--boundaries-only'); - $iStatus = $oCmd->run(); - if ($iStatus != 0) { - fail('error status ' . $iStatus . ' running nominatim!'); - } - - info('Index ranks 5 - 25'); - $oCmd = (clone $oBaseCmd)->addParams('--no-boundaries', '--minrank', 5, '--maxrank', 25); - $iStatus = $oCmd->run(); - if ($iStatus != 0) { - fail('error status ' . $iStatus . ' running nominatim!'); - } - - if (!$bIndexNoanalyse) $this->pgsqlRunScript('ANALYSE'); - - info('Index ranks 26 - 30'); - $oCmd = (clone $oBaseCmd)->addParams('--no-boundaries', '--minrank', 26); - $iStatus = $oCmd->run(); - if ($iStatus != 0) { - fail('error status ' . $iStatus . ' running nominatim!'); - } - - info('Index postcodes'); - $sSQL = 'UPDATE location_postcode SET indexed_status = 0'; - $this->db()->exec($sSQL); - } - - public function createSearchIndices() - { - info('Create Search indices'); - - $sSQL = 'SELECT relname FROM pg_class, pg_index '; - $sSQL .= 'WHERE pg_index.indisvalid = false AND pg_index.indexrelid = pg_class.oid'; - $aInvalidIndices = $this->db()->getCol($sSQL); - - foreach ($aInvalidIndices as $sIndexName) { - info("Cleaning up invalid index $sIndexName"); - $this->db()->exec("DROP INDEX $sIndexName;"); - } - - $sTemplate = file_get_contents(CONST_SqlDir.'/indices.src.sql'); - if (!$this->bDrop) { - $sTemplate .= file_get_contents(CONST_SqlDir.'/indices_updates.src.sql'); - } - if (!$this->dbReverseOnly()) { - $sTemplate .= file_get_contents(CONST_SqlDir.'/indices_search.src.sql'); - } - $sTemplate = $this->replaceSqlPatterns($sTemplate); - - $this->pgsqlRunScript($sTemplate); - } - public function createCountryNames() { info('Create search index for default country names'); @@ -655,105 +227,6 @@ class SetupFunctions $this->pgsqlRunScript($sSQL); } - public function drop() - { - info('Drop tables only required for updates'); - - // The implementation is potentially a bit dangerous because it uses - // a positive selection of tables to keep, and deletes everything else. - // Including any tables that the unsuspecting user might have manually - // created. USE AT YOUR OWN PERIL. - // tables we want to keep. everything else goes. - $aKeepTables = array( - '*columns', - 'import_polygon_*', - 'import_status', - 'place_addressline', - 'location_postcode', - 'location_property*', - 'placex', - 'search_name', - 'seq_*', - 'word', - 'query_log', - 'new_query_log', - 'spatial_ref_sys', - 'country_name', - 'place_classtype_*', - 'country_osm_grid' - ); - - $aDropTables = array(); - $aHaveTables = $this->db()->getListOfTables(); - - foreach ($aHaveTables as $sTable) { - $bFound = false; - foreach ($aKeepTables as $sKeep) { - if (fnmatch($sKeep, $sTable)) { - $bFound = true; - break; - } - } - if (!$bFound) array_push($aDropTables, $sTable); - } - foreach ($aDropTables as $sDrop) { - $this->dropTable($sDrop); - } - - $this->removeFlatnodeFile(); - } - - /** - * Setup the directory for the API scripts. - * - * @return null - */ - public function setupWebsite() - { - if (!is_dir(CONST_InstallDir.'/website')) { - info('Creating directory for website scripts at: '.CONST_InstallDir.'/website'); - mkdir(CONST_InstallDir.'/website'); - } - - $aScripts = array( - 'deletable.php', - 'details.php', - 'lookup.php', - 'polygons.php', - 'reverse.php', - 'search.php', - 'status.php' - ); - - foreach ($aScripts as $sScript) { - $rFile = fopen(CONST_InstallDir.'/website/'.$sScript, 'w'); - - fwrite($rFile, "oDB; } - private function removeFlatnodeFile() - { - $sFName = getSetting('FLATNODE_FILE'); - if ($sFName && file_exists($sFName)) { - if ($this->bVerbose) echo 'Deleting '.$sFName."\n"; - unlink($sFName); - } - } - private function pgsqlRunScript($sScript, $bfatal = true) { runSQLScript( @@ -791,7 +255,7 @@ class SetupFunctions ); } - private function createSqlFunctions() + public function createSqlFunctions() { $oCmd = (clone($this->oNominatimCmd)) ->addParams('refresh', '--functions'); @@ -804,25 +268,7 @@ class SetupFunctions $oCmd->addParams('--enable-debug-statements'); } - $oCmd->run(); - } - - private function pgsqlRunPartitionScript($sTemplate) - { - $sSQL = 'select distinct partition from country_name'; - $aPartitions = $this->db()->getCol($sSQL); - if (!$this->bNoPartitions) $aPartitions[] = 0; - - preg_match_all('#^-- start(.*?)^-- end#ms', $sTemplate, $aMatches, PREG_SET_ORDER); - foreach ($aMatches as $aMatch) { - $sResult = ''; - foreach ($aPartitions as $sPartitionName) { - $sResult .= str_replace('-partition-', $sPartitionName, $aMatch[1]); - } - $sTemplate = str_replace($aMatch[0], $sResult, $sTemplate); - } - - $this->pgsqlRunScript($sTemplate); + $oCmd->run(!$this->sIgnoreErrors); } private function pgsqlRunScriptFile($sFilename) @@ -909,44 +355,4 @@ class SetupFunctions return $sSql; } - - /** - * Drop table with the given name if it exists. - * - * @param string $sName Name of table to remove. - * - * @return null - */ - private function dropTable($sName) - { - if ($this->bVerbose) echo "Dropping table $sName\n"; - $this->db()->deleteTable($sName); - } - - /** - * Check if the database is in reverse-only mode. - * - * @return True if there is no search_name table and infrastructure. - */ - private function dbReverseOnly() - { - return !($this->db()->tableExists('search_name')); - } - - /** - * Try accessing the C module, so we know early if something is wrong. - * - * Raises Nominatim\DatabaseError on failure - */ - private function checkModulePresence() - { - $sModulePath = getSetting('DATABASE_MODULE_PATH', CONST_InstallDir.'/module'); - $sSQL = "CREATE FUNCTION nominatim_test_import_func(text) RETURNS text AS '"; - $sSQL .= $sModulePath . "/nominatim.so', 'transliteration' LANGUAGE c IMMUTABLE STRICT"; - $sSQL .= ';DROP FUNCTION nominatim_test_import_func(text);'; - - $oDB = new \Nominatim\DB(); - $oDB->connect(); - $oDB->exec($sSQL, null, 'Database server failed to load '.$sModulePath.'/nominatim.so module'); - } } diff --git a/lib-php/website/details.php b/lib-php/website/details.php index 91440b54..bd7df12c 100644 --- a/lib-php/website/details.php +++ b/lib-php/website/details.php @@ -53,7 +53,7 @@ if ($sOsmType && $iOsmId > 0) { // Be nice about our error messages for broken geometry - if (!$sPlaceId) { + if (!$sPlaceId && $oDB->tableExists('import_polygon_error')) { $sSQL = 'SELECT '; $sSQL .= ' osm_type, '; $sSQL .= ' osm_id, '; @@ -81,10 +81,15 @@ if ($sOsmType && $iOsmId > 0) { exit; } } -} - -if ($sPlaceId === false) userError('Please select a place id'); + if ($sPlaceId === false) { + throw new Exception('No place with that OSM ID found.', 404); + } +} else { + if ($sPlaceId === false) { + userError('Required parameters missing. Need either osmtype/osmid or place_id.'); + } +} $iPlaceID = (int)$sPlaceId; @@ -140,11 +145,10 @@ $sSQL .= " WHERE place_id = $iPlaceID"; $aPointDetails = $oDB->getRow($sSQL, null, 'Could not get details of place object.'); if (!$aPointDetails) { - userError('Unknown place id.'); + throw new Exception('No place with that place ID found.', 404); } $aPointDetails['localname'] = $aPointDetails['localname']?$aPointDetails['localname']:$aPointDetails['housenumber']; -$aPointDetails['rank_search_label'] = getSearchRankLabel($aPointDetails['rank_search']); // only used in HTML format // Get all alternative names (languages, etc) $sSQL = 'SELECT (each(name)).key,(each(name)).value FROM placex '; diff --git a/lib-php/website/status.php b/lib-php/website/status.php index 9f030fb3..7c7eb928 100644 --- a/lib-php/website/status.php +++ b/lib-php/website/status.php @@ -37,8 +37,13 @@ if ($sOutputFormat == 'json') { $aResponse = array( 'status' => 0, 'message' => 'OK', - 'data_updated' => (new DateTime('@'.$epoch))->format(DateTime::RFC3339) + 'data_updated' => (new DateTime('@'.$epoch))->format(DateTime::RFC3339), + 'software_version' => CONST_NominatimVersion ); + $sDatabaseVersion = $oStatus->databaseVersion(); + if ($sDatabaseVersion) { + $aResponse['database_version'] = $sDatabaseVersion; + } javascript_renderData($aResponse); } else { echo 'OK'; diff --git a/lib-sql/functions.sql b/lib-sql/functions.sql new file mode 100644 index 00000000..750af9f0 --- /dev/null +++ b/lib-sql/functions.sql @@ -0,0 +1,20 @@ +{% include('functions/utils.sql') %} +{% include('functions/normalization.sql') %} +{% include('functions/ranking.sql') %} +{% include('functions/importance.sql') %} +{% include('functions/address_lookup.sql') %} +{% include('functions/interpolation.sql') %} + +{% if 'place' in db.tables %} + {% include 'functions/place_triggers.sql' %} +{% endif %} + +{% if 'placex' in db.tables %} + {% include 'functions/placex_triggers.sql' %} +{% endif %} + +{% if 'location_postcode' in db.tables %} + {% include 'functions/postcode_triggers.sql' %} +{% endif %} + +{% include('functions/partition-functions.sql') %} diff --git a/lib-sql/functions/address_lookup.sql b/lib-sql/functions/address_lookup.sql index b832aed8..3206e8db 100644 --- a/lib-sql/functions/address_lookup.sql +++ b/lib-sql/functions/address_lookup.sql @@ -121,7 +121,7 @@ BEGIN END IF; --then query tiger data - -- %NOTIGERDATA% IF 0 THEN + {% if config.get_bool('USE_US_TIGER_DATA') %} IF place IS NULL AND in_housenumber >= 0 THEN SELECT parent_place_id as place_id, 'us' as country_code, in_housenumber as housenumber, postcode, @@ -133,9 +133,10 @@ BEGIN WHERE place_id = in_place_id AND in_housenumber between startnumber and endnumber; END IF; - -- %NOTIGERDATA% END IF; + {% endif %} - -- %NOAUXDATA% IF 0 THEN + -- then additional data + {% if config.get_bool('USE_AUX_LOCATION_DATA') %} IF place IS NULL THEN SELECT parent_place_id as place_id, 'us' as country_code, housenumber, postcode, @@ -146,7 +147,7 @@ BEGIN FROM location_property_aux WHERE place_id = in_place_id; END IF; - -- %NOAUXDATA% END IF; + {% endif %} -- postcode table IF place IS NULL THEN diff --git a/lib-sql/functions/normalization.sql b/lib-sql/functions/normalization.sql index 8bb4915b..6fcdf552 100644 --- a/lib-sql/functions/normalization.sql +++ b/lib-sql/functions/normalization.sql @@ -1,12 +1,12 @@ -- Functions for term normalisation and access to the 'word' table. CREATE OR REPLACE FUNCTION transliteration(text) RETURNS text - AS '{modulepath}/nominatim.so', 'transliteration' + AS '{{ modulepath }}/nominatim.so', 'transliteration' LANGUAGE c IMMUTABLE STRICT; CREATE OR REPLACE FUNCTION gettokenstring(text) RETURNS text - AS '{modulepath}/nominatim.so', 'gettokenstring' + AS '{{ modulepath }}/nominatim.so', 'gettokenstring' LANGUAGE c IMMUTABLE STRICT; diff --git a/lib-sql/partition-functions.src.sql b/lib-sql/functions/partition-functions.sql similarity index 80% rename from lib-sql/partition-functions.src.sql rename to lib-sql/functions/partition-functions.sql index 703c08af..cfa151de 100644 --- a/lib-sql/partition-functions.src.sql +++ b/lib-sql/functions/partition-functions.sql @@ -37,13 +37,13 @@ DECLARE r nearfeaturecentr%rowtype; BEGIN --- start - IF in_partition = -partition- THEN +{% for partition in db.partitions %} + IF in_partition = {{ partition }} THEN FOR r IN SELECT place_id, keywords, rank_address, rank_search, min(ST_Distance(feature, centroid)) as distance, isguess, postcode, centroid - FROM location_area_large_-partition- + FROM location_area_large_{{ partition }} WHERE geometry && feature AND is_relevant_geometry(ST_Relate(geometry, feature), ST_GeometryType(feature)) AND rank_address < maxrank @@ -56,7 +56,7 @@ BEGIN END LOOP; RETURN; END IF; --- end +{% endfor %} RAISE EXCEPTION 'Unknown partition %', in_partition; END @@ -80,12 +80,12 @@ BEGIN CONTINUE; END IF; --- start - IF in_partition = -partition- THEN +{% for partition in db.partitions %} + IF in_partition = {{ partition }} THEN SELECT place_id, keywords, rank_address, rank_search, min(ST_Distance(feature, centroid)) as distance, isguess, postcode, centroid INTO r - FROM location_area_large_-partition- + FROM location_area_large_{{ partition }} WHERE geometry && ST_Expand(feature, item.extent) AND rank_address between item.from_rank and item.to_rank AND word_ids_from_name(item.name) && keywords @@ -103,7 +103,7 @@ BEGIN END IF; CONTINUE; END IF; --- end +{% endfor %} RAISE EXCEPTION 'Unknown partition %', in_partition; END LOOP; @@ -120,12 +120,12 @@ BEGIN RETURN TRUE; END IF; --- start - IF in_partition = -partition- THEN - DELETE from location_area_large_-partition- WHERE place_id = in_place_id; +{% for partition in db.partitions %} + IF in_partition = {{ partition }} THEN + DELETE from location_area_large_{{ partition }} WHERE place_id = in_place_id; RETURN TRUE; END IF; --- end +{% endfor %} RAISE EXCEPTION 'Unknown partition %', in_partition; @@ -150,13 +150,13 @@ BEGIN RETURN TRUE; END IF; --- start - IF in_partition = -partition- THEN - INSERT INTO location_area_large_-partition- (partition, place_id, country_code, keywords, rank_search, rank_address, isguess, postcode, centroid, geometry) +{% for partition in db.partitions %} + IF in_partition = {{ partition }} THEN + INSERT INTO location_area_large_{{ partition }} (partition, place_id, country_code, keywords, rank_search, rank_address, isguess, postcode, centroid, geometry) values (in_partition, in_place_id, in_country_code, in_keywords, in_rank_search, in_rank_address, in_estimate, postcode, in_centroid, in_geometry); RETURN TRUE; END IF; --- end +{% endfor %} RAISE EXCEPTION 'Unknown partition %', in_partition; RETURN FALSE; @@ -173,9 +173,9 @@ DECLARE parent BIGINT; BEGIN --- start - IF in_partition = -partition- THEN - SELECT place_id FROM search_name_-partition- +{% for partition in db.partitions %} + IF in_partition = {{ partition }} THEN + SELECT place_id FROM search_name_{{ partition }} INTO parent WHERE name_vector && isin_token AND centroid && ST_Expand(point, 0.015) @@ -183,7 +183,7 @@ BEGIN ORDER BY ST_Distance(centroid, point) ASC limit 1; RETURN parent; END IF; --- end +{% endfor %} RAISE EXCEPTION 'Unknown partition %', in_partition; END @@ -199,18 +199,18 @@ DECLARE parent BIGINT; BEGIN --- start - IF in_partition = -partition- THEN +{% for partition in db.partitions %} + IF in_partition = {{ partition }} THEN SELECT place_id INTO parent - FROM search_name_-partition- + FROM search_name_{{ partition }} WHERE name_vector && isin_token AND centroid && ST_Expand(point, 0.04) AND address_rank between 16 and 25 ORDER BY ST_Distance(centroid, point) ASC limit 1; RETURN parent; END IF; --- end +{% endfor %} RAISE EXCEPTION 'Unknown partition %', in_partition; END @@ -223,16 +223,16 @@ create or replace function insertSearchName( RETURNS BOOLEAN AS $$ DECLARE BEGIN --- start - IF in_partition = -partition- THEN - DELETE FROM search_name_-partition- values WHERE place_id = in_place_id; +{% for partition in db.partitions %} + IF in_partition = {{ partition }} THEN + DELETE FROM search_name_{{ partition }} values WHERE place_id = in_place_id; IF in_rank_address > 0 THEN - INSERT INTO search_name_-partition- (place_id, address_rank, name_vector, centroid) + INSERT INTO search_name_{{ partition }} (place_id, address_rank, name_vector, centroid) values (in_place_id, in_rank_address, in_name_vector, in_geometry); END IF; RETURN TRUE; END IF; --- end +{% endfor %} RAISE EXCEPTION 'Unknown partition %', in_partition; RETURN FALSE; @@ -243,12 +243,12 @@ LANGUAGE plpgsql; create or replace function deleteSearchName(in_partition INTEGER, in_place_id BIGINT) RETURNS BOOLEAN AS $$ DECLARE BEGIN --- start - IF in_partition = -partition- THEN - DELETE from search_name_-partition- WHERE place_id = in_place_id; +{% for partition in db.partitions %} + IF in_partition = {{ partition }} THEN + DELETE from search_name_{{ partition }} WHERE place_id = in_place_id; RETURN TRUE; END IF; --- end +{% endfor %} RAISE EXCEPTION 'Unknown partition %', in_partition; @@ -262,14 +262,14 @@ create or replace function insertLocationRoad( DECLARE BEGIN --- start - IF in_partition = -partition- THEN - DELETE FROM location_road_-partition- where place_id = in_place_id; - INSERT INTO location_road_-partition- (partition, place_id, country_code, geometry) +{% for partition in db.partitions %} + IF in_partition = {{ partition }} THEN + DELETE FROM location_road_{{ partition }} where place_id = in_place_id; + INSERT INTO location_road_{{ partition }} (partition, place_id, country_code, geometry) values (in_partition, in_place_id, in_country_code, in_geometry); RETURN TRUE; END IF; --- end +{% endfor %} RAISE EXCEPTION 'Unknown partition %', in_partition; RETURN FALSE; @@ -281,12 +281,12 @@ create or replace function deleteRoad(in_partition INTEGER, in_place_id BIGINT) DECLARE BEGIN --- start - IF in_partition = -partition- THEN - DELETE FROM location_road_-partition- where place_id = in_place_id; +{% for partition in db.partitions %} + IF in_partition = {{ partition }} THEN + DELETE FROM location_road_{{ partition }} where place_id = in_place_id; RETURN TRUE; END IF; --- end +{% endfor %} RAISE EXCEPTION 'Unknown partition %', in_partition; @@ -303,12 +303,12 @@ DECLARE search_diameter FLOAT; BEGIN --- start - IF in_partition = -partition- THEN +{% for partition in db.partitions %} + IF in_partition = {{ partition }} THEN search_diameter := 0.00005; WHILE search_diameter < 0.1 LOOP FOR r IN - SELECT place_id FROM location_road_-partition- + SELECT place_id FROM location_road_{{ partition }} WHERE ST_DWithin(geometry, point, search_diameter) ORDER BY ST_Distance(geometry, point) ASC limit 1 LOOP @@ -318,7 +318,7 @@ BEGIN END LOOP; RETURN NULL; END IF; --- end +{% endfor %} RAISE EXCEPTION 'Unknown partition %', in_partition; END @@ -345,12 +345,12 @@ BEGIN p2 := ST_LineInterpolatePoint(line,0.5); p3 := ST_LineInterpolatePoint(line,1); --- start - IF in_partition = -partition- THEN +{% for partition in db.partitions %} + IF in_partition = {{ partition }} THEN search_diameter := 0.0005; WHILE search_diameter < 0.01 LOOP FOR r IN - SELECT place_id FROM location_road_-partition- + SELECT place_id FROM location_road_{{ partition }} WHERE ST_DWithin(line, geometry, search_diameter) ORDER BY (ST_distance(geometry, p1)+ ST_distance(geometry, p2)+ @@ -362,7 +362,7 @@ BEGIN END LOOP; RETURN NULL; END IF; --- end +{% endfor %} RAISE EXCEPTION 'Unknown partition %', in_partition; END diff --git a/lib-sql/functions/place_triggers.sql b/lib-sql/functions/place_triggers.sql index eaba12be..53163746 100644 --- a/lib-sql/functions/place_triggers.sql +++ b/lib-sql/functions/place_triggers.sql @@ -12,8 +12,10 @@ DECLARE partition INTEGER; BEGIN - --DEBUG: RAISE WARNING '-----------------------------------------------------------------------------------'; - --DEBUG: RAISE WARNING 'place_insert: % % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type,st_area(NEW.geometry); + {% if debug %} + RAISE WARNING '-----------------------------------------------------------------------------------'; + RAISE WARNING 'place_insert: % % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type,st_area(NEW.geometry); + {% endif %} -- filter wrong tupels IF ST_IsEmpty(NEW.geometry) OR NOT ST_IsValid(NEW.geometry) OR ST_X(ST_Centroid(NEW.geometry))::text in ('NaN','Infinity','-Infinity') OR ST_Y(ST_Centroid(NEW.geometry))::text in ('NaN','Infinity','-Infinity') THEN INSERT INTO import_polygon_error (osm_type, osm_id, class, type, name, country_code, updated, errormessage, prevgeometry, newgeometry) @@ -97,8 +99,8 @@ BEGIN DELETE FROM place where osm_type = NEW.osm_type and osm_id = NEW.osm_id and class = NEW.class; END IF; - --DEBUG: RAISE WARNING 'Existing: %',existing.osm_id; - --DEBUG: RAISE WARNING 'Existing PlaceX: %',existingplacex.place_id; + {% if debug %}RAISE WARNING 'Existing: %',existing.osm_id;{% endif %} + {% if debug %}RAISE WARNING 'Existing PlaceX: %',existingplacex.place_id;{% endif %} -- Log and discard IF existing.geometry is not null AND st_isvalid(existing.geometry) @@ -122,14 +124,20 @@ BEGIN (existingplacex.type != NEW.type))) THEN + {% if config.get_bool('LIMIT_REINDEXING') %} IF existingplacex.osm_type IS NOT NULL THEN -- sanity check: ignore admin_level changes on places with too many active children -- or we end up reindexing entire countries because somebody accidentally deleted admin_level - --LIMIT INDEXING: SELECT count(*) FROM (SELECT 'a' FROM placex , place_addressline where address_place_id = existingplacex.place_id and placex.place_id = place_addressline.place_id and indexed_status = 0 and place_addressline.isaddress LIMIT 100001) sub INTO i; - --LIMIT INDEXING: IF i > 100000 THEN - --LIMIT INDEXING: RETURN null; - --LIMIT INDEXING: END IF; + SELECT count(*) INTO i FROM + (SELECT 'a' FROM placex, place_addressline + WHERE address_place_id = existingplacex.place_id + and placex.place_id = place_addressline.place_id + and indexed_status = 0 and place_addressline.isaddress LIMIT 100001) sub; + IF i > 100000 THEN + RETURN null; + END IF; END IF; + {% endif %} IF existing.osm_type IS NOT NULL THEN -- pathological case caused by the triggerless copy into place during initial import @@ -144,7 +152,7 @@ BEGIN values (NEW.osm_type, NEW.osm_id, NEW.class, NEW.type, NEW.name, NEW.admin_level, NEW.address, NEW.extratags, NEW.geometry); - --DEBUG: RAISE WARNING 'insert done % % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type,NEW.name; + {% if debug %}RAISE WARNING 'insert done % % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type,NEW.name;{% endif %} RETURN NEW; END IF; @@ -258,7 +266,7 @@ DECLARE has_rank BOOLEAN; BEGIN - --DEBUG: RAISE WARNING 'delete: % % % %',OLD.osm_type,OLD.osm_id,OLD.class,OLD.type; + {% if debug %}RAISE WARNING 'delete: % % % %',OLD.osm_type,OLD.osm_id,OLD.class,OLD.type;{% endif %} -- deleting large polygons can have a massive effect on the system - require manual intervention to let them through IF st_area(OLD.geometry) > 2 and st_isvalid(OLD.geometry) THEN diff --git a/lib-sql/functions/placex_triggers.sql b/lib-sql/functions/placex_triggers.sql index 6965fe14..086ba930 100644 --- a/lib-sql/functions/placex_triggers.sql +++ b/lib-sql/functions/placex_triggers.sql @@ -20,7 +20,7 @@ DECLARE location RECORD; parent RECORD; BEGIN - --DEBUG: RAISE WARNING 'finding street for % %', poi_osm_type, poi_osm_id; + {% if debug %}RAISE WARNING 'finding street for % %', poi_osm_type, poi_osm_id;{% endif %} -- Is this object part of an associatedStreet relation? FOR location IN @@ -58,7 +58,7 @@ BEGIN and poi_osm_id = any(x.nodes) LIMIT 1 LOOP - --DEBUG: RAISE WARNING 'Get parent from interpolation: %', parent.parent_place_id; + {% if debug %}RAISE WARNING 'Get parent from interpolation: %', parent.parent_place_id;{% endif %} RETURN parent.parent_place_id; END LOOP; @@ -71,11 +71,11 @@ BEGIN and p.geometry && bbox and w.id = p.osm_id and poi_osm_id = any(w.nodes) LOOP - --DEBUG: RAISE WARNING 'Node is part of way % ', location.osm_id; + {% if debug %}RAISE WARNING 'Node is part of way % ', location.osm_id;{% endif %} -- Way IS a road then we are on it - that must be our road IF location.rank_search < 28 THEN - --DEBUG: RAISE WARNING 'node in way that is a street %',location; + {% if debug %}RAISE WARNING 'node in way that is a street %',location;{% endif %} return location.place_id; END IF; @@ -106,7 +106,7 @@ BEGIN ELSEIF ST_Area(bbox) < 0.005 THEN -- for smaller features get the nearest road SELECT getNearestRoadPlaceId(poi_partition, bbox) INTO parent_place_id; - --DEBUG: RAISE WARNING 'Checked for nearest way (%)', parent_place_id; + {% if debug %}RAISE WARNING 'Checked for nearest way (%)', parent_place_id;{% endif %} ELSE -- for larger features simply find the area with the largest rank that -- contains the bbox, only use addressable features @@ -146,21 +146,21 @@ BEGIN IF bnd.osm_type = 'R' THEN -- see if we have any special relation members SELECT members FROM planet_osm_rels WHERE id = bnd.osm_id INTO relation_members; - --DEBUG: RAISE WARNING 'Got relation members'; + {% if debug %}RAISE WARNING 'Got relation members';{% endif %} -- Search for relation members with role 'lable'. IF relation_members IS NOT NULL THEN FOR rel_member IN SELECT get_rel_node_members(relation_members, ARRAY['label']) as member LOOP - --DEBUG: RAISE WARNING 'Found label member %', rel_member.member; + {% if debug %}RAISE WARNING 'Found label member %', rel_member.member;{% endif %} FOR linked_placex IN SELECT * from placex WHERE osm_type = 'N' and osm_id = rel_member.member and class = 'place' LOOP - --DEBUG: RAISE WARNING 'Linked label member'; + {% if debug %}RAISE WARNING 'Linked label member';{% endif %} RETURN linked_placex; END LOOP; @@ -187,7 +187,7 @@ BEGIN AND placex.rank_search < 26 -- needed to select the right index AND _st_covers(bnd.geometry, placex.geometry) LOOP - --DEBUG: RAISE WARNING 'Found type-matching place node %', linked_placex.osm_id; + {% if debug %}RAISE WARNING 'Found type-matching place node %', linked_placex.osm_id;{% endif %} RETURN linked_placex; END LOOP; END IF; @@ -203,14 +203,14 @@ BEGIN AND _st_covers(bnd.geometry, placex.geometry) ORDER BY make_standard_name(name->'name') = bnd_name desc LOOP - --DEBUG: RAISE WARNING 'Found wikidata-matching place node %', linked_placex.osm_id; + {% if debug %}RAISE WARNING 'Found wikidata-matching place node %', linked_placex.osm_id;{% endif %} RETURN linked_placex; END LOOP; END IF; -- Name searches can be done for ways as well as relations IF bnd_name is not null THEN - --DEBUG: RAISE WARNING 'Looking for nodes with matching names'; + {% if debug %}RAISE WARNING 'Looking for nodes with matching names';{% endif %} FOR linked_placex IN SELECT placex.* from placex WHERE make_standard_name(name->'name') = bnd_name @@ -225,7 +225,7 @@ BEGIN AND placex.rank_search < 26 -- needed to select the right index AND _st_covers(bnd.geometry, placex.geometry) LOOP - --DEBUG: RAISE WARNING 'Found matching place node %', linked_placex.osm_id; + {% if debug %}RAISE WARNING 'Found matching place node %', linked_placex.osm_id;{% endif %} RETURN linked_placex; END LOOP; END IF; @@ -285,10 +285,10 @@ BEGIN address, country) ORDER BY rank_address, distance, isguess desc LOOP - IF NOT %REVERSE-ONLY% THEN + {% if not db.reverse_only %} nameaddress_vector := array_merge(nameaddress_vector, location.keywords::int[]); - END IF; + {% endif %} IF location.place_id is not null THEN location_isaddress := not address_havelevel[location.rank_address]; @@ -362,10 +362,10 @@ BEGIN END IF; -- Add it to the list of search terms - IF NOT %REVERSE-ONLY% THEN + {% if not db.reverse_only %} nameaddress_vector := array_merge(nameaddress_vector, location.keywords::integer[]); - END IF; + {% endif %} INSERT INTO place_addressline (place_id, address_place_id, fromarea, isaddress, distance, cached_rank_address) @@ -388,7 +388,7 @@ DECLARE diameter FLOAT; classtable TEXT; BEGIN - --DEBUG: RAISE WARNING '% % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type; + {% if debug %}RAISE WARNING '% % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type;{% endif %} NEW.place_id := nextval('seq_place'); NEW.indexed_status := 1; --STATUS_NEW @@ -440,9 +440,10 @@ BEGIN END IF; - --DEBUG: RAISE WARNING 'placex_insert:END: % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type; + {% if debug %}RAISE WARNING 'placex_insert:END: % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type;{% endif %} - RETURN NEW; -- %DIFFUPDATES% The following is not needed until doing diff updates, and slows the main index process down +{% if not disable_diff_updates %} + -- The following is not needed until doing diff updates, and slows the main index process down IF NEW.osm_type = 'N' and NEW.rank_search > 28 THEN -- might be part of an interpolation @@ -497,6 +498,8 @@ BEGIN USING NEW.place_id, ST_Centroid(NEW.geometry); END IF; +{% endif %} -- not disable_diff_updates + RETURN NEW; END; @@ -534,7 +537,7 @@ DECLARE BEGIN -- deferred delete IF OLD.indexed_status = 100 THEN - --DEBUG: RAISE WARNING 'placex_update delete % %',NEW.osm_type,NEW.osm_id; + {% if debug %}RAISE WARNING 'placex_update delete % %',NEW.osm_type,NEW.osm_id;{% endif %} delete from placex where place_id = OLD.place_id; RETURN NULL; END IF; @@ -543,13 +546,13 @@ BEGIN RETURN NEW; END IF; - --DEBUG: RAISE WARNING 'placex_update % % (%)',NEW.osm_type,NEW.osm_id,NEW.place_id; + {% if debug %}RAISE WARNING 'placex_update % % (%)',NEW.osm_type,NEW.osm_id,NEW.place_id;{% endif %} NEW.indexed_date = now(); - IF NOT %REVERSE-ONLY% THEN + {% if 'search_name' in db.tables %} DELETE from search_name WHERE place_id = NEW.place_id; - END IF; + {% endif %} result := deleteSearchName(NEW.partition, NEW.place_id); DELETE FROM place_addressline WHERE place_id = NEW.place_id; result := deleteRoad(NEW.partition, NEW.place_id); @@ -562,7 +565,7 @@ BEGIN NEW.address := NEW.address - '_unlisted_place'::TEXT; IF NEW.linked_place_id is not null THEN - --DEBUG: RAISE WARNING 'place already linked to %', NEW.linked_place_id; + {% if debug %}RAISE WARNING 'place already linked to %', NEW.linked_place_id;{% endif %} RETURN NEW; END IF; @@ -578,7 +581,7 @@ BEGIN -- Speed up searches - just use the centroid of the feature -- cheaper but less acurate NEW.centroid := ST_PointOnSurface(NEW.geometry); - --DEBUG: RAISE WARNING 'Computing preliminary centroid at %',ST_AsText(NEW.centroid); + {% if debug %}RAISE WARNING 'Computing preliminary centroid at %',ST_AsText(NEW.centroid);{% endif %} -- recompute the ranks, they might change when linking changes SELECT * INTO NEW.rank_search, NEW.rank_address @@ -658,7 +661,7 @@ BEGIN parent_address_level := 3; END IF; - --DEBUG: RAISE WARNING 'Copy over address tags'; + {% if debug %}RAISE WARNING 'Copy over address tags';{% endif %} -- housenumber is a computed field, so start with an empty value NEW.housenumber := NULL; IF NEW.address is not NULL THEN @@ -707,7 +710,7 @@ BEGIN END IF; NEW.partition := get_partition(NEW.country_code); END IF; - --DEBUG: RAISE WARNING 'Country updated: "%"', NEW.country_code; + {% if debug %}RAISE WARNING 'Country updated: "%"', NEW.country_code;{% endif %} -- waterway ways are linked when they are part of a relation and have the same class/type IF NEW.osm_type = 'R' and NEW.class = 'waterway' THEN @@ -715,21 +718,21 @@ BEGIN LOOP FOR i IN 1..array_upper(relation_members, 1) BY 2 LOOP IF relation_members[i+1] in ('', 'main_stream', 'side_stream') AND substring(relation_members[i],1,1) = 'w' THEN - --DEBUG: RAISE WARNING 'waterway parent %, child %/%', NEW.osm_id, i, relation_members[i]; + {% if debug %}RAISE WARNING 'waterway parent %, child %/%', NEW.osm_id, i, relation_members[i];{% endif %} FOR linked_node_id IN SELECT place_id FROM placex WHERE osm_type = 'W' and osm_id = substring(relation_members[i],2,200)::bigint and class = NEW.class and type in ('river', 'stream', 'canal', 'drain', 'ditch') and ( relation_members[i+1] != 'side_stream' or NEW.name->'name' = name->'name') LOOP UPDATE placex SET linked_place_id = NEW.place_id WHERE place_id = linked_node_id; - IF NOT %REVERSE-ONLY% THEN + {% if 'search_name' in db.tables %} DELETE FROM search_name WHERE place_id = linked_node_id; - END IF; + {% endif %} END LOOP; END IF; END LOOP; END LOOP; - --DEBUG: RAISE WARNING 'Waterway processed'; + {% if debug %}RAISE WARNING 'Waterway processed';{% endif %} END IF; NEW.importance := null; @@ -737,13 +740,13 @@ BEGIN FROM compute_importance(NEW.extratags, NEW.country_code, NEW.osm_type, NEW.osm_id) INTO NEW.wikipedia,NEW.importance; ---DEBUG: RAISE WARNING 'Importance computed from wikipedia: %', NEW.importance; +{% if debug %}RAISE WARNING 'Importance computed from wikipedia: %', NEW.importance;{% endif %} -- --------------------------------------------------------------------------- -- For low level elements we inherit from our parent road IF NEW.rank_search > 27 THEN - --DEBUG: RAISE WARNING 'finding street for % %', NEW.osm_type, NEW.osm_id; + {% if debug %}RAISE WARNING 'finding street for % %', NEW.osm_type, NEW.osm_id;{% endif %} NEW.parent_place_id := null; -- if we have a POI and there is no address information, @@ -791,7 +794,7 @@ BEGIN END IF; NEW.country_code := location.country_code; - --DEBUG: RAISE WARNING 'Got parent details from search name'; + {% if debug %}RAISE WARNING 'Got parent details from search name';{% endif %} -- determine postcode IF NEW.address is not null AND NEW.address ? 'postcode' THEN @@ -812,13 +815,14 @@ BEGIN name_vector, NEW.rank_search, NEW.rank_address, upper(trim(NEW.address->'postcode')), NEW.geometry, NEW.centroid); - --DEBUG: RAISE WARNING 'Place added to location table'; + {% if debug %}RAISE WARNING 'Place added to location table';{% endif %} END IF; END IF; - IF not %REVERSE-ONLY% AND (array_length(name_vector, 1) is not NULL - OR inherited_address is not NULL OR NEW.address is not NULL) + {% if not db.reverse_only %} + IF array_length(name_vector, 1) is not NULL + OR inherited_address is not NULL OR NEW.address is not NULL THEN SELECT * INTO name_vector, nameaddress_vector FROM create_poi_search_terms(NEW.place_id, @@ -834,9 +838,10 @@ BEGIN VALUES (NEW.place_id, NEW.rank_search, NEW.rank_address, NEW.importance, NEW.country_code, name_vector, nameaddress_vector, NEW.centroid); - --DEBUG: RAISE WARNING 'Place added to search table'; + {% if debug %}RAISE WARNING 'Place added to search table';{% endif %} END IF; END IF; + {% endif %} RETURN NEW; END IF; @@ -845,10 +850,10 @@ BEGIN -- --------------------------------------------------------------------------- -- Full indexing - --DEBUG: RAISE WARNING 'Using full index mode for % %', NEW.osm_type, NEW.osm_id; + {% if debug %}RAISE WARNING 'Using full index mode for % %', NEW.osm_type, NEW.osm_id;{% endif %} SELECT * INTO location FROM find_linked_place(NEW); IF location.place_id is not null THEN - --DEBUG: RAISE WARNING 'Linked %', location; + {% if debug %}RAISE WARNING 'Linked %', location;{% endif %} -- Use the linked point as the centre point of the geometry, -- but only if it is within the area of the boundary. @@ -857,7 +862,7 @@ BEGIN NEW.centroid := geom; END IF; - --DEBUG: RAISE WARNING 'parent address: % rank address: %', parent_address_level, location.rank_address; + {% if debug %}RAISE WARNING 'parent address: % rank address: %', parent_address_level, location.rank_address;{% endif %} IF location.rank_address > parent_address_level and location.rank_address < 26 THEN @@ -878,9 +883,9 @@ BEGIN UPDATE placex set linked_place_id = NEW.place_id WHERE place_id = location.place_id; -- ensure that those places are not found anymore - IF NOT %REVERSE-ONLY% THEN + {% if 'search_name' in db.tables %} DELETE FROM search_name WHERE place_id = location.place_id; - END IF; + {% endif %} PERFORM deleteLocationArea(NEW.partition, location.place_id, NEW.rank_search); SELECT wikipedia, importance @@ -918,7 +923,7 @@ BEGIN AND NEW.country_code IS NOT NULL AND NEW.osm_type = 'R' THEN PERFORM create_country(NEW.name, lower(NEW.country_code)); - --DEBUG: RAISE WARNING 'Country names updated'; + {% if debug %}RAISE WARNING 'Country names updated';{% endif %} -- Also update the list of country names. Adding an additional sanity -- check here: make sure the country does overlap with the area where @@ -928,7 +933,7 @@ BEGIN WHERE ST_Covers(geometry, NEW.centroid) and country_code = NEW.country_code LIMIT 1 LOOP - --DEBUG: RAISE WARNING 'Updating names for country '%' with: %', NEW.country_code, NEW.name; + {% if debug %}RAISE WARNING 'Updating names for country '%' with: %', NEW.country_code, NEW.name;{% endif %} UPDATE country_name SET name = name || NEW.name WHERE country_code = NEW.country_code; END LOOP; END IF; @@ -960,7 +965,7 @@ BEGIN NEW.address, geom, NEW.country_code) INTO NEW.parent_place_id, NEW.postcode, nameaddress_vector; - --DEBUG: RAISE WARNING 'RETURN insert_addresslines: %, %, %', NEW.parent_place_id, NEW.postcode, nameaddress_vector; + {% if debug %}RAISE WARNING 'RETURN insert_addresslines: %, %, %', NEW.parent_place_id, NEW.postcode, nameaddress_vector;{% endif %} IF NEW.address is not null AND NEW.address ? 'postcode' AND NEW.address->'postcode' not similar to '%(,|;)%' THEN @@ -976,30 +981,30 @@ BEGIN IF NEW.rank_search <= 25 and NEW.rank_address > 0 THEN result := add_location(NEW.place_id, NEW.country_code, NEW.partition, name_vector, NEW.rank_search, NEW.rank_address, upper(trim(NEW.address->'postcode')), NEW.geometry, NEW.centroid); - --DEBUG: RAISE WARNING 'added to location (full)'; + {% if debug %}RAISE WARNING 'added to location (full)';{% endif %} END IF; IF NEW.rank_search between 26 and 27 and NEW.class = 'highway' THEN result := insertLocationRoad(NEW.partition, NEW.place_id, NEW.country_code, NEW.geometry); - --DEBUG: RAISE WARNING 'insert into road location table (full)'; + {% if debug %}RAISE WARNING 'insert into road location table (full)';{% endif %} END IF; result := insertSearchName(NEW.partition, NEW.place_id, name_vector, NEW.rank_search, NEW.rank_address, NEW.geometry); - --DEBUG: RAISE WARNING 'added to search name (full)'; + {% if debug %}RAISE WARNING 'added to search name (full)';{% endif %} - IF NOT %REVERSE-ONLY% THEN + {% if not db.reverse_only %} INSERT INTO search_name (place_id, search_rank, address_rank, importance, country_code, name_vector, nameaddress_vector, centroid) VALUES (NEW.place_id, NEW.rank_search, NEW.rank_address, NEW.importance, NEW.country_code, name_vector, nameaddress_vector, NEW.centroid); - END IF; + {% endif %} END IF; - --DEBUG: RAISE WARNING 'place update % % finsihed.', NEW.osm_type, NEW.osm_id; + {% if debug %}RAISE WARNING 'place update % % finsihed.', NEW.osm_type, NEW.osm_id;{% endif %} RETURN NEW; END; @@ -1018,9 +1023,9 @@ BEGIN IF OLD.linked_place_id is null THEN update placex set linked_place_id = null, indexed_status = 2 where linked_place_id = OLD.place_id and indexed_status = 0; - --DEBUG: RAISE WARNING 'placex_delete:01 % %',OLD.osm_type,OLD.osm_id; + {% if debug %}RAISE WARNING 'placex_delete:01 % %',OLD.osm_type,OLD.osm_id;{% endif %} update placex set linked_place_id = null where linked_place_id = OLD.place_id; - --DEBUG: RAISE WARNING 'placex_delete:02 % %',OLD.osm_type,OLD.osm_id; + {% if debug %}RAISE WARNING 'placex_delete:02 % %',OLD.osm_type,OLD.osm_id;{% endif %} ELSE update placex set indexed_status = 2 where place_id = OLD.linked_place_id and indexed_status = 0; END IF; @@ -1028,44 +1033,44 @@ BEGIN IF OLD.rank_address < 30 THEN -- mark everything linked to this place for re-indexing - --DEBUG: RAISE WARNING 'placex_delete:03 % %',OLD.osm_type,OLD.osm_id; + {% if debug %}RAISE WARNING 'placex_delete:03 % %',OLD.osm_type,OLD.osm_id;{% endif %} UPDATE placex set indexed_status = 2 from place_addressline where address_place_id = OLD.place_id and placex.place_id = place_addressline.place_id and indexed_status = 0 and place_addressline.isaddress; - --DEBUG: RAISE WARNING 'placex_delete:04 % %',OLD.osm_type,OLD.osm_id; + {% if debug %}RAISE WARNING 'placex_delete:04 % %',OLD.osm_type,OLD.osm_id;{% endif %} DELETE FROM place_addressline where address_place_id = OLD.place_id; - --DEBUG: RAISE WARNING 'placex_delete:05 % %',OLD.osm_type,OLD.osm_id; + {% if debug %}RAISE WARNING 'placex_delete:05 % %',OLD.osm_type,OLD.osm_id;{% endif %} b := deleteRoad(OLD.partition, OLD.place_id); - --DEBUG: RAISE WARNING 'placex_delete:06 % %',OLD.osm_type,OLD.osm_id; + {% if debug %}RAISE WARNING 'placex_delete:06 % %',OLD.osm_type,OLD.osm_id;{% endif %} update placex set indexed_status = 2 where parent_place_id = OLD.place_id and indexed_status = 0; - --DEBUG: RAISE WARNING 'placex_delete:07 % %',OLD.osm_type,OLD.osm_id; + {% if debug %}RAISE WARNING 'placex_delete:07 % %',OLD.osm_type,OLD.osm_id;{% endif %} -- reparenting also for OSM Interpolation Lines (and for Tiger?) update location_property_osmline set indexed_status = 2 where indexed_status = 0 and parent_place_id = OLD.place_id; END IF; - --DEBUG: RAISE WARNING 'placex_delete:08 % %',OLD.osm_type,OLD.osm_id; + {% if debug %}RAISE WARNING 'placex_delete:08 % %',OLD.osm_type,OLD.osm_id;{% endif %} IF OLD.rank_address < 26 THEN b := deleteLocationArea(OLD.partition, OLD.place_id, OLD.rank_search); END IF; - --DEBUG: RAISE WARNING 'placex_delete:09 % %',OLD.osm_type,OLD.osm_id; + {% if debug %}RAISE WARNING 'placex_delete:09 % %',OLD.osm_type,OLD.osm_id;{% endif %} IF OLD.name is not null THEN - IF NOT %REVERSE-ONLY% THEN + {% if 'search_name' in db.tables %} DELETE from search_name WHERE place_id = OLD.place_id; - END IF; + {% endif %} b := deleteSearchName(OLD.partition, OLD.place_id); END IF; - --DEBUG: RAISE WARNING 'placex_delete:10 % %',OLD.osm_type,OLD.osm_id; + {% if debug %}RAISE WARNING 'placex_delete:10 % %',OLD.osm_type,OLD.osm_id;{% endif %} DELETE FROM place_addressline where place_id = OLD.place_id; - --DEBUG: RAISE WARNING 'placex_delete:11 % %',OLD.osm_type,OLD.osm_id; + {% if debug %}RAISE WARNING 'placex_delete:11 % %',OLD.osm_type,OLD.osm_id;{% endif %} -- remove from tables for special search classtable := 'place_classtype_' || OLD.class || '_' || OLD.type; @@ -1074,7 +1079,7 @@ BEGIN EXECUTE 'DELETE FROM ' || classtable::regclass || ' WHERE place_id = $1' USING OLD.place_id; END IF; - --DEBUG: RAISE WARNING 'placex_delete:12 % %',OLD.osm_type,OLD.osm_id; + {% if debug %}RAISE WARNING 'placex_delete:12 % %',OLD.osm_type,OLD.osm_id;{% endif %} RETURN OLD; diff --git a/lib-sql/functions/utils.sql b/lib-sql/functions/utils.sql index 18d4211b..4868b828 100644 --- a/lib-sql/functions/utils.sql +++ b/lib-sql/functions/utils.sql @@ -237,7 +237,7 @@ BEGIN IF word_ids is not null THEN parent_place_id := getNearestNamedRoadPlaceId(partition, centroid, word_ids); IF parent_place_id is not null THEN - --DEBUG: RAISE WARNING 'Get parent form addr:street: %', parent_place_id; + {% if debug %}RAISE WARNING 'Get parent form addr:street: %', parent_place_id;{% endif %} RETURN parent_place_id; END IF; END IF; @@ -249,7 +249,7 @@ BEGIN IF word_ids is not null THEN parent_place_id := getNearestNamedPlacePlaceId(partition, centroid, word_ids); IF parent_place_id is not null THEN - --DEBUG: RAISE WARNING 'Get parent form addr:place: %', parent_place_id; + {% if debug %}RAISE WARNING 'Get parent form addr:place: %', parent_place_id;{% endif %} RETURN parent_place_id; END IF; END IF; diff --git a/lib-sql/indices.sql b/lib-sql/indices.sql new file mode 100644 index 00000000..cb77e02b --- /dev/null +++ b/lib-sql/indices.sql @@ -0,0 +1,67 @@ +-- Indices used only during search and update. +-- These indices are created only after the indexing process is done. + +CREATE INDEX {{sql.if_index_not_exists}} idx_word_word_id + ON word USING BTREE (word_id) {{db.tablespace.search_index}}; + +CREATE INDEX {{sql.if_index_not_exists}} idx_place_addressline_address_place_id + ON place_addressline USING BTREE (address_place_id) {{db.tablespace.search_index}}; + +CREATE INDEX {{sql.if_index_not_exists}} idx_placex_rank_search + ON placex USING BTREE (rank_search) {{db.tablespace.search_index}}; + +CREATE INDEX {{sql.if_index_not_exists}} idx_placex_rank_address + ON placex USING BTREE (rank_address) {{db.tablespace.search_index}}; + +CREATE INDEX {{sql.if_index_not_exists}} idx_placex_parent_place_id + ON placex USING BTREE (parent_place_id) {{db.tablespace.search_index}} + WHERE parent_place_id IS NOT NULL; + +CREATE INDEX {{sql.if_index_not_exists}} idx_placex_geometry_reverse_lookupPolygon + ON placex USING gist (geometry) {{db.tablespace.search_index}} + WHERE St_GeometryType(geometry) in ('ST_Polygon', 'ST_MultiPolygon') + AND rank_address between 4 and 25 AND type != 'postcode' + AND name is not null AND indexed_status = 0 AND linked_place_id is null; + +CREATE INDEX {{sql.if_index_not_exists}} idx_placex_geometry_reverse_placeNode + ON placex USING gist (geometry) {{db.tablespace.search_index}} + WHERE osm_type = 'N' AND rank_search between 5 and 25 + AND class = 'place' AND type != 'postcode' + AND name is not null AND indexed_status = 0 AND linked_place_id is null; + +CREATE INDEX {{sql.if_index_not_exists}} idx_osmline_parent_place_id + ON location_property_osmline USING BTREE (parent_place_id) {{db.tablespace.search_index}}; + +CREATE INDEX {{sql.if_index_not_exists}} idx_osmline_parent_osm_id + ON location_property_osmline USING BTREE (osm_id) {{db.tablespace.search_index}}; + +CREATE UNIQUE INDEX {{sql.if_index_not_exists}} idx_postcode_id + ON location_postcode USING BTREE (place_id) {{db.tablespace.search_index}}; + +CREATE INDEX {{sql.if_index_not_exists}} idx_postcode_postcode + ON location_postcode USING BTREE (postcode) {{db.tablespace.search_index}}; + +-- Indices only needed for updating. + +{% if not drop %} + CREATE INDEX {{sql.if_index_not_exists}} idx_placex_pendingsector + ON placex USING BTREE (rank_address,geometry_sector) {{db.tablespace.address_index}} + WHERE indexed_status > 0; + + CREATE INDEX {{sql.if_index_not_exists}} idx_location_area_country_place_id + ON location_area_country USING BTREE (place_id) {{db.tablespace.address_index}}; + + CREATE UNIQUE INDEX {{sql.if_index_not_exists}} idx_place_osm_unique + ON place USING btree(osm_id, osm_type, class, type) {{db.tablespace.address_index}}; +{% endif %} + +-- Indices only needed for search. + +{% if 'search_name' in db.tables %} + CREATE INDEX {{sql.if_index_not_exists}} idx_search_name_nameaddress_vector + ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off) {{db.tablespace.search_index}}; + CREATE INDEX {{sql.if_index_not_exists}} idx_search_name_name_vector + ON search_name USING GIN (name_vector) WITH (fastupdate = off) {{db.tablespace.search_index}}; + CREATE INDEX {{sql.if_index_not_exists}} idx_search_name_centroid + ON search_name USING GIST (centroid) {{db.tablespace.search_index}}; +{% endif %} diff --git a/lib-sql/indices.src.sql b/lib-sql/indices.src.sql deleted file mode 100644 index 7fcd965f..00000000 --- a/lib-sql/indices.src.sql +++ /dev/null @@ -1,30 +0,0 @@ --- Indices used only during search and update. --- These indices are created only after the indexing process is done. - -CREATE INDEX idx_word_word_id on word USING BTREE (word_id) {ts:search-index}; - -CREATE INDEX idx_place_addressline_address_place_id on place_addressline USING BTREE (address_place_id) {ts:search-index}; - -DROP INDEX IF EXISTS idx_placex_rank_search; -CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search) {ts:search-index}; -CREATE INDEX idx_placex_rank_address ON placex USING BTREE (rank_address) {ts:search-index}; -CREATE INDEX idx_placex_parent_place_id ON placex USING BTREE (parent_place_id) {ts:search-index} where parent_place_id IS NOT NULL; - -CREATE INDEX idx_placex_geometry_reverse_lookupPolygon - ON placex USING gist (geometry) {ts:search-index} - WHERE St_GeometryType(geometry) in ('ST_Polygon', 'ST_MultiPolygon') - AND rank_address between 4 and 25 AND type != 'postcode' - AND name is not null AND indexed_status = 0 AND linked_place_id is null; -CREATE INDEX idx_placex_geometry_reverse_placeNode - ON placex USING gist (geometry) {ts:search-index} - WHERE osm_type = 'N' AND rank_search between 5 and 25 - AND class = 'place' AND type != 'postcode' - AND name is not null AND indexed_status = 0 AND linked_place_id is null; - -GRANT SELECT ON table country_osm_grid to "{www-user}"; - -CREATE INDEX idx_osmline_parent_place_id ON location_property_osmline USING BTREE (parent_place_id) {ts:search-index}; -CREATE INDEX idx_osmline_parent_osm_id ON location_property_osmline USING BTREE (osm_id) {ts:search-index}; - -CREATE UNIQUE INDEX idx_postcode_id ON location_postcode USING BTREE (place_id) {ts:search-index}; -CREATE INDEX idx_postcode_postcode ON location_postcode USING BTREE (postcode) {ts:search-index}; diff --git a/lib-sql/indices_search.src.sql b/lib-sql/indices_search.src.sql deleted file mode 100644 index d1363fc6..00000000 --- a/lib-sql/indices_search.src.sql +++ /dev/null @@ -1,6 +0,0 @@ --- Indices used for /search API. --- These indices are created only after the indexing process is done. - -CREATE INDEX idx_search_name_nameaddress_vector ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off) {ts:search-index}; -CREATE INDEX idx_search_name_name_vector ON search_name USING GIN (name_vector) WITH (fastupdate = off) {ts:search-index}; -CREATE INDEX idx_search_name_centroid ON search_name USING GIST (centroid) {ts:search-index}; diff --git a/lib-sql/indices_updates.src.sql b/lib-sql/indices_updates.src.sql deleted file mode 100644 index 6d4c968e..00000000 --- a/lib-sql/indices_updates.src.sql +++ /dev/null @@ -1,9 +0,0 @@ --- Indices used only during search and update. --- These indices are created only after the indexing process is done. - -CREATE INDEX CONCURRENTLY idx_placex_pendingsector ON placex USING BTREE (rank_address,geometry_sector) {ts:address-index} where indexed_status > 0; - -CREATE INDEX CONCURRENTLY idx_location_area_country_place_id ON location_area_country USING BTREE (place_id) {ts:address-index}; - -DROP INDEX CONCURRENTLY IF EXISTS place_id_idx; -CREATE UNIQUE INDEX CONCURRENTLY idx_place_osm_unique on place using btree(osm_id,osm_type,class,type) {ts:address-index}; diff --git a/lib-sql/partition-tables.src.sql b/lib-sql/partition-tables.src.sql index df2cac70..98ab9874 100644 --- a/lib-sql/partition-tables.src.sql +++ b/lib-sql/partition-tables.src.sql @@ -7,24 +7,24 @@ CREATE TABLE search_name_blank ( ); --- start -CREATE TABLE location_area_large_-partition- () INHERITS (location_area_large) {ts:address-data}; -CREATE INDEX idx_location_area_large_-partition-_place_id ON location_area_large_-partition- USING BTREE (place_id) {ts:address-index}; -CREATE INDEX idx_location_area_large_-partition-_geometry ON location_area_large_-partition- USING GIST (geometry) {ts:address-index}; +{% for partition in db.partitions %} + CREATE TABLE location_area_large_{{ partition }} () INHERITS (location_area_large) {{db.tablespace.address_data}}; + CREATE INDEX idx_location_area_large_{{ partition }}_place_id ON location_area_large_{{ partition }} USING BTREE (place_id) {{db.tablespace.address_index}}; + CREATE INDEX idx_location_area_large_{{ partition }}_geometry ON location_area_large_{{ partition }} USING GIST (geometry) {{db.tablespace.address_index}}; -CREATE TABLE search_name_-partition- () INHERITS (search_name_blank) {ts:address-data}; -CREATE INDEX idx_search_name_-partition-_place_id ON search_name_-partition- USING BTREE (place_id) {ts:address-index}; -CREATE INDEX idx_search_name_-partition-_centroid_street ON search_name_-partition- USING GIST (centroid) {ts:address-index} where address_rank between 26 and 27; -CREATE INDEX idx_search_name_-partition-_centroid_place ON search_name_-partition- USING GIST (centroid) {ts:address-index} where address_rank between 2 and 25; + CREATE TABLE search_name_{{ partition }} () INHERITS (search_name_blank) {{db.tablespace.address_data}}; + CREATE INDEX idx_search_name_{{ partition }}_place_id ON search_name_{{ partition }} USING BTREE (place_id) {{db.tablespace.address_index}}; + CREATE INDEX idx_search_name_{{ partition }}_centroid_street ON search_name_{{ partition }} USING GIST (centroid) {{db.tablespace.address_index}} where address_rank between 26 and 27; + CREATE INDEX idx_search_name_{{ partition }}_centroid_place ON search_name_{{ partition }} USING GIST (centroid) {{db.tablespace.address_index}} where address_rank between 2 and 25; -DROP TABLE IF EXISTS location_road_-partition-; -CREATE TABLE location_road_-partition- ( - place_id BIGINT, - partition SMALLINT, - country_code VARCHAR(2), - geometry GEOMETRY(Geometry, 4326) - ) {ts:address-data}; -CREATE INDEX idx_location_road_-partition-_geometry ON location_road_-partition- USING GIST (geometry) {ts:address-index}; -CREATE INDEX idx_location_road_-partition-_place_id ON location_road_-partition- USING BTREE (place_id) {ts:address-index}; + DROP TABLE IF EXISTS location_road_{{ partition }}; + CREATE TABLE location_road_{{ partition }} ( + place_id BIGINT, + partition SMALLINT, + country_code VARCHAR(2), + geometry GEOMETRY(Geometry, 4326) + ) {{db.tablespace.address_data}}; + CREATE INDEX idx_location_road_{{ partition }}_geometry ON location_road_{{ partition }} USING GIST (geometry) {{db.tablespace.address_index}}; + CREATE INDEX idx_location_road_{{ partition }}_place_id ON location_road_{{ partition }} USING BTREE (place_id) {{db.tablespace.address_index}}; --- end +{% endfor %} diff --git a/lib-sql/tables.sql b/lib-sql/tables.sql index d15e42c4..0895c6dd 100644 --- a/lib-sql/tables.sql +++ b/lib-sql/tables.sql @@ -4,7 +4,7 @@ CREATE TABLE import_status ( sequence_id integer, indexed boolean ); -GRANT SELECT ON import_status TO "{www-user}" ; +GRANT SELECT ON import_status TO "{{config.DATABASE_WEBUSER}}" ; drop table if exists import_osmosis_log; CREATE TABLE import_osmosis_log ( @@ -30,11 +30,18 @@ CREATE TABLE new_query_log ( secret text ); CREATE INDEX idx_new_query_log_starttime ON new_query_log USING BTREE (starttime); -GRANT INSERT ON new_query_log TO "{www-user}" ; -GRANT UPDATE ON new_query_log TO "{www-user}" ; -GRANT SELECT ON new_query_log TO "{www-user}" ; +GRANT INSERT ON new_query_log TO "{{config.DATABASE_WEBUSER}}" ; +GRANT UPDATE ON new_query_log TO "{{config.DATABASE_WEBUSER}}" ; +GRANT SELECT ON new_query_log TO "{{config.DATABASE_WEBUSER}}" ; -GRANT SELECT ON TABLE country_name TO "{www-user}"; +GRANT SELECT ON TABLE country_name TO "{{config.DATABASE_WEBUSER}}"; + +DROP TABLE IF EXISTS nominatim_properties; +CREATE TABLE nominatim_properties ( + property TEXT, + value TEXT +); +GRANT SELECT ON TABLE nominatim_properties TO "{{config.DATABASE_WEBUSER}}"; drop table IF EXISTS word; CREATE TABLE word ( @@ -46,9 +53,9 @@ CREATE TABLE word ( country_code varchar(2), search_name_count INTEGER, operator TEXT - ) {ts:search-data}; -CREATE INDEX idx_word_word_token on word USING BTREE (word_token) {ts:search-index}; -GRANT SELECT ON word TO "{www-user}" ; + ) {{db.tablespace.search_data}}; +CREATE INDEX idx_word_word_token on word USING BTREE (word_token) {{db.tablespace.search_index}}; +GRANT SELECT ON word TO "{{config.DATABASE_WEBUSER}}" ; DROP SEQUENCE IF EXISTS seq_word; CREATE SEQUENCE seq_word start 1; @@ -73,8 +80,8 @@ CREATE TABLE location_area_country ( place_id BIGINT, country_code varchar(2), geometry GEOMETRY(Geometry, 4326) - ) {ts:address-data}; -CREATE INDEX idx_location_area_country_geometry ON location_area_country USING GIST (geometry) {ts:address-index}; + ) {{db.tablespace.address_data}}; +CREATE INDEX idx_location_area_country_geometry ON location_area_country USING GIST (geometry) {{db.tablespace.address_index}}; drop table IF EXISTS location_property CASCADE; @@ -91,7 +98,7 @@ CREATE TABLE location_property_aux () INHERITS (location_property); CREATE INDEX idx_location_property_aux_place_id ON location_property_aux USING BTREE (place_id); CREATE INDEX idx_location_property_aux_parent_place_id ON location_property_aux USING BTREE (parent_place_id); CREATE INDEX idx_location_property_aux_housenumber_parent_place_id ON location_property_aux USING BTREE (parent_place_id, housenumber); -GRANT SELECT ON location_property_aux TO "{www-user}"; +GRANT SELECT ON location_property_aux TO "{{config.DATABASE_WEBUSER}}"; CREATE TABLE location_property_tiger ( place_id BIGINT, @@ -102,7 +109,7 @@ CREATE TABLE location_property_tiger ( linegeo GEOMETRY, interpolationtype TEXT, postcode TEXT); -GRANT SELECT ON location_property_tiger TO "{www-user}"; +GRANT SELECT ON location_property_tiger TO "{{config.DATABASE_WEBUSER}}"; drop table if exists location_property_osmline; CREATE TABLE location_property_osmline ( @@ -120,13 +127,14 @@ CREATE TABLE location_property_osmline ( address HSTORE, postcode TEXT, country_code VARCHAR(2) - ){ts:search-data}; -CREATE UNIQUE INDEX idx_osmline_place_id ON location_property_osmline USING BTREE (place_id) {ts:search-index}; -CREATE INDEX idx_osmline_geometry_sector ON location_property_osmline USING BTREE (geometry_sector) {ts:address-index}; -CREATE INDEX idx_osmline_linegeo ON location_property_osmline USING GIST (linegeo) {ts:search-index}; -GRANT SELECT ON location_property_osmline TO "{www-user}"; + ){{db.tablespace.search_data}}; +CREATE UNIQUE INDEX idx_osmline_place_id ON location_property_osmline USING BTREE (place_id) {{db.tablespace.search_index}}; +CREATE INDEX idx_osmline_geometry_sector ON location_property_osmline USING BTREE (geometry_sector) {{db.tablespace.address_index}}; +CREATE INDEX idx_osmline_linegeo ON location_property_osmline USING GIST (linegeo) {{db.tablespace.search_index}}; +GRANT SELECT ON location_property_osmline TO "{{config.DATABASE_WEBUSER}}"; drop table IF EXISTS search_name; +{% if not db.reverse_only %} CREATE TABLE search_name ( place_id BIGINT, importance FLOAT, @@ -136,8 +144,10 @@ CREATE TABLE search_name ( nameaddress_vector integer[], country_code varchar(2), centroid GEOMETRY(Geometry, 4326) - ) {ts:search-data}; -CREATE INDEX idx_search_name_place_id ON search_name USING BTREE (place_id) {ts:search-index}; + ) {{db.tablespace.search_data}}; +CREATE INDEX idx_search_name_place_id ON search_name USING BTREE (place_id) {{db.tablespace.search_index}}; +GRANT SELECT ON search_name to "{{config.DATABASE_WEBUSER}}" ; +{% endif %} drop table IF EXISTS place_addressline; CREATE TABLE place_addressline ( @@ -147,8 +157,8 @@ CREATE TABLE place_addressline ( cached_rank_address SMALLINT, fromarea boolean, isaddress boolean - ) {ts:search-data}; -CREATE INDEX idx_place_addressline_place_id on place_addressline USING BTREE (place_id) {ts:search-index}; + ) {{db.tablespace.search_data}}; +CREATE INDEX idx_place_addressline_place_id on place_addressline USING BTREE (place_id) {{db.tablespace.search_index}}; drop table if exists placex; CREATE TABLE placex ( @@ -168,24 +178,23 @@ CREATE TABLE placex ( housenumber TEXT, postcode TEXT, centroid GEOMETRY(Geometry, 4326) - ) {ts:search-data}; -CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id) {ts:search-index}; -CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id) {ts:search-index}; -CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id) {ts:address-index} WHERE linked_place_id IS NOT NULL; -CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector) {ts:address-index}; -CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry) {ts:search-index}; -CREATE INDEX idx_placex_adminname on placex USING BTREE (make_standard_name(name->'name')) {ts:address-index} WHERE osm_type='N' and rank_search < 26; -CREATE INDEX idx_placex_wikidata on placex USING BTREE ((extratags -> 'wikidata')) {ts:address-index} WHERE extratags ? 'wikidata' and class = 'place' and osm_type = 'N' and rank_search < 26; + ) {{db.tablespace.search_data}}; +CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id) {{db.tablespace.search_index}}; +CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id) {{db.tablespace.search_index}}; +CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id) {{db.tablespace.address_index}} WHERE linked_place_id IS NOT NULL; +CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector) {{db.tablespace.address_index}}; +CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry) {{db.tablespace.search_index}}; +CREATE INDEX idx_placex_adminname on placex USING BTREE (make_standard_name(name->'name')) {{db.tablespace.address_index}} WHERE osm_type='N' and rank_search < 26; +CREATE INDEX idx_placex_wikidata on placex USING BTREE ((extratags -> 'wikidata')) {{db.tablespace.address_index}} WHERE extratags ? 'wikidata' and class = 'place' and osm_type = 'N' and rank_search < 26; DROP SEQUENCE IF EXISTS seq_place; CREATE SEQUENCE seq_place start 1; -GRANT SELECT on placex to "{www-user}" ; -GRANT SELECT ON search_name to "{www-user}" ; -GRANT SELECT on place_addressline to "{www-user}" ; -GRANT SELECT ON seq_word to "{www-user}" ; -GRANT SELECT ON planet_osm_ways to "{www-user}" ; -GRANT SELECT ON planet_osm_rels to "{www-user}" ; -GRANT SELECT on location_area to "{www-user}" ; +GRANT SELECT on placex to "{{config.DATABASE_WEBUSER}}" ; +GRANT SELECT on place_addressline to "{{config.DATABASE_WEBUSER}}" ; +GRANT SELECT ON seq_word to "{{config.DATABASE_WEBUSER}}" ; +GRANT SELECT ON planet_osm_ways to "{{config.DATABASE_WEBUSER}}" ; +GRANT SELECT ON planet_osm_rels to "{{config.DATABASE_WEBUSER}}" ; +GRANT SELECT on location_area to "{{config.DATABASE_WEBUSER}}" ; -- Table for synthetic postcodes. DROP TABLE IF EXISTS location_postcode; @@ -200,8 +209,8 @@ CREATE TABLE location_postcode ( postcode TEXT, geometry GEOMETRY(Geometry, 4326) ); -CREATE INDEX idx_postcode_geometry ON location_postcode USING GIST (geometry) {ts:address-index}; -GRANT SELECT ON location_postcode TO "{www-user}" ; +CREATE INDEX idx_postcode_geometry ON location_postcode USING GIST (geometry) {{db.tablespace.address_index}}; +GRANT SELECT ON location_postcode TO "{{config.DATABASE_WEBUSER}}" ; DROP TABLE IF EXISTS import_polygon_error; CREATE TABLE import_polygon_error ( @@ -217,7 +226,7 @@ CREATE TABLE import_polygon_error ( newgeometry GEOMETRY(Geometry, 4326) ); CREATE INDEX idx_import_polygon_error_osmid ON import_polygon_error USING BTREE (osm_type, osm_id); -GRANT SELECT ON import_polygon_error TO "{www-user}"; +GRANT SELECT ON import_polygon_error TO "{{config.DATABASE_WEBUSER}}"; DROP TABLE IF EXISTS import_polygon_delete; CREATE TABLE import_polygon_delete ( @@ -227,7 +236,7 @@ CREATE TABLE import_polygon_delete ( type TEXT NOT NULL ); CREATE INDEX idx_import_polygon_delete_osmid ON import_polygon_delete USING BTREE (osm_type, osm_id); -GRANT SELECT ON import_polygon_delete TO "{www-user}"; +GRANT SELECT ON import_polygon_delete TO "{{config.DATABASE_WEBUSER}}"; DROP SEQUENCE IF EXISTS file; CREATE SEQUENCE file start 1; @@ -261,3 +270,5 @@ ALTER TABLE ONLY wikipedia_redirect ADD CONSTRAINT wikipedia_redirect_pkey PRIMA -- osm2pgsql does not create indexes on the middle tables for Nominatim -- Add one for lookup of associated street relations. CREATE INDEX planet_osm_rels_parts_associated_idx ON planet_osm_rels USING gin(parts) WHERE tags @> ARRAY['associatedStreet']; + +GRANT SELECT ON table country_osm_grid to "{{config.DATABASE_WEBUSER}}"; diff --git a/nominatim/cli.py b/nominatim/cli.py index 8cb73a8e..7459711f 100644 --- a/nominatim/cli.py +++ b/nominatim/cli.py @@ -12,6 +12,7 @@ from .config import Configuration from .tools.exec_utils import run_legacy_script, run_php_server from .errors import UsageError from . import clicmd +from .clicmd.args import NominatimArgs LOG = logging.getLogger() @@ -62,7 +63,8 @@ class CommandlineParser: """ Parse the command line arguments of the program and execute the appropriate subcommand. """ - args = self.parser.parse_args(args=kwargs.get('cli_args')) + args = NominatimArgs() + self.parser.parse_args(args=kwargs.get('cli_args'), namespace=args) if args.subcommand is None: self.parser.print_help() @@ -73,12 +75,14 @@ class CommandlineParser: setattr(args, arg, Path(kwargs[arg])) args.project_dir = Path(args.project_dir).resolve() - logging.basicConfig(stream=sys.stderr, - format='%(asctime)s: %(message)s', - datefmt='%Y-%m-%d %H:%M:%S', - level=max(4 - args.verbose, 1) * 10) + if 'cli_args' not in kwargs: + logging.basicConfig(stream=sys.stderr, + format='%(asctime)s: %(message)s', + datefmt='%Y-%m-%d %H:%M:%S', + level=max(4 - args.verbose, 1) * 10) - args.config = Configuration(args.project_dir, args.config_dir) + args.config = Configuration(args.project_dir, args.config_dir, + environ=kwargs.get('environ', os.environ)) log = logging.getLogger() log.warning('Using project directory: %s', str(args.project_dir)) @@ -109,91 +113,6 @@ class CommandlineParser: # pylint: disable=E0012,C0415 -class SetupAll: - """\ - Create a new Nominatim database from an OSM file. - """ - - @staticmethod - def add_args(parser): - group_name = parser.add_argument_group('Required arguments') - group = group_name.add_mutually_exclusive_group(required=True) - group.add_argument('--osm-file', - help='OSM file to be imported.') - group.add_argument('--continue', dest='continue_at', - choices=['load-data', 'indexing', 'db-postprocess'], - help='Continue an import that was interrupted') - group = parser.add_argument_group('Optional arguments') - group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int, - help='Size of cache to be used by osm2pgsql (in MB)') - group.add_argument('--reverse-only', action='store_true', - help='Do not create tables and indexes for searching') - group.add_argument('--enable-debug-statements', action='store_true', - help='Include debug warning statements in SQL code') - group.add_argument('--no-partitions', action='store_true', - help="""Do not partition search indices - (speeds up import of single country extracts)""") - group.add_argument('--no-updates', action='store_true', - help="""Do not keep tables that are only needed for - updating the database later""") - group = parser.add_argument_group('Expert options') - group.add_argument('--ignore-errors', action='store_true', - help='Continue import even when errors in SQL are present') - group.add_argument('--index-noanalyse', action='store_true', - help='Do not perform analyse operations during index') - - - @staticmethod - def run(args): - params = ['setup.php'] - if args.osm_file: - params.extend(('--all', '--osm-file', args.osm_file)) - else: - if args.continue_at == 'load-data': - params.append('--load-data') - if args.continue_at in ('load-data', 'indexing'): - params.append('--index') - params.extend(('--create-search-indices', '--create-country-names', - '--setup-website')) - if args.osm2pgsql_cache: - params.extend(('--osm2pgsql-cache', args.osm2pgsql_cache)) - if args.reverse_only: - params.append('--reverse-only') - if args.enable_debug_statements: - params.append('--enable-debug-statements') - if args.no_partitions: - params.append('--no-partitions') - if args.no_updates: - params.append('--drop') - if args.ignore_errors: - params.append('--ignore-errors') - if args.index_noanalyse: - params.append('--index-noanalyse') - - return run_legacy_script(*params, nominatim_env=args) - - -class SetupFreeze: - """\ - Make database read-only. - - About half of data in the Nominatim database is kept only to be able to - keep the data up-to-date with new changes made in OpenStreetMap. This - command drops all this data and only keeps the part needed for geocoding - itself. - - This command has the same effect as the `--no-updates` option for imports. - """ - - @staticmethod - def add_args(parser): - pass # No options - - @staticmethod - def run(args): - return run_legacy_script('setup.php', '--drop', nominatim_env=args) - - class SetupSpecialPhrases: """\ Maintain special phrases. @@ -351,8 +270,8 @@ def nominatim(**kwargs): """ parser = CommandlineParser('nominatim', nominatim.__doc__) - parser.add_subcommand('import', SetupAll) - parser.add_subcommand('freeze', SetupFreeze) + parser.add_subcommand('import', clicmd.SetupAll) + parser.add_subcommand('freeze', clicmd.SetupFreeze) parser.add_subcommand('replication', clicmd.UpdateReplication) parser.add_subcommand('special-phrases', SetupSpecialPhrases) @@ -375,4 +294,6 @@ def nominatim(**kwargs): else: parser.parser.epilog = 'php-cgi not found. Query commands not available.' + parser.add_subcommand('transition', clicmd.AdminTransition) + return parser.run(**kwargs) diff --git a/nominatim/clicmd/__init__.py b/nominatim/clicmd/__init__.py index 9a686df2..9101e0c0 100644 --- a/nominatim/clicmd/__init__.py +++ b/nominatim/clicmd/__init__.py @@ -2,8 +2,11 @@ Subcommand definitions for the command-line tool. """ +from .setup import SetupAll from .replication import UpdateReplication from .api import APISearch, APIReverse, APILookup, APIDetails, APIStatus from .index import UpdateIndex from .refresh import UpdateRefresh from .admin import AdminFuncs +from .freeze import SetupFreeze +from .transition import AdminTransition diff --git a/nominatim/clicmd/admin.py b/nominatim/clicmd/admin.py index 8d34f386..fd9382eb 100644 --- a/nominatim/clicmd/admin.py +++ b/nominatim/clicmd/admin.py @@ -1,6 +1,8 @@ """ Implementation of the 'admin' subcommand. """ +import logging + from ..tools.exec_utils import run_legacy_script from ..db.connection import connect @@ -9,6 +11,8 @@ from ..db.connection import connect # Using non-top-level imports to avoid eventually unused imports. # pylint: disable=E0012,C0415 +LOG = logging.getLogger() + class AdminFuncs: """\ Analyse and maintain the database. @@ -39,23 +43,26 @@ class AdminFuncs: @staticmethod def run(args): - from ..tools import admin if args.warm: AdminFuncs._warm(args) if args.check_database: - run_legacy_script('check_import_finished.php', nominatim_env=args) + LOG.warning('Checking database') + from ..tools import check_database + return check_database.check_database(args.config) if args.analyse_indexing: - conn = connect(args.config.get_libpq_dsn()) - admin.analyse_indexing(conn, osm_id=args.osm_id, place_id=args.place_id) - conn.close() + LOG.warning('Analysing performance of indexing function') + from ..tools import admin + with connect(args.config.get_libpq_dsn()) as conn: + admin.analyse_indexing(conn, osm_id=args.osm_id, place_id=args.place_id) return 0 @staticmethod def _warm(args): + LOG.warning('Warming database caches') params = ['warm.php'] if args.target == 'reverse': params.append('--reverse-only') diff --git a/nominatim/clicmd/args.py b/nominatim/clicmd/args.py new file mode 100644 index 00000000..47007579 --- /dev/null +++ b/nominatim/clicmd/args.py @@ -0,0 +1,27 @@ +""" +Provides custom functions over command-line arguments. +""" + + +class NominatimArgs: # pylint: disable=too-few-public-methods + """ Customized namespace class for the nominatim command line tool + to receive the command-line arguments. + """ + + def osm2pgsql_options(self, default_cache, default_threads): + """ Return the standard osm2pgsql options that can be derived + from the command line arguments. The resulting dict can be + further customized and then used in `run_osm2pgsql()`. + """ + return dict(osm2pgsql=self.config.OSM2PGSQL_BINARY or self.osm2pgsql_path, + osm2pgsql_cache=self.osm2pgsql_cache or default_cache, + osm2pgsql_style=self.config.get_import_style_file(), + threads=self.threads or default_threads, + dsn=self.config.get_libpq_dsn(), + flatnode_file=self.config.FLATNODE_FILE, + tablespaces=dict(slim_data=self.config.TABLESPACE_OSM_DATA, + slim_index=self.config.TABLESPACE_OSM_INDEX, + main_data=self.config.TABLESPACE_PLACE_DATA, + main_index=self.config.TABLESPACE_PLACE_INDEX + ) + ) diff --git a/nominatim/clicmd/freeze.py b/nominatim/clicmd/freeze.py new file mode 100644 index 00000000..1b311e97 --- /dev/null +++ b/nominatim/clicmd/freeze.py @@ -0,0 +1,36 @@ +""" +Implementation of the 'freeze' subcommand. +""" + +from ..db.connection import connect + +# Do not repeat documentation of subcommand classes. +# pylint: disable=C0111 +# Using non-top-level imports to avoid eventually unused imports. +# pylint: disable=E0012,C0415 + +class SetupFreeze: + """\ + Make database read-only. + + About half of data in the Nominatim database is kept only to be able to + keep the data up-to-date with new changes made in OpenStreetMap. This + command drops all this data and only keeps the part needed for geocoding + itself. + + This command has the same effect as the `--no-updates` option for imports. + """ + + @staticmethod + def add_args(parser): + pass # No options + + @staticmethod + def run(args): + from ..tools import freeze + + with connect(args.config.get_libpq_dsn()) as conn: + freeze.drop_update_tables(conn) + freeze.drop_flatnode_file(args.config.FLATNODE_FILE) + + return 0 diff --git a/nominatim/clicmd/index.py b/nominatim/clicmd/index.py index ca3f9dee..0225c5ed 100644 --- a/nominatim/clicmd/index.py +++ b/nominatim/clicmd/index.py @@ -1,7 +1,7 @@ """ Implementation of the 'index' subcommand. """ -import os +import psutil from ..db import status from ..db.connection import connect @@ -11,14 +11,6 @@ from ..db.connection import connect # Using non-top-level imports to avoid eventually unused imports. # pylint: disable=E0012,C0415 -def _num_system_cpus(): - try: - cpus = len(os.sched_getaffinity(0)) - except NotImplementedError: - cpus = None - - return cpus or os.cpu_count() - class UpdateIndex: """\ @@ -42,7 +34,7 @@ class UpdateIndex: from ..indexer.indexer import Indexer indexer = Indexer(args.config.get_libpq_dsn(), - args.threads or _num_system_cpus() or 1) + args.threads or psutil.cpu_count() or 1) if not args.no_boundaries: indexer.index_boundaries(args.minrank, args.maxrank) @@ -51,8 +43,7 @@ class UpdateIndex: if not args.no_boundaries and not args.boundaries_only \ and args.minrank == 0 and args.maxrank == 30: - conn = connect(args.config.get_libpq_dsn()) - status.set_indexed(conn, True) - conn.close() + with connect(args.config.get_libpq_dsn()) as conn: + status.set_indexed(conn, True) return 0 diff --git a/nominatim/clicmd/refresh.py b/nominatim/clicmd/refresh.py index 8e69caca..9dca4e42 100644 --- a/nominatim/clicmd/refresh.py +++ b/nominatim/clicmd/refresh.py @@ -5,7 +5,6 @@ import logging from pathlib import Path from ..db.connection import connect -from ..tools.exec_utils import run_legacy_script # Do not repeat documentation of subcommand classes. # pylint: disable=C0111 @@ -50,39 +49,42 @@ class UpdateRefresh: if args.postcodes: LOG.warning("Update postcodes centroid") - conn = connect(args.config.get_libpq_dsn()) - refresh.update_postcodes(conn, args.sqllib_dir) - conn.close() + refresh.update_postcodes(args.config.get_libpq_dsn(), args.sqllib_dir) if args.word_counts: LOG.warning('Recompute frequency of full-word search terms') - conn = connect(args.config.get_libpq_dsn()) - refresh.recompute_word_counts(conn, args.sqllib_dir) - conn.close() + refresh.recompute_word_counts(args.config.get_libpq_dsn(), args.sqllib_dir) if args.address_levels: cfg = Path(args.config.ADDRESS_LEVEL_CONFIG) LOG.warning('Updating address levels from %s', cfg) - conn = connect(args.config.get_libpq_dsn()) - refresh.load_address_levels_from_file(conn, cfg) - conn.close() + with connect(args.config.get_libpq_dsn()) as conn: + refresh.load_address_levels_from_file(conn, cfg) if args.functions: LOG.warning('Create functions') - conn = connect(args.config.get_libpq_dsn()) - refresh.create_functions(conn, args.config, args.sqllib_dir, - args.diffs, args.enable_debug_statements) - conn.close() + with connect(args.config.get_libpq_dsn()) as conn: + refresh.create_functions(conn, args.config, args.sqllib_dir, + args.diffs, args.enable_debug_statements) if args.wiki_data: - run_legacy_script('setup.php', '--import-wikipedia-articles', - nominatim_env=args, throw_on_fail=True) + data_path = Path(args.config.WIKIPEDIA_DATA_PATH + or args.project_dir) + LOG.warning('Import wikipdia article importance from %s', data_path) + if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(), + data_path) > 0: + LOG.fatal('FATAL: Wikipedia importance dump file not found') + return 1 + # Attention: importance MUST come after wiki data import. if args.importance: - run_legacy_script('update.php', '--recompute-importance', - nominatim_env=args, throw_on_fail=True) + LOG.warning('Update importance values for database') + with connect(args.config.get_libpq_dsn()) as conn: + refresh.recompute_importance(conn) + if args.website: - run_legacy_script('setup.php', '--setup-website', - nominatim_env=args, throw_on_fail=True) + webdir = args.project_dir / 'website' + LOG.warning('Setting up website directory at %s', webdir) + refresh.setup_website(webdir, args.phplib_dir, args.config) return 0 diff --git a/nominatim/clicmd/replication.py b/nominatim/clicmd/replication.py index 2a19e6cd..fc18945e 100644 --- a/nominatim/clicmd/replication.py +++ b/nominatim/clicmd/replication.py @@ -17,17 +17,6 @@ LOG = logging.getLogger() # Using non-top-level imports to make pyosmium optional for replication only. # pylint: disable=E0012,C0415 -def _osm2pgsql_options_from_args(args, default_cache, default_threads): - """ Set up the standard osm2pgsql from the command line arguments. - """ - return dict(osm2pgsql=args.osm2pgsql_path, - osm2pgsql_cache=args.osm2pgsql_cache or default_cache, - osm2pgsql_style=args.config.get_import_style_file(), - threads=args.threads or default_threads, - dsn=args.config.get_libpq_dsn(), - flatnode_file=args.config.FLATNODE_FILE) - - class UpdateReplication: """\ Update the database using an online replication service. @@ -62,13 +51,12 @@ class UpdateReplication: from ..tools import replication, refresh LOG.warning("Initialising replication updates") - conn = connect(args.config.get_libpq_dsn()) - replication.init_replication(conn, base_url=args.config.REPLICATION_URL) - if args.update_functions: - LOG.warning("Create functions") - refresh.create_functions(conn, args.config, args.sqllib_dir, - True, False) - conn.close() + with connect(args.config.get_libpq_dsn()) as conn: + replication.init_replication(conn, base_url=args.config.REPLICATION_URL) + if args.update_functions: + LOG.warning("Create functions") + refresh.create_functions(conn, args.config, args.sqllib_dir, + True, False) return 0 @@ -76,10 +64,8 @@ class UpdateReplication: def _check_for_updates(args): from ..tools import replication - conn = connect(args.config.get_libpq_dsn()) - ret = replication.check_for_updates(conn, base_url=args.config.REPLICATION_URL) - conn.close() - return ret + with connect(args.config.get_libpq_dsn()) as conn: + return replication.check_for_updates(conn, base_url=args.config.REPLICATION_URL) @staticmethod def _report_update(batchdate, start_import, start_index): @@ -99,7 +85,7 @@ class UpdateReplication: from ..tools import replication from ..indexer.indexer import Indexer - params = _osm2pgsql_options_from_args(args, 2000, 1) + params = args.osm2pgsql_options(default_cache=2000, default_threads=1) params.update(base_url=args.config.REPLICATION_URL, update_interval=args.config.get_int('REPLICATION_UPDATE_INTERVAL'), import_file=args.project_dir / 'osmosischange.osc', @@ -122,13 +108,12 @@ class UpdateReplication: recheck_interval = args.config.get_int('REPLICATION_RECHECK_INTERVAL') while True: - conn = connect(args.config.get_libpq_dsn()) - start = dt.datetime.now(dt.timezone.utc) - state = replication.update(conn, params) - if state is not replication.UpdateState.NO_CHANGES: - status.log_status(conn, start, 'import') - batchdate, _, _ = status.get_status(conn) - conn.close() + with connect(args.config.get_libpq_dsn()) as conn: + start = dt.datetime.now(dt.timezone.utc) + state = replication.update(conn, params) + if state is not replication.UpdateState.NO_CHANGES: + status.log_status(conn, start, 'import') + batchdate, _, _ = status.get_status(conn) if state is not replication.UpdateState.NO_CHANGES and args.do_index: index_start = dt.datetime.now(dt.timezone.utc) @@ -137,10 +122,9 @@ class UpdateReplication: indexer.index_boundaries(0, 30) indexer.index_by_rank(0, 30) - conn = connect(args.config.get_libpq_dsn()) - status.set_indexed(conn, True) - status.log_status(conn, index_start, 'index') - conn.close() + with connect(args.config.get_libpq_dsn()) as conn: + status.set_indexed(conn, True) + status.log_status(conn, index_start, 'index') else: index_start = None diff --git a/nominatim/clicmd/setup.py b/nominatim/clicmd/setup.py new file mode 100644 index 00000000..71980739 --- /dev/null +++ b/nominatim/clicmd/setup.py @@ -0,0 +1,151 @@ +""" +Implementation of the 'import' subcommand. +""" +import logging +from pathlib import Path + +import psutil + +from ..tools.exec_utils import run_legacy_script +from ..db.connection import connect +from ..db import status, properties +from ..version import NOMINATIM_VERSION +from ..errors import UsageError + +# Do not repeat documentation of subcommand classes. +# pylint: disable=C0111 +# Using non-top-level imports to avoid eventually unused imports. +# pylint: disable=E0012,C0415 + +LOG = logging.getLogger() + +class SetupAll: + """\ + Create a new Nominatim database from an OSM file. + """ + + @staticmethod + def add_args(parser): + group_name = parser.add_argument_group('Required arguments') + group = group_name.add_mutually_exclusive_group(required=True) + group.add_argument('--osm-file', metavar='FILE', + help='OSM file to be imported.') + group.add_argument('--continue', dest='continue_at', + choices=['load-data', 'indexing', 'db-postprocess'], + help='Continue an import that was interrupted') + group = parser.add_argument_group('Optional arguments') + group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int, + help='Size of cache to be used by osm2pgsql (in MB)') + group.add_argument('--reverse-only', action='store_true', + help='Do not create tables and indexes for searching') + group.add_argument('--no-partitions', action='store_true', + help="""Do not partition search indices + (speeds up import of single country extracts)""") + group.add_argument('--no-updates', action='store_true', + help="""Do not keep tables that are only needed for + updating the database later""") + group = parser.add_argument_group('Expert options') + group.add_argument('--ignore-errors', action='store_true', + help='Continue import even when errors in SQL are present') + group.add_argument('--index-noanalyse', action='store_true', + help='Do not perform analyse operations during index') + + + @staticmethod + def run(args): # pylint: disable=too-many-statements + from ..tools import database_import + from ..tools import refresh + from ..indexer.indexer import Indexer + + if args.osm_file and not Path(args.osm_file).is_file(): + LOG.fatal("OSM file '%s' does not exist.", args.osm_file) + raise UsageError('Cannot access file.') + + if args.continue_at is None: + database_import.setup_database_skeleton(args.config.get_libpq_dsn(), + args.data_dir, + args.no_partitions, + rouser=args.config.DATABASE_WEBUSER) + + LOG.warning('Installing database module') + with connect(args.config.get_libpq_dsn()) as conn: + database_import.install_module(args.module_dir, args.project_dir, + args.config.DATABASE_MODULE_PATH, + conn=conn) + + LOG.warning('Importing OSM data file') + database_import.import_osm_data(Path(args.osm_file), + args.osm2pgsql_options(0, 1), + drop=args.no_updates, + ignore_errors=args.ignore_errors) + + with connect(args.config.get_libpq_dsn()) as conn: + LOG.warning('Create functions (1st pass)') + refresh.create_functions(conn, args.config, args.sqllib_dir, + False, False) + LOG.warning('Create tables') + database_import.create_tables(conn, args.config, args.sqllib_dir, + reverse_only=args.reverse_only) + refresh.load_address_levels_from_file(conn, Path(args.config.ADDRESS_LEVEL_CONFIG)) + LOG.warning('Create functions (2nd pass)') + refresh.create_functions(conn, args.config, args.sqllib_dir, + False, False) + LOG.warning('Create table triggers') + database_import.create_table_triggers(conn, args.config, args.sqllib_dir) + LOG.warning('Create partition tables') + database_import.create_partition_tables(conn, args.config, args.sqllib_dir) + LOG.warning('Create functions (3rd pass)') + refresh.create_functions(conn, args.config, args.sqllib_dir, + False, False) + + LOG.warning('Importing wikipedia importance data') + data_path = Path(args.config.WIKIPEDIA_DATA_PATH or args.project_dir) + if refresh.import_wikipedia_articles(args.config.get_libpq_dsn(), + data_path) > 0: + LOG.error('Wikipedia importance dump file not found. ' + 'Will be using default importances.') + + LOG.warning('Initialise tables') + with connect(args.config.get_libpq_dsn()) as conn: + database_import.truncate_data_tables(conn, args.config.MAX_WORD_FREQUENCY) + + if args.continue_at is None or args.continue_at == 'load-data': + LOG.warning('Load data into placex table') + database_import.load_data(args.config.get_libpq_dsn(), + args.data_dir, + args.threads or psutil.cpu_count() or 1) + + LOG.warning('Calculate postcodes') + run_legacy_script('setup.php', '--calculate-postcodes', + nominatim_env=args, throw_on_fail=not args.ignore_errors) + + if args.continue_at is None or args.continue_at in ('load-data', 'indexing'): + LOG.warning('Indexing places') + indexer = Indexer(args.config.get_libpq_dsn(), + args.threads or psutil.cpu_count() or 1) + indexer.index_full(analyse=not args.index_noanalyse) + + LOG.warning('Post-process tables') + with connect(args.config.get_libpq_dsn()) as conn: + database_import.create_search_indices(conn, args.config, + args.sqllib_dir, + drop=args.no_updates) + run_legacy_script('setup.php', '--create-country-names', + nominatim_env=args, throw_on_fail=not args.ignore_errors) + + webdir = args.project_dir / 'website' + LOG.warning('Setup website at %s', webdir) + refresh.setup_website(webdir, args.phplib_dir, args.config) + + with connect(args.config.get_libpq_dsn()) as conn: + try: + dbdate = status.compute_database_date(conn) + status.set_status(conn, dbdate) + LOG.info('Database is at %s.', dbdate) + except Exception as exc: # pylint: disable=broad-except + LOG.error('Cannot determine date of database: %s', exc) + + properties.set_property(conn, 'database_version', + '{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(NOMINATIM_VERSION)) + + return 0 diff --git a/nominatim/clicmd/transition.py b/nominatim/clicmd/transition.py new file mode 100644 index 00000000..b8db1a38 --- /dev/null +++ b/nominatim/clicmd/transition.py @@ -0,0 +1,129 @@ +""" +Implementation of the 'transition' subcommand. + +This subcommand provides standins for functions that were available +through the PHP scripts but are now no longer directly accessible. +This module will be removed as soon as the transition phase is over. +""" +import logging +from pathlib import Path + +from ..db.connection import connect +from ..db import status +from ..errors import UsageError + +# Do not repeat documentation of subcommand classes. +# pylint: disable=C0111 +# Using non-top-level imports to avoid eventually unused imports. +# pylint: disable=E0012,C0415 + +LOG = logging.getLogger() + +class AdminTransition: + """\ + Internal functions for code transition. Do not use. + """ + + @staticmethod + def add_args(parser): + group = parser.add_argument_group('Sub-functions') + group.add_argument('--create-db', action='store_true', + help='Create nominatim db') + group.add_argument('--setup-db', action='store_true', + help='Build a blank nominatim db') + group.add_argument('--import-data', action='store_true', + help='Import a osm file') + group.add_argument('--load-data', action='store_true', + help='Copy data to live tables from import table') + group.add_argument('--create-tables', action='store_true', + help='Create main tables') + group.add_argument('--create-partition-tables', action='store_true', + help='Create required partition tables') + group.add_argument('--index', action='store_true', + help='Index the data') + group.add_argument('--create-search-indices', action='store_true', + help='Create additional indices required for search and update') + group = parser.add_argument_group('Options') + group.add_argument('--no-partitions', action='store_true', + help='Do not partition search indices') + group.add_argument('--osm-file', metavar='FILE', + help='File to import') + group.add_argument('--drop', action='store_true', + help='Drop tables needed for updates, making the database readonly') + group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int, + help='Size of cache to be used by osm2pgsql (in MB)') + group.add_argument('--no-analyse', action='store_true', + help='Do not perform analyse operations during index') + group.add_argument('--ignore-errors', action='store_true', + help="Ignore certain erros on import.") + group.add_argument('--reverse-only', action='store_true', + help='Do not create search tables and indexes') + + @staticmethod + def run(args): + from ..tools import database_import + from ..tools import refresh + + if args.create_db: + LOG.warning('Create DB') + database_import.create_db(args.config.get_libpq_dsn()) + + if args.setup_db: + LOG.warning('Setup DB') + + with connect(args.config.get_libpq_dsn()) as conn: + database_import.setup_extensions(conn) + database_import.install_module(args.module_dir, args.project_dir, + args.config.DATABASE_MODULE_PATH, + conn=conn) + + database_import.import_base_data(args.config.get_libpq_dsn(), + args.data_dir, args.no_partitions) + + if args.import_data: + LOG.warning('Import data') + if not args.osm_file: + raise UsageError('Missing required --osm-file argument') + database_import.import_osm_data(Path(args.osm_file), + args.osm2pgsql_options(0, 1), + drop=args.drop, + ignore_errors=args.ignore_errors) + + if args.create_tables: + LOG.warning('Create Tables') + with connect(args.config.get_libpq_dsn()) as conn: + database_import.create_tables(conn, args.config, args.sqllib_dir, args.reverse_only) + refresh.load_address_levels_from_file(conn, Path(args.config.ADDRESS_LEVEL_CONFIG)) + refresh.create_functions(conn, args.config, args.sqllib_dir, + enable_diff_updates=False) + database_import.create_table_triggers(conn, args.config, args.sqllib_dir) + + if args.create_partition_tables: + LOG.warning('Create Partition Tables') + with connect(args.config.get_libpq_dsn()) as conn: + database_import.create_partition_tables(conn, args.config, args.sqllib_dir) + + if args.load_data: + LOG.warning('Load data') + with connect(args.config.get_libpq_dsn()) as conn: + database_import.truncate_data_tables(conn, args.config.MAX_WORD_FREQUENCY) + database_import.load_data(args.config.get_libpq_dsn(), + args.data_dir, + args.threads or 1) + + with connect(args.config.get_libpq_dsn()) as conn: + try: + status.set_status(conn, status.compute_database_date(conn)) + except Exception as exc: # pylint: disable=broad-except + LOG.error('Cannot determine date of database: %s', exc) + + if args.index: + LOG.warning('Indexing') + from ..indexer.indexer import Indexer + indexer = Indexer(args.config.get_libpq_dsn(), args.threads or 1) + indexer.index_full() + + if args.create_search_indices: + LOG.warning('Create Search indices') + with connect(args.config.get_libpq_dsn()) as conn: + database_import.create_search_indices(conn, args.config, args.sqllib_dir, args.drop) diff --git a/nominatim/config.py b/nominatim/config.py index 4de2052e..a22f90ab 100644 --- a/nominatim/config.py +++ b/nominatim/config.py @@ -17,7 +17,7 @@ class Configuration: Nominatim uses dotenv to configure the software. Configuration options are resolved in the following order: - * from the OS environment + * from the OS environment (or the dirctionary given in `environ` * from the .env file in the project directory of the installation * from the default installation in the configuration directory @@ -25,7 +25,8 @@ class Configuration: avoid conflicts with other environment variables. """ - def __init__(self, project_dir, config_dir): + def __init__(self, project_dir, config_dir, environ=None): + self.environ = environ or os.environ self.project_dir = project_dir self.config_dir = config_dir self._config = dotenv_values(str((config_dir / 'env.defaults').resolve())) @@ -42,7 +43,7 @@ class Configuration: def __getattr__(self, name): name = 'NOMINATIM_' + name - return os.environ.get(name) or self._config[name] + return self.environ.get(name) or self._config[name] def get_bool(self, name): """ Return the given configuration parameter as a boolean. @@ -100,6 +101,6 @@ class Configuration: merged in. """ env = dict(self._config) - env.update(os.environ) + env.update(self.environ) return env diff --git a/nominatim/db/async_connection.py b/nominatim/db/async_connection.py index 45e83664..c5d6872b 100644 --- a/nominatim/db/async_connection.py +++ b/nominatim/db/async_connection.py @@ -9,8 +9,41 @@ import logging import psycopg2 from psycopg2.extras import wait_select +# psycopg2 emits different exceptions pre and post 2.8. Detect if the new error +# module is available and adapt the error handling accordingly. +try: + import psycopg2.errors # pylint: disable=no-name-in-module,import-error + __has_psycopg2_errors__ = True +except ModuleNotFoundError: + __has_psycopg2_errors__ = False + LOG = logging.getLogger() +class DeadlockHandler: + """ Context manager that catches deadlock exceptions and calls + the given handler function. All other exceptions are passed on + normally. + """ + + def __init__(self, handler): + self.handler = handler + + def __enter__(self): + pass + + def __exit__(self, exc_type, exc_value, traceback): + if __has_psycopg2_errors__: + if exc_type == psycopg2.errors.DeadlockDetected: # pylint: disable=E1101 + self.handler() + return True + else: + if exc_type == psycopg2.extensions.TransactionRollbackError: + if exc_value.pgcode == '40P01': + self.handler() + return True + return False + + class DBConnection: """ A single non-blocking database connection. """ @@ -24,15 +57,22 @@ class DBConnection: self.cursor = None self.connect() + def close(self): + """ Close all open connections. Does not wait for pending requests. + """ + if self.conn is not None: + self.cursor.close() + self.conn.close() + + self.conn = None + def connect(self): """ (Re)connect to the database. Creates an asynchronous connection with JIT and parallel processing disabled. If a connection was already open, it is closed and a new connection established. The caller must ensure that no query is pending before reconnecting. """ - if self.conn is not None: - self.cursor.close() - self.conn.close() + self.close() # Use a dict to hand in the parameters because async is a reserved # word in Python3. @@ -50,23 +90,18 @@ class DBConnection: WHERE name = 'max_parallel_workers_per_gather';""") self.wait() + def _deadlock_handler(self): + LOG.info("Deadlock detected (params = %s), retry.", str(self.current_params)) + self.cursor.execute(self.current_query, self.current_params) + def wait(self): """ Block until any pending operation is done. """ while True: - try: + with DeadlockHandler(self._deadlock_handler): wait_select(self.conn) self.current_query = None return - except psycopg2.extensions.TransactionRollbackError as error: - if error.pgcode == '40P01': - LOG.info("Deadlock detected (params = %s), retry.", - str(self.current_params)) - self.cursor.execute(self.current_query, self.current_params) - else: - raise - except psycopg2.errors.DeadlockDetected: # pylint: disable=E1101 - self.cursor.execute(self.current_query, self.current_params) def perform(self, sql, args=None): """ Send SQL query to the server. Returns immediately without @@ -90,17 +125,9 @@ class DBConnection: if self.current_query is None: return True - try: + with DeadlockHandler(self._deadlock_handler): if self.conn.poll() == psycopg2.extensions.POLL_OK: self.current_query = None return True - except psycopg2.extensions.TransactionRollbackError as error: - if error.pgcode == '40P01': - LOG.info("Deadlock detected (params = %s), retry.", str(self.current_params)) - self.cursor.execute(self.current_query, self.current_params) - else: - raise - except psycopg2.errors.DeadlockDetected: # pylint: disable=E1101 - self.cursor.execute(self.current_query, self.current_params) return False diff --git a/nominatim/db/connection.py b/nominatim/db/connection.py index c7e22c98..5aa05ced 100644 --- a/nominatim/db/connection.py +++ b/nominatim/db/connection.py @@ -1,12 +1,18 @@ """ Specialised connection and cursor functions. """ +import contextlib import logging +import os import psycopg2 import psycopg2.extensions import psycopg2.extras +from ..errors import UsageError + +LOG = logging.getLogger() + class _Cursor(psycopg2.extras.DictCursor): """ A cursor returning dict-like objects and providing specialised execution functions. @@ -15,8 +21,7 @@ class _Cursor(psycopg2.extras.DictCursor): def execute(self, query, args=None): # pylint: disable=W0221 """ Query execution that logs the SQL query when debugging is enabled. """ - logger = logging.getLogger() - logger.debug(self.mogrify(query, args).decode('utf-8')) + LOG.debug(self.mogrify(query, args).decode('utf-8')) super().execute(query, args) @@ -42,26 +47,126 @@ class _Connection(psycopg2.extensions.connection): """ return super().cursor(cursor_factory=cursor_factory, **kwargs) + def table_exists(self, table): """ Check that a table with the given name exists in the database. """ with self.cursor() as cur: num = cur.scalar("""SELECT count(*) FROM pg_tables - WHERE tablename = %s""", (table, )) + WHERE tablename = %s and schemaname = 'public'""", (table, )) return num == 1 + + def index_exists(self, index, table=None): + """ Check that an index with the given name exists in the database. + If table is not None then the index must relate to the given + table. + """ + with self.cursor() as cur: + cur.execute("""SELECT tablename FROM pg_indexes + WHERE indexname = %s and schemaname = 'public'""", (index, )) + if cur.rowcount == 0: + return False + + if table is not None: + row = cur.fetchone() + return row[0] == table + + return True + + + def drop_table(self, name, if_exists=True): + """ Drop the table with the given name. + Set `if_exists` to False if a non-existant table should raise + an exception instead of just being ignored. + """ + with self.cursor() as cur: + cur.execute("""DROP TABLE {} "{}" + """.format('IF EXISTS' if if_exists else '', name)) + self.commit() + + def server_version_tuple(self): """ Return the server version as a tuple of (major, minor). Converts correctly for pre-10 and post-10 PostgreSQL versions. """ version = self.server_version if version < 100000: - return (version / 10000, (version % 10000) / 100) + return (int(version / 10000), (version % 10000) / 100) + + return (int(version / 10000), version % 10000) + + + def postgis_version_tuple(self): + """ Return the postgis version installed in the database as a + tuple of (major, minor). Assumes that the PostGIS extension + has been installed already. + """ + with self.cursor() as cur: + version = cur.scalar('SELECT postgis_lib_version()') + + return tuple((int(x) for x in version.split('.')[:2])) - return (version / 10000, version % 10000) def connect(dsn): """ Open a connection to the database using the specialised connection - factory. + factory. The returned object may be used in conjunction with 'with'. + When used outside a context manager, use the `connection` attribute + to get the connection. """ - return psycopg2.connect(dsn, connection_factory=_Connection) + try: + conn = psycopg2.connect(dsn, connection_factory=_Connection) + ctxmgr = contextlib.closing(conn) + ctxmgr.connection = conn + return ctxmgr + except psycopg2.OperationalError as err: + raise UsageError("Cannot connect to database: {}".format(err)) from err + + +# Translation from PG connection string parameters to PG environment variables. +# Derived from https://www.postgresql.org/docs/current/libpq-envars.html. +_PG_CONNECTION_STRINGS = { + 'host': 'PGHOST', + 'hostaddr': 'PGHOSTADDR', + 'port': 'PGPORT', + 'dbname': 'PGDATABASE', + 'user': 'PGUSER', + 'password': 'PGPASSWORD', + 'passfile': 'PGPASSFILE', + 'channel_binding': 'PGCHANNELBINDING', + 'service': 'PGSERVICE', + 'options': 'PGOPTIONS', + 'application_name': 'PGAPPNAME', + 'sslmode': 'PGSSLMODE', + 'requiressl': 'PGREQUIRESSL', + 'sslcompression': 'PGSSLCOMPRESSION', + 'sslcert': 'PGSSLCERT', + 'sslkey': 'PGSSLKEY', + 'sslrootcert': 'PGSSLROOTCERT', + 'sslcrl': 'PGSSLCRL', + 'requirepeer': 'PGREQUIREPEER', + 'ssl_min_protocol_version': 'PGSSLMINPROTOCOLVERSION', + 'ssl_max_protocol_version': 'PGSSLMAXPROTOCOLVERSION', + 'gssencmode': 'PGGSSENCMODE', + 'krbsrvname': 'PGKRBSRVNAME', + 'gsslib': 'PGGSSLIB', + 'connect_timeout': 'PGCONNECT_TIMEOUT', + 'target_session_attrs': 'PGTARGETSESSIONATTRS', +} + + +def get_pg_env(dsn, base_env=None): + """ Return a copy of `base_env` with the environment variables for + PostgresSQL set up from the given database connection string. + If `base_env` is None, then the OS environment is used as a base + environment. + """ + env = dict(base_env if base_env is not None else os.environ) + + for param, value in psycopg2.extensions.parse_dsn(dsn).items(): + if param in _PG_CONNECTION_STRINGS: + env[_PG_CONNECTION_STRINGS[param]] = value + else: + LOG.error("Unknown connection parameter '%s' ignored.", param) + + return env diff --git a/nominatim/db/properties.py b/nominatim/db/properties.py new file mode 100644 index 00000000..9cc371fe --- /dev/null +++ b/nominatim/db/properties.py @@ -0,0 +1,28 @@ +""" +Query and access functions for the in-database property table. +""" + +def set_property(conn, name, value): + """ Add or replace the propery with the given name. + """ + with conn.cursor() as cur: + cur.execute('SELECT value FROM nominatim_properties WHERE property = %s', + (name, )) + + if cur.rowcount == 0: + sql = 'INSERT INTO nominatim_properties (value, property) VALUES (%s, %s)' + else: + sql = 'UPDATE nominatim_properties SET value = %s WHERE property = %s' + + cur.execute(sql, (value, name)) + conn.commit() + +def get_property(conn, name): + """ Return the current value of the given propery or None if the property + is not set. + """ + with conn.cursor() as cur: + cur.execute('SELECT value FROM nominatim_properties WHERE property = %s', + (name, )) + + return cur.fetchone()[0] if cur.rowcount > 0 else None diff --git a/nominatim/db/sql_preprocessor.py b/nominatim/db/sql_preprocessor.py new file mode 100644 index 00000000..85244752 --- /dev/null +++ b/nominatim/db/sql_preprocessor.py @@ -0,0 +1,94 @@ +""" +Preprocessing of SQL files. +""" +import jinja2 + + +def _get_partitions(conn): + """ Get the set of partitions currently in use. + """ + with conn.cursor() as cur: + cur.execute('SELECT DISTINCT partition FROM country_name') + partitions = set([0]) + for row in cur: + partitions.add(row[0]) + + return partitions + + +def _get_tables(conn): + """ Return the set of tables currently in use. + Only includes non-partitioned + """ + with conn.cursor() as cur: + cur.execute("SELECT tablename FROM pg_tables WHERE schemaname = 'public'") + + return set((row[0] for row in list(cur))) + + +def _setup_tablespace_sql(config): + """ Returns a dict with tablespace expressions for the different tablespace + kinds depending on whether a tablespace is configured or not. + """ + out = {} + for subset in ('ADDRESS', 'SEARCH', 'AUX'): + for kind in ('DATA', 'INDEX'): + tspace = getattr(config, 'TABLESPACE_{}_{}'.format(subset, kind)) + if tspace: + tspace = 'TABLESPACE "{}"'.format(tspace) + out['{}_{}'.format(subset.lower, kind.lower())] = tspace + + return out + + +def _setup_postgres_sql(conn): + """ Set up a dictionary with various Postgresql/Postgis SQL terms which + are dependent on the database version in use. + """ + out = {} + pg_version = conn.server_version_tuple() + # CREATE INDEX IF NOT EXISTS was introduced in PG9.5. + # Note that you need to ignore failures on older versions when + # unsing this construct. + out['if_index_not_exists'] = ' IF NOT EXISTS ' if pg_version >= (9, 5, 0) else '' + + return out + + +class SQLPreprocessor: # pylint: disable=too-few-public-methods + """ A environment for preprocessing SQL files from the + lib-sql directory. + + The preprocessor provides a number of default filters and variables. + The variables may be overwritten when rendering an SQL file. + + The preprocessing is currently based on the jinja2 templating library + and follows its syntax. + """ + + def __init__(self, conn, config, sqllib_dir): + self.env = jinja2.Environment(autoescape=False, + loader=jinja2.FileSystemLoader(str(sqllib_dir))) + + db_info = {} + db_info['partitions'] = _get_partitions(conn) + db_info['tables'] = _get_tables(conn) + db_info['reverse_only'] = 'search_name' not in db_info['tables'] + db_info['tablespace'] = _setup_tablespace_sql(config) + + self.env.globals['config'] = config + self.env.globals['db'] = db_info + self.env.globals['sql'] = _setup_postgres_sql(conn) + self.env.globals['modulepath'] = config.DATABASE_MODULE_PATH or \ + str((config.project_dir / 'module').resolve()) + + + def run_sql_file(self, conn, name, **kwargs): + """ Execute the given SQL file on the connection. The keyword arguments + may supply additional parameters for preprocessing. + """ + sql = self.env.get_template(name).render(**kwargs) + + with conn.cursor() as cur: + cur.execute(sql) + conn.commit() diff --git a/nominatim/db/utils.py b/nominatim/db/utils.py index abd72519..0a2e2c06 100644 --- a/nominatim/db/utils.py +++ b/nominatim/db/utils.py @@ -1,12 +1,59 @@ """ Helper functions for handling DB accesses. """ +import subprocess +import logging +import gzip -def execute_file(conn, fname): - """ Read an SQL file and run its contents against the given connection. +from .connection import get_pg_env +from ..errors import UsageError + +LOG = logging.getLogger() + +def _pipe_to_proc(proc, fdesc): + chunk = fdesc.read(2048) + while chunk and proc.poll() is None: + try: + proc.stdin.write(chunk) + except BrokenPipeError as exc: + raise UsageError("Failed to execute SQL file.") from exc + chunk = fdesc.read(2048) + + return len(chunk) + +def execute_file(dsn, fname, ignore_errors=False, pre_code=None, post_code=None): + """ Read an SQL file and run its contents against the given database + using psql. Use `pre_code` and `post_code` to run extra commands + before or after executing the file. The commands are run within the + same session, so they may be used to wrap the file execution in a + transaction. """ - with fname.open('r') as fdesc: - sql = fdesc.read() - with conn.cursor() as cur: - cur.execute(sql) - conn.commit() + cmd = ['psql'] + if not ignore_errors: + cmd.extend(('-v', 'ON_ERROR_STOP=1')) + if not LOG.isEnabledFor(logging.INFO): + cmd.append('--quiet') + proc = subprocess.Popen(cmd, env=get_pg_env(dsn), stdin=subprocess.PIPE) + + try: + if not LOG.isEnabledFor(logging.INFO): + proc.stdin.write('set client_min_messages to WARNING;'.encode('utf-8')) + + if pre_code: + proc.stdin.write((pre_code + ';').encode('utf-8')) + + if fname.suffix == '.gz': + with gzip.open(str(fname), 'rb') as fdesc: + remain = _pipe_to_proc(proc, fdesc) + else: + with fname.open('rb') as fdesc: + remain = _pipe_to_proc(proc, fdesc) + + if remain == 0 and post_code: + proc.stdin.write((';' + post_code).encode('utf-8')) + finally: + proc.stdin.close() + ret = proc.wait() + + if ret != 0 or remain > 0: + raise UsageError("Failed to execute SQL file.") diff --git a/nominatim/indexer/indexer.py b/nominatim/indexer/indexer.py index 6e0ed60f..06c05e1d 100644 --- a/nominatim/indexer/indexer.py +++ b/nominatim/indexer/indexer.py @@ -61,8 +61,8 @@ class InterpolationRunner: @staticmethod def sql_index_place(ids): return """UPDATE location_property_osmline - SET indexed_status = 0 WHERE place_id IN ({})"""\ - .format(','.join((str(i) for i in ids))) + SET indexed_status = 0 WHERE place_id IN ({}) + """.format(','.join((str(i) for i in ids))) class BoundaryRunner: """ Returns SQL commands for indexing the administrative boundaries @@ -79,57 +79,172 @@ class BoundaryRunner: return """SELECT count(*) FROM placex WHERE indexed_status > 0 AND rank_search = {} - AND class = 'boundary' and type = 'administrative'""".format(self.rank) + AND class = 'boundary' and type = 'administrative' + """.format(self.rank) def sql_get_objects(self): return """SELECT place_id FROM placex WHERE indexed_status > 0 and rank_search = {} and class = 'boundary' and type = 'administrative' - ORDER BY partition, admin_level""".format(self.rank) + ORDER BY partition, admin_level + """.format(self.rank) @staticmethod def sql_index_place(ids): return "UPDATE placex SET indexed_status = 0 WHERE place_id IN ({})"\ .format(','.join((str(i) for i in ids))) + +class PostcodeRunner: + """ Provides the SQL commands for indexing the location_postcode table. + """ + + @staticmethod + def name(): + return "postcodes (location_postcode)" + + @staticmethod + def sql_count_objects(): + return 'SELECT count(*) FROM location_postcode WHERE indexed_status > 0' + + @staticmethod + def sql_get_objects(): + return """SELECT place_id FROM location_postcode + WHERE indexed_status > 0 + ORDER BY country_code, postcode""" + + @staticmethod + def sql_index_place(ids): + return """UPDATE location_postcode SET indexed_status = 0 + WHERE place_id IN ({}) + """.format(','.join((str(i) for i in ids))) + + +def _analyse_db_if(conn, condition): + if condition: + with conn.cursor() as cur: + cur.execute('ANALYSE') + + class Indexer: """ Main indexing routine. """ def __init__(self, dsn, num_threads): - self.conn = psycopg2.connect(dsn) - self.threads = [DBConnection(dsn) for _ in range(num_threads)] + self.dsn = dsn + self.num_threads = num_threads + self.conn = None + self.threads = [] + + + def _setup_connections(self): + self.conn = psycopg2.connect(self.dsn) + self.threads = [DBConnection(self.dsn) for _ in range(self.num_threads)] + + + def _close_connections(self): + if self.conn: + self.conn.close() + self.conn = None + + for thread in self.threads: + thread.close() + self.threads = [] + + + def index_full(self, analyse=True): + """ Index the complete database. This will first index boudnaries + followed by all other objects. When `analyse` is True, then the + database will be analysed at the appropriate places to + ensure that database statistics are updated. + """ + conn = psycopg2.connect(self.dsn) + conn.autocommit = True + + try: + self.index_by_rank(0, 4) + _analyse_db_if(conn, analyse) + + self.index_boundaries(0, 30) + _analyse_db_if(conn, analyse) + + self.index_by_rank(5, 25) + _analyse_db_if(conn, analyse) + + self.index_by_rank(26, 30) + _analyse_db_if(conn, analyse) + + self.index_postcodes() + _analyse_db_if(conn, analyse) + finally: + conn.close() + def index_boundaries(self, minrank, maxrank): + """ Index only administrative boundaries within the given rank range. + """ LOG.warning("Starting indexing boundaries using %s threads", - len(self.threads)) + self.num_threads) + + self._setup_connections() - for rank in range(max(minrank, 4), min(maxrank, 26)): - self.index(BoundaryRunner(rank)) + try: + for rank in range(max(minrank, 4), min(maxrank, 26)): + self.index(BoundaryRunner(rank)) + finally: + self._close_connections() def index_by_rank(self, minrank, maxrank): - """ Run classic indexing by rank. + """ Index all entries of placex in the given rank range (inclusive) + in order of their address rank. + + When rank 30 is requested then also interpolations and + places with address rank 0 will be indexed. """ maxrank = min(maxrank, 30) LOG.warning("Starting indexing rank (%i to %i) using %i threads", - minrank, maxrank, len(self.threads)) + minrank, maxrank, self.num_threads) + + self._setup_connections() + + try: + for rank in range(max(1, minrank), maxrank): + self.index(RankRunner(rank)) + + if maxrank == 30: + self.index(RankRunner(0)) + self.index(InterpolationRunner(), 20) + self.index(RankRunner(30), 20) + else: + self.index(RankRunner(maxrank)) + finally: + self._close_connections() + + + def index_postcodes(self): + """Index the entries ofthe location_postcode table. + """ + LOG.warning("Starting indexing postcodes using %s threads", self.num_threads) - for rank in range(max(1, minrank), maxrank): - self.index(RankRunner(rank)) + self._setup_connections() - if maxrank == 30: - self.index(RankRunner(0)) - self.index(InterpolationRunner(), 20) - self.index(RankRunner(30), 20) - else: - self.index(RankRunner(maxrank)) + try: + self.index(PostcodeRunner(), 20) + finally: + self._close_connections() def update_status_table(self): """ Update the status in the status table to 'indexed'. """ - with self.conn.cursor() as cur: - cur.execute('UPDATE import_status SET indexed = true') - self.conn.commit() + conn = psycopg2.connect(self.dsn) + + try: + with conn.cursor() as cur: + cur.execute('UPDATE import_status SET indexed = true') + + conn.commit() + finally: + conn.close() def index(self, obj, batch=1): """ Index a single rank or table. `obj` describes the SQL to use diff --git a/nominatim/tools/check_database.py b/nominatim/tools/check_database.py new file mode 100644 index 00000000..d8ab08cc --- /dev/null +++ b/nominatim/tools/check_database.py @@ -0,0 +1,269 @@ +""" +Collection of functions that check if the database is complete and functional. +""" +from enum import Enum +from textwrap import dedent + +import psycopg2 + +from ..db.connection import connect +from ..errors import UsageError + +CHECKLIST = [] + +class CheckState(Enum): + """ Possible states of a check. FATAL stops check execution entirely. + """ + OK = 0 + FAIL = 1 + FATAL = 2 + NOT_APPLICABLE = 3 + +def _check(hint=None): + """ Decorator for checks. It adds the function to the list of + checks to execute and adds the code for printing progress messages. + """ + def decorator(func): + title = func.__doc__.split('\n', 1)[0].strip() + def run_check(conn, config): + print(title, end=' ... ') + ret = func(conn, config) + if isinstance(ret, tuple): + ret, params = ret + else: + params = {} + if ret == CheckState.OK: + print('\033[92mOK\033[0m') + elif ret == CheckState.NOT_APPLICABLE: + print('not applicable') + else: + print('\x1B[31mFailed\033[0m') + if hint: + print(dedent(hint.format(**params))) + return ret + + CHECKLIST.append(run_check) + return run_check + + return decorator + +class _BadConnection: # pylint: disable=R0903 + + def __init__(self, msg): + self.msg = msg + + def close(self): + """ Dummy function to provide the implementation. + """ + +def check_database(config): + """ Run a number of checks on the database and return the status. + """ + try: + conn = connect(config.get_libpq_dsn()).connection + except UsageError as err: + conn = _BadConnection(str(err)) + + overall_result = 0 + for check in CHECKLIST: + ret = check(conn, config) + if ret == CheckState.FATAL: + conn.close() + return 1 + if ret in (CheckState.FATAL, CheckState.FAIL): + overall_result = 1 + + conn.close() + return overall_result + + +def _get_indexes(conn): + indexes = ['idx_word_word_id', + 'idx_place_addressline_address_place_id', + 'idx_placex_rank_search', + 'idx_placex_rank_address', + 'idx_placex_parent_place_id', + 'idx_placex_geometry_reverse_lookuppolygon', + 'idx_placex_geometry_reverse_placenode', + 'idx_osmline_parent_place_id', + 'idx_osmline_parent_osm_id', + 'idx_postcode_id', + 'idx_postcode_postcode' + ] + if conn.table_exists('search_name'): + indexes.extend(('idx_search_name_nameaddress_vector', + 'idx_search_name_name_vector', + 'idx_search_name_centroid')) + if conn.table_exists('place'): + indexes.extend(('idx_placex_pendingsector', + 'idx_location_area_country_place_id', + 'idx_place_osm_unique' + )) + + return indexes + + +### CHECK FUNCTIONS +# +# Functions are exectured in the order they appear here. + +@_check(hint="""\ + {error} + + Hints: + * Is the database server started? + * Check the NOMINATIM_DATABASE_DSN variable in your local .env + * Try connecting to the database with the same settings + + Project directory: {config.project_dir} + Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN} + """) +def check_connection(conn, config): + """ Checking database connection + """ + if isinstance(conn, _BadConnection): + return CheckState.FATAL, dict(error=conn.msg, config=config) + + return CheckState.OK + +@_check(hint="""\ + placex table not found + + Hints: + * Are you connecting to the right database? + * Did the import process finish without errors? + + Project directory: {config.project_dir} + Current setting of NOMINATIM_DATABASE_DSN: {config.DATABASE_DSN} + """) +def check_placex_table(conn, config): + """ Checking for placex table + """ + if conn.table_exists('placex'): + return CheckState.OK + + return CheckState.FATAL, dict(config=config) + + +@_check(hint="""placex table has no data. Did the import finish sucessfully?""") +def check_placex_size(conn, config): # pylint: disable=W0613 + """ Checking for placex content + """ + with conn.cursor() as cur: + cnt = cur.scalar('SELECT count(*) FROM (SELECT * FROM placex LIMIT 100) x') + + return CheckState.OK if cnt > 0 else CheckState.FATAL + + +@_check(hint="""\ + The Postgresql extension nominatim.so was not correctly loaded. + + Error: {error} + + Hints: + * Check the output of the CMmake/make installation step + * Does nominatim.so exist? + * Does nominatim.so exist on the database server? + * Can nominatim.so be accessed by the database user? + """) +def check_module(conn, config): # pylint: disable=W0613 + """ Checking that nominatim.so module is installed + """ + with conn.cursor() as cur: + try: + out = cur.scalar("SELECT make_standard_name('a')") + except psycopg2.ProgrammingError as err: + return CheckState.FAIL, dict(error=str(err)) + + if out != 'a': + return CheckState.FAIL, dict(error='Unexpected result for make_standard_name()') + + return CheckState.OK + + +@_check(hint="""\ + The indexing didn't finish. {count} entries are not yet indexed. + + To index the remaining entries, run: {index_cmd} + """) +def check_indexing(conn, config): # pylint: disable=W0613 + """ Checking indexing status + """ + with conn.cursor() as cur: + cnt = cur.scalar('SELECT count(*) FROM placex WHERE indexed_status > 0') + + if cnt == 0: + return CheckState.OK + + if conn.index_exists('idx_word_word_id'): + # Likely just an interrupted update. + index_cmd = 'nominatim index' + else: + # Looks like the import process got interrupted. + index_cmd = 'nominatim import --continue indexing' + + return CheckState.FAIL, dict(count=cnt, index_cmd=index_cmd) + + +@_check(hint="""\ + The following indexes are missing: + {indexes} + + Rerun the index creation with: nominatim import --continue db-postprocess + """) +def check_database_indexes(conn, config): # pylint: disable=W0613 + """ Checking that database indexes are complete + """ + missing = [] + for index in _get_indexes(conn): + if not conn.index_exists(index): + missing.append(index) + + if missing: + return CheckState.FAIL, dict(indexes='\n '.join(missing)) + + return CheckState.OK + + +@_check(hint="""\ + At least one index is invalid. That can happen, e.g. when index creation was + disrupted and later restarted. You should delete the affected indices + and recreate them. + + Invalid indexes: + {indexes} + """) +def check_database_index_valid(conn, config): # pylint: disable=W0613 + """ Checking that all database indexes are valid + """ + with conn.cursor() as cur: + cur.execute(""" SELECT relname FROM pg_class, pg_index + WHERE pg_index.indisvalid = false + AND pg_index.indexrelid = pg_class.oid""") + + broken = list(cur) + + if broken: + return CheckState.FAIL, dict(indexes='\n '.join(broken)) + + return CheckState.OK + + +@_check(hint="""\ + {error} + Run TIGER import again: nominatim add-data --tiger-data + """) +def check_tiger_table(conn, config): + """ Checking TIGER external data table. + """ + if not config.get_bool('USE_US_TIGER_DATA'): + return CheckState.NOT_APPLICABLE + + if not conn.table_exists('location_property_tiger'): + return CheckState.FAIL, dict(error='TIGER data table not found.') + + with conn.cursor() as cur: + if cur.scalar('SELECT count(*) FROM location_property_tiger') == 0: + return CheckState.FAIL, dict(error='TIGER data table is empty.') + + return CheckState.OK diff --git a/nominatim/tools/database_import.py b/nominatim/tools/database_import.py new file mode 100644 index 00000000..017c74b6 --- /dev/null +++ b/nominatim/tools/database_import.py @@ -0,0 +1,308 @@ +""" +Functions for setting up and importing a new Nominatim database. +""" +import logging +import os +import selectors +import subprocess +import shutil +from pathlib import Path + +import psutil +import psycopg2 + +from ..db.connection import connect, get_pg_env +from ..db import utils as db_utils +from ..db.async_connection import DBConnection +from ..db.sql_preprocessor import SQLPreprocessor +from .exec_utils import run_osm2pgsql +from ..errors import UsageError +from ..version import POSTGRESQL_REQUIRED_VERSION, POSTGIS_REQUIRED_VERSION + +LOG = logging.getLogger() + +def setup_database_skeleton(dsn, data_dir, no_partitions, rouser=None): + """ Create a new database for Nominatim and populate it with the + essential extensions and data. + """ + LOG.warning('Creating database') + create_db(dsn, rouser) + + LOG.warning('Setting up database') + with connect(dsn) as conn: + setup_extensions(conn) + + LOG.warning('Loading basic data') + import_base_data(dsn, data_dir, no_partitions) + + +def create_db(dsn, rouser=None): + """ Create a new database for the given DSN. Fails when the database + already exists or the PostgreSQL version is too old. + Uses `createdb` to create the database. + + If 'rouser' is given, then the function also checks that the user + with that given name exists. + + Requires superuser rights by the caller. + """ + proc = subprocess.run(['createdb'], env=get_pg_env(dsn), check=False) + + if proc.returncode != 0: + raise UsageError('Creating new database failed.') + + with connect(dsn) as conn: + postgres_version = conn.server_version_tuple() + if postgres_version < POSTGRESQL_REQUIRED_VERSION: + LOG.fatal('Minimum supported version of Postgresql is %d.%d. ' + 'Found version %d.%d.', + POSTGRESQL_REQUIRED_VERSION[0], POSTGRESQL_REQUIRED_VERSION[1], + postgres_version[0], postgres_version[1]) + raise UsageError('PostgreSQL server is too old.') + + if rouser is not None: + with conn.cursor() as cur: + cnt = cur.scalar('SELECT count(*) FROM pg_user where usename = %s', + (rouser, )) + if cnt == 0: + LOG.fatal("Web user '%s' does not exists. Create it with:\n" + "\n createuser %s", rouser, rouser) + raise UsageError('Missing read-only user.') + + + +def setup_extensions(conn): + """ Set up all extensions needed for Nominatim. Also checks that the + versions of the extensions are sufficient. + """ + with conn.cursor() as cur: + cur.execute('CREATE EXTENSION IF NOT EXISTS hstore') + cur.execute('CREATE EXTENSION IF NOT EXISTS postgis') + conn.commit() + + postgis_version = conn.postgis_version_tuple() + if postgis_version < POSTGIS_REQUIRED_VERSION: + LOG.fatal('Minimum supported version of PostGIS is %d.%d. ' + 'Found version %d.%d.', + POSTGIS_REQUIRED_VERSION[0], POSTGIS_REQUIRED_VERSION[1], + postgis_version[0], postgis_version[1]) + raise UsageError('PostGIS version is too old.') + + +def install_module(src_dir, project_dir, module_dir, conn=None): + """ Copy the normalization module from src_dir into the project + directory under the '/module' directory. If 'module_dir' is set, then + use the module from there instead and check that it is accessible + for Postgresql. + + The function detects when the installation is run from the + build directory. It doesn't touch the module in that case. + + If 'conn' is given, then the function also tests if the module + can be access via the given database. + """ + if not module_dir: + module_dir = project_dir / 'module' + + if not module_dir.exists() or not src_dir.samefile(module_dir): + + if not module_dir.exists(): + module_dir.mkdir() + + destfile = module_dir / 'nominatim.so' + shutil.copy(str(src_dir / 'nominatim.so'), str(destfile)) + destfile.chmod(0o755) + + LOG.info('Database module installed at %s', str(destfile)) + else: + LOG.info('Running from build directory. Leaving database module as is.') + else: + LOG.info("Using custom path for database module at '%s'", module_dir) + + if conn is not None: + with conn.cursor() as cur: + try: + cur.execute("""CREATE FUNCTION nominatim_test_import_func(text) + RETURNS text AS '{}/nominatim.so', 'transliteration' + LANGUAGE c IMMUTABLE STRICT; + DROP FUNCTION nominatim_test_import_func(text) + """.format(module_dir)) + except psycopg2.DatabaseError as err: + LOG.fatal("Error accessing database module: %s", err) + raise UsageError("Database module cannot be accessed.") from err + + +def import_base_data(dsn, sql_dir, ignore_partitions=False): + """ Create and populate the tables with basic static data that provides + the background for geocoding. Data is assumed to not yet exist. + """ + db_utils.execute_file(dsn, sql_dir / 'country_name.sql') + db_utils.execute_file(dsn, sql_dir / 'country_osm_grid.sql.gz') + + if ignore_partitions: + with connect(dsn) as conn: + with conn.cursor() as cur: + cur.execute('UPDATE country_name SET partition = 0') + conn.commit() + + +def import_osm_data(osm_file, options, drop=False, ignore_errors=False): + """ Import the given OSM file. 'options' contains the list of + default settings for osm2pgsql. + """ + options['import_file'] = osm_file + options['append'] = False + options['threads'] = 1 + + if not options['flatnode_file'] and options['osm2pgsql_cache'] == 0: + # Make some educated guesses about cache size based on the size + # of the import file and the available memory. + mem = psutil.virtual_memory() + fsize = os.stat(str(osm_file)).st_size + options['osm2pgsql_cache'] = int(min((mem.available + mem.cached) * 0.75, + fsize * 2) / 1024 / 1024) + 1 + + run_osm2pgsql(options) + + with connect(options['dsn']) as conn: + if not ignore_errors: + with conn.cursor() as cur: + cur.execute('SELECT * FROM place LIMIT 1') + if cur.rowcount == 0: + raise UsageError('No data imported by osm2pgsql.') + + if drop: + conn.drop_table('planet_osm_nodes') + + if drop: + if options['flatnode_file']: + Path(options['flatnode_file']).unlink() + + +def create_tables(conn, config, sqllib_dir, reverse_only=False): + """ Create the set of basic tables. + When `reverse_only` is True, then the main table for searching will + be skipped and only reverse search is possible. + """ + sql = SQLPreprocessor(conn, config, sqllib_dir) + sql.env.globals['db']['reverse_only'] = reverse_only + + sql.run_sql_file(conn, 'tables.sql') + + +def create_table_triggers(conn, config, sqllib_dir): + """ Create the triggers for the tables. The trigger functions must already + have been imported with refresh.create_functions(). + """ + sql = SQLPreprocessor(conn, config, sqllib_dir) + sql.run_sql_file(conn, 'table-triggers.sql') + + +def create_partition_tables(conn, config, sqllib_dir): + """ Create tables that have explicit partitioning. + """ + sql = SQLPreprocessor(conn, config, sqllib_dir) + sql.run_sql_file(conn, 'partition-tables.src.sql') + + +def truncate_data_tables(conn, max_word_frequency=None): + """ Truncate all data tables to prepare for a fresh load. + """ + with conn.cursor() as cur: + cur.execute('TRUNCATE word') + cur.execute('TRUNCATE placex') + cur.execute('TRUNCATE place_addressline') + cur.execute('TRUNCATE location_area') + cur.execute('TRUNCATE location_area_country') + cur.execute('TRUNCATE location_property') + cur.execute('TRUNCATE location_property_tiger') + cur.execute('TRUNCATE location_property_osmline') + cur.execute('TRUNCATE location_postcode') + if conn.table_exists('search_name'): + cur.execute('TRUNCATE search_name') + cur.execute('DROP SEQUENCE IF EXISTS seq_place') + cur.execute('CREATE SEQUENCE seq_place start 100000') + + cur.execute("""SELECT tablename FROM pg_tables + WHERE tablename LIKE 'location_road_%'""") + + for table in [r[0] for r in list(cur)]: + cur.execute('TRUNCATE ' + table) + + if max_word_frequency is not None: + # Used by getorcreate_word_id to ignore frequent partial words. + cur.execute("""CREATE OR REPLACE FUNCTION get_maxwordfreq() + RETURNS integer AS $$ + SELECT {} as maxwordfreq; + $$ LANGUAGE SQL IMMUTABLE + """.format(max_word_frequency)) + conn.commit() + +_COPY_COLUMNS = 'osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry' + +def load_data(dsn, data_dir, threads): + """ Copy data into the word and placex table. + """ + # Pre-calculate the most important terms in the word list. + db_utils.execute_file(dsn, data_dir / 'words.sql') + + sel = selectors.DefaultSelector() + # Then copy data from place to placex in chunks. + place_threads = max(1, threads - 1) + for imod in range(place_threads): + conn = DBConnection(dsn) + conn.connect() + conn.perform("""INSERT INTO placex ({0}) + SELECT {0} FROM place + WHERE osm_id % {1} = {2} + AND NOT (class='place' and type='houses') + AND ST_IsValid(geometry) + """.format(_COPY_COLUMNS, place_threads, imod)) + sel.register(conn, selectors.EVENT_READ, conn) + + # Address interpolations go into another table. + conn = DBConnection(dsn) + conn.connect() + conn.perform("""INSERT INTO location_property_osmline (osm_id, address, linegeo) + SELECT osm_id, address, geometry FROM place + WHERE class='place' and type='houses' and osm_type='W' + and ST_GeometryType(geometry) = 'ST_LineString' + """) + sel.register(conn, selectors.EVENT_READ, conn) + + # Now wait for all of them to finish. + todo = place_threads + 1 + while todo > 0: + for key, _ in sel.select(1): + conn = key.data + sel.unregister(conn) + conn.wait() + conn.close() + todo -= 1 + print('.', end='', flush=True) + print('\n') + + with connect(dsn) as conn: + with conn.cursor() as cur: + cur.execute('ANALYSE') + + +def create_search_indices(conn, config, sqllib_dir, drop=False): + """ Create tables that have explicit partitioning. + """ + + # If index creation failed and left an index invalid, they need to be + # cleaned out first, so that the script recreates them. + with conn.cursor() as cur: + cur.execute("""SELECT relname FROM pg_class, pg_index + WHERE pg_index.indisvalid = false + AND pg_index.indexrelid = pg_class.oid""") + bad_indices = [row[0] for row in list(cur)] + for idx in bad_indices: + LOG.info("Drop invalid index %s.", idx) + cur.execute('DROP INDEX "{}"'.format(idx)) + conn.commit() + + sql = SQLPreprocessor(conn, config, sqllib_dir) + + sql.run_sql_file(conn, 'indices.sql', drop=drop) diff --git a/nominatim/tools/exec_utils.py b/nominatim/tools/exec_utils.py index f373f347..e6b9d8d4 100644 --- a/nominatim/tools/exec_utils.py +++ b/nominatim/tools/exec_utils.py @@ -2,14 +2,12 @@ Helper functions for executing external programs. """ import logging -import os import subprocess import urllib.request as urlrequest from urllib.parse import urlencode -from psycopg2.extensions import parse_dsn - from ..version import NOMINATIM_VERSION +from ..db.connection import get_pg_env LOG = logging.getLogger() @@ -100,7 +98,7 @@ def run_php_server(server_address, base_dir): def run_osm2pgsql(options): """ Run osm2pgsql with the given options. """ - env = os.environ + env = get_pg_env(options['dsn']) cmd = [options['osm2pgsql'], '--hstore', '--latlon', '--slim', '--with-forward-dependencies', 'false', @@ -112,20 +110,18 @@ def run_osm2pgsql(options): ] if options['append']: cmd.append('--append') + else: + cmd.append('--create') if options['flatnode_file']: cmd.extend(('--flat-nodes', options['flatnode_file'])) - dsn = parse_dsn(options['dsn']) - if 'password' in dsn: - env['PGPASSWORD'] = dsn['password'] - if 'dbname' in dsn: - cmd.extend(('-d', dsn['dbname'])) - if 'user' in dsn: - cmd.extend(('--username', dsn['user'])) - for param in ('host', 'port'): - if param in dsn: - cmd.extend(('--' + param, dsn[param])) + for key, param in (('slim_data', '--tablespace-slim-data'), + ('slim_index', '--tablespace-slim-index'), + ('main_data', '--tablespace-main-data'), + ('main_index', '--tablespace-main-index')): + if options['tablespaces'][key]: + cmd.extend((param, options['tablespaces'][key])) if options.get('disable_jit', False): env['PGOPTIONS'] = '-c jit=off -c max_parallel_workers_per_gather=0' @@ -138,7 +134,7 @@ def run_osm2pgsql(options): def get_url(url): """ Get the contents from the given URL and return it as a UTF-8 string. """ - headers = {"User-Agent" : "Nominatim/" + NOMINATIM_VERSION} + headers = {"User-Agent" : "Nominatim/{0[0]}.{0[1]}.{0[2]}-{0[3]}".format(NOMINATIM_VERSION)} try: with urlrequest.urlopen(urlrequest.Request(url, headers=headers)) as response: diff --git a/nominatim/tools/freeze.py b/nominatim/tools/freeze.py new file mode 100644 index 00000000..cc1bf97e --- /dev/null +++ b/nominatim/tools/freeze.py @@ -0,0 +1,43 @@ +""" +Functions for removing unnecessary data from the database. +""" +from pathlib import Path + +UPDATE_TABLES = [ + 'address_levels', + 'gb_postcode', + 'import_osmosis_log', + 'import_polygon_%', + 'location_area%', + 'location_road%', + 'place', + 'planet_osm_%', + 'search_name_%', + 'us_postcode', + 'wikipedia_%' +] + +def drop_update_tables(conn): + """ Drop all tables only necessary for updating the database from + OSM replication data. + """ + + where = ' or '.join(["(tablename LIKE '{}')".format(t) for t in UPDATE_TABLES]) + + with conn.cursor() as cur: + cur.execute("SELECT tablename FROM pg_tables WHERE " + where) + tables = [r[0] for r in cur] + + for table in tables: + cur.execute('DROP TABLE IF EXISTS "{}" CASCADE'.format(table)) + + conn.commit() + + +def drop_flatnode_file(fname): + """ Remove the flatnode file if it exists. + """ + if fname: + fpath = Path(fname) + if fpath.exists(): + fpath.unlink() diff --git a/nominatim/tools/refresh.py b/nominatim/tools/refresh.py index 1fcb1577..c00a2e10 100644 --- a/nominatim/tools/refresh.py +++ b/nominatim/tools/refresh.py @@ -2,23 +2,28 @@ Functions for bringing auxiliary data in the database up-to-date. """ import json -import re +import logging +from textwrap import dedent from psycopg2.extras import execute_values from ..db.utils import execute_file +from ..db.sql_preprocessor import SQLPreprocessor +from ..version import NOMINATIM_VERSION -def update_postcodes(conn, sql_dir): +LOG = logging.getLogger() + +def update_postcodes(dsn, sql_dir): """ Recalculate postcode centroids and add, remove and update entries in the location_postcode table. `conn` is an opne connection to the database. """ - execute_file(conn, sql_dir / 'update-postcodes.sql') + execute_file(dsn, sql_dir / 'update-postcodes.sql') -def recompute_word_counts(conn, sql_dir): +def recompute_word_counts(dsn, sql_dir): """ Compute the frequency of full-word search terms. """ - execute_file(conn, sql_dir / 'words_from_search_name.sql') + execute_file(dsn, sql_dir / 'words_from_search_name.sql') def _add_address_level_rows_from_entry(rows, entry): @@ -71,97 +76,124 @@ def load_address_levels_from_file(conn, config_file): with config_file.open('r') as fdesc: load_address_levels(conn, 'address_levels', json.load(fdesc)) -PLPGSQL_BASE_MODULES = ( - 'utils.sql', - 'normalization.sql', - 'ranking.sql', - 'importance.sql', - 'address_lookup.sql', - 'interpolation.sql' -) -PLPGSQL_TABLE_MODULES = ( - ('place', 'place_triggers.sql'), - ('placex', 'placex_triggers.sql'), - ('location_postcode', 'postcode_triggers.sql') -) - -def _get_standard_function_sql(conn, config, sql_dir, enable_diff_updates, enable_debug): - """ Read all applicable SQLs containing PL/pgSQL functions, replace - placefolders and execute them. +def create_functions(conn, config, sqllib_dir, + enable_diff_updates=True, enable_debug=False): + """ (Re)create the PL/pgSQL functions. """ - sql_func_dir = sql_dir / 'functions' - sql = '' + sql = SQLPreprocessor(conn, config, sqllib_dir) - # Get the basic set of functions that is always imported. - for sql_file in PLPGSQL_BASE_MODULES: - with (sql_func_dir / sql_file).open('r') as fdesc: - sql += fdesc.read() + sql.run_sql_file(conn, 'functions.sql', + disable_diff_update=not enable_diff_updates, + debug=enable_debug) - # Some files require the presence of a certain table - for table, fname in PLPGSQL_TABLE_MODULES: - if conn.table_exists(table): - with (sql_func_dir / fname).open('r') as fdesc: - sql += fdesc.read() - # Replace placeholders. - sql = sql.replace('{modulepath}', - config.DATABASE_MODULE_PATH or str((config.project_dir / 'module').resolve())) - if enable_diff_updates: - sql = sql.replace('RETURN NEW; -- %DIFFUPDATES%', '--') +WEBSITE_SCRIPTS = ( + 'deletable.php', + 'details.php', + 'lookup.php', + 'polygons.php', + 'reverse.php', + 'search.php', + 'status.php' +) - if enable_debug: - sql = sql.replace('--DEBUG:', '') +# constants needed by PHP scripts: PHP name, config name, type +PHP_CONST_DEFS = ( + ('Database_DSN', 'DATABASE_DSN', str), + ('Default_Language', 'DEFAULT_LANGUAGE', str), + ('Log_DB', 'LOG_DB', bool), + ('Log_File', 'LOG_FILE', str), + ('Max_Word_Frequency', 'MAX_WORD_FREQUENCY', int), + ('NoAccessControl', 'CORS_NOACCESSCONTROL', bool), + ('Places_Max_ID_count', 'LOOKUP_MAX_COUNT', int), + ('PolygonOutput_MaximumTypes', 'POLYGON_OUTPUT_MAX_TYPES', int), + ('Search_BatchMode', 'SEARCH_BATCH_MODE', bool), + ('Search_NameOnlySearchFrequencyThreshold', 'SEARCH_NAME_ONLY_THRESHOLD', str), + ('Term_Normalization_Rules', 'TERM_NORMALIZATION', str), + ('Use_Aux_Location_data', 'USE_AUX_LOCATION_DATA', bool), + ('Use_US_Tiger_Data', 'USE_US_TIGER_DATA', bool), + ('MapIcon_URL', 'MAPICON_URL', str), +) - if config.get_bool('LIMIT_REINDEXING'): - sql = sql.replace('--LIMIT INDEXING:', '') - if not config.get_bool('USE_US_TIGER_DATA'): - sql = sql.replace('-- %NOTIGERDATA% ', '') +def import_wikipedia_articles(dsn, data_path, ignore_errors=False): + """ Replaces the wikipedia importance tables with new data. + The import is run in a single transaction so that the new data + is replace seemlessly. + + Returns 0 if all was well and 1 if the importance file could not + be found. Throws an exception if there was an error reading the file. + """ + datafile = data_path / 'wikimedia-importance.sql.gz' - if not config.get_bool('USE_AUX_LOCATION_DATA'): - sql = sql.replace('-- %NOAUXDATA% ', '') + if not datafile.exists(): + return 1 - reverse_only = 'false' if conn.table_exists('search_name') else 'true' + pre_code = """BEGIN; + DROP TABLE IF EXISTS "wikipedia_article"; + DROP TABLE IF EXISTS "wikipedia_redirect" + """ + post_code = "COMMIT" + execute_file(dsn, datafile, ignore_errors=ignore_errors, + pre_code=pre_code, post_code=post_code) - return sql.replace('%REVERSE-ONLY%', reverse_only) + return 0 -def replace_partition_string(sql, partitions): - """ Replace a partition template with the actual partition code. +def recompute_importance(conn): + """ Recompute wikipedia links and importance for all entries in placex. + This is a long-running operations that must not be executed in + parallel with updates. """ - for match in re.findall('^-- start(.*?)^-- end', sql, re.M | re.S): - repl = '' - for part in partitions: - repl += match.replace('-partition-', str(part)) - sql = sql.replace(match, repl) + with conn.cursor() as cur: + cur.execute('ALTER TABLE placex DISABLE TRIGGER ALL') + cur.execute(""" + UPDATE placex SET (wikipedia, importance) = + (SELECT wikipedia, importance + FROM compute_importance(extratags, country_code, osm_type, osm_id)) + """) + cur.execute(""" + UPDATE placex s SET wikipedia = d.wikipedia, importance = d.importance + FROM placex d + WHERE s.place_id = d.linked_place_id and d.wikipedia is not null + and (s.wikipedia is null or s.importance < d.importance); + """) + + cur.execute('ALTER TABLE placex ENABLE TRIGGER ALL') + conn.commit() - return sql -def _get_partition_function_sql(conn, sql_dir): - """ Create functions that work on partition tables. +def setup_website(basedir, phplib_dir, config): + """ Create the website script stubs. """ - with conn.cursor() as cur: - cur.execute('SELECT distinct partition FROM country_name') - partitions = set([0]) - for row in cur: - partitions.add(row[0]) + if not basedir.exists(): + LOG.info('Creating website directory.') + basedir.mkdir() - with (sql_dir / 'partition-functions.src.sql').open('r') as fdesc: - sql = fdesc.read() + template = dedent("""\ + - Then a HTTP 400 is returned + Then a HTTP 404 is returned Examples: | object | diff --git a/test/bdd/steps/nominatim_environment.py b/test/bdd/steps/nominatim_environment.py index dd76dee3..168334b1 100644 --- a/test/bdd/steps/nominatim_environment.py +++ b/test/bdd/steps/nominatim_environment.py @@ -7,7 +7,9 @@ import psycopg2.extras sys.path.insert(1, str((Path(__file__) / '..' / '..' / '..' / '..').resolve())) +from nominatim import cli from nominatim.config import Configuration +from nominatim.tools import refresh from steps.utils import run_script class NominatimEnvironment: @@ -87,24 +89,25 @@ class NominatimEnvironment: self.test_env['NOMINATIM_FLATNODE_FILE'] = '' self.test_env['NOMINATIM_IMPORT_STYLE'] = 'full' self.test_env['NOMINATIM_USE_US_TIGER_DATA'] = 'yes' - self.test_env['NOMINATIM_DATADIR'] = self.src_dir / 'data' - self.test_env['NOMINATIM_SQLDIR'] = self.src_dir / 'lib-sql' - self.test_env['NOMINATIM_CONFIGDIR'] = self.src_dir / 'settings' - self.test_env['NOMINATIM_DATABASE_MODULE_SRC_PATH'] = self.build_dir / 'module' - self.test_env['NOMINATIM_OSM2PGSQL_BINARY'] = self.build_dir / 'osm2pgsql' / 'osm2pgsql' - self.test_env['NOMINATIM_NOMINATIM_TOOL'] = self.build_dir / 'nominatim' + self.test_env['NOMINATIM_DATADIR'] = str((self.src_dir / 'data').resolve()) + self.test_env['NOMINATIM_SQLDIR'] = str((self.src_dir / 'lib-sql').resolve()) + self.test_env['NOMINATIM_CONFIGDIR'] = str((self.src_dir / 'settings').resolve()) + self.test_env['NOMINATIM_DATABASE_MODULE_SRC_PATH'] = str((self.build_dir / 'module').resolve()) + self.test_env['NOMINATIM_OSM2PGSQL_BINARY'] = str((self.build_dir / 'osm2pgsql' / 'osm2pgsql').resolve()) + self.test_env['NOMINATIM_NOMINATIM_TOOL'] = str((self.build_dir / 'nominatim').resolve()) if self.server_module_path: self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = self.server_module_path else: # avoid module being copied into the temporary environment - self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = self.build_dir / 'module' + self.test_env['NOMINATIM_DATABASE_MODULE_PATH'] = str((self.build_dir / 'module').resolve()) if self.website_dir is not None: self.website_dir.cleanup() self.website_dir = tempfile.TemporaryDirectory() - self.run_setup_script('setup-website') + cfg = Configuration(None, self.src_dir / 'settings', environ=self.test_env) + refresh.setup_website(Path(self.website_dir.name) / 'website', self.src_dir / 'lib-php', cfg) def db_drop_database(self, name): @@ -180,8 +183,9 @@ class NominatimEnvironment: self.test_env['NOMINATIM_WIKIPEDIA_DATA_PATH'] = str(testdata.resolve()) try: - self.run_setup_script('all', osm_file=self.api_test_file) + self.run_nominatim('import', '--osm-file', str(self.api_test_file)) self.run_setup_script('import-tiger-data') + self.run_nominatim('freeze') phrase_file = str((testdata / 'specialphrases_testdb.sql').resolve()) run_script(['psql', '-d', self.api_test_db, '-f', phrase_file]) @@ -246,12 +250,25 @@ class NominatimEnvironment: """ with db.cursor() as cur: while True: - self.run_update_script('index') + self.run_nominatim('index') cur.execute("SELECT 'a' FROM placex WHERE indexed_status != 0 LIMIT 1") if cur.rowcount == 0: return + def run_nominatim(self, *cmdline): + """ Run the nominatim command-line tool via the library. + """ + cli.nominatim(module_dir='', + osm2pgsql_path=str(self.build_dir / 'osm2pgsql' / 'osm2pgsql'), + phplib_dir=str(self.src_dir / 'lib-php'), + sqllib_dir=str(self.src_dir / 'lib-sql'), + data_dir=str(self.src_dir / 'data'), + config_dir=str(self.src_dir / 'settings'), + cli_args=cmdline, + phpcgi_path='', + environ=self.test_env) + def run_setup_script(self, *args, **kwargs): """ Run the Nominatim setup script with the given arguments. """ @@ -282,7 +299,7 @@ class NominatimEnvironment: """ Copy data from place to the placex and location_property_osmline tables invoking the appropriate triggers. """ - self.run_setup_script('create-functions', 'create-partition-functions') + self.run_nominatim('refresh', '--functions', '--no-diff-updates') with db.cursor() as cur: cur.execute("""INSERT INTO placex (osm_type, osm_id, class, type, diff --git a/test/bdd/steps/steps_db_ops.py b/test/bdd/steps/steps_db_ops.py index c549f3eb..9d443b43 100644 --- a/test/bdd/steps/steps_db_ops.py +++ b/test/bdd/steps/steps_db_ops.py @@ -5,6 +5,7 @@ import psycopg2.extras from place_inserter import PlaceColumn from table_compare import NominatimID, DBRow +from nominatim.indexer.indexer import Indexer def check_database_integrity(context): """ Check some generic constraints on the tables. @@ -85,7 +86,12 @@ def import_and_index_data_from_place_table(context): """ Import data previously set up in the place table. """ context.nominatim.copy_from_place(context.db) - context.nominatim.run_setup_script('calculate-postcodes', 'index', 'index-noanalyse') + context.nominatim.run_setup_script('calculate-postcodes') + + # Call directly as the refresh function does not include postcodes. + indexer = Indexer(context.nominatim.test_env['NOMINATIM_DATABASE_DSN'][6:], 1) + indexer.index_full(analyse=False) + check_database_integrity(context) @when("updating places") @@ -93,8 +99,7 @@ def update_place_table(context): """ Update the place table with the given data. Also runs all triggers related to updates and reindexes the new data. """ - context.nominatim.run_setup_script( - 'create-functions', 'create-partition-functions', 'enable-diff-updates') + context.nominatim.run_nominatim('refresh', '--functions') with context.db.cursor() as cur: for row in context.table: PlaceColumn(context).add_row(row, False).db_insert(cur) @@ -106,7 +111,7 @@ def update_place_table(context): def update_postcodes(context): """ Rerun the calculation of postcodes. """ - context.nominatim.run_update_script('calculate-postcodes') + context.nominatim.run_nominatim('refresh', '--postcodes') @when("marking for delete (?P.*)") def delete_places(context, oids): @@ -114,8 +119,7 @@ def delete_places(context, oids): separated by commas. Also runs all triggers related to updates and reindexes the new data. """ - context.nominatim.run_setup_script( - 'create-functions', 'create-partition-functions', 'enable-diff-updates') + context.nominatim.run_nominatim('refresh', '--functions') with context.db.cursor() as cur: for oid in oids.split(','): NominatimID(oid).query_osm_id(cur, 'DELETE FROM place WHERE {}') diff --git a/test/bdd/steps/steps_osm_data.py b/test/bdd/steps/steps_osm_data.py index 3858198b..844fb274 100644 --- a/test/bdd/steps/steps_osm_data.py +++ b/test/bdd/steps/steps_osm_data.py @@ -75,9 +75,8 @@ def update_from_osm_file(context): The data is expected as attached text in OPL format. """ context.nominatim.copy_from_place(context.db) - context.nominatim.run_setup_script('index', 'index-noanalyse') - context.nominatim.run_setup_script('create-functions', 'create-partition-functions', - 'enable-diff-updates') + context.nominatim.run_nominatim('index') + context.nominatim.run_nominatim('refresh', '--functions') # create an OSM file and import it fname = write_opl_file(context.text, context.osm) diff --git a/test/php/Nominatim/DBTest.php b/test/php/Nominatim/DBTest.php index 8a3157a8..1a2ecc86 100644 --- a/test/php/Nominatim/DBTest.php +++ b/test/php/Nominatim/DBTest.php @@ -159,31 +159,11 @@ class DBTest extends \PHPUnit\Framework\TestCase # Tables, Indices { - $this->assertEmpty($oDB->getListOfTables()); $oDB->exec('CREATE TABLE table1 (id integer, city varchar, country varchar)'); - $oDB->exec('CREATE TABLE table2 (id integer, city varchar, country varchar)'); - $this->assertEquals( - array('table1', 'table2'), - $oDB->getListOfTables() - ); - $this->assertTrue($oDB->deleteTable('table2')); - $this->assertTrue($oDB->deleteTable('table99')); - $this->assertEquals( - array('table1'), - $oDB->getListOfTables() - ); $this->assertTrue($oDB->tableExists('table1')); $this->assertFalse($oDB->tableExists('table99')); $this->assertFalse($oDB->tableExists(null)); - - $this->assertEmpty($oDB->getListOfIndices()); - $oDB->exec('CREATE UNIQUE INDEX table1_index ON table1 (id)'); - $this->assertEquals( - array('table1_index'), - $oDB->getListOfIndices() - ); - $this->assertEmpty($oDB->getListOfIndices('table2')); } # select queries diff --git a/test/php/Nominatim/LibTest.php b/test/php/Nominatim/LibTest.php index 6e9038ee..5111b326 100644 --- a/test/php/Nominatim/LibTest.php +++ b/test/php/Nominatim/LibTest.php @@ -15,26 +15,6 @@ class LibTest extends \PHPUnit\Framework\TestCase $this->assertSame("''", addQuotes('')); } - - public function testCreatePointsAroundCenter() - { - // you might say we're creating a circle - $aPoints = createPointsAroundCenter(0, 0, 2); - - $this->assertEquals( - 101, - count($aPoints) - ); - $this->assertEquals( - array( - array('', 0, 2), - array('', 0.12558103905863, 1.9960534568565), - array('', 0.25066646712861, 1.984229402629) - ), - array_splice($aPoints, 0, 3) - ); - } - public function testParseLatLon() { // no coordinates expected @@ -132,12 +112,4 @@ class LibTest extends \PHPUnit\Framework\TestCase // start == end $this->closestHouseNumberEvenOddOther(50, 50, 0.5, array('even' => 50, 'odd' => 50, 'other' => 50)); } - - public function testGetSearchRankLabel() - { - $this->assertEquals('unknown', getSearchRankLabel(null)); - $this->assertEquals('continent', getSearchRankLabel(0)); - $this->assertEquals('continent', getSearchRankLabel(1)); - $this->assertEquals('other: 30', getSearchRankLabel(30)); - } } diff --git a/test/python/conftest.py b/test/python/conftest.py index ecd40d7c..d16dceff 100644 --- a/test/python/conftest.py +++ b/test/python/conftest.py @@ -36,6 +36,19 @@ class _TestingCursor(psycopg2.extras.DictCursor): return set((tuple(row) for row in self)) + def table_exists(self, table): + """ Check that a table with the given name exists in the database. + """ + num = self.scalar("""SELECT count(*) FROM pg_tables + WHERE tablename = %s""", (table, )) + return num == 1 + + def table_rows(self, table): + """ Return the number of rows in the given table. + """ + return self.scalar('SELECT count(*) FROM ' + table) + + @pytest.fixture def temp_db(monkeypatch): """ Create an empty database for the test. The database name is also @@ -63,6 +76,12 @@ def temp_db(monkeypatch): conn.close() + +@pytest.fixture +def dsn(temp_db): + return 'dbname=' + temp_db + + @pytest.fixture def temp_db_with_extensions(temp_db): conn = psycopg2.connect(database=temp_db) @@ -77,9 +96,8 @@ def temp_db_with_extensions(temp_db): def temp_db_conn(temp_db): """ Connection to the test database. """ - conn = connection.connect('dbname=' + temp_db) - yield conn - conn.close() + with connection.connect('dbname=' + temp_db) as conn: + yield conn @pytest.fixture @@ -94,10 +112,25 @@ def temp_db_cursor(temp_db): conn.close() +@pytest.fixture +def table_factory(temp_db_cursor): + def mk_table(name, definition='id INT', content=None): + temp_db_cursor.execute('CREATE TABLE {} ({})'.format(name, definition)) + if content is not None: + if not isinstance(content, str): + content = '),('.join([str(x) for x in content]) + temp_db_cursor.execute("INSERT INTO {} VALUES ({})".format(name, content)) + + return mk_table + + @pytest.fixture def def_config(): return Configuration(None, SRC_DIR.resolve() / 'settings') +@pytest.fixture +def src_dir(): + return SRC_DIR.resolve() @pytest.fixture def status_table(temp_db_conn): @@ -150,7 +183,7 @@ def place_row(place_table, temp_db_cursor): temp_db_cursor.execute("INSERT INTO place VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s)", (osm_id or next(idseq), osm_type, cls, typ, names, admin_level, address, extratags, - geom or 'SRID=4326;POINT(0 0 )')) + geom or 'SRID=4326;POINT(0 0)')) return _insert @@ -160,7 +193,7 @@ def placex_table(temp_db_with_extensions, temp_db_conn): """ with temp_db_conn.cursor() as cur: cur.execute("""CREATE TABLE placex ( - place_id BIGINT NOT NULL, + place_id BIGINT, parent_place_id BIGINT, linked_place_id BIGINT, importance FLOAT, @@ -183,9 +216,53 @@ def placex_table(temp_db_with_extensions, temp_db_conn): country_code varchar(2), housenumber TEXT, postcode TEXT, - centroid GEOMETRY(Geometry, 4326)) - """) + centroid GEOMETRY(Geometry, 4326))""") + temp_db_conn.commit() + + +@pytest.fixture +def osmline_table(temp_db_with_extensions, temp_db_conn): + with temp_db_conn.cursor() as cur: + cur.execute("""CREATE TABLE location_property_osmline ( + place_id BIGINT, + osm_id BIGINT, + parent_place_id BIGINT, + geometry_sector INTEGER, + indexed_date TIMESTAMP, + startnumber INTEGER, + endnumber INTEGER, + partition SMALLINT, + indexed_status SMALLINT, + linegeo GEOMETRY, + interpolationtype TEXT, + address HSTORE, + postcode TEXT, + country_code VARCHAR(2))""") temp_db_conn.commit() +@pytest.fixture +def word_table(temp_db, temp_db_conn): + with temp_db_conn.cursor() as cur: + cur.execute("""CREATE TABLE word ( + word_id INTEGER, + word_token text, + word text, + class text, + type text, + country_code varchar(2), + search_name_count INTEGER, + operator TEXT)""") + temp_db_conn.commit() + +@pytest.fixture +def osm2pgsql_options(temp_db): + return dict(osm2pgsql='echo', + osm2pgsql_cache=10, + osm2pgsql_style='style.file', + threads=1, + dsn='dbname=' + temp_db, + flatnode_file='', + tablespaces=dict(slim_data='', slim_index='', + main_data='', main_index='')) diff --git a/test/python/mocks.py b/test/python/mocks.py new file mode 100644 index 00000000..415e18b3 --- /dev/null +++ b/test/python/mocks.py @@ -0,0 +1,18 @@ +""" +Custom mocks for testing. +""" + + +class MockParamCapture: + """ Mock that records the parameters with which a function was called + as well as the number of calls. + """ + def __init__(self, retval=0): + self.called = 0 + self.return_value = retval + + def __call__(self, *args, **kwargs): + self.called += 1 + self.last_args = args + self.last_kwargs = kwargs + return self.return_value diff --git a/test/python/test_cli.py b/test/python/test_cli.py index 0c0a689e..418f4bcf 100644 --- a/test/python/test_cli.py +++ b/test/python/test_cli.py @@ -5,44 +5,37 @@ These tests just check that the various command line parameters route to the correct functionionality. They use a lot of monkeypatching to avoid executing the actual functions. """ -import datetime as dt -import psycopg2 +from pathlib import Path + import pytest -import time +import nominatim.db.properties import nominatim.cli import nominatim.clicmd.api import nominatim.clicmd.refresh import nominatim.clicmd.admin +import nominatim.clicmd.setup import nominatim.indexer.indexer +import nominatim.tools.admin +import nominatim.tools.check_database +import nominatim.tools.database_import +import nominatim.tools.freeze import nominatim.tools.refresh -import nominatim.tools.replication -from nominatim.errors import UsageError -from nominatim.db import status + +from mocks import MockParamCapture + +SRC_DIR = (Path(__file__) / '..' / '..' / '..').resolve() def call_nominatim(*args): return nominatim.cli.nominatim(module_dir='build/module', osm2pgsql_path='build/osm2pgsql/osm2pgsql', - phplib_dir='lib-php', - data_dir='.', + phplib_dir=str(SRC_DIR / 'lib-php'), + data_dir=str(SRC_DIR / 'data'), phpcgi_path='/usr/bin/php-cgi', - sqllib_dir='lib-sql', - config_dir='settings', + sqllib_dir=str(SRC_DIR / 'lib-sql'), + config_dir=str(SRC_DIR / 'settings'), cli_args=args) -class MockParamCapture: - """ Mock that records the parameters with which a function was called - as well as the number of calls. - """ - def __init__(self, retval=0): - self.called = 0 - self.return_value = retval - - def __call__(self, *args, **kwargs): - self.called += 1 - self.last_args = args - self.last_kwargs = kwargs - return self.return_value @pytest.fixture def mock_run_legacy(monkeypatch): @@ -51,6 +44,16 @@ def mock_run_legacy(monkeypatch): return mock +@pytest.fixture +def mock_func_factory(monkeypatch): + def get_mock(module, func): + mock = MockParamCapture() + monkeypatch.setattr(module, func, mock) + return mock + + return get_mock + + def test_cli_help(capsys): """ Running nominatim tool without arguments prints help. """ @@ -61,8 +64,6 @@ def test_cli_help(capsys): @pytest.mark.parametrize("command,script", [ - (('import', '--continue', 'load-data'), 'setup'), - (('freeze',), 'setup'), (('special-phrases',), 'specialphrases'), (('add-data', '--tiger-data', 'tiger'), 'setup'), (('add-data', '--file', 'foo.osm'), 'update'), @@ -75,26 +76,78 @@ def test_legacy_commands_simple(mock_run_legacy, command, script): assert mock_run_legacy.last_args[0] == script + '.php' +def test_import_missing_file(temp_db): + assert 1 == call_nominatim('import', '--osm-file', 'sfsafegweweggdgw.reh.erh') + + +def test_import_bad_file(temp_db): + assert 1 == call_nominatim('import', '--osm-file', '.') + + +def test_import_full(temp_db, mock_func_factory): + mocks = [ + mock_func_factory(nominatim.tools.database_import, 'setup_database_skeleton'), + mock_func_factory(nominatim.tools.database_import, 'install_module'), + mock_func_factory(nominatim.tools.database_import, 'import_osm_data'), + mock_func_factory(nominatim.tools.refresh, 'import_wikipedia_articles'), + mock_func_factory(nominatim.tools.database_import, 'truncate_data_tables'), + mock_func_factory(nominatim.tools.database_import, 'load_data'), + mock_func_factory(nominatim.tools.database_import, 'create_tables'), + mock_func_factory(nominatim.tools.database_import, 'create_table_triggers'), + mock_func_factory(nominatim.tools.database_import, 'create_partition_tables'), + mock_func_factory(nominatim.tools.database_import, 'create_search_indices'), + mock_func_factory(nominatim.tools.refresh, 'load_address_levels_from_file'), + mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'), + mock_func_factory(nominatim.tools.refresh, 'setup_website'), + mock_func_factory(nominatim.db.properties, 'set_property') + ] + + cf_mock = mock_func_factory(nominatim.tools.refresh, 'create_functions') + mock_func_factory(nominatim.clicmd.setup, 'run_legacy_script') + + assert 0 == call_nominatim('import', '--osm-file', __file__) + + assert cf_mock.called > 1 + + for mock in mocks: + assert mock.called == 1 + +def test_freeze_command(mock_func_factory, temp_db): + mock_drop = mock_func_factory(nominatim.tools.freeze, 'drop_update_tables') + mock_flatnode = mock_func_factory(nominatim.tools.freeze, 'drop_flatnode_file') + + assert 0 == call_nominatim('freeze') + + assert mock_drop.called == 1 + assert mock_flatnode.called == 1 + + @pytest.mark.parametrize("params", [('--warm', ), ('--warm', '--reverse-only'), - ('--warm', '--search-only'), - ('--check-database', )]) -def test_admin_command_legacy(monkeypatch, params): - mock_run_legacy = MockParamCapture() - monkeypatch.setattr(nominatim.clicmd.admin, 'run_legacy_script', mock_run_legacy) + ('--warm', '--search-only')]) +def test_admin_command_legacy(mock_func_factory, params): + mock_run_legacy = mock_func_factory(nominatim.clicmd.admin, 'run_legacy_script') assert 0 == call_nominatim('admin', *params) assert mock_run_legacy.called == 1 + @pytest.mark.parametrize("func, params", [('analyse_indexing', ('--analyse-indexing', ))]) -def test_admin_command_tool(temp_db, monkeypatch, func, params): - mock = MockParamCapture() - monkeypatch.setattr(nominatim.tools.admin, func, mock) +def test_admin_command_tool(temp_db, mock_func_factory, func, params): + mock = mock_func_factory(nominatim.tools.admin, func) assert 0 == call_nominatim('admin', *params) assert mock.called == 1 + +def test_admin_command_check_database(mock_func_factory): + mock = mock_func_factory(nominatim.tools.check_database, 'check_database') + + assert 0 == call_nominatim('admin', '--check-database') + assert mock.called == 1 + + @pytest.mark.parametrize("name,oid", [('file', 'foo.osm'), ('diff', 'foo.osc'), ('node', 12), ('way', 8), ('relation', 32)]) def test_add_data_command(mock_run_legacy, name, oid): @@ -109,12 +162,10 @@ def test_add_data_command(mock_run_legacy, name, oid): (['--boundaries-only'], 1, 0), (['--no-boundaries'], 0, 1), (['--boundaries-only', '--no-boundaries'], 0, 0)]) -def test_index_command(monkeypatch, temp_db_cursor, params, do_bnds, do_ranks): +def test_index_command(mock_func_factory, temp_db_cursor, params, do_bnds, do_ranks): temp_db_cursor.execute("CREATE TABLE import_status (indexed bool)") - bnd_mock = MockParamCapture() - monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', bnd_mock) - rank_mock = MockParamCapture() - monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', rank_mock) + bnd_mock = mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_boundaries') + rank_mock = mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_by_rank') assert 0 == call_nominatim('index', *params) @@ -122,123 +173,36 @@ def test_index_command(monkeypatch, temp_db_cursor, params, do_bnds, do_ranks): assert rank_mock.called == do_ranks -@pytest.mark.parametrize("command,params", [ - ('wiki-data', ('setup.php', '--import-wikipedia-articles')), - ('importance', ('update.php', '--recompute-importance')), - ('website', ('setup.php', '--setup-website')), - ]) -def test_refresh_legacy_command(monkeypatch, temp_db, command, params): - mock_run_legacy = MockParamCapture() - monkeypatch.setattr(nominatim.clicmd.refresh, 'run_legacy_script', mock_run_legacy) - - assert 0 == call_nominatim('refresh', '--' + command) - - assert mock_run_legacy.called == 1 - assert len(mock_run_legacy.last_args) >= len(params) - assert mock_run_legacy.last_args[:len(params)] == params - @pytest.mark.parametrize("command,func", [ ('postcodes', 'update_postcodes'), ('word-counts', 'recompute_word_counts'), ('address-levels', 'load_address_levels_from_file'), ('functions', 'create_functions'), + ('wiki-data', 'import_wikipedia_articles'), + ('importance', 'recompute_importance'), + ('website', 'setup_website'), ]) -def test_refresh_command(monkeypatch, temp_db, command, func): - func_mock = MockParamCapture() - monkeypatch.setattr(nominatim.tools.refresh, func, func_mock) +def test_refresh_command(mock_func_factory, temp_db, command, func): + func_mock = mock_func_factory(nominatim.tools.refresh, func) assert 0 == call_nominatim('refresh', '--' + command) assert func_mock.called == 1 def test_refresh_importance_computed_after_wiki_import(monkeypatch, temp_db): - mock_run_legacy = MockParamCapture() - monkeypatch.setattr(nominatim.clicmd.refresh, 'run_legacy_script', mock_run_legacy) + calls = [] + monkeypatch.setattr(nominatim.tools.refresh, 'import_wikipedia_articles', + lambda *args, **kwargs: calls.append('import') or 0) + monkeypatch.setattr(nominatim.tools.refresh, 'recompute_importance', + lambda *args, **kwargs: calls.append('update')) assert 0 == call_nominatim('refresh', '--importance', '--wiki-data') - assert mock_run_legacy.called == 2 - assert mock_run_legacy.last_args == ('update.php', '--recompute-importance') - - -@pytest.mark.parametrize("params,func", [ - (('--init', '--no-update-functions'), 'init_replication'), - (('--check-for-updates',), 'check_for_updates') - ]) -def test_replication_command(monkeypatch, temp_db, params, func): - func_mock = MockParamCapture() - monkeypatch.setattr(nominatim.tools.replication, func, func_mock) - - assert 0 == call_nominatim('replication', *params) - assert func_mock.called == 1 - - -def test_replication_update_bad_interval(monkeypatch, temp_db): - monkeypatch.setenv('NOMINATIM_REPLICATION_UPDATE_INTERVAL', 'xx') - - assert call_nominatim('replication') == 1 - - -def test_replication_update_bad_interval_for_geofabrik(monkeypatch, temp_db): - monkeypatch.setenv('NOMINATIM_REPLICATION_URL', - 'https://download.geofabrik.de/europe/ireland-and-northern-ireland-updates') - - assert call_nominatim('replication') == 1 - - -@pytest.mark.parametrize("state", [nominatim.tools.replication.UpdateState.UP_TO_DATE, - nominatim.tools.replication.UpdateState.NO_CHANGES]) -def test_replication_update_once_no_index(monkeypatch, temp_db, temp_db_conn, - status_table, state): - status.set_status(temp_db_conn, date=dt.datetime.now(dt.timezone.utc), seq=1) - func_mock = MockParamCapture(retval=state) - monkeypatch.setattr(nominatim.tools.replication, 'update', func_mock) - - assert 0 == call_nominatim('replication', '--once', '--no-index') - - -def test_replication_update_continuous(monkeypatch, temp_db_conn, status_table): - status.set_status(temp_db_conn, date=dt.datetime.now(dt.timezone.utc), seq=1) - states = [nominatim.tools.replication.UpdateState.UP_TO_DATE, - nominatim.tools.replication.UpdateState.UP_TO_DATE] - monkeypatch.setattr(nominatim.tools.replication, 'update', - lambda *args, **kwargs: states.pop()) - - index_mock = MockParamCapture() - monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', index_mock) - monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', index_mock) - - with pytest.raises(IndexError): - call_nominatim('replication') - - assert index_mock.called == 4 - - -def test_replication_update_continuous_no_change(monkeypatch, temp_db_conn, status_table): - status.set_status(temp_db_conn, date=dt.datetime.now(dt.timezone.utc), seq=1) - states = [nominatim.tools.replication.UpdateState.NO_CHANGES, - nominatim.tools.replication.UpdateState.UP_TO_DATE] - monkeypatch.setattr(nominatim.tools.replication, 'update', - lambda *args, **kwargs: states.pop()) - - index_mock = MockParamCapture() - monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', index_mock) - monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', index_mock) - - sleep_mock = MockParamCapture() - monkeypatch.setattr(time, 'sleep', sleep_mock) - - with pytest.raises(IndexError): - call_nominatim('replication') - - assert index_mock.called == 2 - assert sleep_mock.called == 1 - assert sleep_mock.last_args[0] == 60 + assert calls == ['import', 'update'] -def test_serve_command(monkeypatch): - func = MockParamCapture() - monkeypatch.setattr(nominatim.cli, 'run_php_server', func) +def test_serve_command(mock_func_factory): + func = mock_func_factory(nominatim.cli, 'run_php_server') call_nominatim('serve') @@ -254,9 +218,8 @@ def test_serve_command(monkeypatch): ('details', '--place_id', '10001'), ('status',) ]) -def test_api_commands_simple(monkeypatch, params): - mock_run_api = MockParamCapture() - monkeypatch.setattr(nominatim.clicmd.api, 'run_api_script', mock_run_api) +def test_api_commands_simple(mock_func_factory, params): + mock_run_api = mock_func_factory(nominatim.clicmd.api, 'run_api_script') assert 0 == call_nominatim(*params) diff --git a/test/python/test_cli_replication.py b/test/python/test_cli_replication.py new file mode 100644 index 00000000..a62ad1a4 --- /dev/null +++ b/test/python/test_cli_replication.py @@ -0,0 +1,127 @@ +""" +Tests for replication command of command-line interface wrapper. +""" +import datetime as dt +import time +from pathlib import Path + +import pytest + +import nominatim.cli +import nominatim.indexer.indexer +import nominatim.tools.replication +from nominatim.db import status + +from mocks import MockParamCapture + +SRC_DIR = (Path(__file__) / '..' / '..' / '..').resolve() + +def call_nominatim(*args): + return nominatim.cli.nominatim(module_dir='build/module', + osm2pgsql_path='build/osm2pgsql/osm2pgsql', + phplib_dir=str(SRC_DIR / 'lib-php'), + data_dir=str(SRC_DIR / 'data'), + phpcgi_path='/usr/bin/php-cgi', + sqllib_dir=str(SRC_DIR / 'lib-sql'), + config_dir=str(SRC_DIR / 'settings'), + cli_args=['replication'] + list(args)) + +@pytest.fixture +def index_mock(monkeypatch): + mock = MockParamCapture() + monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_boundaries', mock) + monkeypatch.setattr(nominatim.indexer.indexer.Indexer, 'index_by_rank', mock) + + return mock + + +@pytest.fixture +def mock_func_factory(monkeypatch): + def get_mock(module, func): + mock = MockParamCapture() + monkeypatch.setattr(module, func, mock) + return mock + + return get_mock + + +@pytest.fixture +def init_status(temp_db_conn, status_table): + status.set_status(temp_db_conn, date=dt.datetime.now(dt.timezone.utc), seq=1) + return 1 + + +@pytest.fixture +def update_mock(mock_func_factory, init_status): + return mock_func_factory(nominatim.tools.replication, 'update') + +@pytest.mark.parametrize("params,func", [ + (('--init', '--no-update-functions'), 'init_replication'), + (('--check-for-updates',), 'check_for_updates') + ]) +def test_replication_command(mock_func_factory, temp_db, params, func): + func_mock = mock_func_factory(nominatim.tools.replication, func) + + assert 0 == call_nominatim(*params) + assert func_mock.called == 1 + + +def test_replication_update_bad_interval(monkeypatch, temp_db): + monkeypatch.setenv('NOMINATIM_REPLICATION_UPDATE_INTERVAL', 'xx') + + assert call_nominatim() == 1 + + +def test_replication_update_bad_interval_for_geofabrik(monkeypatch, temp_db): + monkeypatch.setenv('NOMINATIM_REPLICATION_URL', + 'https://download.geofabrik.de/europe/ireland-and-northern-ireland-updates') + + assert call_nominatim() == 1 + + +def test_replication_update_once_no_index(update_mock): + assert 0 == call_nominatim('--once', '--no-index') + + assert str(update_mock.last_args[1]['osm2pgsql']) == 'build/osm2pgsql/osm2pgsql' + + +def test_replication_update_custom_osm2pgsql(monkeypatch, update_mock): + monkeypatch.setenv('NOMINATIM_OSM2PGSQL_BINARY', '/secret/osm2pgsql') + assert 0 == call_nominatim('--once', '--no-index') + + assert str(update_mock.last_args[1]['osm2pgsql']) == '/secret/osm2pgsql' + + +def test_replication_update_custom_threads(update_mock): + assert 0 == call_nominatim('--once', '--no-index', '--threads', '4') + + assert update_mock.last_args[1]['threads'] == 4 + + +def test_replication_update_continuous(monkeypatch, init_status, index_mock): + states = [nominatim.tools.replication.UpdateState.UP_TO_DATE, + nominatim.tools.replication.UpdateState.UP_TO_DATE] + monkeypatch.setattr(nominatim.tools.replication, 'update', + lambda *args, **kwargs: states.pop()) + + with pytest.raises(IndexError): + call_nominatim() + + assert index_mock.called == 4 + + +def test_replication_update_continuous_no_change(monkeypatch, init_status, index_mock): + states = [nominatim.tools.replication.UpdateState.NO_CHANGES, + nominatim.tools.replication.UpdateState.UP_TO_DATE] + monkeypatch.setattr(nominatim.tools.replication, 'update', + lambda *args, **kwargs: states.pop()) + + sleep_mock = MockParamCapture() + monkeypatch.setattr(time, 'sleep', sleep_mock) + + with pytest.raises(IndexError): + call_nominatim() + + assert index_mock.called == 2 + assert sleep_mock.called == 1 + assert sleep_mock.last_args[0] == 60 diff --git a/test/python/test_config.py b/test/python/test_config.py index 4578be13..d6911957 100644 --- a/test/python/test_config.py +++ b/test/python/test_config.py @@ -2,7 +2,6 @@ Test for loading dotenv configuration. """ from pathlib import Path -import tempfile import pytest @@ -17,26 +16,25 @@ def test_no_project_dir(): assert config.DATABASE_WEBUSER == 'www-data' -def test_prefer_project_setting_over_default(): - with tempfile.TemporaryDirectory() as project_dir: - with open(project_dir + '/.env', 'w') as envfile: - envfile.write('NOMINATIM_DATABASE_WEBUSER=apache\n') +@pytest.mark.parametrize("val", ('apache', '"apache"')) +def test_prefer_project_setting_over_default(val, tmp_path): + envfile = tmp_path / '.env' + envfile.write_text('NOMINATIM_DATABASE_WEBUSER={}\n'.format(val)) - config = Configuration(Path(project_dir), DEFCFG_DIR) + config = Configuration(Path(tmp_path), DEFCFG_DIR) - assert config.DATABASE_WEBUSER == 'apache' + assert config.DATABASE_WEBUSER == 'apache' -def test_prefer_os_environ_over_project_setting(monkeypatch): - with tempfile.TemporaryDirectory() as project_dir: - with open(project_dir + '/.env', 'w') as envfile: - envfile.write('NOMINATIM_DATABASE_WEBUSER=apache\n') +def test_prefer_os_environ_over_project_setting(monkeypatch, tmp_path): + envfile = tmp_path / '.env' + envfile.write_text('NOMINATIM_DATABASE_WEBUSER=apache\n') - monkeypatch.setenv('NOMINATIM_DATABASE_WEBUSER', 'nobody') + monkeypatch.setenv('NOMINATIM_DATABASE_WEBUSER', 'nobody') - config = Configuration(Path(project_dir), DEFCFG_DIR) + config = Configuration(Path(tmp_path), DEFCFG_DIR) - assert config.DATABASE_WEBUSER == 'nobody' + assert config.DATABASE_WEBUSER == 'nobody' def test_get_os_env_add_defaults(monkeypatch): diff --git a/test/python/test_db_async_connection.py b/test/python/test_db_async_connection.py new file mode 100644 index 00000000..b52f7053 --- /dev/null +++ b/test/python/test_db_async_connection.py @@ -0,0 +1,102 @@ +""" +Tests for function providing a non-blocking query interface towards PostgreSQL. +""" +from contextlib import closing +import concurrent.futures + +import pytest +import psycopg2 +from psycopg2.extras import wait_select + +from nominatim.db.async_connection import DBConnection, DeadlockHandler + + +@pytest.fixture +def conn(temp_db): + with closing(DBConnection('dbname=' + temp_db)) as c: + yield c + + +@pytest.fixture +def simple_conns(temp_db): + conn1 = psycopg2.connect('dbname=' + temp_db) + conn2 = psycopg2.connect('dbname=' + temp_db) + + yield conn1.cursor(), conn2.cursor() + + conn1.close() + conn2.close() + + +def test_simple_query(conn, temp_db_conn): + conn.connect() + + conn.perform('CREATE TABLE foo (id INT)') + conn.wait() + + temp_db_conn.table_exists('foo') + + +def test_wait_for_query(conn): + conn.connect() + + conn.perform('SELECT pg_sleep(1)') + + assert not conn.is_done() + + conn.wait() + + +def test_bad_query(conn): + conn.connect() + + conn.perform('SELECT efasfjsea') + + with pytest.raises(psycopg2.ProgrammingError): + conn.wait() + + +def exec_with_deadlock(cur, sql, detector): + with DeadlockHandler(lambda *args: detector.append(1)): + cur.execute(sql) + + +def test_deadlock(simple_conns): + print(psycopg2.__version__) + cur1, cur2 = simple_conns + + cur1.execute("""CREATE TABLE t1 (id INT PRIMARY KEY, t TEXT); + INSERT into t1 VALUES (1, 'a'), (2, 'b')""") + cur1.connection.commit() + + cur1.execute("UPDATE t1 SET t = 'x' WHERE id = 1") + cur2.execute("UPDATE t1 SET t = 'x' WHERE id = 2") + + # This is the tricky part of the test. The first SQL command runs into + # a lock and blocks, so we have to run it in a separate thread. When the + # second deadlocking SQL statement is issued, Postgresql will abort one of + # the two transactions that cause the deadlock. There is no way to tell + # which one of the two. Therefore wrap both in a DeadlockHandler and + # expect that exactly one of the two triggers. + with concurrent.futures.ThreadPoolExecutor(max_workers=1) as executor: + deadlock_check = [] + try: + future = executor.submit(exec_with_deadlock, cur2, + "UPDATE t1 SET t = 'y' WHERE id = 1", + deadlock_check) + + while not future.running(): + pass + + + exec_with_deadlock(cur1, "UPDATE t1 SET t = 'y' WHERE id = 2", + deadlock_check) + finally: + # Whatever happens, make sure the deadlock gets resolved. + cur1.connection.rollback() + + future.result() + + assert len(deadlock_check) == 1 + + diff --git a/test/python/test_db_connection.py b/test/python/test_db_connection.py index ef1ae741..5de68618 100644 --- a/test/python/test_db_connection.py +++ b/test/python/test_db_connection.py @@ -2,31 +2,106 @@ Tests for specialised conenction and cursor classes. """ import pytest +import psycopg2 -from nominatim.db.connection import connect +from nominatim.db.connection import connect, get_pg_env @pytest.fixture def db(temp_db): - conn = connect('dbname=' + temp_db) - yield conn - conn.close() + with connect('dbname=' + temp_db) as conn: + yield conn -def test_connection_table_exists(db, temp_db_cursor): +def test_connection_table_exists(db, table_factory): assert db.table_exists('foobar') == False - temp_db_cursor.execute('CREATE TABLE foobar (id INT)') + table_factory('foobar') assert db.table_exists('foobar') == True -def test_cursor_scalar(db, temp_db_cursor): - temp_db_cursor.execute('CREATE TABLE dummy (id INT)') +def test_connection_index_exists(db, temp_db_cursor): + assert db.index_exists('some_index') == False + + temp_db_cursor.execute('CREATE TABLE foobar (id INT)') + temp_db_cursor.execute('CREATE INDEX some_index ON foobar(id)') + + assert db.index_exists('some_index') == True + assert db.index_exists('some_index', table='foobar') == True + assert db.index_exists('some_index', table='bar') == False + + +def test_drop_table_existing(db, table_factory): + table_factory('dummy') + assert db.table_exists('dummy') + + db.drop_table('dummy') + assert not db.table_exists('dummy') + + +def test_drop_table_non_existsing(db): + db.drop_table('dfkjgjriogjigjgjrdghehtre') + + +def test_drop_table_non_existing_force(db): + with pytest.raises(psycopg2.ProgrammingError, match='.*does not exist.*'): + db.drop_table('dfkjgjriogjigjgjrdghehtre', if_exists=False) + +def test_connection_server_version_tuple(db): + ver = db.server_version_tuple() + + assert isinstance(ver, tuple) + assert len(ver) == 2 + assert ver[0] > 8 + + +def test_connection_postgis_version_tuple(db, temp_db_cursor): + temp_db_cursor.execute('CREATE EXTENSION postgis') + + ver = db.postgis_version_tuple() + + assert isinstance(ver, tuple) + assert len(ver) == 2 + assert ver[0] >= 2 + + +def test_cursor_scalar(db, table_factory): + table_factory('dummy') with db.cursor() as cur: assert cur.scalar('SELECT count(*) FROM dummy') == 0 + def test_cursor_scalar_many_rows(db): with db.cursor() as cur: with pytest.raises(RuntimeError): cur.scalar('SELECT * FROM pg_tables') + + +def test_cursor_scalar_no_rows(db, table_factory): + table_factory('dummy') + + with db.cursor() as cur: + with pytest.raises(RuntimeError): + cur.scalar('SELECT id FROM dummy') + + +def test_get_pg_env_add_variable(monkeypatch): + monkeypatch.delenv('PGPASSWORD', raising=False) + env = get_pg_env('user=fooF') + + assert env['PGUSER'] == 'fooF' + assert 'PGPASSWORD' not in env + + +def test_get_pg_env_overwrite_variable(monkeypatch): + monkeypatch.setenv('PGUSER', 'some default') + env = get_pg_env('user=overwriter') + + assert env['PGUSER'] == 'overwriter' + + +def test_get_pg_env_ignore_unknown(): + env = get_pg_env('tty=stuff', base_env={}) + + assert env == {} diff --git a/test/python/test_db_properties.py b/test/python/test_db_properties.py new file mode 100644 index 00000000..9621c68c --- /dev/null +++ b/test/python/test_db_properties.py @@ -0,0 +1,35 @@ +""" +Tests for property table manpulation. +""" +import pytest + +from nominatim.db import properties + +@pytest.fixture +def prop_table(table_factory): + table_factory('nominatim_properties', 'property TEXT, value TEXT') + + +def test_get_property_existing(prop_table, temp_db_conn, temp_db_cursor): + temp_db_cursor.execute("INSERT INTO nominatim_properties VALUES('foo', 'bar')") + + assert properties.get_property(temp_db_conn, 'foo') == 'bar' + + +def test_get_property_unknown(prop_table, temp_db_conn, temp_db_cursor): + temp_db_cursor.execute("INSERT INTO nominatim_properties VALUES('other', 'bar')") + + assert properties.get_property(temp_db_conn, 'foo') is None + + +@pytest.mark.parametrize("prefill", (True, False)) +def test_set_property_new(prop_table, temp_db_conn, temp_db_cursor, prefill): + if prefill: + temp_db_cursor.execute("INSERT INTO nominatim_properties VALUES('something', 'bar')") + + properties.set_property(temp_db_conn, 'something', 'else') + + assert temp_db_cursor.scalar("""SELECT value FROM nominatim_properties + WHERE property = 'something'""") == 'else' + + assert properties.get_property(temp_db_conn, 'something') == 'else' diff --git a/test/python/test_db_sql_preprocessor.py b/test/python/test_db_sql_preprocessor.py new file mode 100644 index 00000000..3c10000f --- /dev/null +++ b/test/python/test_db_sql_preprocessor.py @@ -0,0 +1,51 @@ +""" +Tests for SQL preprocessing. +""" +from pathlib import Path + +import pytest + +from nominatim.db.sql_preprocessor import SQLPreprocessor + +@pytest.fixture +def sql_factory(tmp_path): + def _mk_sql(sql_body): + (tmp_path / 'test.sql').write_text(""" + CREATE OR REPLACE FUNCTION test() RETURNS TEXT + AS $$ + BEGIN + {} + END; + $$ LANGUAGE plpgsql IMMUTABLE;""".format(sql_body)) + return 'test.sql' + + return _mk_sql + + +@pytest.fixture +def sql_preprocessor(temp_db_conn, tmp_path, def_config, monkeypatch, table_factory): + monkeypatch.setenv('NOMINATIM_DATABASE_MODULE_PATH', '.') + table_factory('country_name', 'partition INT', (0, 1, 2)) + return SQLPreprocessor(temp_db_conn, def_config, tmp_path) + +@pytest.mark.parametrize("expr,ret", [ + ("'a'", 'a'), + ("'{{db.partitions|join}}'", '012'), + ("{% if 'country_name' in db.tables %}'yes'{% else %}'no'{% endif %}", "yes"), + ("{% if 'xxx' in db.tables %}'yes'{% else %}'no'{% endif %}", "no"), + ("'{{config.DATABASE_MODULE_PATH}}'", '.') + ]) +def test_load_file_simple(sql_preprocessor, sql_factory, temp_db_conn, temp_db_cursor, expr, ret): + sqlfile = sql_factory("RETURN {};".format(expr)) + + sql_preprocessor.run_sql_file(temp_db_conn, sqlfile) + + assert temp_db_cursor.scalar('SELECT test()') == ret + + +def test_load_file_with_params(sql_preprocessor, sql_factory, temp_db_conn, temp_db_cursor): + sqlfile = sql_factory("RETURN '{{ foo }} {{ bar }}';") + + sql_preprocessor.run_sql_file(temp_db_conn, sqlfile, bar='XX', foo='ZZ') + + assert temp_db_cursor.scalar('SELECT test()') == 'ZZ XX' diff --git a/test/python/test_db_utils.py b/test/python/test_db_utils.py index e756f2c4..b8a49ccf 100644 --- a/test/python/test_db_utils.py +++ b/test/python/test_db_utils.py @@ -5,26 +5,58 @@ import psycopg2 import pytest import nominatim.db.utils as db_utils +from nominatim.errors import UsageError -def test_execute_file_success(temp_db_conn, tmp_path): +def test_execute_file_success(dsn, temp_db_cursor, tmp_path): tmpfile = tmp_path / 'test.sql' tmpfile.write_text('CREATE TABLE test (id INT);\nINSERT INTO test VALUES(56);') - db_utils.execute_file(temp_db_conn, tmpfile) + db_utils.execute_file(dsn, tmpfile) - with temp_db_conn.cursor() as cur: - cur.execute('SELECT * FROM test') + temp_db_cursor.execute('SELECT * FROM test') - assert cur.rowcount == 1 - assert cur.fetchone()[0] == 56 + assert temp_db_cursor.rowcount == 1 + assert temp_db_cursor.fetchone()[0] == 56 -def test_execute_file_bad_file(temp_db_conn, tmp_path): +def test_execute_file_bad_file(dsn, tmp_path): with pytest.raises(FileNotFoundError): - db_utils.execute_file(temp_db_conn, tmp_path / 'test2.sql') + db_utils.execute_file(dsn, tmp_path / 'test2.sql') -def test_execute_file_bad_sql(temp_db_conn, tmp_path): + +def test_execute_file_bad_sql(dsn, tmp_path): tmpfile = tmp_path / 'test.sql' tmpfile.write_text('CREATE STABLE test (id INT)') - with pytest.raises(psycopg2.ProgrammingError): - db_utils.execute_file(temp_db_conn, tmpfile) + with pytest.raises(UsageError): + db_utils.execute_file(dsn, tmpfile) + + +def test_execute_file_bad_sql_ignore_errors(dsn, tmp_path): + tmpfile = tmp_path / 'test.sql' + tmpfile.write_text('CREATE STABLE test (id INT)') + + db_utils.execute_file(dsn, tmpfile, ignore_errors=True) + + +def test_execute_file_with_pre_code(dsn, tmp_path, temp_db_cursor): + tmpfile = tmp_path / 'test.sql' + tmpfile.write_text('INSERT INTO test VALUES(4)') + + db_utils.execute_file(dsn, tmpfile, pre_code='CREATE TABLE test (id INT)') + + temp_db_cursor.execute('SELECT * FROM test') + + assert temp_db_cursor.rowcount == 1 + assert temp_db_cursor.fetchone()[0] == 4 + + +def test_execute_file_with_post_code(dsn, tmp_path, temp_db_cursor): + tmpfile = tmp_path / 'test.sql' + tmpfile.write_text('CREATE TABLE test (id INT)') + + db_utils.execute_file(dsn, tmpfile, post_code='INSERT INTO test VALUES(23)') + + temp_db_cursor.execute('SELECT * FROM test') + + assert temp_db_cursor.rowcount == 1 + assert temp_db_cursor.fetchone()[0] == 23 diff --git a/test/python/test_indexing.py b/test/python/test_indexing.py index 6b52a65e..ee9c6c7e 100644 --- a/test/python/test_indexing.py +++ b/test/python/test_indexing.py @@ -12,6 +12,7 @@ class IndexerTestDB: def __init__(self, conn): self.placex_id = itertools.count(100000) self.osmline_id = itertools.count(500000) + self.postcode_id = itertools.count(700000) self.conn = conn self.conn.set_isolation_level(0) @@ -31,6 +32,12 @@ class IndexerTestDB: indexed_status SMALLINT, indexed_date TIMESTAMP, geometry_sector INTEGER)""") + cur.execute("""CREATE TABLE location_postcode ( + place_id BIGINT, + indexed_status SMALLINT, + indexed_date TIMESTAMP, + country_code varchar(2), + postcode TEXT)""") cur.execute("""CREATE OR REPLACE FUNCTION date_update() RETURNS TRIGGER AS $$ BEGIN @@ -39,10 +46,10 @@ class IndexerTestDB: END IF; RETURN NEW; END; $$ LANGUAGE plpgsql;""") - cur.execute("""CREATE TRIGGER placex_update BEFORE UPDATE ON placex - FOR EACH ROW EXECUTE PROCEDURE date_update()""") - cur.execute("""CREATE TRIGGER osmline_update BEFORE UPDATE ON location_property_osmline - FOR EACH ROW EXECUTE PROCEDURE date_update()""") + for table in ('placex', 'location_property_osmline', 'location_postcode'): + cur.execute("""CREATE TRIGGER {0}_update BEFORE UPDATE ON {0} + FOR EACH ROW EXECUTE PROCEDURE date_update() + """.format(table)) def scalar(self, query): with self.conn.cursor() as cur: @@ -74,6 +81,15 @@ class IndexerTestDB: (next_id, sector)) return next_id + def add_postcode(self, country, postcode): + next_id = next(self.postcode_id) + with self.conn.cursor() as cur: + cur.execute("""INSERT INTO location_postcode + (place_id, indexed_status, country_code, postcode) + VALUES (%s, 1, %s, %s)""", + (next_id, country, postcode)) + return next_id + def placex_unindexed(self): return self.scalar('SELECT count(*) from placex where indexed_status > 0') @@ -87,7 +103,7 @@ def test_db(temp_db_conn): @pytest.mark.parametrize("threads", [1, 15]) -def test_index_full(test_db, threads): +def test_index_all_by_rank(test_db, threads): for rank in range(31): test_db.add_place(rank_address=rank, rank_search=rank) test_db.add_osmline() @@ -184,3 +200,35 @@ def test_index_boundaries(test_db, threads): assert 0 == test_db.scalar(""" SELECT count(*) FROM placex WHERE indexed_status = 0 AND class != 'boundary'""") + + +@pytest.mark.parametrize("threads", [1, 15]) +def test_index_postcodes(test_db, threads): + for postcode in range(1000): + test_db.add_postcode('de', postcode) + for postcode in range(32000, 33000): + test_db.add_postcode('us', postcode) + + idx = Indexer('dbname=test_nominatim_python_unittest', threads) + idx.index_postcodes() + + assert 0 == test_db.scalar("""SELECT count(*) FROM location_postcode + WHERE indexed_status != 0""") + + +def test_index_full(test_db): + for rank in range(4, 10): + test_db.add_admin(rank_address=rank, rank_search=rank) + for rank in range(31): + test_db.add_place(rank_address=rank, rank_search=rank) + test_db.add_osmline() + for postcode in range(1000): + test_db.add_postcode('de', postcode) + + idx = Indexer('dbname=test_nominatim_python_unittest', 4) + idx.index_full() + + assert 0 == test_db.placex_unindexed() + assert 0 == test_db.osmline_unindexed() + assert 0 == test_db.scalar("""SELECT count(*) FROM location_postcode + WHERE indexed_status != 0""") diff --git a/test/python/test_tools_admin.py b/test/python/test_tools_admin.py index a40a17db..36c7d6ff 100644 --- a/test/python/test_tools_admin.py +++ b/test/python/test_tools_admin.py @@ -9,9 +9,8 @@ from nominatim.tools import admin @pytest.fixture def db(temp_db, placex_table): - conn = connect('dbname=' + temp_db) - yield conn - conn.close() + with connect('dbname=' + temp_db) as conn: + yield conn def test_analyse_indexing_no_objects(db): with pytest.raises(UsageError): diff --git a/test/python/test_tools_check_database.py b/test/python/test_tools_check_database.py new file mode 100644 index 00000000..68b376a7 --- /dev/null +++ b/test/python/test_tools_check_database.py @@ -0,0 +1,84 @@ +""" +Tests for database integrity checks. +""" +import pytest + +from nominatim.tools import check_database as chkdb + +def test_check_database_unknown_db(def_config, monkeypatch): + monkeypatch.setenv('NOMINATIM_DATABASE_DSN', 'pgsql:dbname=fjgkhughwgh2423gsags') + assert 1 == chkdb.check_database(def_config) + + +def test_check_database_fatal_test(def_config, temp_db): + assert 1 == chkdb.check_database(def_config) + + +def test_check_conection_good(temp_db_conn, def_config): + assert chkdb.check_connection(temp_db_conn, def_config) == chkdb.CheckState.OK + + +def test_check_conection_bad(def_config): + badconn = chkdb._BadConnection('Error') + assert chkdb.check_connection(badconn, def_config) == chkdb.CheckState.FATAL + + +def test_check_placex_table_good(temp_db_cursor, temp_db_conn, def_config): + temp_db_cursor.execute('CREATE TABLE placex (place_id int)') + assert chkdb.check_placex_table(temp_db_conn, def_config) == chkdb.CheckState.OK + + +def test_check_placex_table_bad(temp_db_conn, def_config): + assert chkdb.check_placex_table(temp_db_conn, def_config) == chkdb.CheckState.FATAL + + +def test_check_placex_table_size_good(temp_db_cursor, temp_db_conn, def_config): + temp_db_cursor.execute('CREATE TABLE placex (place_id int)') + temp_db_cursor.execute('INSERT INTO placex VALUES (1), (2)') + assert chkdb.check_placex_size(temp_db_conn, def_config) == chkdb.CheckState.OK + + +def test_check_placex_table_size_bad(temp_db_cursor, temp_db_conn, def_config): + temp_db_cursor.execute('CREATE TABLE placex (place_id int)') + assert chkdb.check_placex_size(temp_db_conn, def_config) == chkdb.CheckState.FATAL + + +def test_check_module_bad(temp_db_conn, def_config): + assert chkdb.check_module(temp_db_conn, def_config) == chkdb.CheckState.FAIL + + +def test_check_indexing_good(temp_db_cursor, temp_db_conn, def_config): + temp_db_cursor.execute('CREATE TABLE placex (place_id int, indexed_status smallint)') + temp_db_cursor.execute('INSERT INTO placex VALUES (1, 0), (2, 0)') + assert chkdb.check_indexing(temp_db_conn, def_config) == chkdb.CheckState.OK + + +def test_check_indexing_bad(temp_db_cursor, temp_db_conn, def_config): + temp_db_cursor.execute('CREATE TABLE placex (place_id int, indexed_status smallint)') + temp_db_cursor.execute('INSERT INTO placex VALUES (1, 0), (2, 2)') + assert chkdb.check_indexing(temp_db_conn, def_config) == chkdb.CheckState.FAIL + + +def test_check_database_indexes_bad(temp_db_conn, def_config): + assert chkdb.check_database_indexes(temp_db_conn, def_config) == chkdb.CheckState.FAIL + + +def test_check_database_indexes_valid(temp_db_conn, def_config): + assert chkdb.check_database_index_valid(temp_db_conn, def_config) == chkdb.CheckState.OK + + +def test_check_tiger_table_disabled(temp_db_conn, def_config, monkeypatch): + monkeypatch.setenv('NOMINATIM_USE_US_TIGER_DATA' , 'no') + assert chkdb.check_tiger_table(temp_db_conn, def_config) == chkdb.CheckState.NOT_APPLICABLE + + +def test_check_tiger_table_enabled(temp_db_cursor, temp_db_conn, def_config, monkeypatch): + monkeypatch.setenv('NOMINATIM_USE_US_TIGER_DATA' , 'yes') + assert chkdb.check_tiger_table(temp_db_conn, def_config) == chkdb.CheckState.FAIL + + temp_db_cursor.execute('CREATE TABLE location_property_tiger (place_id int)') + assert chkdb.check_tiger_table(temp_db_conn, def_config) == chkdb.CheckState.FAIL + + temp_db_cursor.execute('INSERT INTO location_property_tiger VALUES (1), (2)') + assert chkdb.check_tiger_table(temp_db_conn, def_config) == chkdb.CheckState.OK + diff --git a/test/python/test_tools_database_import.py b/test/python/test_tools_database_import.py new file mode 100644 index 00000000..45324834 --- /dev/null +++ b/test/python/test_tools_database_import.py @@ -0,0 +1,202 @@ +""" +Tests for functions to import a new database. +""" +import pytest +import psycopg2 +import sys +from pathlib import Path + +from nominatim.tools import database_import +from nominatim.errors import UsageError + +@pytest.fixture +def nonexistant_db(): + dbname = 'test_nominatim_python_unittest' + + conn = psycopg2.connect(database='postgres') + + conn.set_isolation_level(0) + with conn.cursor() as cur: + cur.execute('DROP DATABASE IF EXISTS {}'.format(dbname)) + + yield dbname + + with conn.cursor() as cur: + cur.execute('DROP DATABASE IF EXISTS {}'.format(dbname)) + +@pytest.mark.parametrize("no_partitions", (True, False)) +def test_setup_skeleton(src_dir, nonexistant_db, no_partitions): + database_import.setup_database_skeleton('dbname=' + nonexistant_db, + src_dir / 'data', no_partitions) + + conn = psycopg2.connect(database=nonexistant_db) + + try: + with conn.cursor() as cur: + cur.execute("SELECT distinct partition FROM country_name") + partitions = set([r[0] for r in list(cur)]) + if no_partitions: + assert partitions == set([0]) + else: + assert len(partitions) > 10 + finally: + conn.close() + + +def test_create_db_success(nonexistant_db): + database_import.create_db('dbname=' + nonexistant_db, rouser='www-data') + + conn = psycopg2.connect(database=nonexistant_db) + conn.close() + + +def test_create_db_already_exists(temp_db): + with pytest.raises(UsageError): + database_import.create_db('dbname=' + temp_db) + + +def test_create_db_unsupported_version(nonexistant_db, monkeypatch): + monkeypatch.setattr(database_import, 'POSTGRESQL_REQUIRED_VERSION', (100, 4)) + + with pytest.raises(UsageError, match='PostgreSQL server is too old.'): + database_import.create_db('dbname=' + nonexistant_db) + + +def test_create_db_missing_ro_user(nonexistant_db): + with pytest.raises(UsageError, match='Missing read-only user.'): + database_import.create_db('dbname=' + nonexistant_db, rouser='sdfwkjkjgdugu2;jgsafkljas;') + + +def test_setup_extensions(temp_db_conn, temp_db_cursor): + database_import.setup_extensions(temp_db_conn) + + temp_db_cursor.execute('CREATE TABLE t (h HSTORE, geom GEOMETRY(Geometry, 4326))') + + +def test_setup_extensions_old_postgis(temp_db_conn, monkeypatch): + monkeypatch.setattr(database_import, 'POSTGIS_REQUIRED_VERSION', (50, 50)) + + with pytest.raises(UsageError, match='PostGIS version is too old.'): + database_import.setup_extensions(temp_db_conn) + + +def test_install_module(tmp_path): + src_dir = tmp_path / 'source' + src_dir.mkdir() + (src_dir / 'nominatim.so').write_text('TEST nomiantim.so') + + project_dir = tmp_path / 'project' + project_dir.mkdir() + + database_import.install_module(src_dir, project_dir, '') + + outfile = project_dir / 'module' / 'nominatim.so' + + assert outfile.exists() + assert outfile.read_text() == 'TEST nomiantim.so' + assert outfile.stat().st_mode == 33261 + + +def test_install_module_custom(tmp_path): + (tmp_path / 'nominatim.so').write_text('TEST nomiantim.so') + + database_import.install_module(tmp_path, tmp_path, str(tmp_path.resolve())) + + assert not (tmp_path / 'module').exists() + + +def test_install_module_fail_access(temp_db_conn, tmp_path): + (tmp_path / 'nominatim.so').write_text('TEST nomiantim.so') + + with pytest.raises(UsageError, match='.*module cannot be accessed.*'): + database_import.install_module(tmp_path, tmp_path, '', + conn=temp_db_conn) + + +def test_import_base_data(src_dir, temp_db, temp_db_cursor): + temp_db_cursor.execute('CREATE EXTENSION hstore') + temp_db_cursor.execute('CREATE EXTENSION postgis') + database_import.import_base_data('dbname=' + temp_db, src_dir / 'data') + + assert temp_db_cursor.scalar('SELECT count(*) FROM country_name') > 0 + + +def test_import_base_data_ignore_partitions(src_dir, temp_db, temp_db_cursor): + temp_db_cursor.execute('CREATE EXTENSION hstore') + temp_db_cursor.execute('CREATE EXTENSION postgis') + database_import.import_base_data('dbname=' + temp_db, src_dir / 'data', + ignore_partitions=True) + + assert temp_db_cursor.scalar('SELECT count(*) FROM country_name') > 0 + assert temp_db_cursor.scalar('SELECT count(*) FROM country_name WHERE partition != 0') == 0 + + +def test_import_osm_data_simple(temp_db_cursor,osm2pgsql_options): + temp_db_cursor.execute('CREATE TABLE place (id INT)') + temp_db_cursor.execute('INSERT INTO place values (1)') + + database_import.import_osm_data('file.pdf', osm2pgsql_options) + + +def test_import_osm_data_simple_no_data(temp_db_cursor,osm2pgsql_options): + temp_db_cursor.execute('CREATE TABLE place (id INT)') + + with pytest.raises(UsageError, match='No data.*'): + database_import.import_osm_data('file.pdf', osm2pgsql_options) + + +def test_import_osm_data_drop(temp_db_conn, temp_db_cursor, tmp_path, osm2pgsql_options): + temp_db_cursor.execute('CREATE TABLE place (id INT)') + temp_db_cursor.execute('CREATE TABLE planet_osm_nodes (id INT)') + temp_db_cursor.execute('INSERT INTO place values (1)') + + flatfile = tmp_path / 'flatfile' + flatfile.write_text('touch') + + osm2pgsql_options['flatnode_file'] = str(flatfile.resolve()) + + database_import.import_osm_data('file.pdf', osm2pgsql_options, drop=True) + + assert not flatfile.exists() + assert not temp_db_conn.table_exists('planet_osm_nodes') + + +def test_import_osm_data_default_cache(temp_db_cursor,osm2pgsql_options): + temp_db_cursor.execute('CREATE TABLE place (id INT)') + temp_db_cursor.execute('INSERT INTO place values (1)') + + osm2pgsql_options['osm2pgsql_cache'] = 0 + + database_import.import_osm_data(Path(__file__), osm2pgsql_options) + + +def test_truncate_database_tables(temp_db_conn, temp_db_cursor, table_factory): + tables = ('word', 'placex', 'place_addressline', 'location_area', + 'location_area_country', 'location_property', + 'location_property_tiger', 'location_property_osmline', + 'location_postcode', 'search_name', 'location_road_23') + for table in tables: + table_factory(table, content=(1, 2, 3)) + + database_import.truncate_data_tables(temp_db_conn, max_word_frequency=23) + + for table in tables: + assert temp_db_cursor.table_rows(table) == 0 + + +@pytest.mark.parametrize("threads", (1, 5)) +def test_load_data(dsn, src_dir, place_row, placex_table, osmline_table, word_table, + temp_db_cursor, threads): + for func in ('make_keywords', 'getorcreate_housenumber_id', 'make_standard_name'): + temp_db_cursor.execute("""CREATE FUNCTION {} (src TEXT) + RETURNS TEXT AS $$ SELECT 'a'::TEXT $$ LANGUAGE SQL + """.format(func)) + for oid in range(100, 130): + place_row(osm_id=oid) + place_row(osm_type='W', osm_id=342, cls='place', typ='houses', + geom='SRID=4326;LINESTRING(0 0, 10 10)') + + database_import.load_data(dsn, src_dir / 'data', threads) + + assert temp_db_cursor.table_rows('placex') == 30 + assert temp_db_cursor.table_rows('location_property_osmline') == 1 diff --git a/test/python/test_tools_exec_utils.py b/test/python/test_tools_exec_utils.py index 283f486a..8f60ac74 100644 --- a/test/python/test_tools_exec_utils.py +++ b/test/python/test_tools_exec_utils.py @@ -105,8 +105,15 @@ def test_run_api_with_extra_env(tmp_project_dir): ### run_osm2pgsql -def test_run_osm2pgsql(): - exec_utils.run_osm2pgsql(dict(osm2pgsql='echo', append=False, flatnode_file=None, - dsn='dbname=foobar', threads=1, osm2pgsql_cache=500, - osm2pgsql_style='./my.style', - import_file='foo.bar')) +def test_run_osm2pgsql(osm2pgsql_options): + osm2pgsql_options['append'] = False + osm2pgsql_options['import_file'] = 'foo.bar' + osm2pgsql_options['tablespaces']['osm_data'] = 'extra' + exec_utils.run_osm2pgsql(osm2pgsql_options) + + +def test_run_osm2pgsql_disable_jit(osm2pgsql_options): + osm2pgsql_options['append'] = True + osm2pgsql_options['import_file'] = 'foo.bar' + osm2pgsql_options['disable_jit'] = True + exec_utils.run_osm2pgsql(osm2pgsql_options) diff --git a/test/python/test_tools_freeze.py b/test/python/test_tools_freeze.py new file mode 100644 index 00000000..fcdab23a --- /dev/null +++ b/test/python/test_tools_freeze.py @@ -0,0 +1,51 @@ +""" +Tests for freeze functions (removing unused database parts). +""" +import pytest + +from nominatim.tools import freeze + +NOMINATIM_RUNTIME_TABLES = [ + 'country_name', 'country_osm_grid', + 'location_postcode', 'location_property_osmline', 'location_property_tiger', + 'placex', 'place_adressline', + 'search_name', + 'word' +] + +NOMINATIM_DROP_TABLES = [ + 'address_levels', + 'location_area', 'location_area_country', 'location_area_large_100', + 'location_road_1', + 'place', 'planet_osm_nodes', 'planet_osm_rels', 'planet_osm_ways', + 'search_name_111', + 'wikipedia_article', 'wikipedia_redirect' +] + +def test_drop_tables(temp_db_conn, temp_db_cursor): + for table in NOMINATIM_RUNTIME_TABLES + NOMINATIM_DROP_TABLES: + temp_db_cursor.execute('CREATE TABLE {} (id int)'.format(table)) + + freeze.drop_update_tables(temp_db_conn) + + for table in NOMINATIM_RUNTIME_TABLES: + assert temp_db_cursor.table_exists(table) + + for table in NOMINATIM_DROP_TABLES: + assert not temp_db_cursor.table_exists(table) + +def test_drop_flatnode_file_no_file(): + freeze.drop_flatnode_file('') + + +def test_drop_flatnode_file_file_already_gone(tmp_path): + freeze.drop_flatnode_file(str(tmp_path / 'something.store')) + + +def test_drop_flatnode_file_delte(tmp_path): + flatfile = tmp_path / 'flatnode.store' + flatfile.write_text('Some content') + + freeze.drop_flatnode_file(str(flatfile)) + + assert not flatfile.exists() diff --git a/test/python/test_tools_refresh.py b/test/python/test_tools_refresh.py new file mode 100644 index 00000000..d6c46ad7 --- /dev/null +++ b/test/python/test_tools_refresh.py @@ -0,0 +1,26 @@ +""" +Test for various refresh functions. +""" +from pathlib import Path + +import pytest + +from nominatim.tools import refresh + +TEST_DIR = (Path(__file__) / '..' / '..').resolve() + +def test_refresh_import_wikipedia_not_existing(dsn): + assert 1 == refresh.import_wikipedia_articles(dsn, Path('.')) + + +@pytest.mark.parametrize("replace", (True, False)) +def test_refresh_import_wikipedia(dsn, table_factory, temp_db_cursor, replace): + if replace: + table_factory('wikipedia_article') + table_factory('wikipedia_redirect') + + # use the small wikipedia file for the API testdb + assert 0 == refresh.import_wikipedia_articles(dsn, TEST_DIR / 'testdb') + + assert temp_db_cursor.scalar('SELECT count(*) FROM wikipedia_article') > 0 + assert temp_db_cursor.scalar('SELECT count(*) FROM wikipedia_redirect') > 0 diff --git a/test/python/test_tools_refresh_address_levels.py b/test/python/test_tools_refresh_address_levels.py index 87e34c61..2bd91720 100644 --- a/test/python/test_tools_refresh_address_levels.py +++ b/test/python/test_tools_refresh_address_levels.py @@ -2,9 +2,10 @@ Tests for function for importing address ranks. """ import json -import pytest from pathlib import Path +import pytest + from nominatim.tools.refresh import load_address_levels, load_address_levels_from_file def test_load_ranks_def_config(temp_db_conn, temp_db_cursor, def_config): diff --git a/test/python/test_tools_refresh_create_functions.py b/test/python/test_tools_refresh_create_functions.py index d219d748..40d4c81a 100644 --- a/test/python/test_tools_refresh_create_functions.py +++ b/test/python/test_tools_refresh_create_functions.py @@ -1,99 +1,47 @@ """ Tests for creating PL/pgSQL functions for Nominatim. """ -from pathlib import Path import pytest -from nominatim.db.connection import connect -from nominatim.tools.refresh import _get_standard_function_sql, _get_partition_function_sql - -SQL_DIR = (Path(__file__) / '..' / '..' / '..' / 'lib-sql').resolve() - -@pytest.fixture -def db(temp_db): - conn = connect('dbname=' + temp_db) - yield conn - conn.close() +from nominatim.tools.refresh import create_functions @pytest.fixture -def db_with_tables(db): - with db.cursor() as cur: - for table in ('place', 'placex', 'location_postcode'): - cur.execute('CREATE TABLE {} (place_id BIGINT)'.format(table)) - - return db - - -def test_standard_functions_replace_module_default(db, def_config): - def_config.project_dir = Path('.') - sql = _get_standard_function_sql(db, def_config, SQL_DIR, False, False) - - assert sql - assert sql.find('{modulepath}') < 0 - assert sql.find("'{}'".format(Path('module/nominatim.so').resolve())) >= 0 - - -def test_standard_functions_replace_module_custom(monkeypatch, db, def_config): - monkeypatch.setenv('NOMINATIM_DATABASE_MODULE_PATH', 'custom') - sql = _get_standard_function_sql(db, def_config, SQL_DIR, False, False) - - assert sql - assert sql.find('{modulepath}') < 0 - assert sql.find("'custom/nominatim.so'") >= 0 - - -@pytest.mark.parametrize("enabled", (True, False)) -def test_standard_functions_enable_diff(db_with_tables, def_config, enabled): - def_config.project_dir = Path('.') - sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, enabled, False) - - assert sql - assert (sql.find('%DIFFUPDATES%') < 0) == enabled - - -@pytest.mark.parametrize("enabled", (True, False)) -def test_standard_functions_enable_debug(db_with_tables, def_config, enabled): - def_config.project_dir = Path('.') - sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, False, enabled) - - assert sql - assert (sql.find('--DEBUG') < 0) == enabled - - -@pytest.mark.parametrize("enabled", (True, False)) -def test_standard_functions_enable_limit_reindexing(monkeypatch, db_with_tables, def_config, enabled): - def_config.project_dir = Path('.') - monkeypatch.setenv('NOMINATIM_LIMIT_REINDEXING', 'yes' if enabled else 'no') - sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, False, False) - - assert sql - assert (sql.find('--LIMIT INDEXING') < 0) == enabled - - -@pytest.mark.parametrize("enabled", (True, False)) -def test_standard_functions_enable_tiger(monkeypatch, db_with_tables, def_config, enabled): - def_config.project_dir = Path('.') - monkeypatch.setenv('NOMINATIM_USE_US_TIGER_DATA', 'yes' if enabled else 'no') - sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, False, False) - - assert sql - assert (sql.find('%NOTIGERDATA%') >= 0) == enabled - - -@pytest.mark.parametrize("enabled", (True, False)) -def test_standard_functions_enable_aux(monkeypatch, db_with_tables, def_config, enabled): - def_config.project_dir = Path('.') - monkeypatch.setenv('NOMINATIM_USE_AUX_LOCATION_DATA', 'yes' if enabled else 'no') - sql = _get_standard_function_sql(db_with_tables, def_config, SQL_DIR, False, False) - - assert sql - assert (sql.find('%NOAUXDATA%') >= 0) == enabled - - -def test_partition_function(temp_db_cursor, db, def_config): - temp_db_cursor.execute("CREATE TABLE country_name (partition SMALLINT)") - - sql = _get_partition_function_sql(db, SQL_DIR) - - assert sql - assert sql.find('-partition-') < 0 +def conn(temp_db_conn, table_factory, monkeypatch): + monkeypatch.setenv('NOMINATIM_DATABASE_MODULE_PATH', '.') + table_factory('country_name', 'partition INT', (0, 1, 2)) + return temp_db_conn + + +def test_create_functions(temp_db_cursor, conn, def_config, tmp_path): + sqlfile = tmp_path / 'functions.sql' + sqlfile.write_text("""CREATE OR REPLACE FUNCTION test() RETURNS INTEGER + AS $$ + BEGIN + RETURN 43; + END; + $$ LANGUAGE plpgsql IMMUTABLE; + """) + + create_functions(conn, def_config, tmp_path) + + assert temp_db_cursor.scalar('SELECT test()') == 43 + + +@pytest.mark.parametrize("dbg,ret", ((True, 43), (False, 22))) +def test_create_functions_with_template(temp_db_cursor, conn, def_config, tmp_path, dbg, ret): + sqlfile = tmp_path / 'functions.sql' + sqlfile.write_text("""CREATE OR REPLACE FUNCTION test() RETURNS INTEGER + AS $$ + BEGIN + {% if debug %} + RETURN 43; + {% else %} + RETURN 22; + {% endif %} + END; + $$ LANGUAGE plpgsql IMMUTABLE; + """) + + create_functions(conn, def_config, tmp_path, enable_debug=dbg) + + assert temp_db_cursor.scalar('SELECT test()') == ret diff --git a/test/python/test_tools_refresh_setup_website.py b/test/python/test_tools_refresh_setup_website.py new file mode 100644 index 00000000..126fc561 --- /dev/null +++ b/test/python/test_tools_refresh_setup_website.py @@ -0,0 +1,70 @@ +""" +Tests for setting up the website scripts. +""" +from pathlib import Path +import subprocess + +import pytest + +from nominatim.tools import refresh + +@pytest.fixture +def envdir(tmpdir): + (tmpdir / 'php').mkdir() + (tmpdir / 'php' / 'website').mkdir() + return tmpdir + + +@pytest.fixture +def test_script(envdir): + def _create_file(code): + outfile = envdir / 'php' / 'website' / 'search.php' + outfile.write_text('