From: Sarah Hoffmann Date: Mon, 19 Apr 2021 14:06:43 +0000 (+0200) Subject: Merge remote-tracking branch 'upstream/master' X-Git-Tag: deploy~170 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/616789bbccd4b35cc643b20c5af3c9835eb22643?hp=0d567291d668dcb912375d0e8ec779525c7c0bba Merge remote-tracking branch 'upstream/master' --- diff --git a/CMakeLists.txt b/CMakeLists.txt index 33b6ae64..a2a98455 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -109,21 +109,6 @@ if (BUILD_IMPORTER) " wget -O ${PROJECT_SOURCE_DIR}/data/country_osm_grid.sql.gz https://www.nominatim.org/data/country_grid.sql.gz") endif() - set(CUSTOMSCRIPTS - check_import_finished.php - country_languages.php - export.php - query.php - setup.php - update.php - warm.php - ) - - foreach (script_source ${CUSTOMSCRIPTS}) - configure_file(${PROJECT_SOURCE_DIR}/cmake/script.tmpl - ${PROJECT_BINARY_DIR}/utils/${script_source}) - endforeach() - configure_file(${PROJECT_SOURCE_DIR}/cmake/tool.tmpl ${PROJECT_BINARY_DIR}/nominatim) endif() diff --git a/cmake/script.tmpl b/cmake/script.tmpl deleted file mode 100755 index 3fbe535e..00000000 --- a/cmake/script.tmpl +++ /dev/null @@ -1,14 +0,0 @@ -#!@PHP_BIN@ -Cq -addParams('admin', '--check-database') - ->run(); diff --git a/lib-php/admin/setup.php b/lib-php/admin/setup.php deleted file mode 100644 index 7523527a..00000000 --- a/lib-php/admin/setup.php +++ /dev/null @@ -1,218 +0,0 @@ -addParams('--threads', $iInstances); - if ($aCMDResult['ignore-errors'] ?? false) { - $oCmd->addParams('--ignore-errors'); - } - if ($aCMDResult['quiet'] ?? false) { - $oCmd->addParams('--quiet'); - } - if ($aCMDResult['verbose'] ?? false) { - $oCmd->addParams('--verbose'); - } - $oCmd->run(true); -} - - -//******************************************************* -// Making some sanity check: -// Check if osm-file is set and points to a valid file -if ($aCMDResult['import-data'] || $aCMDResult['all']) { - // to remain in /lib/setup_functions.php function - checkInFile($aCMDResult['osm-file']); -} - -// ****************************************************** -// instantiate Setup class -$oSetup = new SetupFunctions($aCMDResult); - -// ******************************************************* -// go through complete process if 'all' is selected or start selected functions -if ($aCMDResult['create-db'] || $aCMDResult['all']) { - $bDidSomething = true; - run((clone($oNominatimCmd))->addParams('transition', '--create-db')); -} - -if ($aCMDResult['setup-db'] || $aCMDResult['all']) { - $bDidSomething = true; - $oCmd = (clone($oNominatimCmd))->addParams('transition', '--setup-db'); - - if ($aCMDResult['no-partitions'] ?? false) { - $oCmd->addParams('--no-partitions'); - } - - run($oCmd); -} - -if ($aCMDResult['import-data'] || $aCMDResult['all']) { - $bDidSomething = true; - $oCmd = (clone($oNominatimCmd)) - ->addParams('transition', '--import-data') - ->addParams('--osm-file', $aCMDResult['osm-file']); - if ($aCMDResult['drop'] ?? false) { - $oCmd->addParams('--drop'); - } - - run($oCmd); -} - -if ($aCMDResult['create-functions'] || $aCMDResult['all']) { - $bDidSomething = true; - $oSetup->createSqlFunctions(); -} - -if ($aCMDResult['create-tables'] || $aCMDResult['all']) { - $bDidSomething = true; - $oCmd = (clone($oNominatimCmd))->addParams('transition', '--create-tables'); - - if ($aCMDResult['reverse-only'] ?? false) { - $oCmd->addParams('--reverse-only'); - } - - run($oCmd); -} - -if ($aCMDResult['create-partition-tables'] || $aCMDResult['all']) { - $bDidSomething = true; - run((clone($oNominatimCmd))->addParams('transition', '--create-partition-tables')); -} - -if ($aCMDResult['create-partition-functions'] || $aCMDResult['all']) { - $bDidSomething = true; - $oSetup->createSqlFunctions(); // also create partition functions -} - -if ($aCMDResult['import-wikipedia-articles'] || $aCMDResult['all']) { - $bDidSomething = true; - // ignore errors! - (clone($oNominatimCmd))->addParams('refresh', '--wiki-data')->run(); -} - -if ($aCMDResult['load-data'] || $aCMDResult['all']) { - $bDidSomething = true; - run((clone($oNominatimCmd))->addParams('transition', '--load-data')); -} - -if ($aCMDResult['import-tiger-data']) { - $bDidSomething = true; - $sTigerPath = getSetting('TIGER_DATA_PATH', CONST_InstallDir.'/tiger'); - run((clone($oNominatimCmd))->addParams('transition', '--tiger-data', $sTigerPath)); -} - -if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all']) { - $bDidSomething = true; - $oSetup->calculatePostcodes($aCMDResult['all']); -} - -if ($aCMDResult['index'] || $aCMDResult['all']) { - $bDidSomething = true; - $oCmd = (clone($oNominatimCmd))->addParams('transition', '--index'); - if ($aCMDResult['index-noanalyse'] ?? false) { - $oCmd->addParams('--no-analyse'); - } - - run($oCmd); -} - -if ($aCMDResult['drop']) { - $bDidSomething = true; - run((clone($oNominatimCmd))->addParams('freeze')); -} - -if ($aCMDResult['create-search-indices'] || $aCMDResult['all']) { - $bDidSomething = true; - - $oCmd = (clone($oNominatimCmd))->addParams('transition', '--create-search-indices'); - - if ($aCMDResult['drop'] ?? false) { - $oCmd->addParams('--drop'); - } - - run($oCmd); -} - -if ($aCMDResult['create-country-names'] || $aCMDResult['all']) { - $bDidSomething = true; - run(clone($oNominatimCmd))->addParams('transition', '--create-country-names'); -} - -if ($aCMDResult['setup-website'] || $aCMDResult['all']) { - $bDidSomething = true; - run((clone($oNominatimCmd))->addParams('refresh', '--website')); -} - -// ****************************************************** -// If we did something, repeat the warnings -if (!$bDidSomething) { - showUsage($aCMDOptions, true); -} else { - echo "Summary of warnings:\n\n"; - repeatWarnings(); - echo "\n"; - info('Setup finished.'); -} diff --git a/lib-php/admin/specialphrases.php b/lib-php/admin/specialphrases.php deleted file mode 100644 index 84bcfb5c..00000000 --- a/lib-php/admin/specialphrases.php +++ /dev/null @@ -1,11 +0,0 @@ - -addParams('special-phrases', '--import-from-wiki') - ->run(); diff --git a/lib-php/admin/update.php b/lib-php/admin/update.php index fae4a783..ea58f37c 100644 --- a/lib-php/admin/update.php +++ b/lib-php/admin/update.php @@ -3,12 +3,9 @@ require_once(CONST_LibDir.'/init-cmd.php'); require_once(CONST_LibDir.'/setup_functions.php'); -require_once(CONST_LibDir.'/setup/SetupClass.php'); ini_set('memory_limit', '800M'); -use Nominatim\Setup\SetupFunctions as SetupFunctions; - // (long-opt, short-opt, min-occurs, max-occurs, num-arguments, num-arguments, type, help) $aCMDOptions = array( @@ -17,13 +14,6 @@ $aCMDOptions array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'), array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'), - array('init-updates', '', 0, 1, 0, 0, 'bool', 'Set up database for updating'), - array('check-for-updates', '', 0, 1, 0, 0, 'bool', 'Check if new updates are available'), - array('no-update-functions', '', 0, 1, 0, 0, 'bool', 'Do not update trigger functions to support differential updates (assuming the diff update logic is already present)'), - array('import-osmosis', '', 0, 1, 0, 0, 'bool', 'Import updates once'), - array('import-osmosis-all', '', 0, 1, 0, 0, 'bool', 'Import updates forever'), - array('no-index', '', 0, 1, 0, 0, 'bool', 'Do not index the new data'), - array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Update postcode centroid table'), array('import-file', '', 0, 1, 1, 1, 'realpath', 'Re-import data from an OSM file'), @@ -35,14 +25,6 @@ $aCMDOptions array('import-relation', '', 0, 1, 1, 1, 'int', 'Re-import relation'), array('import-from-main-api', '', 0, 1, 0, 0, 'bool', 'Use OSM API instead of Overpass to download objects'), - array('index', '', 0, 1, 0, 0, 'bool', 'Index'), - array('index-rank', '', 0, 1, 1, 1, 'int', 'Rank to start indexing from'), - array('index-instances', '', 0, 1, 1, 1, 'int', 'Number of indexing instances (threads)'), - - array('recompute-word-counts', '', 0, 1, 0, 0, 'bool', 'Compute frequency of full-word search terms'), - array('update-address-levels', '', 0, 1, 0, 0, 'bool', 'Reimport address level configuration (EXPERT)'), - array('recompute-importance', '', 0, 1, 0, 0, 'bool', 'Recompute place importances'), - array('project-dir', '', 0, 1, 1, 1, 'realpath', 'Base directory of the Nominatim installation (default: .)'), ); @@ -51,10 +33,6 @@ getCmdOpt($_SERVER['argv'], $aCMDOptions, $aResult, true, true); loadSettings($aCMDResult['project-dir'] ?? getcwd()); setupHTTPProxy(); -if (!isset($aResult['index-instances'])) $aResult['index-instances'] = 1; - -if (!isset($aResult['index-rank'])) $aResult['index-rank'] = 0; - date_default_timezone_set('Etc/UTC'); $oDB = new Nominatim\DB(); @@ -104,35 +82,6 @@ if ($fPostgresVersion >= 11.0) { ); } -$oNominatimCmd = new \Nominatim\Shell(getSetting('NOMINATIM_TOOL')); - -function run($oCmd) -{ - global $aCMDResult; - if ($aCMDResult['quiet'] ?? false) { - $oCmd->addParams('--quiet'); - } - if ($aCMDResult['verbose'] ?? false) { - $oCmd->addParams('--verbose'); - } - $oCmd->run(true); -} - - -if ($aResult['init-updates']) { - $oCmd = (clone($oNominatimCmd))->addParams('replication', '--init'); - - if ($aResult['no-update-functions']) { - $oCmd->addParams('--no-update-functions'); - } - - run($oCmd); -} - -if ($aResult['check-for-updates']) { - exit((clone($oNominatimCmd))->addParams('replication', '--check-for-updates')->run()); -} - if (isset($aResult['import-diff']) || isset($aResult['import-file'])) { // import diffs and files directly (e.g. from osmosis --rri) $sNextFile = isset($aResult['import-diff']) ? $aResult['import-diff'] : $aResult['import-file']; @@ -153,10 +102,6 @@ if (isset($aResult['import-diff']) || isset($aResult['import-file'])) { // Don't update the import status - we don't know what this file contains } -if ($aResult['calculate-postcodes']) { - run((clone($oNominatimCmd))->addParams('refresh', '--postcodes')); -} - $sTemporaryFile = CONST_InstallDir.'/osmosischange.osc'; $bHaveDiff = false; $bUseOSMApi = isset($aResult['import-from-main-api']) && $aResult['import-from-main-api']; @@ -201,37 +146,3 @@ if ($bHaveDiff) { fail("osm2pgsql exited with error level $iRet\n"); } } - -if ($aResult['recompute-word-counts']) { - run((clone($oNominatimCmd))->addParams('refresh', '--word-counts')); -} - -if ($aResult['index']) { - run((clone $oNominatimCmd) - ->addParams('index', '--minrank', $aResult['index-rank']) - ->addParams('--threads', $aResult['index-instances'])); -} - -if ($aResult['update-address-levels']) { - run((clone($oNominatimCmd))->addParams('refresh', '--address-levels')); -} - -if ($aResult['recompute-importance']) { - run((clone($oNominatimCmd))->addParams('refresh', '--importance')); -} - -if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) { - $oCmd = (clone($oNominatimCmd)) - ->addParams('replication') - ->addParams('--threads', $aResult['index-instances']); - - if (!$aResult['import-osmosis-all']) { - $oCmd->addParams('--once'); - } - - if ($aResult['no-index']) { - $oCmd->addParams('--no-index'); - } - - run($oCmd); -} diff --git a/lib-php/cmd.php b/lib-php/cmd.php index 5a12f99a..9c971e5f 100644 --- a/lib-php/cmd.php +++ b/lib-php/cmd.php @@ -144,58 +144,6 @@ function repeatWarnings() } -function runSQLScript($sScript, $bfatal = true, $bVerbose = false, $bIgnoreErrors = false) -{ - // Convert database DSN to psql parameters - $aDSNInfo = \Nominatim\DB::parseDSN(getSetting('DATABASE_DSN')); - if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432; - - $oCmd = new \Nominatim\Shell('psql'); - $oCmd->addParams('--port', $aDSNInfo['port']); - $oCmd->addParams('--dbname', $aDSNInfo['database']); - if (isset($aDSNInfo['hostspec']) && $aDSNInfo['hostspec']) { - $oCmd->addParams('--host', $aDSNInfo['hostspec']); - } - if (isset($aDSNInfo['username']) && $aDSNInfo['username']) { - $oCmd->addParams('--username', $aDSNInfo['username']); - } - if (isset($aDSNInfo['password'])) { - $oCmd->addEnvPair('PGPASSWORD', $aDSNInfo['password']); - } - if (!$bVerbose) { - $oCmd->addParams('--quiet'); - } - if ($bfatal && !$bIgnoreErrors) { - $oCmd->addParams('-v', 'ON_ERROR_STOP=1'); - } - - $aDescriptors = array( - 0 => array('pipe', 'r'), - 1 => STDOUT, - 2 => STDERR - ); - $ahPipes = null; - $hProcess = @proc_open($oCmd->escapedCmd(), $aDescriptors, $ahPipes, null, $oCmd->aEnv); - if (!is_resource($hProcess)) { - fail('unable to start pgsql'); - } - - if (!$bVerbose) { - fwrite($ahPipes[0], 'set client_min_messages to WARNING;'); - } - - while (strlen($sScript)) { - $iWritten = fwrite($ahPipes[0], $sScript); - if ($iWritten <= 0) break; - $sScript = substr($sScript, $iWritten); - } - fclose($ahPipes[0]); - $iReturn = proc_close($hProcess); - if ($bfatal && $iReturn > 0) { - fail("pgsql returned with error code ($iReturn)"); - } -} - function setupHTTPProxy() { if (!getSettingBool('HTTP_PROXY')) { diff --git a/lib-php/setup/SetupClass.php b/lib-php/setup/SetupClass.php deleted file mode 100755 index d07adce7..00000000 --- a/lib-php/setup/SetupClass.php +++ /dev/null @@ -1,261 +0,0 @@ -iInstances = isset($aCMDResult['threads']) - ? $aCMDResult['threads'] - : (min(16, getProcessorCount()) - 1); - - if ($this->iInstances < 1) { - $this->iInstances = 1; - warn('resetting threads to '.$this->iInstances); - } - - // parse database string - $this->aDSNInfo = \Nominatim\DB::parseDSN(getSetting('DATABASE_DSN')); - if (!isset($this->aDSNInfo['port'])) { - $this->aDSNInfo['port'] = 5432; - } - - // setting member variables based on command line options stored in $aCMDResult - $this->bQuiet = isset($aCMDResult['quiet']) && $aCMDResult['quiet']; - $this->bVerbose = $aCMDResult['verbose']; - - //setting default values which are not set by the update.php array - if (isset($aCMDResult['ignore-errors'])) { - $this->sIgnoreErrors = $aCMDResult['ignore-errors']; - } else { - $this->sIgnoreErrors = false; - } - if (isset($aCMDResult['enable-debug-statements'])) { - $this->bEnableDebugStatements = $aCMDResult['enable-debug-statements']; - } else { - $this->bEnableDebugStatements = false; - } - if (isset($aCMDResult['enable-diff-updates'])) { - $this->bEnableDiffUpdates = $aCMDResult['enable-diff-updates']; - } else { - $this->bEnableDiffUpdates = false; - } - - $this->bDrop = isset($aCMDResult['drop']) && $aCMDResult['drop']; - - $this->oNominatimCmd = new \Nominatim\Shell(getSetting('NOMINATIM_TOOL')); - if ($this->bQuiet) { - $this->oNominatimCmd->addParams('--quiet'); - } - if ($this->bVerbose) { - $this->oNominatimCmd->addParams('--verbose'); - } - } - - public function calculatePostcodes($bCMDResultAll) - { - info('Calculate Postcodes'); - $this->pgsqlRunScriptFile(CONST_SqlDir.'/postcode_tables.sql'); - - $sPostcodeFilename = CONST_InstallDir.'/gb_postcode_data.sql.gz'; - if (file_exists($sPostcodeFilename)) { - $this->pgsqlRunScriptFile($sPostcodeFilename); - } else { - warn('optional external GB postcode table file ('.$sPostcodeFilename.') not found. Skipping.'); - } - - $sPostcodeFilename = CONST_InstallDir.'/us_postcode_data.sql.gz'; - if (file_exists($sPostcodeFilename)) { - $this->pgsqlRunScriptFile($sPostcodeFilename); - } else { - warn('optional external US postcode table file ('.$sPostcodeFilename.') not found. Skipping.'); - } - - - $this->db()->exec('TRUNCATE location_postcode'); - - $sSQL = 'INSERT INTO location_postcode'; - $sSQL .= ' (place_id, indexed_status, country_code, postcode, geometry) '; - $sSQL .= "SELECT nextval('seq_place'), 1, country_code,"; - $sSQL .= " upper(trim (both ' ' from address->'postcode')) as pc,"; - $sSQL .= ' ST_Centroid(ST_Collect(ST_Centroid(geometry)))'; - $sSQL .= ' FROM placex'; - $sSQL .= " WHERE address ? 'postcode' AND address->'postcode' NOT SIMILAR TO '%(,|;)%'"; - $sSQL .= ' AND geometry IS NOT null'; - $sSQL .= ' GROUP BY country_code, pc'; - $this->db()->exec($sSQL); - - // only add postcodes that are not yet available in OSM - $sSQL = 'INSERT INTO location_postcode'; - $sSQL .= ' (place_id, indexed_status, country_code, postcode, geometry) '; - $sSQL .= "SELECT nextval('seq_place'), 1, 'us', postcode,"; - $sSQL .= ' ST_SetSRID(ST_Point(x,y),4326)'; - $sSQL .= ' FROM us_postcode WHERE postcode NOT IN'; - $sSQL .= ' (SELECT postcode FROM location_postcode'; - $sSQL .= " WHERE country_code = 'us')"; - $this->db()->exec($sSQL); - - // add missing postcodes for GB (if available) - $sSQL = 'INSERT INTO location_postcode'; - $sSQL .= ' (place_id, indexed_status, country_code, postcode, geometry) '; - $sSQL .= "SELECT nextval('seq_place'), 1, 'gb', postcode, geometry"; - $sSQL .= ' FROM gb_postcode WHERE postcode NOT IN'; - $sSQL .= ' (SELECT postcode FROM location_postcode'; - $sSQL .= " WHERE country_code = 'gb')"; - $this->db()->exec($sSQL); - - if (!$bCMDResultAll) { - $sSQL = "DELETE FROM word WHERE class='place' and type='postcode'"; - $sSQL .= 'and word NOT IN (SELECT postcode FROM location_postcode)'; - $this->db()->exec($sSQL); - } - - $sSQL = 'SELECT count(getorcreate_postcode_id(v)) FROM '; - $sSQL .= '(SELECT distinct(postcode) as v FROM location_postcode) p'; - $this->db()->exec($sSQL); - } - - /** - * Return the connection to the database. - * - * @return Database object. - * - * Creates a new connection if none exists yet. Otherwise reuses the - * already established connection. - */ - private function db() - { - if (is_null($this->oDB)) { - $this->oDB = new \Nominatim\DB(); - $this->oDB->connect(); - } - - return $this->oDB; - } - - private function pgsqlRunScript($sScript, $bfatal = true) - { - runSQLScript( - $sScript, - $bfatal, - $this->bVerbose, - $this->sIgnoreErrors - ); - } - - public function createSqlFunctions() - { - $oCmd = (clone($this->oNominatimCmd)) - ->addParams('refresh', '--functions'); - - if (!$this->bEnableDiffUpdates) { - $oCmd->addParams('--no-diff-updates'); - } - - if ($this->bEnableDebugStatements) { - $oCmd->addParams('--enable-debug-statements'); - } - - $oCmd->run(!$this->sIgnoreErrors); - } - - private function pgsqlRunScriptFile($sFilename) - { - if (!file_exists($sFilename)) fail('unable to find '.$sFilename); - - $oCmd = (new \Nominatim\Shell('psql')) - ->addParams('--port', $this->aDSNInfo['port']) - ->addParams('--dbname', $this->aDSNInfo['database']); - - if (!$this->bVerbose) { - $oCmd->addParams('--quiet'); - } - if (isset($this->aDSNInfo['hostspec'])) { - $oCmd->addParams('--host', $this->aDSNInfo['hostspec']); - } - if (isset($this->aDSNInfo['username'])) { - $oCmd->addParams('--username', $this->aDSNInfo['username']); - } - if (isset($this->aDSNInfo['password'])) { - $oCmd->addEnvPair('PGPASSWORD', $this->aDSNInfo['password']); - } - $ahGzipPipes = null; - if (preg_match('/\\.gz$/', $sFilename)) { - $aDescriptors = array( - 0 => array('pipe', 'r'), - 1 => array('pipe', 'w'), - 2 => array('file', '/dev/null', 'a') - ); - $oZcatCmd = new \Nominatim\Shell('zcat', $sFilename); - - $hGzipProcess = proc_open($oZcatCmd->escapedCmd(), $aDescriptors, $ahGzipPipes); - if (!is_resource($hGzipProcess)) fail('unable to start zcat'); - $aReadPipe = $ahGzipPipes[1]; - fclose($ahGzipPipes[0]); - } else { - $oCmd->addParams('--file', $sFilename); - $aReadPipe = array('pipe', 'r'); - } - $aDescriptors = array( - 0 => $aReadPipe, - 1 => array('pipe', 'w'), - 2 => array('file', '/dev/null', 'a') - ); - $ahPipes = null; - - $hProcess = proc_open($oCmd->escapedCmd(), $aDescriptors, $ahPipes, null, $oCmd->aEnv); - if (!is_resource($hProcess)) fail('unable to start pgsql'); - // TODO: error checking - while (!feof($ahPipes[1])) { - echo fread($ahPipes[1], 4096); - } - fclose($ahPipes[1]); - $iReturn = proc_close($hProcess); - if ($iReturn > 0) { - fail("pgsql returned with error code ($iReturn)"); - } - if ($ahGzipPipes) { - fclose($ahGzipPipes[1]); - proc_close($hGzipProcess); - } - } - - private function replaceSqlPatterns($sSql) - { - $sSql = str_replace('{www-user}', getSetting('DATABASE_WEBUSER'), $sSql); - - $aPatterns = array( - '{ts:address-data}' => getSetting('TABLESPACE_ADDRESS_DATA'), - '{ts:address-index}' => getSetting('TABLESPACE_ADDRESS_INDEX'), - '{ts:search-data}' => getSetting('TABLESPACE_SEARCH_DATA'), - '{ts:search-index}' => getSetting('TABLESPACE_SEARCH_INDEX'), - '{ts:aux-data}' => getSetting('TABLESPACE_AUX_DATA'), - '{ts:aux-index}' => getSetting('TABLESPACE_AUX_INDEX') - ); - - foreach ($aPatterns as $sPattern => $sTablespace) { - if ($sTablespace) { - $sSql = str_replace($sPattern, 'TABLESPACE "'.$sTablespace.'"', $sSql); - } else { - $sSql = str_replace($sPattern, '', $sSql); - } - } - - return $sSql; - } -} diff --git a/lib-php/setup_functions.php b/lib-php/setup_functions.php index c89db534..179b0159 100755 --- a/lib-php/setup_functions.php +++ b/lib-php/setup_functions.php @@ -1,20 +1,5 @@ 0 + """.format(tablespace)) + conn.commit() diff --git a/nominatim/clicmd/transition.py b/nominatim/clicmd/transition.py deleted file mode 100644 index c9341f49..00000000 --- a/nominatim/clicmd/transition.py +++ /dev/null @@ -1,146 +0,0 @@ -""" -Implementation of the 'transition' subcommand. - -This subcommand provides standins for functions that were available -through the PHP scripts but are now no longer directly accessible. -This module will be removed as soon as the transition phase is over. -""" -import logging -from pathlib import Path - -from ..db.connection import connect -from ..db import status -from ..errors import UsageError - -# Do not repeat documentation of subcommand classes. -# pylint: disable=C0111 -# Using non-top-level imports to avoid eventually unused imports. -# pylint: disable=E0012,C0415 - -LOG = logging.getLogger() - -class AdminTransition: - """\ - Internal functions for code transition. Do not use. - """ - - @staticmethod - def add_args(parser): - group = parser.add_argument_group('Sub-functions') - group.add_argument('--create-db', action='store_true', - help='Create nominatim db') - group.add_argument('--setup-db', action='store_true', - help='Build a blank nominatim db') - group.add_argument('--import-data', action='store_true', - help='Import a osm file') - group.add_argument('--load-data', action='store_true', - help='Copy data to live tables from import table') - group.add_argument('--create-tables', action='store_true', - help='Create main tables') - group.add_argument('--create-partition-tables', action='store_true', - help='Create required partition tables') - group.add_argument('--index', action='store_true', - help='Index the data') - group.add_argument('--create-search-indices', action='store_true', - help='Create additional indices required for search and update') - group.add_argument('--create-country-names', action='store_true', - help='Create search index for default country names.') - group = parser.add_argument_group('Options') - group.add_argument('--no-partitions', action='store_true', - help='Do not partition search indices') - group.add_argument('--osm-file', metavar='FILE', - help='File to import') - group.add_argument('--drop', action='store_true', - help='Drop tables needed for updates, making the database readonly') - group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int, - help='Size of cache to be used by osm2pgsql (in MB)') - group.add_argument('--no-analyse', action='store_true', - help='Do not perform analyse operations during index') - group.add_argument('--ignore-errors', action='store_true', - help="Ignore certain erros on import.") - group.add_argument('--reverse-only', action='store_true', - help='Do not create search tables and indexes') - group.add_argument('--tiger-data', metavar='FILE', - help='File to import') - - @staticmethod - def run(args): # pylint: disable=too-many-statements - from ..tools import database_import, tiger_data - from ..tools import refresh - - if args.create_db: - LOG.warning('Create DB') - database_import.create_db(args.config.get_libpq_dsn()) - - if args.setup_db: - LOG.warning('Setup DB') - - with connect(args.config.get_libpq_dsn()) as conn: - database_import.setup_extensions(conn) - database_import.install_module(args.module_dir, args.project_dir, - args.config.DATABASE_MODULE_PATH, - conn=conn) - - database_import.import_base_data(args.config.get_libpq_dsn(), - args.data_dir, args.no_partitions) - - if args.import_data: - LOG.warning('Import data') - if not args.osm_file: - raise UsageError('Missing required --osm-file argument') - database_import.import_osm_data(Path(args.osm_file), - args.osm2pgsql_options(0, 1), - drop=args.drop, - ignore_errors=args.ignore_errors) - - if args.create_tables: - LOG.warning('Create Tables') - with connect(args.config.get_libpq_dsn()) as conn: - database_import.create_tables(conn, args.config, args.sqllib_dir, args.reverse_only) - refresh.load_address_levels_from_file(conn, Path(args.config.ADDRESS_LEVEL_CONFIG)) - refresh.create_functions(conn, args.config, args.sqllib_dir, - enable_diff_updates=False) - database_import.create_table_triggers(conn, args.config, args.sqllib_dir) - - if args.create_partition_tables: - LOG.warning('Create Partition Tables') - with connect(args.config.get_libpq_dsn()) as conn: - database_import.create_partition_tables(conn, args.config, args.sqllib_dir) - - if args.load_data: - LOG.warning('Load data') - with connect(args.config.get_libpq_dsn()) as conn: - database_import.truncate_data_tables(conn, args.config.MAX_WORD_FREQUENCY) - database_import.load_data(args.config.get_libpq_dsn(), - args.data_dir, - args.threads or 1) - - with connect(args.config.get_libpq_dsn()) as conn: - try: - status.set_status(conn, status.compute_database_date(conn)) - except Exception as exc: # pylint: disable=broad-except - LOG.error('Cannot determine date of database: %s', exc) - - if args.index: - LOG.warning('Indexing') - from ..indexer.indexer import Indexer - indexer = Indexer(args.config.get_libpq_dsn(), args.threads or 1) - indexer.index_full() - - if args.create_search_indices: - LOG.warning('Create Search indices') - with connect(args.config.get_libpq_dsn()) as conn: - database_import.create_search_indices(conn, args.config, args.sqllib_dir, args.drop) - - if args.tiger_data: - LOG.warning('Tiger data') - tiger_data.add_tiger_data(args.config.get_libpq_dsn(), - args.tiger_data, - args.threads or 1, - args.config, - args.sqllib_dir) - - if args.create_country_names: - LOG.warning('Create search index for default country names.') - with connect(args.config.get_libpq_dsn()) as conn: - database_import.create_country_names(conn, args.config) diff --git a/nominatim/config.py b/nominatim/config.py index a22f90ab..d1df17b7 100644 --- a/nominatim/config.py +++ b/nominatim/config.py @@ -7,7 +7,7 @@ from pathlib import Path from dotenv import dotenv_values -from .errors import UsageError +from nominatim.errors import UsageError LOG = logging.getLogger() @@ -39,6 +39,16 @@ class Configuration: self._config['NOMINATIM_ADDRESS_LEVEL_CONFIG'] = \ str(config_dir / 'address-levels.json') + class _LibDirs: # pylint: disable=too-few-public-methods + pass + + self.lib_dir = _LibDirs() + + def set_libdirs(self, **kwargs): + """ Set paths to library functions and data. + """ + for key, value in kwargs.items(): + setattr(self.lib_dir, key, Path(value).resolve()) def __getattr__(self, name): name = 'NOMINATIM_' + name diff --git a/nominatim/db/connection.py b/nominatim/db/connection.py index 5aa05ced..ac8d7c85 100644 --- a/nominatim/db/connection.py +++ b/nominatim/db/connection.py @@ -9,7 +9,7 @@ import psycopg2 import psycopg2.extensions import psycopg2.extras -from ..errors import UsageError +from nominatim.errors import UsageError LOG = logging.getLogger() diff --git a/nominatim/db/sql_preprocessor.py b/nominatim/db/sql_preprocessor.py index 7ffe8881..c7009b34 100644 --- a/nominatim/db/sql_preprocessor.py +++ b/nominatim/db/sql_preprocessor.py @@ -75,9 +75,9 @@ class SQLPreprocessor: # pylint: disable=too-few-public-methods and follows its syntax. """ - def __init__(self, conn, config, sqllib_dir): + def __init__(self, conn, config): self.env = jinja2.Environment(autoescape=False, - loader=jinja2.FileSystemLoader(str(sqllib_dir))) + loader=jinja2.FileSystemLoader(str(config.lib_dir.sql))) db_info = {} db_info['partitions'] = _get_partitions(conn) diff --git a/nominatim/db/status.py b/nominatim/db/status.py index 225638f4..e63a40f9 100644 --- a/nominatim/db/status.py +++ b/nominatim/db/status.py @@ -5,8 +5,8 @@ import datetime as dt import logging import re -from ..tools.exec_utils import get_url -from ..errors import UsageError +from nominatim.tools.exec_utils import get_url +from nominatim.errors import UsageError LOG = logging.getLogger() diff --git a/nominatim/db/utils.py b/nominatim/db/utils.py index 0a2e2c06..b376940d 100644 --- a/nominatim/db/utils.py +++ b/nominatim/db/utils.py @@ -5,8 +5,8 @@ import subprocess import logging import gzip -from .connection import get_pg_env -from ..errors import UsageError +from nominatim.db.connection import get_pg_env +from nominatim.errors import UsageError LOG = logging.getLogger() diff --git a/nominatim/indexer/indexer.py b/nominatim/indexer/indexer.py index 06c05e1d..4f4de218 100644 --- a/nominatim/indexer/indexer.py +++ b/nominatim/indexer/indexer.py @@ -7,8 +7,8 @@ import select import psycopg2 -from .progress import ProgressLogger -from ..db.async_connection import DBConnection +from nominatim.indexer.progress import ProgressLogger +from nominatim.db.async_connection import DBConnection LOG = logging.getLogger() diff --git a/nominatim/tools/admin.py b/nominatim/tools/admin.py index 119adf37..2e18cb6f 100644 --- a/nominatim/tools/admin.py +++ b/nominatim/tools/admin.py @@ -3,7 +3,7 @@ Functions for database analysis and maintenance. """ import logging -from ..errors import UsageError +from nominatim.errors import UsageError LOG = logging.getLogger() diff --git a/nominatim/tools/check_database.py b/nominatim/tools/check_database.py index 5b39085d..265f8666 100644 --- a/nominatim/tools/check_database.py +++ b/nominatim/tools/check_database.py @@ -6,8 +6,8 @@ from textwrap import dedent import psycopg2 -from ..db.connection import connect -from ..errors import UsageError +from nominatim.db.connection import connect +from nominatim.errors import UsageError CHECKLIST = [] diff --git a/nominatim/tools/database_import.py b/nominatim/tools/database_import.py index 433cd8af..25efedb9 100644 --- a/nominatim/tools/database_import.py +++ b/nominatim/tools/database_import.py @@ -11,13 +11,13 @@ from pathlib import Path import psutil import psycopg2 -from ..db.connection import connect, get_pg_env -from ..db import utils as db_utils -from ..db.async_connection import DBConnection -from ..db.sql_preprocessor import SQLPreprocessor -from .exec_utils import run_osm2pgsql -from ..errors import UsageError -from ..version import POSTGRESQL_REQUIRED_VERSION, POSTGIS_REQUIRED_VERSION +from nominatim.db.connection import connect, get_pg_env +from nominatim.db import utils as db_utils +from nominatim.db.async_connection import DBConnection +from nominatim.db.sql_preprocessor import SQLPreprocessor +from nominatim.tools.exec_utils import run_osm2pgsql +from nominatim.errors import UsageError +from nominatim.version import POSTGRESQL_REQUIRED_VERSION, POSTGIS_REQUIRED_VERSION LOG = logging.getLogger() @@ -179,29 +179,29 @@ def import_osm_data(osm_file, options, drop=False, ignore_errors=False): Path(options['flatnode_file']).unlink() -def create_tables(conn, config, sqllib_dir, reverse_only=False): +def create_tables(conn, config, reverse_only=False): """ Create the set of basic tables. When `reverse_only` is True, then the main table for searching will be skipped and only reverse search is possible. """ - sql = SQLPreprocessor(conn, config, sqllib_dir) + sql = SQLPreprocessor(conn, config) sql.env.globals['db']['reverse_only'] = reverse_only sql.run_sql_file(conn, 'tables.sql') -def create_table_triggers(conn, config, sqllib_dir): +def create_table_triggers(conn, config): """ Create the triggers for the tables. The trigger functions must already have been imported with refresh.create_functions(). """ - sql = SQLPreprocessor(conn, config, sqllib_dir) + sql = SQLPreprocessor(conn, config) sql.run_sql_file(conn, 'table-triggers.sql') -def create_partition_tables(conn, config, sqllib_dir): +def create_partition_tables(conn, config): """ Create tables that have explicit partitioning. """ - sql = SQLPreprocessor(conn, config, sqllib_dir) + sql = SQLPreprocessor(conn, config) sql.run_sql_file(conn, 'partition-tables.src.sql') @@ -287,7 +287,7 @@ def load_data(dsn, data_dir, threads): cur.execute('ANALYSE') -def create_search_indices(conn, config, sqllib_dir, drop=False): +def create_search_indices(conn, config, drop=False): """ Create tables that have explicit partitioning. """ @@ -303,7 +303,7 @@ def create_search_indices(conn, config, sqllib_dir, drop=False): cur.execute('DROP INDEX "{}"'.format(idx)) conn.commit() - sql = SQLPreprocessor(conn, config, sqllib_dir) + sql = SQLPreprocessor(conn, config) sql.run_sql_file(conn, 'indices.sql', drop=drop) diff --git a/nominatim/tools/exec_utils.py b/nominatim/tools/exec_utils.py index e6b9d8d4..96679d27 100644 --- a/nominatim/tools/exec_utils.py +++ b/nominatim/tools/exec_utils.py @@ -6,8 +6,8 @@ import subprocess import urllib.request as urlrequest from urllib.parse import urlencode -from ..version import NOMINATIM_VERSION -from ..db.connection import get_pg_env +from nominatim.version import NOMINATIM_VERSION +from nominatim.db.connection import get_pg_env LOG = logging.getLogger() diff --git a/nominatim/tools/migration.py b/nominatim/tools/migration.py index 54848341..4af5cb48 100644 --- a/nominatim/tools/migration.py +++ b/nominatim/tools/migration.py @@ -3,11 +3,11 @@ Functions for database migration to newer software versions. """ import logging -from ..db import properties -from ..db.connection import connect -from ..version import NOMINATIM_VERSION -from . import refresh, database_import -from ..errors import UsageError +from nominatim.db import properties +from nominatim.db.connection import connect +from nominatim.version import NOMINATIM_VERSION +from nominatim.tools import refresh, database_import +from nominatim.errors import UsageError LOG = logging.getLogger() @@ -47,7 +47,7 @@ def migrate(config, paths): if has_run_migration: LOG.warning('Updating SQL functions.') - refresh.create_functions(conn, config, paths.sqllib_dir) + refresh.create_functions(conn, config) properties.set_property(conn, 'database_version', '{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(NOMINATIM_VERSION)) diff --git a/nominatim/tools/postcodes.py b/nominatim/tools/postcodes.py new file mode 100644 index 00000000..0a568cba --- /dev/null +++ b/nominatim/tools/postcodes.py @@ -0,0 +1,80 @@ +""" +Functions for importing, updating and otherwise maintaining the table +of artificial postcode centroids. +""" + +from nominatim.db.utils import execute_file +from nominatim.db.connection import connect + +def import_postcodes(dsn, project_dir): + """ Set up the initial list of postcodes. + """ + + with connect(dsn) as conn: + conn.drop_table('gb_postcode') + conn.drop_table('us_postcode') + + with conn.cursor() as cur: + cur.execute("""CREATE TABLE gb_postcode ( + id integer, + postcode character varying(9), + geometry GEOMETRY(Point, 4326))""") + + with conn.cursor() as cur: + cur.execute("""CREATE TABLE us_postcode ( + postcode text, + x double precision, + y double precision)""") + conn.commit() + + gb_postcodes = project_dir / 'gb_postcode_data.sql.gz' + if gb_postcodes.is_file(): + execute_file(dsn, gb_postcodes) + + us_postcodes = project_dir / 'us_postcode_data.sql.gz' + if us_postcodes.is_file(): + execute_file(dsn, us_postcodes) + + with conn.cursor() as cur: + cur.execute("TRUNCATE location_postcode") + cur.execute(""" + INSERT INTO location_postcode + (place_id, indexed_status, country_code, postcode, geometry) + SELECT nextval('seq_place'), 1, country_code, + upper(trim (both ' ' from address->'postcode')) as pc, + ST_Centroid(ST_Collect(ST_Centroid(geometry))) + FROM placex + WHERE address ? 'postcode' AND address->'postcode' NOT SIMILAR TO '%(,|;)%' + AND geometry IS NOT null + GROUP BY country_code, pc + """) + + cur.execute(""" + INSERT INTO location_postcode + (place_id, indexed_status, country_code, postcode, geometry) + SELECT nextval('seq_place'), 1, 'us', postcode, + ST_SetSRID(ST_Point(x,y),4326) + FROM us_postcode WHERE postcode NOT IN + (SELECT postcode FROM location_postcode + WHERE country_code = 'us') + """) + + cur.execute(""" + INSERT INTO location_postcode + (place_id, indexed_status, country_code, postcode, geometry) + SELECT nextval('seq_place'), 1, 'gb', postcode, geometry + FROM gb_postcode WHERE postcode NOT IN + (SELECT postcode FROM location_postcode + WHERE country_code = 'gb') + """) + + cur.execute(""" + DELETE FROM word WHERE class='place' and type='postcode' + and word NOT IN (SELECT postcode FROM location_postcode) + """) + + cur.execute(""" + SELECT count(getorcreate_postcode_id(v)) FROM + (SELECT distinct(postcode) as v FROM location_postcode) p + """) + conn.commit() diff --git a/nominatim/tools/refresh.py b/nominatim/tools/refresh.py index 581c69e8..d38cb216 100644 --- a/nominatim/tools/refresh.py +++ b/nominatim/tools/refresh.py @@ -7,9 +7,9 @@ from textwrap import dedent from psycopg2.extras import execute_values -from ..db.utils import execute_file -from ..db.sql_preprocessor import SQLPreprocessor -from ..version import NOMINATIM_VERSION +from nominatim.db.utils import execute_file +from nominatim.db.sql_preprocessor import SQLPreprocessor +from nominatim.version import NOMINATIM_VERSION LOG = logging.getLogger() @@ -77,11 +77,10 @@ def load_address_levels_from_file(conn, config_file): load_address_levels(conn, 'address_levels', json.load(fdesc)) -def create_functions(conn, config, sqllib_dir, - enable_diff_updates=True, enable_debug=False): +def create_functions(conn, config, enable_diff_updates=True, enable_debug=False): """ (Re)create the PL/pgSQL functions. """ - sql = SQLPreprocessor(conn, config, sqllib_dir) + sql = SQLPreprocessor(conn, config) sql.run_sql_file(conn, 'functions.sql', disable_diff_updates=not enable_diff_updates, @@ -165,7 +164,7 @@ def recompute_importance(conn): conn.commit() -def setup_website(basedir, phplib_dir, config): +def setup_website(basedir, config): """ Create the website script stubs. """ if not basedir.exists(): @@ -179,7 +178,7 @@ def setup_website(basedir, phplib_dir, config): @define('CONST_LibDir', '{0}'); @define('CONST_NominatimVersion', '{1[0]}.{1[1]}.{1[2]}-{1[3]}'); - """.format(phplib_dir, NOMINATIM_VERSION)) + """.format(config.lib_dir.php, NOMINATIM_VERSION)) for php_name, conf_name, var_type in PHP_CONST_DEFS: if var_type == bool: @@ -193,7 +192,7 @@ def setup_website(basedir, phplib_dir, config): template += "@define('CONST_{}', {});\n".format(php_name, varout) - template += "\nrequire_once('{}/website/{{}}');\n".format(phplib_dir) + template += "\nrequire_once('{}/website/{{}}');\n".format(config.lib_dir.php) for script in WEBSITE_SCRIPTS: (basedir / script).write_text(template.format(script), 'utf-8') diff --git a/nominatim/tools/replication.py b/nominatim/tools/replication.py index a0a741e8..d6e80891 100644 --- a/nominatim/tools/replication.py +++ b/nominatim/tools/replication.py @@ -6,9 +6,9 @@ from enum import Enum import logging import time -from ..db import status -from .exec_utils import run_osm2pgsql -from ..errors import UsageError +from nominatim.db import status +from nominatim.tools.exec_utils import run_osm2pgsql +from nominatim.errors import UsageError try: from osmium.replication.server import ReplicationServer diff --git a/nominatim/tools/special_phrases.py b/nominatim/tools/special_phrases.py index 0c1258fe..9d0259dc 100644 --- a/nominatim/tools/special_phrases.py +++ b/nominatim/tools/special_phrases.py @@ -27,7 +27,7 @@ class SpecialPhrasesImporter(): self.black_list, self.white_list = self._load_white_and_black_lists() #Compile the regex here to increase performances. self.occurence_pattern = re.compile( - r'\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([\-YN])' + r'\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([\-YN])' ) self.sanity_check_pattern = re.compile(r'^\w+$') self.transliterator = Transliterator.createFromRules("special-phrases normalizer", @@ -153,8 +153,10 @@ class SpecialPhrasesImporter(): class_matchs = self.sanity_check_pattern.findall(phrase_class) if len(class_matchs) < 1 or len(type_matchs) < 1: - raise UsageError("Bad class/type for language {}: {}={}".format( - lang, phrase_class, phrase_type)) + LOG.warning("Bad class/type for language %s: %s=%s. It will not be imported", + lang, phrase_class, phrase_type) + return False + return True def _process_xml_content(self, xml_content, lang): """ @@ -205,7 +207,8 @@ class SpecialPhrasesImporter(): continue #sanity check, in case somebody added garbage in the wiki - self._check_sanity(lang, phrase_class, phrase_type) + if not self._check_sanity(lang, phrase_class, phrase_type): + continue class_type_pairs.add((phrase_class, phrase_type)) diff --git a/nominatim/tools/tiger_data.py b/nominatim/tools/tiger_data.py index c655f91d..07772c70 100644 --- a/nominatim/tools/tiger_data.py +++ b/nominatim/tools/tiger_data.py @@ -6,9 +6,9 @@ import os import tarfile import selectors -from ..db.connection import connect -from ..db.async_connection import DBConnection -from ..db.sql_preprocessor import SQLPreprocessor +from nominatim.db.connection import connect +from nominatim.db.async_connection import DBConnection +from nominatim.db.sql_preprocessor import SQLPreprocessor LOG = logging.getLogger() @@ -76,17 +76,17 @@ def handle_unregister_connection_pool(sel, place_threads): conn.close() place_threads -= 1 -def add_tiger_data(dsn, data_dir, threads, config, sqllib_dir): - """ Import tiger data from directory or tar file +def add_tiger_data(data_dir, config, threads): + """ Import tiger data from directory or tar file `data dir`. """ - + dsn = config.get_libpq_dsn() sql_files, tar = handle_tarfile_or_directory(data_dir) if not sql_files: return with connect(dsn) as conn: - sql = SQLPreprocessor(conn, config, sqllib_dir) + sql = SQLPreprocessor(conn, config) sql.run_sql_file(conn, 'tiger_import_start.sql') # Reading sql_files and then for each file line handling @@ -116,5 +116,5 @@ def add_tiger_data(dsn, data_dir, threads, config, sqllib_dir): print('\n') LOG.warning("Creating indexes on Tiger data") with connect(dsn) as conn: - sql = SQLPreprocessor(conn, config, sqllib_dir) + sql = SQLPreprocessor(conn, config) sql.run_sql_file(conn, 'tiger_import_finish.sql') diff --git a/osm2pgsql b/osm2pgsql index 497476d5..7869a4e1 160000 --- a/osm2pgsql +++ b/osm2pgsql @@ -1 +1 @@ -Subproject commit 497476d56f7c1fcbbdb95b363293de6ce0feac00 +Subproject commit 7869a4e1255a7657bc8582a58adcf5aa2a3f3c70 diff --git a/test/bdd/steps/nominatim_environment.py b/test/bdd/steps/nominatim_environment.py index 170dd639..6381e4b4 100644 --- a/test/bdd/steps/nominatim_environment.py +++ b/test/bdd/steps/nominatim_environment.py @@ -107,7 +107,8 @@ class NominatimEnvironment: self.website_dir = tempfile.TemporaryDirectory() cfg = Configuration(None, self.src_dir / 'settings', environ=self.test_env) - refresh.setup_website(Path(self.website_dir.name) / 'website', self.src_dir / 'lib-php', cfg) + cfg.lib_dir.php = self.src_dir / 'lib-php' + refresh.setup_website(Path(self.website_dir.name) / 'website', cfg) def get_libpq_dsn(self): dsn = self.test_env['NOMINATIM_DATABASE_DSN'] diff --git a/test/python/conftest.py b/test/python/conftest.py index 871365d9..4b9749c0 100644 --- a/test/python/conftest.py +++ b/test/python/conftest.py @@ -33,8 +33,6 @@ class _TestingCursor(psycopg2.extras.DictCursor): """ Execute a query and return the result as a set of tuples. """ self.execute(sql, params) - if self.rowcount == 1: - return set(tuple(self.fetchone())) return set((tuple(row) for row in self)) @@ -128,7 +126,12 @@ def table_factory(temp_db_cursor): @pytest.fixture def def_config(): - return Configuration(None, SRC_DIR.resolve() / 'settings') + cfg = Configuration(None, SRC_DIR.resolve() / 'settings') + cfg.set_libdirs(module='.', osm2pgsql='.', + php=SRC_DIR / 'lib-php', + sql=SRC_DIR / 'lib-sql', + data=SRC_DIR / 'data') + return cfg @pytest.fixture def src_dir(): @@ -277,7 +280,11 @@ def osm2pgsql_options(temp_db): main_data='', main_index='')) @pytest.fixture -def sql_preprocessor(temp_db_conn, tmp_path, def_config, monkeypatch, table_factory): +def sql_preprocessor(temp_db_conn, tmp_path, monkeypatch, table_factory): monkeypatch.setenv('NOMINATIM_DATABASE_MODULE_PATH', '.') table_factory('country_name', 'partition INT', (0, 1, 2)) - return SQLPreprocessor(temp_db_conn, def_config, tmp_path) + cfg = Configuration(None, SRC_DIR.resolve() / 'settings') + cfg.set_libdirs(module='.', osm2pgsql='.', php=SRC_DIR / 'lib-php', + sql=tmp_path, data=SRC_DIR / 'data') + + return SQLPreprocessor(temp_db_conn, cfg) diff --git a/test/python/test_cli.py b/test/python/test_cli.py index eb0ee584..afa01e57 100644 --- a/test/python/test_cli.py +++ b/test/python/test_cli.py @@ -21,6 +21,7 @@ import nominatim.tools.check_database import nominatim.tools.database_import import nominatim.tools.freeze import nominatim.tools.refresh +import nominatim.tools.postcodes from mocks import MockParamCapture @@ -48,6 +49,7 @@ def mock_run_legacy(monkeypatch): def mock_func_factory(monkeypatch): def get_mock(module, func): mock = MockParamCapture() + mock.func_name = func monkeypatch.setattr(module, func, mock) return mock @@ -96,20 +98,74 @@ def test_import_full(temp_db, mock_func_factory): mock_func_factory(nominatim.tools.database_import, 'create_search_indices'), mock_func_factory(nominatim.tools.database_import, 'create_country_names'), mock_func_factory(nominatim.tools.refresh, 'load_address_levels_from_file'), + mock_func_factory(nominatim.tools.postcodes, 'import_postcodes'), mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'), mock_func_factory(nominatim.tools.refresh, 'setup_website'), mock_func_factory(nominatim.db.properties, 'set_property') ] cf_mock = mock_func_factory(nominatim.tools.refresh, 'create_functions') - mock_func_factory(nominatim.clicmd.setup, 'run_legacy_script') assert 0 == call_nominatim('import', '--osm-file', __file__) assert cf_mock.called > 1 for mock in mocks: - assert mock.called == 1 + assert mock.called == 1, "Mock '{}' not called".format(mock.func_name) + + +def test_import_continue_load_data(temp_db, mock_func_factory): + mocks = [ + mock_func_factory(nominatim.tools.database_import, 'truncate_data_tables'), + mock_func_factory(nominatim.tools.database_import, 'load_data'), + mock_func_factory(nominatim.tools.database_import, 'create_search_indices'), + mock_func_factory(nominatim.tools.database_import, 'create_country_names'), + mock_func_factory(nominatim.tools.postcodes, 'import_postcodes'), + mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'), + mock_func_factory(nominatim.tools.refresh, 'setup_website'), + mock_func_factory(nominatim.db.properties, 'set_property') + ] + + assert 0 == call_nominatim('import', '--continue', 'load-data') + + for mock in mocks: + assert mock.called == 1, "Mock '{}' not called".format(mock.func_name) + + +def test_import_continue_indexing(temp_db, mock_func_factory, placex_table, temp_db_conn): + mocks = [ + mock_func_factory(nominatim.tools.database_import, 'create_search_indices'), + mock_func_factory(nominatim.tools.database_import, 'create_country_names'), + mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'), + mock_func_factory(nominatim.tools.refresh, 'setup_website'), + mock_func_factory(nominatim.db.properties, 'set_property') + ] + + assert 0 == call_nominatim('import', '--continue', 'indexing') + + for mock in mocks: + assert mock.called == 1, "Mock '{}' not called".format(mock.func_name) + + assert temp_db_conn.index_exists('idx_placex_pendingsector') + + # Calling it again still works for the index + assert 0 == call_nominatim('import', '--continue', 'indexing') + assert temp_db_conn.index_exists('idx_placex_pendingsector') + + +def test_import_continue_postprocess(temp_db, mock_func_factory): + mocks = [ + mock_func_factory(nominatim.tools.database_import, 'create_search_indices'), + mock_func_factory(nominatim.tools.database_import, 'create_country_names'), + mock_func_factory(nominatim.tools.refresh, 'setup_website'), + mock_func_factory(nominatim.db.properties, 'set_property') + ] + + assert 0 == call_nominatim('import', '--continue', 'db-postprocess') + + for mock in mocks: + assert mock.called == 1, "Mock '{}' not called".format(mock.func_name) + def test_freeze_command(mock_func_factory, temp_db): mock_drop = mock_func_factory(nominatim.tools.freeze, 'drop_update_tables') diff --git a/test/python/test_tools_import_special_phrases.py b/test/python/test_tools_import_special_phrases.py index 9ca6bbeb..4890e0b2 100644 --- a/test/python/test_tools_import_special_phrases.py +++ b/test/python/test_tools_import_special_phrases.py @@ -13,66 +13,46 @@ from nominatim.tools.special_phrases import SpecialPhrasesImporter TEST_BASE_DIR = Path(__file__) / '..' / '..' def test_fetch_existing_words_phrases_basic(special_phrases_importer, word_table, - temp_db_conn): + temp_db_cursor): """ Check for the fetch_existing_words_phrases() method. It should return special phrase term added to the word table. """ - with temp_db_conn.cursor() as temp_db_cursor: - query =""" - INSERT INTO word VALUES(99999, 'lookup_token', 'normalized_word', - 'class', 'type', null, 0, 'near'); - """ - temp_db_cursor.execute(query) + query =""" + INSERT INTO word VALUES(99999, 'lookup_token', 'normalized_word', + 'class', 'type', null, 0, 'near'); + """ + temp_db_cursor.execute(query) assert not special_phrases_importer.words_phrases_to_delete special_phrases_importer._fetch_existing_words_phrases() contained_phrase = special_phrases_importer.words_phrases_to_delete.pop() assert contained_phrase == ('normalized_word', 'class', 'type', 'near') -def test_fetch_existing_words_phrases_housenumber(special_phrases_importer, word_table, - temp_db_conn): +@pytest.mark.parametrize("house_type", ['house', 'postcode']) +def test_fetch_existing_words_phrases_special_cases(special_phrases_importer, word_table, + house_type, temp_db_cursor): """ Check for the fetch_existing_words_phrases() method. - It should return nothing as the term added correspond - to a housenumber term. - """ - with temp_db_conn.cursor() as temp_db_cursor: - query =""" - INSERT INTO word VALUES(99999, 'lookup_token', 'normalized_word', - 'place', 'house', null, 0, 'near'); - """ - temp_db_cursor.execute(query) - - special_phrases_importer._fetch_existing_words_phrases() - assert not special_phrases_importer.words_phrases_to_delete - -def test_fetch_existing_words_phrases_postcode(special_phrases_importer, word_table, - temp_db_conn): + It should return nothing as the terms added correspond + to a housenumber and postcode term. """ - Check for the fetch_existing_words_phrases() method. - It should return nothing as the term added correspond - to a postcode term. + query =""" + INSERT INTO word VALUES(99999, 'lookup_token', 'normalized_word', + 'place', %s, null, 0, 'near'); """ - with temp_db_conn.cursor() as temp_db_cursor: - query =""" - INSERT INTO word VALUES(99999, 'lookup_token', 'normalized_word', - 'place', 'postcode', null, 0, 'near'); - """ - temp_db_cursor.execute(query) + temp_db_cursor.execute(query, (house_type,)) special_phrases_importer._fetch_existing_words_phrases() assert not special_phrases_importer.words_phrases_to_delete -def test_fetch_existing_place_classtype_tables(special_phrases_importer, temp_db_conn): +def test_fetch_existing_place_classtype_tables(special_phrases_importer, temp_db_cursor): """ Check for the fetch_existing_place_classtype_tables() method. It should return the table just created. """ - with temp_db_conn.cursor() as temp_db_cursor: - query = 'CREATE TABLE place_classtype_testclasstypetable()' - temp_db_cursor.execute(query) + temp_db_cursor.execute('CREATE TABLE place_classtype_testclasstypetable()') special_phrases_importer._fetch_existing_place_classtype_tables() contained_table = special_phrases_importer.table_phrases_to_delete.pop() @@ -84,13 +64,11 @@ def test_check_sanity_class(special_phrases_importer): If a wrong class or type is given, an UsageError should raise. If a good class and type are given, nothing special happens. """ - with pytest.raises(UsageError): - special_phrases_importer._check_sanity('en', '', 'type') - with pytest.raises(UsageError): - special_phrases_importer._check_sanity('en', 'class', '') + assert not special_phrases_importer._check_sanity('en', '', 'type') + assert not special_phrases_importer._check_sanity('en', 'class', '') - special_phrases_importer._check_sanity('en', 'class', 'type') + assert special_phrases_importer._check_sanity('en', 'class', 'type') def test_load_white_and_black_lists(special_phrases_importer): """ @@ -147,7 +125,7 @@ def test_convert_settings_giving_json(special_phrases_importer): assert returned == json_file def test_process_amenity_with_operator(special_phrases_importer, getorcreate_amenityoperator_funcs, - temp_db_conn): + temp_db_conn, word_table): """ Test that _process_amenity() execute well the getorcreate_amenityoperator() SQL function and that @@ -157,13 +135,13 @@ def test_process_amenity_with_operator(special_phrases_importer, getorcreate_ame special_phrases_importer._process_amenity('', '', '', '', 'in') with temp_db_conn.cursor() as temp_db_cursor: - temp_db_cursor.execute("SELECT * FROM temp_with_operator WHERE op='near' OR op='in'") + temp_db_cursor.execute("SELECT * FROM word WHERE operator='near' OR operator='in'") results = temp_db_cursor.fetchall() assert len(results) == 2 def test_process_amenity_without_operator(special_phrases_importer, getorcreate_amenity_funcs, - temp_db_conn): + temp_db_conn, word_table): """ Test that _process_amenity() execute well the getorcreate_amenity() SQL function. @@ -171,7 +149,7 @@ def test_process_amenity_without_operator(special_phrases_importer, getorcreate_ special_phrases_importer._process_amenity('', '', '', '', '') with temp_db_conn.cursor() as temp_db_cursor: - temp_db_cursor.execute("SELECT * FROM temp_without_operator WHERE op='no_operator'") + temp_db_cursor.execute("SELECT * FROM word WHERE operator='no_operator'") result = temp_db_cursor.fetchone() assert result @@ -221,8 +199,8 @@ def test_grant_access_to_web_user(temp_db_conn, def_config, special_phrases_impo assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, phrase_class, phrase_type) def test_create_place_classtype_table_and_indexes( - temp_db_conn, def_config, placex_table, getorcreate_amenity_funcs, - getorcreate_amenityoperator_funcs, special_phrases_importer): + temp_db_conn, def_config, placex_table, + special_phrases_importer): """ Test that _create_place_classtype_table_and_indexes() create the right place_classtype tables and place_id indexes @@ -238,7 +216,7 @@ def test_create_place_classtype_table_and_indexes( assert check_placeid_and_centroid_indexes(temp_db_conn, pair[0], pair[1]) assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, pair[0], pair[1]) -def test_process_xml_content(temp_db_conn, def_config, special_phrases_importer, +def test_process_xml_content(temp_db_conn, def_config, special_phrases_importer, word_table, getorcreate_amenity_funcs, getorcreate_amenityoperator_funcs): """ Test that _process_xml_content() process the given xml content right @@ -310,17 +288,22 @@ def test_import_from_wiki(monkeypatch, temp_db_conn, def_config, special_phrases Check that the main import_from_wiki() method is well executed. It should create the place_classtype table, the place_id and centroid indexes, grand access to the web user and executing the SQL functions for amenities. + It should also update the database well by deleting or preserving existing entries + of the database. """ - mock_fetch_existing_words_phrases = MockParamCapture() - mock_fetch_existing_place_classtype_tables = MockParamCapture() - mock_remove_non_existent_phrases_from_db = MockParamCapture() + #Add some data to the database before execution in order to test + #what is deleted and what is preserved. + with temp_db_conn.cursor() as temp_db_cursor: + temp_db_cursor.execute(""" + INSERT INTO word VALUES(99999, ' animal shelter', 'animal shelter', + 'amenity', 'animal_shelter', null, 0, null); + + INSERT INTO word VALUES(99999, ' wrong_lookup_token', 'wrong_normalized_word', + 'wrong_class', 'wrong_type', null, 0, 'near'); + + CREATE TABLE place_classtype_amenity_animal_shelter(); + CREATE TABLE place_classtype_wrongclass_wrongtype();""") - monkeypatch.setattr('nominatim.tools.special_phrases.SpecialPhrasesImporter._fetch_existing_words_phrases', - mock_fetch_existing_words_phrases) - monkeypatch.setattr('nominatim.tools.special_phrases.SpecialPhrasesImporter._fetch_existing_place_classtype_tables', - mock_fetch_existing_place_classtype_tables) - monkeypatch.setattr('nominatim.tools.special_phrases.SpecialPhrasesImporter._remove_non_existent_phrases_from_db', - mock_remove_non_existent_phrases_from_db) monkeypatch.setattr('nominatim.tools.special_phrases.SpecialPhrasesImporter._get_wiki_content', mock_get_wiki_content) special_phrases_importer.import_from_wiki(['en']) @@ -332,9 +315,45 @@ def test_import_from_wiki(monkeypatch, temp_db_conn, def_config, special_phrases assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, class_test, type_test) assert check_amenities_with_op(temp_db_conn) assert check_amenities_without_op(temp_db_conn) - assert mock_fetch_existing_words_phrases.called == 1 - assert mock_fetch_existing_place_classtype_tables.called == 1 - assert mock_remove_non_existent_phrases_from_db.called == 1 + assert check_table_exist(temp_db_conn, 'amenity', 'animal_shelter') + assert not check_table_exist(temp_db_conn, 'wrong_class', 'wrong_type') + + #Format (query, should_return_something_bool) use to easily execute all asserts + queries_tests = set() + + #Used to check that the correct phrase already in the word table before is still there. + query_correct_word = "SELECT * FROM word WHERE word = 'animal shelter'" + queries_tests.add((query_correct_word, True)) + + #Used to check if wrong phrase was deleted from the word table of the database. + query_wrong_word = "SELECT word FROM word WHERE word = 'wrong_normalized_word'" + queries_tests.add((query_wrong_word, False)) + + #Used to check that correct place_classtype table already in the datase before is still there. + query_existing_table = """ + SELECT table_name + FROM information_schema.tables + WHERE table_schema='public' + AND table_name = 'place_classtype_amenity_animal_shelter'; + """ + queries_tests.add((query_existing_table, True)) + + #Used to check that wrong place_classtype table was deleted from the database. + query_wrong_table = """ + SELECT table_name + FROM information_schema.tables + WHERE table_schema='public' + AND table_name = 'place_classtype_wrongclass_wrongtype'; + """ + queries_tests.add((query_wrong_table, False)) + + with temp_db_conn.cursor() as temp_db_cursor: + for query in queries_tests: + temp_db_cursor.execute(query[0]) + if (query[1] == True): + assert temp_db_cursor.fetchone() + else: + assert not temp_db_cursor.fetchone() def mock_get_wiki_content(lang): """ @@ -400,7 +419,7 @@ def check_amenities_with_op(temp_db_conn): contains more than one value (so that the SQL function was call more than one time). """ with temp_db_conn.cursor() as temp_db_cursor: - temp_db_cursor.execute("SELECT * FROM temp_with_operator") + temp_db_cursor.execute("SELECT * FROM word WHERE operator != 'no_operator'") return len(temp_db_cursor.fetchall()) > 1 def check_amenities_without_op(temp_db_conn): @@ -409,7 +428,7 @@ def check_amenities_without_op(temp_db_conn): contains more than one value (so that the SQL function was call more than one time). """ with temp_db_conn.cursor() as temp_db_cursor: - temp_db_cursor.execute("SELECT * FROM temp_without_operator") + temp_db_cursor.execute("SELECT * FROM word WHERE operator = 'no_operator'") return len(temp_db_cursor.fetchall()) > 1 @pytest.fixture @@ -458,13 +477,12 @@ def make_strandard_name_func(temp_db_cursor): @pytest.fixture def getorcreate_amenity_funcs(temp_db_cursor, make_strandard_name_func): temp_db_cursor.execute(""" - CREATE TABLE temp_without_operator(op TEXT); - CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, normalized_word TEXT, lookup_class text, lookup_type text) RETURNS void as $$ BEGIN - INSERT INTO temp_without_operator VALUES('no_operator'); + INSERT INTO word VALUES(null, lookup_word, normalized_word, + lookup_class, lookup_type, null, 0, 'no_operator'); END; $$ LANGUAGE plpgsql""") @@ -477,6 +495,7 @@ def getorcreate_amenityoperator_funcs(temp_db_cursor, make_strandard_name_func): lookup_class text, lookup_type text, op text) RETURNS void as $$ BEGIN - INSERT INTO temp_with_operator VALUES(op); + INSERT INTO word VALUES(null, lookup_word, normalized_word, + lookup_class, lookup_type, null, 0, op); END; $$ LANGUAGE plpgsql""") \ No newline at end of file diff --git a/test/python/test_tools_postcodes.py b/test/python/test_tools_postcodes.py new file mode 100644 index 00000000..1fc060b0 --- /dev/null +++ b/test/python/test_tools_postcodes.py @@ -0,0 +1,50 @@ +""" +Tests for functions to maintain the artificial postcode table. +""" + +import pytest + +from nominatim.tools import postcodes + +@pytest.fixture +def postcode_table(temp_db_with_extensions, temp_db_cursor, table_factory, + placex_table, word_table): + table_factory('location_postcode', + """ place_id BIGINT, + parent_place_id BIGINT, + rank_search SMALLINT, + rank_address SMALLINT, + indexed_status SMALLINT, + indexed_date TIMESTAMP, + country_code varchar(2), + postcode TEXT, + geometry GEOMETRY(Geometry, 4326)""") + temp_db_cursor.execute('CREATE SEQUENCE seq_place') + temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION getorcreate_postcode_id(postcode TEXT) + RETURNS INTEGER AS $$ BEGIN RETURN 1; END; $$ LANGUAGE plpgsql; + """) + + +def test_import_postcodes_empty(dsn, temp_db_cursor, postcode_table, tmp_path): + postcodes.import_postcodes(dsn, tmp_path) + + assert temp_db_cursor.table_exists('gb_postcode') + assert temp_db_cursor.table_exists('us_postcode') + assert temp_db_cursor.table_rows('location_postcode') == 0 + + +def test_import_postcodes_from_placex(dsn, temp_db_cursor, postcode_table, tmp_path): + temp_db_cursor.execute(""" + INSERT INTO placex (place_id, country_code, address, geometry) + VALUES (1, 'xx', '"postcode"=>"9486"', 'SRID=4326;POINT(10 12)') + """) + + postcodes.import_postcodes(dsn, tmp_path) + + rows = temp_db_cursor.row_set(""" SELECT postcode, country_code, + ST_X(geometry), ST_Y(geometry) + FROM location_postcode""") + print(rows) + assert len(rows) == 1 + assert rows == set((('9486', 'xx', 10, 12), )) + diff --git a/test/python/test_tools_refresh_create_functions.py b/test/python/test_tools_refresh_create_functions.py index 40d4c81a..53ea2b52 100644 --- a/test/python/test_tools_refresh_create_functions.py +++ b/test/python/test_tools_refresh_create_functions.py @@ -5,6 +5,11 @@ import pytest from nominatim.tools.refresh import create_functions +@pytest.fixture +def sql_tmp_path(tmp_path, def_config): + def_config.lib_dir.sql = tmp_path + return tmp_path + @pytest.fixture def conn(temp_db_conn, table_factory, monkeypatch): monkeypatch.setenv('NOMINATIM_DATABASE_MODULE_PATH', '.') @@ -12,8 +17,8 @@ def conn(temp_db_conn, table_factory, monkeypatch): return temp_db_conn -def test_create_functions(temp_db_cursor, conn, def_config, tmp_path): - sqlfile = tmp_path / 'functions.sql' +def test_create_functions(temp_db_cursor, conn, def_config, sql_tmp_path): + sqlfile = sql_tmp_path / 'functions.sql' sqlfile.write_text("""CREATE OR REPLACE FUNCTION test() RETURNS INTEGER AS $$ BEGIN @@ -22,14 +27,14 @@ def test_create_functions(temp_db_cursor, conn, def_config, tmp_path): $$ LANGUAGE plpgsql IMMUTABLE; """) - create_functions(conn, def_config, tmp_path) + create_functions(conn, def_config) assert temp_db_cursor.scalar('SELECT test()') == 43 @pytest.mark.parametrize("dbg,ret", ((True, 43), (False, 22))) -def test_create_functions_with_template(temp_db_cursor, conn, def_config, tmp_path, dbg, ret): - sqlfile = tmp_path / 'functions.sql' +def test_create_functions_with_template(temp_db_cursor, conn, def_config, sql_tmp_path, dbg, ret): + sqlfile = sql_tmp_path / 'functions.sql' sqlfile.write_text("""CREATE OR REPLACE FUNCTION test() RETURNS INTEGER AS $$ BEGIN @@ -42,6 +47,6 @@ def test_create_functions_with_template(temp_db_cursor, conn, def_config, tmp_pa $$ LANGUAGE plpgsql IMMUTABLE; """) - create_functions(conn, def_config, tmp_path, enable_debug=dbg) + create_functions(conn, def_config, enable_debug=dbg) assert temp_db_cursor.scalar('SELECT test()') == ret diff --git a/test/python/test_tools_refresh_setup_website.py b/test/python/test_tools_refresh_setup_website.py index 126fc561..18b146fc 100644 --- a/test/python/test_tools_refresh_setup_website.py +++ b/test/python/test_tools_refresh_setup_website.py @@ -25,7 +25,8 @@ def test_script(envdir): def run_website_script(envdir, config): - refresh.setup_website(envdir, envdir / 'php', config) + config.lib_dir.php = envdir / 'php' + refresh.setup_website(envdir, config) proc = subprocess.run(['/usr/bin/env', 'php', '-Cq', envdir / 'search.php'], check=False) diff --git a/test/python/test_tools_tiger_data.py b/test/python/test_tools_tiger_data.py index 1366fe3e..1ed89782 100644 --- a/test/python/test_tools_tiger_data.py +++ b/test/python/test_tools_tiger_data.py @@ -10,57 +10,52 @@ from nominatim.tools import tiger_data, database_import @pytest.mark.parametrize("threads", (1, 5)) -def test_add_tiger_data(dsn, src_dir, def_config, tmp_path, sql_preprocessor, - temp_db_cursor, threads, temp_db): - temp_db_cursor.execute('CREATE EXTENSION hstore') - temp_db_cursor.execute('CREATE EXTENSION postgis') +def test_add_tiger_data(def_config, tmp_path, sql_preprocessor, + temp_db_cursor, threads, temp_db_with_extensions): temp_db_cursor.execute('CREATE TABLE place (id INT)') sqlfile = tmp_path / '1010.sql' sqlfile.write_text("""INSERT INTO place values (1); INSERT INTO non_existant_table values (1);""") - tiger_data.add_tiger_data(dsn, str(tmp_path), threads, def_config, src_dir / 'lib-sql') + tiger_data.add_tiger_data(str(tmp_path), def_config, threads) assert temp_db_cursor.table_rows('place') == 1 + @pytest.mark.parametrize("threads", (1, 5)) -def test_add_tiger_data_bad_file(dsn, src_dir, def_config, tmp_path, sql_preprocessor, - temp_db_cursor, threads, temp_db): - temp_db_cursor.execute('CREATE EXTENSION hstore') - temp_db_cursor.execute('CREATE EXTENSION postgis') +def test_add_tiger_data_bad_file(def_config, tmp_path, sql_preprocessor, + temp_db_cursor, threads, temp_db_with_extensions): temp_db_cursor.execute('CREATE TABLE place (id INT)') sqlfile = tmp_path / '1010.txt' sqlfile.write_text("""Random text""") - tiger_data.add_tiger_data(dsn, str(tmp_path), threads, def_config, src_dir / 'lib-sql') + tiger_data.add_tiger_data(str(tmp_path), def_config, threads) assert temp_db_cursor.table_rows('place') == 0 + @pytest.mark.parametrize("threads", (1, 5)) -def test_add_tiger_data_tarfile(dsn, src_dir, def_config, tmp_path, - temp_db_cursor, threads, temp_db, sql_preprocessor): - temp_db_cursor.execute('CREATE EXTENSION hstore') - temp_db_cursor.execute('CREATE EXTENSION postgis') +def test_add_tiger_data_tarfile(def_config, tmp_path, temp_db_cursor, + threads, temp_db_with_extensions, sql_preprocessor): temp_db_cursor.execute('CREATE TABLE place (id INT)') sqlfile = tmp_path / '1010.sql' sqlfile.write_text("""INSERT INTO place values (1); INSERT INTO non_existant_table values (1);""") - tar = tarfile.open("sample.tar.gz", "w:gz") + tar = tarfile.open(str(tmp_path / 'sample.tar.gz'), "w:gz") tar.add(sqlfile) tar.close() - tiger_data.add_tiger_data(dsn, str(src_dir / 'sample.tar.gz'), threads, def_config, src_dir / 'lib-sql') - + tiger_data.add_tiger_data(str(tmp_path / 'sample.tar.gz'), def_config, threads) + assert temp_db_cursor.table_rows('place') == 1 + @pytest.mark.parametrize("threads", (1, 5)) -def test_add_tiger_data_bad_tarfile(dsn, src_dir, def_config, tmp_path, - temp_db_cursor, threads, temp_db, sql_preprocessor): - temp_db_cursor.execute('CREATE EXTENSION hstore') - temp_db_cursor.execute('CREATE EXTENSION postgis') +def test_add_tiger_data_bad_tarfile(def_config, tmp_path, temp_db_cursor, threads, + temp_db_with_extensions, sql_preprocessor): temp_db_cursor.execute('CREATE TABLE place (id INT)') sqlfile = tmp_path / '1010.txt' sqlfile.write_text("""Random text""") - tar = tarfile.open("sample.tar.gz", "w:gz") + tar = tarfile.open(str(tmp_path / 'sample.tar.gz'), "w:gz") tar.add(sqlfile) tar.close() - tiger_data.add_tiger_data(dsn, str(src_dir / 'sample.tar.gz'), threads, def_config, src_dir / 'lib-sql') - - assert temp_db_cursor.table_rows('place') == 0 \ No newline at end of file + tiger_data.add_tiger_data(str(tmp_path / 'sample.tar.gz'), def_config, threads) + + assert temp_db_cursor.table_rows('place') == 0 diff --git a/test/testdata/special_phrases_test_content.txt b/test/testdata/special_phrases_test_content.txt index bc8c65d0..739ded0d 100644 --- a/test/testdata/special_phrases_test_content.txt +++ b/test/testdata/special_phrases_test_content.txt @@ -3,7 +3,7 @@ OpenStreetMap Wiki wiki https://wiki.openstreetmap.org/wiki/Main_Page -MediaWiki 1.35.1 +MediaWiki 1.35.2 first-letter Media @@ -70,7 +70,7 @@ wikitext text/x-wiki -== en == {| class="wikitable sortable" |- ! Word / Phrase !! Key !! Value !! Operator !! Plural |- | Zip Line || aerialway || zip_line || - || N |- | Zip Lines || aerialway || zip_line || - || Y |- | Zip Line in || aerialway || zip_line || in || N |- | Zip Lines in || aerialway || zip_line || in || Y |- | Zip Line near || aerialway || zip_line || near || N |- | Zip Lines near || aerialway || zip_line || near || Y |- | Zip Wire || aerialway || zip_line || - || N |- | Zip Wires || aerialway || zip_line || - || Y |- | Zip Wire in || aerialway || zip_line || in || N |- | Zip Wires in || aerialway || zip_line || in || Y |- | Zip Wire near || aerialway || zip_line || near || N |} [[Category:Word list]] +== en == {| class="wikitable sortable" |- ! Word / Phrase !! Key !! Value !! Operator !! Plural |- | Zip Line || aerialway || zip_line || - || N |- | Zip Lines || aerialway || zip_line || - || Y |- | Zip Line in || aerialway || zip_line || in || N |- | Zip Lines in || aerialway || zip_line || in || Y |- | Zip Line near || aerialway || zip_line || near || N |- | Animal shelter || amenity || animal_shelter || - || N |- | Animal shelters || amenity || animal_shelter || - || Y |- | Animal shelter in || amenity || animal_shelter || in || N |- | Animal shelters in || amenity || animal_shelter || in || Y |- | Animal shelter near || amenity || animal_shelter || near|| N |- | Animal shelters near || amenity || animal_shelter || near|| Y |- | Drinking Water near || amenity || drinking_water || near || N |- | Water || amenity || drinking_water || - || N |- | Water in || amenity || drinking_water || in || N |- | Water near || amenity || drinking_water || near || N |- | Embassy || amenity || embassy || - || N |- | Embassys || amenity || embassy || - || Y |- | Embassies || amenity || embassy || - || Y |- |Coworkings near |amenity |coworking_space |near |Y |} [[Category:Word list]] cst5x7tt58izti1pxzgljf27tx8qjcj