steps:
- name: Remove existing PostgreSQL
run: |
- sudo apt-get update -qq
sudo apt-get purge -yq postgresql*
+ sudo sh -c 'echo "deb http://apt.postgresql.org/pub/repos/apt $(lsb_release -cs)-pgdg main" > /etc/apt/sources.list.d/pgdg.list'
+ sudo apt-get update -qq
+
shell: bash
- name: Install PostgreSQL
uses: shivammathur/setup-php@v2
with:
php-version: '7.4'
- tools: phpunit, phpcs
+ tools: phpunit, phpcs, composer
- name: Get Date
id: get-date
- uses: ./Nominatim/.github/actions/build-nominatim
- name: Install test prerequsites
- run: sudo apt-get install -y -qq php-codesniffer pylint python3-pytest python3-behave
+ run: sudo apt-get install -y -qq php-codesniffer pylint python3-pytest python3-behave python3-pytest-cov php-codecoverage php-xdebug
- name: PHP linting
run: phpcs --report-width=120 .
working-directory: Nominatim
- name: PHP unit tests
- run: phpunit ./
+ run: phpunit --coverage-clover ../../coverage-php.xml ./
working-directory: Nominatim/test/php
- name: Python unit tests
- run: py.test-3 test/python
+ run: py.test-3 --cov=nominatim --cov-report=xml test/python
working-directory: Nominatim
- name: BDD tests
- run: behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build --format=progress3
+ run: |
+ behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build --format=progress3 -DPHPCOV=./cov
+ composer require phpunit/phpcov:7.0.2
+ vendor/bin/phpcov merge --clover ../../coverage-bdd.xml ./cov
working-directory: Nominatim/test/bdd
+ - name: Upload coverage to Codecov
+ uses: codecov/codecov-action@v1
+ with:
+ files: ./Nominatim/coverage*.xml
+ directory: ./
+ name: codecov-umbrella
+ fail_ci_if_error: true
+ path_to_write_report: ./coverage/codecov_report.txt
+ verbose: true
+
import:
runs-on: ubuntu-20.04
" wget -O ${PROJECT_SOURCE_DIR}/data/country_osm_grid.sql.gz https://www.nominatim.org/data/country_grid.sql.gz")
endif()
- set(CUSTOMSCRIPTS
- check_import_finished.php
- country_languages.php
- export.php
- query.php
- setup.php
- update.php
- warm.php
- )
-
- foreach (script_source ${CUSTOMSCRIPTS})
- configure_file(${PROJECT_SOURCE_DIR}/cmake/script.tmpl
- ${PROJECT_BINARY_DIR}/utils/${script_source})
- endforeach()
-
configure_file(${PROJECT_SOURCE_DIR}/cmake/tool.tmpl
${PROJECT_BINARY_DIR}/nominatim)
endif()
[![Build Status](https://github.com/osm-search/Nominatim/workflows/CI%20Tests/badge.svg)](https://github.com/osm-search/Nominatim/actions?query=workflow%3A%22CI+Tests%22)
+[![codecov](https://codecov.io/gh/osm-search/Nominatim/branch/master/graph/badge.svg?token=8P1LXrhCMy)](https://codecov.io/gh/osm-search/Nominatim)
Nominatim
=========
+++ /dev/null
-#!@PHP_BIN@ -Cq
-<?php
-require('@CMAKE_SOURCE_DIR@/lib-php/dotenv_loader.php');
-
-@define('CONST_Default_ModulePath', '@CMAKE_BINARY_DIR@/module');
-@define('CONST_Default_Osm2pgsql', '@CMAKE_BINARY_DIR@/osm2pgsql/osm2pgsql');
-@define('CONST_DataDir', '@CMAKE_SOURCE_DIR@/data');
-@define('CONST_SqlDir', '@CMAKE_SOURCE_DIR@/lib-sql');
-@define('CONST_ConfigDir', '@CMAKE_SOURCE_DIR@/settings');
-
-loadDotEnv();
-$_SERVER['NOMINATIM_NOMINATIM_TOOL'] = '@CMAKE_BINARY_DIR@/nominatim';
-
-require_once('@CMAKE_SOURCE_DIR@/lib-php/admin/@script_source@');
--- /dev/null
+codecov:
+ require_ci_to_pass: yes
+
+coverage:
+ status:
+ project: off
+ patch: off
+
+comment:
+ require_changes: true
+ after_n_builds: 2
+
+fixes:
+ - "Nominatim/::"
\ No newline at end of file
## 3.6.0 -> 3.7.0
+### New format and name of configuration file
+
+The configuration for an import is now saved in a `.env` file in the project
+directory. This file follows the dotenv format. For more information, see
+the [installation chapter](Import.md#configuration-setup-in-env).
+
+To migrate to the new system, create a new project directory, add the `.env`
+file and port your custom configuration from `settings/local.php`. Most
+settings are named similar and only have received a `NOMINATIM_` prefix.
+Use the default settings in `settings/env.defaults` as a reference.
+
### New location for data files
External data files for Wikipedia importance, postcodes etc. are no longer
protected $aLangPrefOrder = array();
protected $aExcludePlaceIDs = array();
- protected $bReverseInPlan = true;
protected $iLimit = 20;
protected $iFinalLimit = 10;
return $this->oNormalizer->transliterate($sTerm);
}
- public function setReverseInPlan($bReverse)
- {
- $this->bReverseInPlan = $bReverse;
- }
-
public function setLanguagePreference($aLangPref)
{
$this->aLangPrefOrder = $aLangPref;
$oParams->getString('country'),
$oParams->getString('postalcode')
);
- $this->setReverseInPlan(false);
} else {
$this->setQuery($sQuery);
}
return false;
}
- public function getGroupedSearches($aSearches, $aPhrases, $oValidTokens, $bIsStructured)
+ public function getGroupedSearches($aSearches, $aPhrases, $oValidTokens)
{
/*
Calculate all searches using oValidTokens i.e.
*/
foreach ($aPhrases as $iPhrase => $oPhrase) {
$aNewPhraseSearches = array();
- $sPhraseType = $bIsStructured ? $oPhrase->getPhraseType() : '';
+ $sPhraseType = $oPhrase->getPhraseType();
foreach ($oPhrase->getWordSets() as $aWordset) {
$aWordsetSearches = $aSearches;
$aNewSearches = $oCurrentSearch->extendWithPartialTerm(
$sToken,
$oSearchTerm,
- $bIsStructured,
+ (bool) $sPhraseType,
$iPhrase,
$oValidTokens->get(' '.$sToken)
);
// Commas are used to reduce the search space by indicating where phrases split
if ($this->aStructuredQuery) {
$aInPhrases = $this->aStructuredQuery;
- $bStructuredPhrases = true;
} else {
$aInPhrases = explode(',', $sQuery);
- $bStructuredPhrases = false;
}
Debug::printDebugArray('Search context', $oCtx);
Debug::newSection('Search candidates');
- $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens, $bStructuredPhrases);
+ $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens);
- if ($this->bReverseInPlan) {
+ if (!$this->aStructuredQuery) {
// Reverse phrase array and also reverse the order of the wordsets in
// the first and final phrase. Don't bother about phrases in the middle
// because order in the address doesn't matter.
if (count($aPhrases) > 1) {
$aPhrases[count($aPhrases)-1]->invertWordSets();
}
- $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens, false);
+ $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens);
foreach ($aGroupedSearches as $aSearches) {
foreach ($aSearches as $aSearch) {
'Structured query' => $this->aStructuredQuery,
'Name keys' => Debug::fmtArrayVals($this->aLangPrefOrder),
'Excluded place IDs' => Debug::fmtArrayVals($this->aExcludePlaceIDs),
- 'Try reversed query'=> $this->bReverseInPlan,
'Limit (for searches)' => $this->iLimit,
'Limit (for results)'=> $this->iFinalLimit,
'Country codes' => Debug::fmtArrayVals($this->aCountryCodes),
+++ /dev/null
-<?php
-@define('CONST_LibDir', dirname(dirname(__FILE__)));
-
-require_once(CONST_LibDir.'/init-cmd.php');
-
-loadSettings(getcwd());
-
-(new \Nominatim\Shell(getSetting('NOMINATIM_TOOL')))
- ->addParams('admin', '--check-database')
- ->run();
$oGeocode = new Nominatim\Geocode($oDB);
$oGeocode->setLanguagePreference($oParams->getPreferredLanguages(false));
-$oGeocode->setReverseInPlan(true);
$oGeocode->loadParamArray($oParams);
if ($oParams->getBool('search')) {
+++ /dev/null
-<?php
-@define('CONST_LibDir', dirname(dirname(__FILE__)));
-
-require_once(CONST_LibDir.'/init-cmd.php');
-require_once(CONST_LibDir.'/setup/SetupClass.php');
-require_once(CONST_LibDir.'/setup_functions.php');
-ini_set('memory_limit', '800M');
-
-use Nominatim\Setup\SetupFunctions as SetupFunctions;
-
-// (long-opt, short-opt, min-occurs, max-occurs, num-arguments, num-arguments, type, help)
-$aCMDOptions
-= array(
- 'Create and setup nominatim search system',
- array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
- array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
- array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
-
- array('osm-file', '', 0, 1, 1, 1, 'realpath', 'File to import'),
- array('threads', '', 0, 1, 1, 1, 'int', 'Number of threads (where possible)'),
-
- array('all', '', 0, 1, 0, 0, 'bool', 'Do the complete process'),
-
- array('create-db', '', 0, 1, 0, 0, 'bool', 'Create nominatim db'),
- array('setup-db', '', 0, 1, 0, 0, 'bool', 'Build a blank nominatim db'),
- array('import-data', '', 0, 1, 0, 0, 'bool', 'Import a osm file'),
- array('osm2pgsql-cache', '', 0, 1, 1, 1, 'int', 'Cache size used by osm2pgsql'),
- array('reverse-only', '', 0, 1, 0, 0, 'bool', 'Do not create search tables and indexes'),
- array('create-functions', '', 0, 1, 0, 0, 'bool', 'Create functions'),
- array('enable-diff-updates', '', 0, 1, 0, 0, 'bool', 'Turn on the code required to make diff updates work'),
- array('enable-debug-statements', '', 0, 1, 0, 0, 'bool', 'Include debug warning statements in pgsql commands'),
- array('ignore-errors', '', 0, 1, 0, 0, 'bool', 'Continue import even when errors in SQL are present (EXPERT)'),
- array('create-tables', '', 0, 1, 0, 0, 'bool', 'Create main tables'),
- array('create-partition-tables', '', 0, 1, 0, 0, 'bool', 'Create required partition tables'),
- array('create-partition-functions', '', 0, 1, 0, 0, 'bool', 'Create required partition triggers'),
- array('no-partitions', '', 0, 1, 0, 0, 'bool', 'Do not partition search indices (speeds up import of single country extracts)'),
- array('import-wikipedia-articles', '', 0, 1, 0, 0, 'bool', 'Import wikipedia article dump'),
- array('load-data', '', 0, 1, 0, 0, 'bool', 'Copy data to live tables from import table'),
- array('disable-token-precalc', '', 0, 1, 0, 0, 'bool', 'Disable name precalculation (EXPERT)'),
- array('import-tiger-data', '', 0, 1, 0, 0, 'bool', 'Import tiger data (not included in \'all\')'),
- array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Calculate postcode centroids'),
- array('index', '', 0, 1, 0, 0, 'bool', 'Index the data'),
- array('index-noanalyse', '', 0, 1, 0, 0, 'bool', 'Do not perform analyse operations during index (EXPERT)'),
- array('create-search-indices', '', 0, 1, 0, 0, 'bool', 'Create additional indices required for search and update'),
- array('create-country-names', '', 0, 1, 0, 0, 'bool', 'Create default list of searchable country names'),
- array('drop', '', 0, 1, 0, 0, 'bool', 'Drop tables needed for updates, making the database readonly (EXPERIMENTAL)'),
- array('setup-website', '', 0, 1, 0, 0, 'bool', 'Used to compile environment variables for the website'),
- array('project-dir', '', 0, 1, 1, 1, 'realpath', 'Base directory of the Nominatim installation (default: .)'),
- );
-
-// $aCMDOptions passed to getCmdOpt by reference
-getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
-
-loadSettings($aCMDResult['project-dir'] ?? getcwd());
-setupHTTPProxy();
-
-$bDidSomething = false;
-
-$oNominatimCmd = new \Nominatim\Shell(getSetting('NOMINATIM_TOOL'));
-
-// by default, use all but one processor, but never more than 15.
-$iInstances = max(1, $aCMDResult['threads'] ?? (min(16, getProcessorCount()) - 1));
-
-function run($oCmd)
-{
- global $iInstances;
- global $aCMDResult;
- $oCmd->addParams('--threads', $iInstances);
- if ($aCMDResult['ignore-errors'] ?? false) {
- $oCmd->addParams('--ignore-errors');
- }
- if ($aCMDResult['quiet'] ?? false) {
- $oCmd->addParams('--quiet');
- }
- if ($aCMDResult['verbose'] ?? false) {
- $oCmd->addParams('--verbose');
- }
- $oCmd->run(true);
-}
-
-
-//*******************************************************
-// Making some sanity check:
-// Check if osm-file is set and points to a valid file
-if ($aCMDResult['import-data'] || $aCMDResult['all']) {
- // to remain in /lib/setup_functions.php function
- checkInFile($aCMDResult['osm-file']);
-}
-
-// ******************************************************
-// instantiate Setup class
-$oSetup = new SetupFunctions($aCMDResult);
-
-// *******************************************************
-// go through complete process if 'all' is selected or start selected functions
-if ($aCMDResult['create-db'] || $aCMDResult['all']) {
- $bDidSomething = true;
- run((clone($oNominatimCmd))->addParams('transition', '--create-db'));
-}
-
-if ($aCMDResult['setup-db'] || $aCMDResult['all']) {
- $bDidSomething = true;
- $oCmd = (clone($oNominatimCmd))->addParams('transition', '--setup-db');
-
- if ($aCMDResult['no-partitions'] ?? false) {
- $oCmd->addParams('--no-partitions');
- }
-
- run($oCmd);
-}
-
-if ($aCMDResult['import-data'] || $aCMDResult['all']) {
- $bDidSomething = true;
- $oCmd = (clone($oNominatimCmd))
- ->addParams('transition', '--import-data')
- ->addParams('--osm-file', $aCMDResult['osm-file']);
- if ($aCMDResult['drop'] ?? false) {
- $oCmd->addParams('--drop');
- }
-
- run($oCmd);
-}
-
-if ($aCMDResult['create-functions'] || $aCMDResult['all']) {
- $bDidSomething = true;
- $oSetup->createSqlFunctions();
-}
-
-if ($aCMDResult['create-tables'] || $aCMDResult['all']) {
- $bDidSomething = true;
- $oCmd = (clone($oNominatimCmd))->addParams('transition', '--create-tables');
-
- if ($aCMDResult['reverse-only'] ?? false) {
- $oCmd->addParams('--reverse-only');
- }
-
- run($oCmd);
-}
-
-if ($aCMDResult['create-partition-tables'] || $aCMDResult['all']) {
- $bDidSomething = true;
- run((clone($oNominatimCmd))->addParams('transition', '--create-partition-tables'));
-}
-
-if ($aCMDResult['create-partition-functions'] || $aCMDResult['all']) {
- $bDidSomething = true;
- $oSetup->createSqlFunctions(); // also create partition functions
-}
-
-if ($aCMDResult['import-wikipedia-articles'] || $aCMDResult['all']) {
- $bDidSomething = true;
- // ignore errors!
- (clone($oNominatimCmd))->addParams('refresh', '--wiki-data')->run();
-}
-
-if ($aCMDResult['load-data'] || $aCMDResult['all']) {
- $bDidSomething = true;
- run((clone($oNominatimCmd))->addParams('transition', '--load-data'));
-}
-
-if ($aCMDResult['import-tiger-data']) {
- $bDidSomething = true;
- $sTigerPath = getSetting('TIGER_DATA_PATH', CONST_InstallDir.'/tiger');
- run((clone($oNominatimCmd))->addParams('transition', '--tiger-data', $sTigerPath));
-}
-
-if ($aCMDResult['calculate-postcodes'] || $aCMDResult['all']) {
- $bDidSomething = true;
- $oSetup->calculatePostcodes($aCMDResult['all']);
-}
-
-if ($aCMDResult['index'] || $aCMDResult['all']) {
- $bDidSomething = true;
- $oCmd = (clone($oNominatimCmd))->addParams('transition', '--index');
- if ($aCMDResult['index-noanalyse'] ?? false) {
- $oCmd->addParams('--no-analyse');
- }
-
- run($oCmd);
-}
-
-if ($aCMDResult['drop']) {
- $bDidSomething = true;
- run((clone($oNominatimCmd))->addParams('freeze'));
-}
-
-if ($aCMDResult['create-search-indices'] || $aCMDResult['all']) {
- $bDidSomething = true;
-
- $oCmd = (clone($oNominatimCmd))->addParams('transition', '--create-search-indices');
-
- if ($aCMDResult['drop'] ?? false) {
- $oCmd->addParams('--drop');
- }
-
- run($oCmd);
-}
-
-if ($aCMDResult['create-country-names'] || $aCMDResult['all']) {
- $bDidSomething = true;
- run(clone($oNominatimCmd))->addParams('transition', '--create-country-names');
-}
-
-if ($aCMDResult['setup-website'] || $aCMDResult['all']) {
- $bDidSomething = true;
- run((clone($oNominatimCmd))->addParams('refresh', '--website'));
-}
-
-// ******************************************************
-// If we did something, repeat the warnings
-if (!$bDidSomething) {
- showUsage($aCMDOptions, true);
-} else {
- echo "Summary of warnings:\n\n";
- repeatWarnings();
- echo "\n";
- info('Setup finished.');
-}
+++ /dev/null
-
-<?php
-@define('CONST_LibDir', dirname(dirname(__FILE__)));
-
-require_once(CONST_LibDir.'/init-cmd.php');
-
-loadSettings(getcwd());
-
-(new \Nominatim\Shell(getSetting('NOMINATIM_TOOL')))
- ->addParams('special-phrases', '--import-from-wiki')
- ->run();
require_once(CONST_LibDir.'/init-cmd.php');
require_once(CONST_LibDir.'/setup_functions.php');
-require_once(CONST_LibDir.'/setup/SetupClass.php');
ini_set('memory_limit', '800M');
-use Nominatim\Setup\SetupFunctions as SetupFunctions;
-
// (long-opt, short-opt, min-occurs, max-occurs, num-arguments, num-arguments, type, help)
$aCMDOptions
= array(
array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
- array('init-updates', '', 0, 1, 0, 0, 'bool', 'Set up database for updating'),
- array('check-for-updates', '', 0, 1, 0, 0, 'bool', 'Check if new updates are available'),
- array('no-update-functions', '', 0, 1, 0, 0, 'bool', 'Do not update trigger functions to support differential updates (assuming the diff update logic is already present)'),
- array('import-osmosis', '', 0, 1, 0, 0, 'bool', 'Import updates once'),
- array('import-osmosis-all', '', 0, 1, 0, 0, 'bool', 'Import updates forever'),
- array('no-index', '', 0, 1, 0, 0, 'bool', 'Do not index the new data'),
-
array('calculate-postcodes', '', 0, 1, 0, 0, 'bool', 'Update postcode centroid table'),
array('import-file', '', 0, 1, 1, 1, 'realpath', 'Re-import data from an OSM file'),
array('import-relation', '', 0, 1, 1, 1, 'int', 'Re-import relation'),
array('import-from-main-api', '', 0, 1, 0, 0, 'bool', 'Use OSM API instead of Overpass to download objects'),
- array('index', '', 0, 1, 0, 0, 'bool', 'Index'),
- array('index-rank', '', 0, 1, 1, 1, 'int', 'Rank to start indexing from'),
- array('index-instances', '', 0, 1, 1, 1, 'int', 'Number of indexing instances (threads)'),
-
- array('recompute-word-counts', '', 0, 1, 0, 0, 'bool', 'Compute frequency of full-word search terms'),
- array('update-address-levels', '', 0, 1, 0, 0, 'bool', 'Reimport address level configuration (EXPERT)'),
- array('recompute-importance', '', 0, 1, 0, 0, 'bool', 'Recompute place importances'),
-
array('project-dir', '', 0, 1, 1, 1, 'realpath', 'Base directory of the Nominatim installation (default: .)'),
);
loadSettings($aCMDResult['project-dir'] ?? getcwd());
setupHTTPProxy();
-if (!isset($aResult['index-instances'])) $aResult['index-instances'] = 1;
-if (!isset($aResult['index-rank'])) $aResult['index-rank'] = 0;
-
date_default_timezone_set('Etc/UTC');
$oDB = new Nominatim\DB();
);
}
-$oNominatimCmd = new \Nominatim\Shell(getSetting('NOMINATIM_TOOL'));
-
-function run($oCmd)
-{
- global $aCMDResult;
- if ($aCMDResult['quiet'] ?? false) {
- $oCmd->addParams('--quiet');
- }
- if ($aCMDResult['verbose'] ?? false) {
- $oCmd->addParams('--verbose');
- }
- $oCmd->run(true);
-}
-
-
-if ($aResult['init-updates']) {
- $oCmd = (clone($oNominatimCmd))->addParams('replication', '--init');
-
- if ($aResult['no-update-functions']) {
- $oCmd->addParams('--no-update-functions');
- }
-
- run($oCmd);
-}
-
-if ($aResult['check-for-updates']) {
- exit((clone($oNominatimCmd))->addParams('replication', '--check-for-updates')->run());
-}
-
if (isset($aResult['import-diff']) || isset($aResult['import-file'])) {
// import diffs and files directly (e.g. from osmosis --rri)
$sNextFile = isset($aResult['import-diff']) ? $aResult['import-diff'] : $aResult['import-file'];
// Don't update the import status - we don't know what this file contains
}
-if ($aResult['calculate-postcodes']) {
- run((clone($oNominatimCmd))->addParams('refresh', '--postcodes'));
-}
-
$sTemporaryFile = CONST_InstallDir.'/osmosischange.osc';
$bHaveDiff = false;
$bUseOSMApi = isset($aResult['import-from-main-api']) && $aResult['import-from-main-api'];
fail("osm2pgsql exited with error level $iRet\n");
}
}
-
-if ($aResult['recompute-word-counts']) {
- run((clone($oNominatimCmd))->addParams('refresh', '--word-counts'));
-}
-
-if ($aResult['index']) {
- run((clone $oNominatimCmd)
- ->addParams('index', '--minrank', $aResult['index-rank'])
- ->addParams('--threads', $aResult['index-instances']));
-}
-
-if ($aResult['update-address-levels']) {
- run((clone($oNominatimCmd))->addParams('refresh', '--address-levels'));
-}
-
-if ($aResult['recompute-importance']) {
- run((clone($oNominatimCmd))->addParams('refresh', '--importance'));
-}
-
-if ($aResult['import-osmosis'] || $aResult['import-osmosis-all']) {
- $oCmd = (clone($oNominatimCmd))
- ->addParams('replication')
- ->addParams('--threads', $aResult['index-instances']);
-
- if (!$aResult['import-osmosis-all']) {
- $oCmd->addParams('--once');
- }
-
- if ($aResult['no-index']) {
- $oCmd->addParams('--no-index');
- }
-
- run($oCmd);
-}
}
-function runSQLScript($sScript, $bfatal = true, $bVerbose = false, $bIgnoreErrors = false)
-{
- // Convert database DSN to psql parameters
- $aDSNInfo = \Nominatim\DB::parseDSN(getSetting('DATABASE_DSN'));
- if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432;
-
- $oCmd = new \Nominatim\Shell('psql');
- $oCmd->addParams('--port', $aDSNInfo['port']);
- $oCmd->addParams('--dbname', $aDSNInfo['database']);
- if (isset($aDSNInfo['hostspec']) && $aDSNInfo['hostspec']) {
- $oCmd->addParams('--host', $aDSNInfo['hostspec']);
- }
- if (isset($aDSNInfo['username']) && $aDSNInfo['username']) {
- $oCmd->addParams('--username', $aDSNInfo['username']);
- }
- if (isset($aDSNInfo['password'])) {
- $oCmd->addEnvPair('PGPASSWORD', $aDSNInfo['password']);
- }
- if (!$bVerbose) {
- $oCmd->addParams('--quiet');
- }
- if ($bfatal && !$bIgnoreErrors) {
- $oCmd->addParams('-v', 'ON_ERROR_STOP=1');
- }
-
- $aDescriptors = array(
- 0 => array('pipe', 'r'),
- 1 => STDOUT,
- 2 => STDERR
- );
- $ahPipes = null;
- $hProcess = @proc_open($oCmd->escapedCmd(), $aDescriptors, $ahPipes, null, $oCmd->aEnv);
- if (!is_resource($hProcess)) {
- fail('unable to start pgsql');
- }
-
- if (!$bVerbose) {
- fwrite($ahPipes[0], 'set client_min_messages to WARNING;');
- }
-
- while (strlen($sScript)) {
- $iWritten = fwrite($ahPipes[0], $sScript);
- if ($iWritten <= 0) break;
- $sScript = substr($sScript, $iWritten);
- }
- fclose($ahPipes[0]);
- $iReturn = proc_close($hProcess);
- if ($bfatal && $iReturn > 0) {
- fail("pgsql returned with error code ($iReturn)");
- }
-}
-
function setupHTTPProxy()
{
if (!getSettingBool('HTTP_PROXY')) {
+++ /dev/null
-<?php
-
-namespace Nominatim\Setup;
-
-require_once(CONST_LibDir.'/Shell.php');
-
-class SetupFunctions
-{
- protected $iInstances;
- protected $aDSNInfo;
- protected $bQuiet;
- protected $bVerbose;
- protected $sIgnoreErrors;
- protected $bEnableDiffUpdates;
- protected $bEnableDebugStatements;
- protected $bDrop;
- protected $oDB = null;
- protected $oNominatimCmd;
-
- public function __construct(array $aCMDResult)
- {
- // by default, use all but one processor, but never more than 15.
- $this->iInstances = isset($aCMDResult['threads'])
- ? $aCMDResult['threads']
- : (min(16, getProcessorCount()) - 1);
-
- if ($this->iInstances < 1) {
- $this->iInstances = 1;
- warn('resetting threads to '.$this->iInstances);
- }
-
- // parse database string
- $this->aDSNInfo = \Nominatim\DB::parseDSN(getSetting('DATABASE_DSN'));
- if (!isset($this->aDSNInfo['port'])) {
- $this->aDSNInfo['port'] = 5432;
- }
-
- // setting member variables based on command line options stored in $aCMDResult
- $this->bQuiet = isset($aCMDResult['quiet']) && $aCMDResult['quiet'];
- $this->bVerbose = $aCMDResult['verbose'];
-
- //setting default values which are not set by the update.php array
- if (isset($aCMDResult['ignore-errors'])) {
- $this->sIgnoreErrors = $aCMDResult['ignore-errors'];
- } else {
- $this->sIgnoreErrors = false;
- }
- if (isset($aCMDResult['enable-debug-statements'])) {
- $this->bEnableDebugStatements = $aCMDResult['enable-debug-statements'];
- } else {
- $this->bEnableDebugStatements = false;
- }
- if (isset($aCMDResult['enable-diff-updates'])) {
- $this->bEnableDiffUpdates = $aCMDResult['enable-diff-updates'];
- } else {
- $this->bEnableDiffUpdates = false;
- }
-
- $this->bDrop = isset($aCMDResult['drop']) && $aCMDResult['drop'];
-
- $this->oNominatimCmd = new \Nominatim\Shell(getSetting('NOMINATIM_TOOL'));
- if ($this->bQuiet) {
- $this->oNominatimCmd->addParams('--quiet');
- }
- if ($this->bVerbose) {
- $this->oNominatimCmd->addParams('--verbose');
- }
- }
-
- public function calculatePostcodes($bCMDResultAll)
- {
- info('Calculate Postcodes');
- $this->pgsqlRunScriptFile(CONST_SqlDir.'/postcode_tables.sql');
-
- $sPostcodeFilename = CONST_InstallDir.'/gb_postcode_data.sql.gz';
- if (file_exists($sPostcodeFilename)) {
- $this->pgsqlRunScriptFile($sPostcodeFilename);
- } else {
- warn('optional external GB postcode table file ('.$sPostcodeFilename.') not found. Skipping.');
- }
-
- $sPostcodeFilename = CONST_InstallDir.'/us_postcode_data.sql.gz';
- if (file_exists($sPostcodeFilename)) {
- $this->pgsqlRunScriptFile($sPostcodeFilename);
- } else {
- warn('optional external US postcode table file ('.$sPostcodeFilename.') not found. Skipping.');
- }
-
-
- $this->db()->exec('TRUNCATE location_postcode');
-
- $sSQL = 'INSERT INTO location_postcode';
- $sSQL .= ' (place_id, indexed_status, country_code, postcode, geometry) ';
- $sSQL .= "SELECT nextval('seq_place'), 1, country_code,";
- $sSQL .= " upper(trim (both ' ' from address->'postcode')) as pc,";
- $sSQL .= ' ST_Centroid(ST_Collect(ST_Centroid(geometry)))';
- $sSQL .= ' FROM placex';
- $sSQL .= " WHERE address ? 'postcode' AND address->'postcode' NOT SIMILAR TO '%(,|;)%'";
- $sSQL .= ' AND geometry IS NOT null';
- $sSQL .= ' GROUP BY country_code, pc';
- $this->db()->exec($sSQL);
-
- // only add postcodes that are not yet available in OSM
- $sSQL = 'INSERT INTO location_postcode';
- $sSQL .= ' (place_id, indexed_status, country_code, postcode, geometry) ';
- $sSQL .= "SELECT nextval('seq_place'), 1, 'us', postcode,";
- $sSQL .= ' ST_SetSRID(ST_Point(x,y),4326)';
- $sSQL .= ' FROM us_postcode WHERE postcode NOT IN';
- $sSQL .= ' (SELECT postcode FROM location_postcode';
- $sSQL .= " WHERE country_code = 'us')";
- $this->db()->exec($sSQL);
-
- // add missing postcodes for GB (if available)
- $sSQL = 'INSERT INTO location_postcode';
- $sSQL .= ' (place_id, indexed_status, country_code, postcode, geometry) ';
- $sSQL .= "SELECT nextval('seq_place'), 1, 'gb', postcode, geometry";
- $sSQL .= ' FROM gb_postcode WHERE postcode NOT IN';
- $sSQL .= ' (SELECT postcode FROM location_postcode';
- $sSQL .= " WHERE country_code = 'gb')";
- $this->db()->exec($sSQL);
-
- if (!$bCMDResultAll) {
- $sSQL = "DELETE FROM word WHERE class='place' and type='postcode'";
- $sSQL .= 'and word NOT IN (SELECT postcode FROM location_postcode)';
- $this->db()->exec($sSQL);
- }
-
- $sSQL = 'SELECT count(getorcreate_postcode_id(v)) FROM ';
- $sSQL .= '(SELECT distinct(postcode) as v FROM location_postcode) p';
- $this->db()->exec($sSQL);
- }
-
- /**
- * Return the connection to the database.
- *
- * @return Database object.
- *
- * Creates a new connection if none exists yet. Otherwise reuses the
- * already established connection.
- */
- private function db()
- {
- if (is_null($this->oDB)) {
- $this->oDB = new \Nominatim\DB();
- $this->oDB->connect();
- }
-
- return $this->oDB;
- }
-
- private function pgsqlRunScript($sScript, $bfatal = true)
- {
- runSQLScript(
- $sScript,
- $bfatal,
- $this->bVerbose,
- $this->sIgnoreErrors
- );
- }
-
- public function createSqlFunctions()
- {
- $oCmd = (clone($this->oNominatimCmd))
- ->addParams('refresh', '--functions');
-
- if (!$this->bEnableDiffUpdates) {
- $oCmd->addParams('--no-diff-updates');
- }
-
- if ($this->bEnableDebugStatements) {
- $oCmd->addParams('--enable-debug-statements');
- }
-
- $oCmd->run(!$this->sIgnoreErrors);
- }
-
- private function pgsqlRunScriptFile($sFilename)
- {
- if (!file_exists($sFilename)) fail('unable to find '.$sFilename);
-
- $oCmd = (new \Nominatim\Shell('psql'))
- ->addParams('--port', $this->aDSNInfo['port'])
- ->addParams('--dbname', $this->aDSNInfo['database']);
-
- if (!$this->bVerbose) {
- $oCmd->addParams('--quiet');
- }
- if (isset($this->aDSNInfo['hostspec'])) {
- $oCmd->addParams('--host', $this->aDSNInfo['hostspec']);
- }
- if (isset($this->aDSNInfo['username'])) {
- $oCmd->addParams('--username', $this->aDSNInfo['username']);
- }
- if (isset($this->aDSNInfo['password'])) {
- $oCmd->addEnvPair('PGPASSWORD', $this->aDSNInfo['password']);
- }
- $ahGzipPipes = null;
- if (preg_match('/\\.gz$/', $sFilename)) {
- $aDescriptors = array(
- 0 => array('pipe', 'r'),
- 1 => array('pipe', 'w'),
- 2 => array('file', '/dev/null', 'a')
- );
- $oZcatCmd = new \Nominatim\Shell('zcat', $sFilename);
-
- $hGzipProcess = proc_open($oZcatCmd->escapedCmd(), $aDescriptors, $ahGzipPipes);
- if (!is_resource($hGzipProcess)) fail('unable to start zcat');
- $aReadPipe = $ahGzipPipes[1];
- fclose($ahGzipPipes[0]);
- } else {
- $oCmd->addParams('--file', $sFilename);
- $aReadPipe = array('pipe', 'r');
- }
- $aDescriptors = array(
- 0 => $aReadPipe,
- 1 => array('pipe', 'w'),
- 2 => array('file', '/dev/null', 'a')
- );
- $ahPipes = null;
-
- $hProcess = proc_open($oCmd->escapedCmd(), $aDescriptors, $ahPipes, null, $oCmd->aEnv);
- if (!is_resource($hProcess)) fail('unable to start pgsql');
- // TODO: error checking
- while (!feof($ahPipes[1])) {
- echo fread($ahPipes[1], 4096);
- }
- fclose($ahPipes[1]);
- $iReturn = proc_close($hProcess);
- if ($iReturn > 0) {
- fail("pgsql returned with error code ($iReturn)");
- }
- if ($ahGzipPipes) {
- fclose($ahGzipPipes[1]);
- proc_close($hGzipProcess);
- }
- }
-
- private function replaceSqlPatterns($sSql)
- {
- $sSql = str_replace('{www-user}', getSetting('DATABASE_WEBUSER'), $sSql);
-
- $aPatterns = array(
- '{ts:address-data}' => getSetting('TABLESPACE_ADDRESS_DATA'),
- '{ts:address-index}' => getSetting('TABLESPACE_ADDRESS_INDEX'),
- '{ts:search-data}' => getSetting('TABLESPACE_SEARCH_DATA'),
- '{ts:search-index}' => getSetting('TABLESPACE_SEARCH_INDEX'),
- '{ts:aux-data}' => getSetting('TABLESPACE_AUX_DATA'),
- '{ts:aux-index}' => getSetting('TABLESPACE_AUX_INDEX')
- );
-
- foreach ($aPatterns as $sPattern => $sTablespace) {
- if ($sTablespace) {
- $sSql = str_replace($sPattern, 'TABLESPACE "'.$sTablespace.'"', $sSql);
- } else {
- $sSql = str_replace($sPattern, '', $sSql);
- }
- }
-
- return $sSql;
- }
-}
<?php
-function checkInFile($sOSMFile)
-{
- if (!isset($sOSMFile)) {
- fail('missing --osm-file for data import');
- }
-
- if (!file_exists($sOSMFile)) {
- fail('the path supplied to --osm-file does not exist');
- }
-
- if (!is_readable($sOSMFile)) {
- fail('osm-file "' . $aCMDResult['osm-file'] . '" not readable');
- }
-}
-
function getOsm2pgsqlBinary()
{
$sBinary = getSetting('OSM2PGSQL_BINARY');
END IF;
IF bnd.name ? 'name' THEN
- bnd_name := make_standard_name(bnd.name->'name');
+ bnd_name := lower(bnd.name->'name');
IF bnd_name = '' THEN
bnd_name := NULL;
END IF;
IF bnd.extratags ? 'place' and bnd_name is not null THEN
FOR linked_placex IN
SELECT * FROM placex
- WHERE make_standard_name(name->'name') = bnd_name
+ WHERE (position(lower(name->'name') in bnd_name) > 0
+ OR position(bnd_name in lower(name->'name')) > 0)
AND placex.class = 'place' AND placex.type = bnd.extratags->'place'
AND placex.osm_type = 'N'
AND placex.linked_place_id is null
AND placex.rank_search < 26 -- needed to select the right index
- AND _st_covers(bnd.geometry, placex.geometry)
+ AND placex.type != 'postcode'
+ AND ST_Covers(bnd.geometry, placex.geometry)
LOOP
{% if debug %}RAISE WARNING 'Found type-matching place node %', linked_placex.osm_id;{% endif %}
RETURN linked_placex;
AND placex.linked_place_id is null
AND placex.rank_search < 26
AND _st_covers(bnd.geometry, placex.geometry)
- ORDER BY make_standard_name(name->'name') = bnd_name desc
+ ORDER BY lower(name->'name') = bnd_name desc
LOOP
{% if debug %}RAISE WARNING 'Found wikidata-matching place node %', linked_placex.osm_id;{% endif %}
RETURN linked_placex;
{% if debug %}RAISE WARNING 'Looking for nodes with matching names';{% endif %}
FOR linked_placex IN
SELECT placex.* from placex
- WHERE make_standard_name(name->'name') = bnd_name
+ WHERE lower(name->'name') = bnd_name
AND ((bnd.rank_address > 0
and bnd.rank_address = (compute_place_rank(placex.country_code,
'N', placex.class,
false, placex.postcode)).address_rank)
OR (bnd.rank_address = 0 and placex.rank_search = bnd.rank_search))
AND placex.osm_type = 'N'
+ AND placex.class = 'place'
AND placex.linked_place_id is null
AND placex.rank_search < 26 -- needed to select the right index
- AND _st_covers(bnd.geometry, placex.geometry)
+ AND placex.type != 'postcode'
+ AND ST_Covers(bnd.geometry, placex.geometry)
LOOP
{% if debug %}RAISE WARNING 'Found matching place node %', linked_placex.osm_id;{% endif %}
RETURN linked_placex;
AND rank_address between 4 and 25 AND type != 'postcode'
AND name is not null AND indexed_status = 0 AND linked_place_id is null;
-CREATE INDEX {{sql.if_index_not_exists}} idx_placex_geometry_reverse_placeNode
- ON placex USING gist (geometry) {{db.tablespace.search_index}}
- WHERE osm_type = 'N' AND rank_search between 5 and 25
- AND class = 'place' AND type != 'postcode'
- AND name is not null AND indexed_status = 0 AND linked_place_id is null;
-
CREATE INDEX {{sql.if_index_not_exists}} idx_osmline_parent_place_id
ON location_property_osmline USING BTREE (parent_place_id) {{db.tablespace.search_index}};
CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id) {{db.tablespace.address_index}} WHERE linked_place_id IS NOT NULL;
CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector) {{db.tablespace.address_index}};
CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry) {{db.tablespace.search_index}};
-CREATE INDEX idx_placex_adminname on placex USING BTREE (make_standard_name(name->'name')) {{db.tablespace.address_index}} WHERE osm_type='N' and rank_search < 26;
+CREATE INDEX idx_placex_geometry_placenode ON placex
+ USING GIST (geometry) {{db.tablespace.search_index}}
+ WHERE osm_type = 'N' and rank_search < 26
+ and class = 'place' and type != 'postcode' and linked_place_id is null;
CREATE INDEX idx_placex_wikidata on placex USING BTREE ((extratags -> 'wikidata')) {{db.tablespace.address_index}} WHERE extratags ? 'wikidata' and class = 'place' and osm_type = 'N' and rank_search < 26;
DROP SEQUENCE IF EXISTS seq_place;
import argparse
from pathlib import Path
-from .config import Configuration
-from .tools.exec_utils import run_legacy_script, run_php_server
-from .errors import UsageError
-from . import clicmd
-from .clicmd.args import NominatimArgs
-from .tools import tiger_data
+from nominatim.config import Configuration
+from nominatim.tools.exec_utils import run_legacy_script, run_php_server
+from nominatim.errors import UsageError
+from nominatim import clicmd
+from nominatim.clicmd.args import NominatimArgs
+from nominatim.tools import tiger_data
LOG = logging.getLogger()
else:
parser.parser.epilog = 'php-cgi not found. Query commands not available.'
- parser.add_subcommand('transition', clicmd.AdminTransition)
-
return parser
Subcommand definitions for the command-line tool.
"""
-from .setup import SetupAll
-from .replication import UpdateReplication
-from .api import APISearch, APIReverse, APILookup, APIDetails, APIStatus
-from .index import UpdateIndex
-from .refresh import UpdateRefresh
-from .admin import AdminFuncs
-from .freeze import SetupFreeze
-from .transition import AdminTransition
-from .special_phrases import ImportSpecialPhrases
+from nominatim.clicmd.setup import SetupAll
+from nominatim.clicmd.replication import UpdateReplication
+from nominatim.clicmd.api import APISearch, APIReverse, APILookup, APIDetails, APIStatus
+from nominatim.clicmd.index import UpdateIndex
+from nominatim.clicmd.refresh import UpdateRefresh
+from nominatim.clicmd.admin import AdminFuncs
+from nominatim.clicmd.freeze import SetupFreeze
+from nominatim.clicmd.special_phrases import ImportSpecialPhrases
"""
import logging
-from ..tools.exec_utils import run_legacy_script
-from ..db.connection import connect
+from nominatim.tools.exec_utils import run_legacy_script
+from nominatim.db.connection import connect
# Do not repeat documentation of subcommand classes.
# pylint: disable=C0111
"""
import logging
-from ..tools.exec_utils import run_api_script
+from nominatim.tools.exec_utils import run_api_script
# Do not repeat documentation of subcommand classes.
# pylint: disable=C0111
Implementation of the 'freeze' subcommand.
"""
-from ..db.connection import connect
+from nominatim.db.connection import connect
# Do not repeat documentation of subcommand classes.
# pylint: disable=C0111
"""
import psutil
-from ..db import status
-from ..db.connection import connect
+from nominatim.db import status
+from nominatim.db.connection import connect
# Do not repeat documentation of subcommand classes.
# pylint: disable=C0111
import logging
from pathlib import Path
-from ..db.connection import connect
+from nominatim.db.connection import connect
# Do not repeat documentation of subcommand classes.
# pylint: disable=C0111
import socket
import time
-from ..db import status
-from ..db.connection import connect
-from ..errors import UsageError
+from nominatim.db import status
+from nominatim.db.connection import connect
+from nominatim.errors import UsageError
LOG = logging.getLogger()
import psutil
-from ..tools.exec_utils import run_legacy_script
-from ..db.connection import connect
-from ..db import status, properties
-from ..version import NOMINATIM_VERSION
-from ..errors import UsageError
+from nominatim.db.connection import connect
+from nominatim.db import status, properties
+from nominatim.version import NOMINATIM_VERSION
+from nominatim.errors import UsageError
# Do not repeat documentation of subcommand classes.
# pylint: disable=C0111
from ..tools import database_import
from ..tools import refresh
from ..indexer.indexer import Indexer
+ from ..tools import postcodes
if args.osm_file and not Path(args.osm_file).is_file():
LOG.fatal("OSM file '%s' does not exist.", args.osm_file)
LOG.error('Wikipedia importance dump file not found. '
'Will be using default importances.')
+ if args.continue_at is None or args.continue_at == 'load-data':
LOG.warning('Initialise tables')
with connect(args.config.get_libpq_dsn()) as conn:
database_import.truncate_data_tables(conn, args.config.MAX_WORD_FREQUENCY)
- if args.continue_at is None or args.continue_at == 'load-data':
LOG.warning('Load data into placex table')
database_import.load_data(args.config.get_libpq_dsn(),
args.data_dir,
args.threads or psutil.cpu_count() or 1)
LOG.warning('Calculate postcodes')
- run_legacy_script('setup.php', '--calculate-postcodes',
- nominatim_env=args, throw_on_fail=not args.ignore_errors)
+ postcodes.import_postcodes(args.config.get_libpq_dsn(), args.project_dir)
if args.continue_at is None or args.continue_at in ('load-data', 'indexing'):
+ if args.continue_at is not None and args.continue_at != 'load-data':
+ with connect(args.config.get_libpq_dsn()) as conn:
+ SetupAll._create_pending_index(conn, args.config.TABLESPACE_ADDRESS_INDEX)
LOG.warning('Indexing places')
indexer = Indexer(args.config.get_libpq_dsn(),
args.threads or psutil.cpu_count() or 1)
'{0[0]}.{0[1]}.{0[2]}-{0[3]}'.format(NOMINATIM_VERSION))
return 0
+
+
+ @staticmethod
+ def _create_pending_index(conn, tablespace):
+ """ Add a supporting index for finding places still to be indexed.
+
+ This index is normally created at the end of the import process
+ for later updates. When indexing was partially done, then this
+ index can greatly improve speed going through already indexed data.
+ """
+ if conn.index_exists('idx_placex_pendingsector'):
+ return
+
+ with conn.cursor() as cur:
+ LOG.warning('Creating support index')
+ if tablespace:
+ tablespace = 'TABLESPACE ' + tablespace
+ cur.execute("""CREATE INDEX idx_placex_pendingsector
+ ON placex USING BTREE (rank_address,geometry_sector)
+ {} WHERE indexed_status > 0
+ """.format(tablespace))
+ conn.commit()
+++ /dev/null
-"""
-Implementation of the 'transition' subcommand.
-
-This subcommand provides standins for functions that were available
-through the PHP scripts but are now no longer directly accessible.
-This module will be removed as soon as the transition phase is over.
-"""
-import logging
-from pathlib import Path
-
-from ..db.connection import connect
-from ..db import status
-from ..errors import UsageError
-
-# Do not repeat documentation of subcommand classes.
-# pylint: disable=C0111
-# Using non-top-level imports to avoid eventually unused imports.
-# pylint: disable=E0012,C0415
-
-LOG = logging.getLogger()
-
-class AdminTransition:
- """\
- Internal functions for code transition. Do not use.
- """
-
- @staticmethod
- def add_args(parser):
- group = parser.add_argument_group('Sub-functions')
- group.add_argument('--create-db', action='store_true',
- help='Create nominatim db')
- group.add_argument('--setup-db', action='store_true',
- help='Build a blank nominatim db')
- group.add_argument('--import-data', action='store_true',
- help='Import a osm file')
- group.add_argument('--load-data', action='store_true',
- help='Copy data to live tables from import table')
- group.add_argument('--create-tables', action='store_true',
- help='Create main tables')
- group.add_argument('--create-partition-tables', action='store_true',
- help='Create required partition tables')
- group.add_argument('--index', action='store_true',
- help='Index the data')
- group.add_argument('--create-search-indices', action='store_true',
- help='Create additional indices required for search and update')
- group.add_argument('--create-country-names', action='store_true',
- help='Create search index for default country names.')
- group = parser.add_argument_group('Options')
- group.add_argument('--no-partitions', action='store_true',
- help='Do not partition search indices')
- group.add_argument('--osm-file', metavar='FILE',
- help='File to import')
- group.add_argument('--drop', action='store_true',
- help='Drop tables needed for updates, making the database readonly')
- group.add_argument('--osm2pgsql-cache', metavar='SIZE', type=int,
- help='Size of cache to be used by osm2pgsql (in MB)')
- group.add_argument('--no-analyse', action='store_true',
- help='Do not perform analyse operations during index')
- group.add_argument('--ignore-errors', action='store_true',
- help="Ignore certain erros on import.")
- group.add_argument('--reverse-only', action='store_true',
- help='Do not create search tables and indexes')
- group.add_argument('--tiger-data', metavar='FILE',
- help='File to import')
-
- @staticmethod
- def run(args): # pylint: disable=too-many-statements
- from ..tools import database_import, tiger_data
- from ..tools import refresh
-
- if args.create_db:
- LOG.warning('Create DB')
- database_import.create_db(args.config.get_libpq_dsn())
-
- if args.setup_db:
- LOG.warning('Setup DB')
-
- with connect(args.config.get_libpq_dsn()) as conn:
- database_import.setup_extensions(conn)
- database_import.install_module(args.module_dir, args.project_dir,
- args.config.DATABASE_MODULE_PATH,
- conn=conn)
-
- database_import.import_base_data(args.config.get_libpq_dsn(),
- args.data_dir, args.no_partitions)
-
- if args.import_data:
- LOG.warning('Import data')
- if not args.osm_file:
- raise UsageError('Missing required --osm-file argument')
- database_import.import_osm_data(Path(args.osm_file),
- args.osm2pgsql_options(0, 1),
- drop=args.drop,
- ignore_errors=args.ignore_errors)
-
- if args.create_tables:
- LOG.warning('Create Tables')
- with connect(args.config.get_libpq_dsn()) as conn:
- database_import.create_tables(conn, args.config, args.sqllib_dir, args.reverse_only)
- refresh.load_address_levels_from_file(conn, Path(args.config.ADDRESS_LEVEL_CONFIG))
- refresh.create_functions(conn, args.config, args.sqllib_dir,
- enable_diff_updates=False)
- database_import.create_table_triggers(conn, args.config, args.sqllib_dir)
-
- if args.create_partition_tables:
- LOG.warning('Create Partition Tables')
- with connect(args.config.get_libpq_dsn()) as conn:
- database_import.create_partition_tables(conn, args.config, args.sqllib_dir)
-
- if args.load_data:
- LOG.warning('Load data')
- with connect(args.config.get_libpq_dsn()) as conn:
- database_import.truncate_data_tables(conn, args.config.MAX_WORD_FREQUENCY)
- database_import.load_data(args.config.get_libpq_dsn(),
- args.data_dir,
- args.threads or 1)
-
- with connect(args.config.get_libpq_dsn()) as conn:
- try:
- status.set_status(conn, status.compute_database_date(conn))
- except Exception as exc: # pylint: disable=broad-except
- LOG.error('Cannot determine date of database: %s', exc)
-
- if args.index:
- LOG.warning('Indexing')
- from ..indexer.indexer import Indexer
- indexer = Indexer(args.config.get_libpq_dsn(), args.threads or 1)
- indexer.index_full()
-
- if args.create_search_indices:
- LOG.warning('Create Search indices')
- with connect(args.config.get_libpq_dsn()) as conn:
- database_import.create_search_indices(conn, args.config, args.sqllib_dir, args.drop)
-
- if args.tiger_data:
- LOG.warning('Tiger data')
- tiger_data.add_tiger_data(args.config.get_libpq_dsn(),
- args.tiger_data,
- args.threads or 1,
- args.config,
- args.sqllib_dir)
-
- if args.create_country_names:
- LOG.warning('Create search index for default country names.')
- with connect(args.config.get_libpq_dsn()) as conn:
- database_import.create_country_names(conn, args.config)
from dotenv import dotenv_values
-from .errors import UsageError
+from nominatim.errors import UsageError
LOG = logging.getLogger()
import psycopg2.extensions
import psycopg2.extras
-from ..errors import UsageError
+from nominatim.errors import UsageError
LOG = logging.getLogger()
import logging
import re
-from ..tools.exec_utils import get_url
-from ..errors import UsageError
+from nominatim.tools.exec_utils import get_url
+from nominatim.errors import UsageError
LOG = logging.getLogger()
import logging
import gzip
-from .connection import get_pg_env
-from ..errors import UsageError
+from nominatim.db.connection import get_pg_env
+from nominatim.errors import UsageError
LOG = logging.getLogger()
import psycopg2
-from .progress import ProgressLogger
-from ..db.async_connection import DBConnection
+from nominatim.indexer.progress import ProgressLogger
+from nominatim.db.async_connection import DBConnection
LOG = logging.getLogger()
"""
import logging
-from ..errors import UsageError
+from nominatim.errors import UsageError
LOG = logging.getLogger()
import psycopg2
-from ..db.connection import connect
-from ..errors import UsageError
+from nominatim.db.connection import connect
+from nominatim.errors import UsageError
CHECKLIST = []
'idx_placex_rank_address',
'idx_placex_parent_place_id',
'idx_placex_geometry_reverse_lookuppolygon',
- 'idx_placex_geometry_reverse_placenode',
+ 'idx_placex_geometry_placenode',
+ 'idx_placex_housenumber',
'idx_osmline_parent_place_id',
'idx_osmline_parent_osm_id',
'idx_postcode_id',
import psutil
import psycopg2
-from ..db.connection import connect, get_pg_env
-from ..db import utils as db_utils
-from ..db.async_connection import DBConnection
-from ..db.sql_preprocessor import SQLPreprocessor
-from .exec_utils import run_osm2pgsql
-from ..errors import UsageError
-from ..version import POSTGRESQL_REQUIRED_VERSION, POSTGIS_REQUIRED_VERSION
+from nominatim.db.connection import connect, get_pg_env
+from nominatim.db import utils as db_utils
+from nominatim.db.async_connection import DBConnection
+from nominatim.db.sql_preprocessor import SQLPreprocessor
+from nominatim.tools.exec_utils import run_osm2pgsql
+from nominatim.errors import UsageError
+from nominatim.version import POSTGRESQL_REQUIRED_VERSION, POSTGIS_REQUIRED_VERSION
LOG = logging.getLogger()
import urllib.request as urlrequest
from urllib.parse import urlencode
-from ..version import NOMINATIM_VERSION
-from ..db.connection import get_pg_env
+from nominatim.version import NOMINATIM_VERSION
+from nominatim.db.connection import get_pg_env
LOG = logging.getLogger()
"""
import logging
-from ..db import properties
-from ..db.connection import connect
-from ..version import NOMINATIM_VERSION
-from . import refresh, database_import
-from ..errors import UsageError
+from nominatim.db import properties
+from nominatim.db.connection import connect
+from nominatim.version import NOMINATIM_VERSION
+from nominatim.tools import refresh, database_import
+from nominatim.errors import UsageError
LOG = logging.getLogger()
cur.execute("""UPDATE placex
SET housenumber = create_housenumber_id(housenumber)
WHERE housenumber is not null""")
+
+
+@_migration(3, 7, 0, 0)
+def switch_placenode_geometry_index(conn, **_):
+ """ Replace idx_placex_geometry_reverse_placeNode index.
+
+ Make the index slightly more permissive, so that it can also be used
+ when matching up boundaries and place nodes. It makes the index
+ idx_placex_adminname index unnecessary.
+ """
+ with conn.cursor() as cur:
+ cur.execute(""" CREATE INDEX IF NOT EXISTS idx_placex_geometry_placenode ON placex
+ USING GIST (geometry)
+ WHERE osm_type = 'N' and rank_search < 26
+ and class = 'place' and type != 'postcode'
+ and linked_place_id is null""")
+ cur.execute(""" DROP INDEX IF EXISTS idx_placex_adminname """)
--- /dev/null
+"""
+Functions for importing, updating and otherwise maintaining the table
+of artificial postcode centroids.
+"""
+
+from nominatim.db.utils import execute_file
+from nominatim.db.connection import connect
+
+def import_postcodes(dsn, project_dir):
+ """ Set up the initial list of postcodes.
+ """
+
+ with connect(dsn) as conn:
+ conn.drop_table('gb_postcode')
+ conn.drop_table('us_postcode')
+
+ with conn.cursor() as cur:
+ cur.execute("""CREATE TABLE gb_postcode (
+ id integer,
+ postcode character varying(9),
+ geometry GEOMETRY(Point, 4326))""")
+
+ with conn.cursor() as cur:
+ cur.execute("""CREATE TABLE us_postcode (
+ postcode text,
+ x double precision,
+ y double precision)""")
+ conn.commit()
+
+ gb_postcodes = project_dir / 'gb_postcode_data.sql.gz'
+ if gb_postcodes.is_file():
+ execute_file(dsn, gb_postcodes)
+
+ us_postcodes = project_dir / 'us_postcode_data.sql.gz'
+ if us_postcodes.is_file():
+ execute_file(dsn, us_postcodes)
+
+ with conn.cursor() as cur:
+ cur.execute("TRUNCATE location_postcode")
+ cur.execute("""
+ INSERT INTO location_postcode
+ (place_id, indexed_status, country_code, postcode, geometry)
+ SELECT nextval('seq_place'), 1, country_code,
+ upper(trim (both ' ' from address->'postcode')) as pc,
+ ST_Centroid(ST_Collect(ST_Centroid(geometry)))
+ FROM placex
+ WHERE address ? 'postcode' AND address->'postcode' NOT SIMILAR TO '%(,|;)%'
+ AND geometry IS NOT null
+ GROUP BY country_code, pc
+ """)
+
+ cur.execute("""
+ INSERT INTO location_postcode
+ (place_id, indexed_status, country_code, postcode, geometry)
+ SELECT nextval('seq_place'), 1, 'us', postcode,
+ ST_SetSRID(ST_Point(x,y),4326)
+ FROM us_postcode WHERE postcode NOT IN
+ (SELECT postcode FROM location_postcode
+ WHERE country_code = 'us')
+ """)
+
+ cur.execute("""
+ INSERT INTO location_postcode
+ (place_id, indexed_status, country_code, postcode, geometry)
+ SELECT nextval('seq_place'), 1, 'gb', postcode, geometry
+ FROM gb_postcode WHERE postcode NOT IN
+ (SELECT postcode FROM location_postcode
+ WHERE country_code = 'gb')
+ """)
+
+ cur.execute("""
+ DELETE FROM word WHERE class='place' and type='postcode'
+ and word NOT IN (SELECT postcode FROM location_postcode)
+ """)
+
+ cur.execute("""
+ SELECT count(getorcreate_postcode_id(v)) FROM
+ (SELECT distinct(postcode) as v FROM location_postcode) p
+ """)
+ conn.commit()
from psycopg2.extras import execute_values
-from ..db.utils import execute_file
-from ..db.sql_preprocessor import SQLPreprocessor
-from ..version import NOMINATIM_VERSION
+from nominatim.db.utils import execute_file
+from nominatim.db.sql_preprocessor import SQLPreprocessor
+from nominatim.version import NOMINATIM_VERSION
LOG = logging.getLogger()
import logging
import time
-from ..db import status
-from .exec_utils import run_osm2pgsql
-from ..errors import UsageError
+from nominatim.db import status
+from nominatim.tools.exec_utils import run_osm2pgsql
+from nominatim.errors import UsageError
try:
from osmium.replication.server import ReplicationServer
self.black_list, self.white_list = self._load_white_and_black_lists()
#Compile the regex here to increase performances.
self.occurence_pattern = re.compile(
- r'\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([^\|]+) \|\| ([\-YN])'
+ r'\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([\-YN])'
)
self.sanity_check_pattern = re.compile(r'^\w+$')
self.transliterator = Transliterator.createFromRules("special-phrases normalizer",
self.config.TERM_NORMALIZATION)
+ #This set will contain all existing phrases from the word table which
+ #no longer exist on the wiki.
+ #It contain tuples with the following format: (normalized_word, class, type, operator)
+ self.words_phrases_to_delete = set()
+ #This set will contain the phrases which still exist from the wiki.
+ #It is used to prevent duplicates on the wiki by removing them from
+ #the word_phrases_to_delete only at the end.
+ self.words_phrases_still_exist = set()
+ #This set will contain all existing place_classtype tables which doesn't match any
+ #special phrases class/type on the wiki.
+ self.table_phrases_to_delete = set()
def import_from_wiki(self, languages=None):
"""
if languages is not None and not isinstance(languages, list):
raise TypeError('The \'languages\' argument should be of type list.')
+ self._fetch_existing_words_phrases()
+ self._fetch_existing_place_classtype_tables()
+
#Get all languages to process.
languages = self._load_languages() if not languages else languages
class_type_pairs.update(self._process_xml_content(wiki_page_xml_content, lang))
self._create_place_classtype_table_and_indexes(class_type_pairs)
+ self._remove_non_existent_phrases_from_db()
self.db_connection.commit()
LOG.warning('Import done.')
+ def _fetch_existing_words_phrases(self):
+ """
+ Fetch existing special phrases from the word table.
+ Fill the word_phrases_to_delete set of the class.
+ """
+ #Only extract special phrases terms:
+ #If class=place and type=house then it is a housenumber term.
+ #If class=place and type=postcode then it is a postcode term.
+ word_query = """
+ SELECT word, class, type, operator FROM word
+ WHERE class != 'place' OR (type != 'house' AND type != 'postcode')
+ """
+ with self.db_connection.cursor() as db_cursor:
+ db_cursor.execute(SQL(word_query))
+ for row in db_cursor:
+ row[3] = '-' if row[3] is None else row[3]
+ self.words_phrases_to_delete.add(
+ (row[0], row[1], row[2], row[3])
+ )
+
+ def _fetch_existing_place_classtype_tables(self):
+ """
+ Fetch existing place_classtype tables.
+ Fill the table_phrases_to_delete set of the class.
+ """
+ query = """
+ SELECT table_name
+ FROM information_schema.tables
+ WHERE table_schema='public'
+ AND table_name like 'place_classtype_%';
+ """
+ with self.db_connection.cursor() as db_cursor:
+ db_cursor.execute(SQL(query))
+ for row in db_cursor:
+ self.table_phrases_to_delete.add(row[0])
+
def _load_white_and_black_lists(self):
"""
Load white and black lists from phrases-settings.json.
'et', 'eu', 'fa', 'fi', 'fr', 'gl', 'hr', 'hu',
'ia', 'is', 'it', 'ja', 'mk', 'nl', 'no', 'pl',
'ps', 'pt', 'ru', 'sk', 'sl', 'sv', 'uk', 'vi']
- return self.config.LANGUAGES or default_languages
+ return self.config.LANGUAGES.split(',') if self.config.LANGUAGES else default_languages
@staticmethod
def _get_wiki_content(lang):
class_matchs = self.sanity_check_pattern.findall(phrase_class)
if len(class_matchs) < 1 or len(type_matchs) < 1:
- raise UsageError("Bad class/type for language {}: {}={}".format(
- lang, phrase_class, phrase_type))
+ LOG.warning("Bad class/type for language %s: %s=%s. It will not be imported",
+ lang, phrase_class, phrase_type)
+ return False
+ return True
def _process_xml_content(self, xml_content, lang):
"""
phrase_class = match[1].strip()
phrase_type = match[2].strip()
phrase_operator = match[3].strip()
+ #Needed if some operator in the wiki are not written in english
+ phrase_operator = '-' if phrase_operator not in ('near', 'in') else phrase_operator
#hack around a bug where building=yes was imported with quotes into the wiki
phrase_type = re.sub(r'\"|"', '', phrase_type)
- #sanity check, in case somebody added garbage in the wiki
- self._check_sanity(lang, phrase_class, phrase_type)
-
#blacklisting: disallow certain class/type combinations
if (
phrase_class in self.black_list.keys() and
):
continue
- #add class/type to the pairs dict
+ #Check if the phrase already exists in the database.
+ if (
+ (normalized_label, phrase_class, phrase_type, phrase_operator)
+ in self.words_phrases_to_delete
+ ):
+ #Remove this phrase from the ones to delete as it still exist on the wiki.
+ self.words_phrases_still_exist.add(
+ (normalized_label, phrase_class, phrase_type, phrase_operator)
+ )
+ class_type_pairs.add((phrase_class, phrase_type))
+ #Dont need to add this phrase as it already exists in the word table.
+ continue
+
+ #sanity check, in case somebody added garbage in the wiki
+ if not self._check_sanity(lang, phrase_class, phrase_type):
+ continue
+
class_type_pairs.add((phrase_class, phrase_type))
self._process_amenity(
phrase_class = pair[0]
phrase_type = pair[1]
+ table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type)
+
+ if table_name in self.table_phrases_to_delete:
+ #Remove this table from the ones to delete as it match a class/type
+ #still existing on the special phrases of the wiki.
+ self.table_phrases_to_delete.remove(table_name)
+ #So dont need to create the table and indexes.
+ continue
+
#Table creation
self._create_place_classtype_table(sql_tablespace, phrase_class, phrase_type)
.format(Identifier(table_name),
Identifier(self.config.DATABASE_WEBUSER)))
+ def _remove_non_existent_phrases_from_db(self):
+ """
+ Remove special phrases which doesn't exist on the wiki anymore.
+ Delete from the word table and delete the place_classtype tables.
+ """
+ LOG.warning('Cleaning database...')
+ self.words_phrases_to_delete = self.words_phrases_to_delete - self.words_phrases_still_exist
+ #Array containing all queries to execute. Contain tuples of format (query, parameters)
+ queries_parameters = []
+
+ #Delete phrases from the word table which are not on the wiki anymore.
+ for phrase_to_delete in self.words_phrases_to_delete:
+ if phrase_to_delete[3] == '-':
+ query = """
+ DELETE FROM word WHERE word = %s AND class = %s AND type = %s AND operator IS null
+ """
+ parameters = (phrase_to_delete[0], phrase_to_delete[1], phrase_to_delete[2], )
+ queries_parameters.append((query, parameters))
+ else:
+ query = """
+ DELETE FROM word WHERE word = %s AND class = %s AND type = %s AND operator = %s
+ """
+ parameters = (phrase_to_delete[0], phrase_to_delete[1],
+ phrase_to_delete[2], phrase_to_delete[3], )
+ queries_parameters.append((query, parameters))
+
+ #Delete place_classtype tables corresponding to class/type which are not on the wiki anymore
+ for table in self.table_phrases_to_delete:
+ query = SQL('DROP TABLE IF EXISTS {}').format(Identifier(table))
+ queries_parameters.append((query, ()))
+
+ with self.db_connection.cursor() as db_cursor:
+ for query, parameters in queries_parameters:
+ db_cursor.execute(query, parameters)
+
def _convert_php_settings_if_needed(self, file_path):
"""
Convert php settings file of special phrases to json file if it is still in php format.
import tarfile
import selectors
-from ..db.connection import connect
-from ..db.async_connection import DBConnection
-from ..db.sql_preprocessor import SQLPreprocessor
+from nominatim.db.connection import connect
+from nominatim.db.async_connection import DBConnection
+from nominatim.db.sql_preprocessor import SQLPreprocessor
LOG = logging.getLogger()
except Exception as exc: # pylint: disable=broad-except
LOG.info('Wrong SQL statement: %s', exc)
+def handle_unregister_connection_pool(sel, place_threads):
+ """ Handles unregistering pool of connections
+ """
+
+ while place_threads > 0:
+ for key, _ in sel.select(1):
+ conn = key.data
+ sel.unregister(conn)
+ try:
+ conn.wait()
+ except Exception as exc: # pylint: disable=broad-except
+ LOG.info('Wrong SQL statement: %s', exc)
+ conn.close()
+ place_threads -= 1
def add_tiger_data(dsn, data_dir, threads, config, sqllib_dir):
""" Import tiger data from directory or tar file
handle_threaded_sql_statements(sel, file)
# Unregistering pool of database connections
- while place_threads > 0:
- for key, _ in sel.select(1):
- conn = key.data
- sel.unregister(conn)
- conn.wait()
- conn.close()
- place_threads -= 1
+ handle_unregister_connection_pool(sel, place_threads)
if tar:
tar.close()
# and must always be increased when there is a change to the database or code
# that requires a migration.
# Released versions always have a database patch level of 0.
-NOMINATIM_VERSION = (3, 7, 0, 0)
+NOMINATIM_VERSION = (3, 7, 0, 1)
POSTGRESQL_REQUIRED_VERSION = (9, 3)
POSTGIS_REQUIRED_VERSION = (2, 2)
-Subproject commit 497476d56f7c1fcbbdb95b363293de6ce0feac00
+Subproject commit 7869a4e1255a7657bc8582a58adcf5aa2a3f3c70
""" Execute a query and return the result as a set of tuples.
"""
self.execute(sql, params)
- if self.rowcount == 1:
- return set(tuple(self.fetchone()))
return set((tuple(row) for row in self))
import nominatim.tools.database_import
import nominatim.tools.freeze
import nominatim.tools.refresh
+import nominatim.tools.postcodes
from mocks import MockParamCapture
def mock_func_factory(monkeypatch):
def get_mock(module, func):
mock = MockParamCapture()
+ mock.func_name = func
monkeypatch.setattr(module, func, mock)
return mock
mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
mock_func_factory(nominatim.tools.database_import, 'create_country_names'),
mock_func_factory(nominatim.tools.refresh, 'load_address_levels_from_file'),
+ mock_func_factory(nominatim.tools.postcodes, 'import_postcodes'),
mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'),
mock_func_factory(nominatim.tools.refresh, 'setup_website'),
mock_func_factory(nominatim.db.properties, 'set_property')
]
cf_mock = mock_func_factory(nominatim.tools.refresh, 'create_functions')
- mock_func_factory(nominatim.clicmd.setup, 'run_legacy_script')
assert 0 == call_nominatim('import', '--osm-file', __file__)
assert cf_mock.called > 1
for mock in mocks:
- assert mock.called == 1
+ assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
+
+
+def test_import_continue_load_data(temp_db, mock_func_factory):
+ mocks = [
+ mock_func_factory(nominatim.tools.database_import, 'truncate_data_tables'),
+ mock_func_factory(nominatim.tools.database_import, 'load_data'),
+ mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
+ mock_func_factory(nominatim.tools.database_import, 'create_country_names'),
+ mock_func_factory(nominatim.tools.postcodes, 'import_postcodes'),
+ mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'),
+ mock_func_factory(nominatim.tools.refresh, 'setup_website'),
+ mock_func_factory(nominatim.db.properties, 'set_property')
+ ]
+
+ assert 0 == call_nominatim('import', '--continue', 'load-data')
+
+ for mock in mocks:
+ assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
+
+
+def test_import_continue_indexing(temp_db, mock_func_factory, placex_table, temp_db_conn):
+ mocks = [
+ mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
+ mock_func_factory(nominatim.tools.database_import, 'create_country_names'),
+ mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'),
+ mock_func_factory(nominatim.tools.refresh, 'setup_website'),
+ mock_func_factory(nominatim.db.properties, 'set_property')
+ ]
+
+ assert 0 == call_nominatim('import', '--continue', 'indexing')
+
+ for mock in mocks:
+ assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
+
+ assert temp_db_conn.index_exists('idx_placex_pendingsector')
+
+ # Calling it again still works for the index
+ assert 0 == call_nominatim('import', '--continue', 'indexing')
+ assert temp_db_conn.index_exists('idx_placex_pendingsector')
+
+
+def test_import_continue_postprocess(temp_db, mock_func_factory):
+ mocks = [
+ mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
+ mock_func_factory(nominatim.tools.database_import, 'create_country_names'),
+ mock_func_factory(nominatim.tools.refresh, 'setup_website'),
+ mock_func_factory(nominatim.db.properties, 'set_property')
+ ]
+
+ assert 0 == call_nominatim('import', '--continue', 'db-postprocess')
+
+ for mock in mocks:
+ assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
+
def test_freeze_command(mock_func_factory, temp_db):
mock_drop = mock_func_factory(nominatim.tools.freeze, 'drop_update_tables')
Tests for import special phrases methods
of the class SpecialPhrasesImporter.
"""
+from mocks import MockParamCapture
from nominatim.errors import UsageError
from pathlib import Path
import tempfile
TEST_BASE_DIR = Path(__file__) / '..' / '..'
+def test_fetch_existing_words_phrases_basic(special_phrases_importer, word_table,
+ temp_db_cursor):
+ """
+ Check for the fetch_existing_words_phrases() method.
+ It should return special phrase term added to the word
+ table.
+ """
+ query ="""
+ INSERT INTO word VALUES(99999, 'lookup_token', 'normalized_word',
+ 'class', 'type', null, 0, 'near');
+ """
+ temp_db_cursor.execute(query)
+
+ assert not special_phrases_importer.words_phrases_to_delete
+ special_phrases_importer._fetch_existing_words_phrases()
+ contained_phrase = special_phrases_importer.words_phrases_to_delete.pop()
+ assert contained_phrase == ('normalized_word', 'class', 'type', 'near')
+
+@pytest.mark.parametrize("house_type", ['house', 'postcode'])
+def test_fetch_existing_words_phrases_special_cases(special_phrases_importer, word_table,
+ house_type, temp_db_cursor):
+ """
+ Check for the fetch_existing_words_phrases() method.
+ It should return nothing as the terms added correspond
+ to a housenumber and postcode term.
+ """
+ query ="""
+ INSERT INTO word VALUES(99999, 'lookup_token', 'normalized_word',
+ 'place', %s, null, 0, 'near');
+ """
+ temp_db_cursor.execute(query, (house_type,))
+
+ special_phrases_importer._fetch_existing_words_phrases()
+ assert not special_phrases_importer.words_phrases_to_delete
+
+def test_fetch_existing_place_classtype_tables(special_phrases_importer, temp_db_cursor):
+ """
+ Check for the fetch_existing_place_classtype_tables() method.
+ It should return the table just created.
+ """
+ temp_db_cursor.execute('CREATE TABLE place_classtype_testclasstypetable()')
+
+ special_phrases_importer._fetch_existing_place_classtype_tables()
+ contained_table = special_phrases_importer.table_phrases_to_delete.pop()
+ assert contained_table == 'place_classtype_testclasstypetable'
+
def test_check_sanity_class(special_phrases_importer):
"""
Check for _check_sanity() method.
If a wrong class or type is given, an UsageError should raise.
If a good class and type are given, nothing special happens.
"""
- with pytest.raises(UsageError):
- special_phrases_importer._check_sanity('en', '', 'type')
- with pytest.raises(UsageError):
- special_phrases_importer._check_sanity('en', 'class', '')
+ assert not special_phrases_importer._check_sanity('en', '', 'type')
+ assert not special_phrases_importer._check_sanity('en', 'class', '')
- special_phrases_importer._check_sanity('en', 'class', 'type')
+ assert special_phrases_importer._check_sanity('en', 'class', 'type')
def test_load_white_and_black_lists(special_phrases_importer):
"""
assert returned == json_file
def test_process_amenity_with_operator(special_phrases_importer, getorcreate_amenityoperator_funcs,
- word_table, temp_db_conn):
+ temp_db_conn, word_table):
"""
Test that _process_amenity() execute well the
getorcreate_amenityoperator() SQL function and that
special_phrases_importer._process_amenity('', '', '', '', 'in')
with temp_db_conn.cursor() as temp_db_cursor:
- temp_db_cursor.execute("SELECT * FROM temp_with_operator WHERE op='near' OR op='in'")
+ temp_db_cursor.execute("SELECT * FROM word WHERE operator='near' OR operator='in'")
results = temp_db_cursor.fetchall()
assert len(results) == 2
def test_process_amenity_without_operator(special_phrases_importer, getorcreate_amenity_funcs,
- temp_db_conn):
+ temp_db_conn, word_table):
"""
Test that _process_amenity() execute well the
getorcreate_amenity() SQL function.
special_phrases_importer._process_amenity('', '', '', '', '')
with temp_db_conn.cursor() as temp_db_cursor:
- temp_db_cursor.execute("SELECT * FROM temp_without_operator WHERE op='no_operator'")
+ temp_db_cursor.execute("SELECT * FROM word WHERE operator='no_operator'")
result = temp_db_cursor.fetchone()
assert result
assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, phrase_class, phrase_type)
def test_create_place_classtype_table_and_indexes(
- temp_db_conn, def_config, placex_table, getorcreate_amenity_funcs,
- getorcreate_amenityoperator_funcs, special_phrases_importer):
+ temp_db_conn, def_config, placex_table,
+ special_phrases_importer):
"""
Test that _create_place_classtype_table_and_indexes()
create the right place_classtype tables and place_id indexes
assert check_placeid_and_centroid_indexes(temp_db_conn, pair[0], pair[1])
assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, pair[0], pair[1])
-def test_process_xml_content(temp_db_conn, def_config, special_phrases_importer,
+def test_process_xml_content(temp_db_conn, def_config, special_phrases_importer, word_table,
getorcreate_amenity_funcs, getorcreate_amenityoperator_funcs):
"""
Test that _process_xml_content() process the given xml content right
assert check_amenities_without_op(temp_db_conn)
assert results[class_test] and type_test in results.values()
+def test_remove_non_existent_phrases_from_db(special_phrases_importer, default_phrases,
+ temp_db_conn):
+ """
+ Check for the remove_non_existent_phrases_from_db() method.
+
+ It should removed entries from the word table which are contained
+ in the words_phrases_to_delete set and not those also contained
+ in the words_phrases_still_exist set.
+
+ place_classtype tables contained in table_phrases_to_delete should
+ be deleted.
+ """
+ with temp_db_conn.cursor() as temp_db_cursor:
+ to_delete_phrase_tuple = ('normalized_word', 'class', 'type', 'near')
+ to_keep_phrase_tuple = (
+ 'normalized_word_exists', 'class_exists', 'type_exists', 'near'
+ )
+ special_phrases_importer.words_phrases_to_delete = {
+ to_delete_phrase_tuple,
+ to_keep_phrase_tuple
+ }
+ special_phrases_importer.words_phrases_still_exist = {
+ to_keep_phrase_tuple
+ }
+ special_phrases_importer.table_phrases_to_delete = {
+ 'place_classtype_testclasstypetable_to_delete'
+ }
+
+ query_words = 'SELECT word, class, type, operator FROM word;'
+ query_tables = """
+ SELECT table_name
+ FROM information_schema.tables
+ WHERE table_schema='public'
+ AND table_name like 'place_classtype_%';
+ """
+
+ special_phrases_importer._remove_non_existent_phrases_from_db()
+
+ temp_db_cursor.execute(query_words)
+ words_result = temp_db_cursor.fetchall()
+ temp_db_cursor.execute(query_tables)
+ tables_result = temp_db_cursor.fetchall()
+ assert len(words_result) == 1 and words_result[0] == [
+ 'normalized_word_exists', 'class_exists', 'type_exists', 'near'
+ ]
+ assert (len(tables_result) == 1 and
+ tables_result[0][0] == 'place_classtype_testclasstypetable_to_keep'
+ )
+
def test_import_from_wiki(monkeypatch, temp_db_conn, def_config, special_phrases_importer, placex_table,
- getorcreate_amenity_funcs, getorcreate_amenityoperator_funcs):
+ getorcreate_amenity_funcs, getorcreate_amenityoperator_funcs, word_table):
"""
Check that the main import_from_wiki() method is well executed.
It should create the place_classtype table, the place_id and centroid indexes,
grand access to the web user and executing the SQL functions for amenities.
+ It should also update the database well by deleting or preserving existing entries
+ of the database.
"""
+ #Add some data to the database before execution in order to test
+ #what is deleted and what is preserved.
+ with temp_db_conn.cursor() as temp_db_cursor:
+ temp_db_cursor.execute("""
+ INSERT INTO word VALUES(99999, ' animal shelter', 'animal shelter',
+ 'amenity', 'animal_shelter', null, 0, null);
+
+ INSERT INTO word VALUES(99999, ' wrong_lookup_token', 'wrong_normalized_word',
+ 'wrong_class', 'wrong_type', null, 0, 'near');
+
+ CREATE TABLE place_classtype_amenity_animal_shelter();
+ CREATE TABLE place_classtype_wrongclass_wrongtype();""")
+
monkeypatch.setattr('nominatim.tools.special_phrases.SpecialPhrasesImporter._get_wiki_content', mock_get_wiki_content)
special_phrases_importer.import_from_wiki(['en'])
assert check_grant_access(temp_db_conn, def_config.DATABASE_WEBUSER, class_test, type_test)
assert check_amenities_with_op(temp_db_conn)
assert check_amenities_without_op(temp_db_conn)
+ assert check_table_exist(temp_db_conn, 'amenity', 'animal_shelter')
+ assert not check_table_exist(temp_db_conn, 'wrong_class', 'wrong_type')
+
+ #Format (query, should_return_something_bool) use to easily execute all asserts
+ queries_tests = set()
+
+ #Used to check that the correct phrase already in the word table before is still there.
+ query_correct_word = "SELECT * FROM word WHERE word = 'animal shelter'"
+ queries_tests.add((query_correct_word, True))
+
+ #Used to check if wrong phrase was deleted from the word table of the database.
+ query_wrong_word = "SELECT word FROM word WHERE word = 'wrong_normalized_word'"
+ queries_tests.add((query_wrong_word, False))
+
+ #Used to check that correct place_classtype table already in the datase before is still there.
+ query_existing_table = """
+ SELECT table_name
+ FROM information_schema.tables
+ WHERE table_schema='public'
+ AND table_name = 'place_classtype_amenity_animal_shelter';
+ """
+ queries_tests.add((query_existing_table, True))
+
+ #Used to check that wrong place_classtype table was deleted from the database.
+ query_wrong_table = """
+ SELECT table_name
+ FROM information_schema.tables
+ WHERE table_schema='public'
+ AND table_name = 'place_classtype_wrongclass_wrongtype';
+ """
+ queries_tests.add((query_wrong_table, False))
+
+ with temp_db_conn.cursor() as temp_db_cursor:
+ for query in queries_tests:
+ temp_db_cursor.execute(query[0])
+ if (query[1] == True):
+ assert temp_db_cursor.fetchone()
+ else:
+ assert not temp_db_cursor.fetchone()
def mock_get_wiki_content(lang):
"""
contains more than one value (so that the SQL function was call more than one time).
"""
with temp_db_conn.cursor() as temp_db_cursor:
- temp_db_cursor.execute("SELECT * FROM temp_with_operator")
+ temp_db_cursor.execute("SELECT * FROM word WHERE operator != 'no_operator'")
return len(temp_db_cursor.fetchall()) > 1
def check_amenities_without_op(temp_db_conn):
contains more than one value (so that the SQL function was call more than one time).
"""
with temp_db_conn.cursor() as temp_db_cursor:
- temp_db_cursor.execute("SELECT * FROM temp_without_operator")
+ temp_db_cursor.execute("SELECT * FROM word WHERE operator = 'no_operator'")
return len(temp_db_cursor.fetchall()) > 1
@pytest.fixture
yield Path(phpdir)
+@pytest.fixture
+def default_phrases(word_table, temp_db_cursor):
+ temp_db_cursor.execute("""
+ INSERT INTO word VALUES(99999, 'lookup_token', 'normalized_word',
+ 'class', 'type', null, 0, 'near');
+
+ INSERT INTO word VALUES(99999, 'lookup_token', 'normalized_word_exists',
+ 'class_exists', 'type_exists', null, 0, 'near');
+
+ CREATE TABLE place_classtype_testclasstypetable_to_delete();
+ CREATE TABLE place_classtype_testclasstypetable_to_keep();""")
+
@pytest.fixture
def make_strandard_name_func(temp_db_cursor):
temp_db_cursor.execute("""
@pytest.fixture
def getorcreate_amenity_funcs(temp_db_cursor, make_strandard_name_func):
temp_db_cursor.execute("""
- CREATE TABLE temp_without_operator(op TEXT);
-
CREATE OR REPLACE FUNCTION getorcreate_amenity(lookup_word TEXT, normalized_word TEXT,
lookup_class text, lookup_type text)
RETURNS void as $$
BEGIN
- INSERT INTO temp_without_operator VALUES('no_operator');
+ INSERT INTO word VALUES(null, lookup_word, normalized_word,
+ lookup_class, lookup_type, null, 0, 'no_operator');
END;
$$ LANGUAGE plpgsql""")
lookup_class text, lookup_type text, op text)
RETURNS void as $$
BEGIN
- INSERT INTO temp_with_operator VALUES(op);
+ INSERT INTO word VALUES(null, lookup_word, normalized_word,
+ lookup_class, lookup_type, null, 0, op);
END;
$$ LANGUAGE plpgsql""")
\ No newline at end of file
--- /dev/null
+"""
+Tests for functions to maintain the artificial postcode table.
+"""
+
+import pytest
+
+from nominatim.tools import postcodes
+
+@pytest.fixture
+def postcode_table(temp_db_with_extensions, temp_db_cursor, table_factory,
+ placex_table, word_table):
+ table_factory('location_postcode',
+ """ place_id BIGINT,
+ parent_place_id BIGINT,
+ rank_search SMALLINT,
+ rank_address SMALLINT,
+ indexed_status SMALLINT,
+ indexed_date TIMESTAMP,
+ country_code varchar(2),
+ postcode TEXT,
+ geometry GEOMETRY(Geometry, 4326)""")
+ temp_db_cursor.execute('CREATE SEQUENCE seq_place')
+ temp_db_cursor.execute("""CREATE OR REPLACE FUNCTION getorcreate_postcode_id(postcode TEXT)
+ RETURNS INTEGER AS $$ BEGIN RETURN 1; END; $$ LANGUAGE plpgsql;
+ """)
+
+
+def test_import_postcodes_empty(dsn, temp_db_cursor, postcode_table, tmp_path):
+ postcodes.import_postcodes(dsn, tmp_path)
+
+ assert temp_db_cursor.table_exists('gb_postcode')
+ assert temp_db_cursor.table_exists('us_postcode')
+ assert temp_db_cursor.table_rows('location_postcode') == 0
+
+
+def test_import_postcodes_from_placex(dsn, temp_db_cursor, postcode_table, tmp_path):
+ temp_db_cursor.execute("""
+ INSERT INTO placex (place_id, country_code, address, geometry)
+ VALUES (1, 'xx', '"postcode"=>"9486"', 'SRID=4326;POINT(10 12)')
+ """)
+
+ postcodes.import_postcodes(dsn, tmp_path)
+
+ rows = temp_db_cursor.row_set(""" SELECT postcode, country_code,
+ ST_X(geometry), ST_Y(geometry)
+ FROM location_postcode""")
+ print(rows)
+ assert len(rows) == 1
+ assert rows == set((('9486', 'xx', 10, 12), ))
+
temp_db_cursor.execute('CREATE EXTENSION postgis')
temp_db_cursor.execute('CREATE TABLE place (id INT)')
sqlfile = tmp_path / '1010.sql'
- sqlfile.write_text("""INSERT INTO place values (1)""")
+ sqlfile.write_text("""INSERT INTO place values (1);
+ INSERT INTO non_existant_table values (1);""")
tiger_data.add_tiger_data(dsn, str(tmp_path), threads, def_config, src_dir / 'lib-sql')
assert temp_db_cursor.table_rows('place') == 1
+@pytest.mark.parametrize("threads", (1, 5))
+def test_add_tiger_data_bad_file(dsn, src_dir, def_config, tmp_path, sql_preprocessor,
+ temp_db_cursor, threads, temp_db):
+ temp_db_cursor.execute('CREATE EXTENSION hstore')
+ temp_db_cursor.execute('CREATE EXTENSION postgis')
+ temp_db_cursor.execute('CREATE TABLE place (id INT)')
+ sqlfile = tmp_path / '1010.txt'
+ sqlfile.write_text("""Random text""")
+ tiger_data.add_tiger_data(dsn, str(tmp_path), threads, def_config, src_dir / 'lib-sql')
+
+ assert temp_db_cursor.table_rows('place') == 0
+
@pytest.mark.parametrize("threads", (1, 5))
def test_add_tiger_data_tarfile(dsn, src_dir, def_config, tmp_path,
temp_db_cursor, threads, temp_db, sql_preprocessor):
temp_db_cursor.execute('CREATE EXTENSION postgis')
temp_db_cursor.execute('CREATE TABLE place (id INT)')
sqlfile = tmp_path / '1010.sql'
- sqlfile.write_text("""INSERT INTO place values (1)""")
+ sqlfile.write_text("""INSERT INTO place values (1);
+ INSERT INTO non_existant_table values (1);""")
+ tar = tarfile.open("sample.tar.gz", "w:gz")
+ tar.add(sqlfile)
+ tar.close()
+ tiger_data.add_tiger_data(dsn, str(src_dir / 'sample.tar.gz'), threads, def_config, src_dir / 'lib-sql')
+
+ assert temp_db_cursor.table_rows('place') == 1
+
+@pytest.mark.parametrize("threads", (1, 5))
+def test_add_tiger_data_bad_tarfile(dsn, src_dir, def_config, tmp_path,
+ temp_db_cursor, threads, temp_db, sql_preprocessor):
+ temp_db_cursor.execute('CREATE EXTENSION hstore')
+ temp_db_cursor.execute('CREATE EXTENSION postgis')
+ temp_db_cursor.execute('CREATE TABLE place (id INT)')
+ sqlfile = tmp_path / '1010.txt'
+ sqlfile.write_text("""Random text""")
tar = tarfile.open("sample.tar.gz", "w:gz")
tar.add(sqlfile)
tar.close()
tiger_data.add_tiger_data(dsn, str(src_dir / 'sample.tar.gz'), threads, def_config, src_dir / 'lib-sql')
- assert temp_db_cursor.table_rows('place') == 1
\ No newline at end of file
+ assert temp_db_cursor.table_rows('place') == 0
\ No newline at end of file
<sitename>OpenStreetMap Wiki</sitename>
<dbname>wiki</dbname>
<base>https://wiki.openstreetmap.org/wiki/Main_Page</base>
-<generator>MediaWiki 1.35.1</generator>
+<generator>MediaWiki 1.35.2</generator>
<case>first-letter</case>
<namespaces>
<namespace key="-2" case="first-letter">Media</namespace>
<model>wikitext</model>
<format>text/x-wiki</format>
<text bytes="158218" sha1="cst5x7tt58izti1pxzgljf27tx8qjcj" xml:space="preserve">
-== en == {| class="wikitable sortable" |- ! Word / Phrase !! Key !! Value !! Operator !! Plural |- | Zip Line || aerialway || zip_line || - || N |- | Zip Lines || aerialway || zip_line || - || Y |- | Zip Line in || aerialway || zip_line || in || N |- | Zip Lines in || aerialway || zip_line || in || Y |- | Zip Line near || aerialway || zip_line || near || N |- | Zip Lines near || aerialway || zip_line || near || Y |- | Zip Wire || aerialway || zip_line || - || N |- | Zip Wires || aerialway || zip_line || - || Y |- | Zip Wire in || aerialway || zip_line || in || N |- | Zip Wires in || aerialway || zip_line || in || Y |- | Zip Wire near || aerialway || zip_line || near || N |} [[Category:Word list]]
+== en == {| class="wikitable sortable" |- ! Word / Phrase !! Key !! Value !! Operator !! Plural |- | Zip Line || aerialway || zip_line || - || N |- | Zip Lines || aerialway || zip_line || - || Y |- | Zip Line in || aerialway || zip_line || in || N |- | Zip Lines in || aerialway || zip_line || in || Y |- | Zip Line near || aerialway || zip_line || near || N |- | Animal shelter || amenity || animal_shelter || - || N |- | Animal shelters || amenity || animal_shelter || - || Y |- | Animal shelter in || amenity || animal_shelter || in || N |- | Animal shelters in || amenity || animal_shelter || in || Y |- | Animal shelter near || amenity || animal_shelter || near|| N |- | Animal shelters near || amenity || animal_shelter || near|| Y |- | Drinking Water near || amenity || drinking_water || near || N |- | Water || amenity || drinking_water || - || N |- | Water in || amenity || drinking_water || in || N |- | Water near || amenity || drinking_water || near || N |- | Embassy || amenity || embassy || - || N |- | Embassys || amenity || embassy || - || Y |- | Embassies || amenity || embassy || - || Y |- |Coworkings near |amenity |coworking_space |near |Y |} [[Category:Word list]]
</text>
<sha1>cst5x7tt58izti1pxzgljf27tx8qjcj</sha1>
</revision>