From: Sarah Hoffmann Date: Mon, 19 Jul 2021 07:43:04 +0000 (+0200) Subject: Merge remote-tracking branch 'upstream/master' X-Git-Tag: deploy~159 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/73953cbac95847740339d7a5ee3c0e5f142d93c5?hp=9025b0ec78bd770e74a1bed85c8d2e43ce5f571a Merge remote-tracking branch 'upstream/master' --- diff --git a/.github/actions/build-nominatim/action.yml b/.github/actions/build-nominatim/action.yml index a3915616..757decd4 100644 --- a/.github/actions/build-nominatim/action.yml +++ b/.github/actions/build-nominatim/action.yml @@ -14,9 +14,9 @@ runs: run: | sudo apt-get install -y -qq libboost-system-dev libboost-filesystem-dev libexpat1-dev zlib1g-dev libbz2-dev libpq-dev libproj-dev libicu-dev if [ "x$UBUNTUVER" == "x18" ]; then - pip3 install python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu osmium + pip3 install python-dotenv psycopg2==2.7.7 jinja2==2.8 psutil==5.4.2 pyicu osmium PyYAML==5.1 datrie else - sudo apt-get install -y -qq python3-icu python3-datrie python3-pyosmium python3-jinja2 python3-psutil python3-psycopg2 python3-dotenv + sudo apt-get install -y -qq python3-icu python3-datrie python3-pyosmium python3-jinja2 python3-psutil python3-psycopg2 python3-dotenv python3-yaml fi shell: bash env: diff --git a/.github/workflows/ci-tests.yml b/.github/workflows/ci-tests.yml index 8f0ea80d..cea27091 100644 --- a/.github/workflows/ci-tests.yml +++ b/.github/workflows/ci-tests.yml @@ -4,16 +4,22 @@ on: [ push, pull_request ] jobs: tests: - runs-on: ubuntu-20.04 - strategy: matrix: - postgresql: [9.5, 13] + ubuntu: [18, 20] include: - - postgresql: 9.5 + - ubuntu: 18 + postgresql: 9.5 postgis: 2.5 - - postgresql: 13 + pytest: pytest + php: 7.2 + - ubuntu: 20 + postgresql: 13 postgis: 3 + pytest: py.test-3 + php: 7.4 + + runs-on: ubuntu-${{ matrix.ubuntu }}.04 steps: - uses: actions/checkout@v2 @@ -24,9 +30,15 @@ jobs: - name: Setup PHP uses: shivammathur/setup-php@v2 with: - php-version: '7.4' + php-version: ${{ matrix.php }} + coverage: xdebug tools: phpunit, phpcs, composer + - uses: actions/setup-python@v2 + with: + python-version: 3.6 + if: matrix.ubuntu == 18 + - name: Get Date id: get-date run: | @@ -43,33 +55,53 @@ jobs: with: postgresql-version: ${{ matrix.postgresql }} postgis-version: ${{ matrix.postgis }} + - uses: ./Nominatim/.github/actions/build-nominatim + with: + ubuntu: ${{ matrix.ubuntu }} - name: Install test prerequsites - run: sudo apt-get install -y -qq php-codesniffer pylint python3-pytest python3-behave python3-pytest-cov php-codecoverage php-xdebug + run: sudo apt-get install -y -qq pylint python3-pytest python3-behave python3-pytest-cov php-codecoverage + if: matrix.ubuntu == 20 + + - name: Install test prerequsites + run: | + pip3 install pylint==2.6.0 pytest pytest-cov behave==1.2.6 + if: matrix.ubuntu == 18 - name: PHP linting run: phpcs --report-width=120 . working-directory: Nominatim - name: Python linting - run: pylint --extension-pkg-whitelist=osmium nominatim + run: pylint nominatim working-directory: Nominatim - name: PHP unit tests run: phpunit --coverage-clover ../../coverage-php.xml ./ working-directory: Nominatim/test/php + if: matrix.ubuntu == 20 - name: Python unit tests - run: py.test-3 --cov=nominatim --cov-report=xml test/python + run: $PYTEST --cov=nominatim --cov-report=xml test/python working-directory: Nominatim + env: + PYTEST: ${{ matrix.pytest }} - name: BDD tests run: | + mkdir cov behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build --format=progress3 -DPHPCOV=./cov composer require phpunit/phpcov:7.0.2 vendor/bin/phpcov merge --clover ../../coverage-bdd.xml ./cov working-directory: Nominatim/test/bdd + if: matrix.ubuntu == 20 + + - name: BDD tests + run: | + behave -DREMOVE_TEMPLATE=1 -DBUILDDIR=$GITHUB_WORKSPACE/build --format=progress3 + working-directory: Nominatim/test/bdd + if: matrix.ubuntu == 18 - name: BDD tests (legacy_icu tokenizer) run: | @@ -85,6 +117,7 @@ jobs: fail_ci_if_error: false path_to_write_report: ./coverage/codecov_report.txt verbose: true + if: matrix.ubuntu == 20 import: strategy: diff --git a/SECURITY.md b/SECURITY.md new file mode 100644 index 00000000..41a6f2ef --- /dev/null +++ b/SECURITY.md @@ -0,0 +1,39 @@ +# Security Policy + +## Supported Versions + +All Nominatim releases receive security updates for two years. + +The following table lists the end of support for all currently supported +versions. + +| Version | End of support for security updates | +| ------- | ----------------------------------- | +| 3.7.x | 2023-04-05 | +| 3.6.x | 2022-12-12 | +| 3.5.x | 2022-06-05 | +| 3.4.x | 2021-10-24 | + +## Reporting a Vulnerability + +If you believe, you have found an issue in Nominatim that has implications on +security, please send a description of the issue to **security@nominatim.org**. +You will receive an acknowledgement of your mail within 3 work days where we +also notify you of the next steps. + +## How we Disclose Security Issues + +** The following section only applies to security issues found in released +versions. Issues that concern the master development branch only will be +fixed immediately on the branch with the corresponding PR containing the +description of the nature and severity of the issue. ** + +Patches for identified security issues are applied to all affected versions and +new minor versions are released. At the same time we release a statement at +the [Nominatim blog](https://nominatim.org/blog/) describing the nature of the +incident. Announcements will also be published at the +[geocoding mailinglist](https://lists.openstreetmap.org/listinfo/geocoding). + +## List of Previous Incidents + +* 2020-05-04 - [SQL injection issue on /details endpoint](https://lists.openstreetmap.org/pipermail/geocoding/2020-May/002012.html) diff --git a/docs/admin/Installation.md b/docs/admin/Installation.md index cc1edf59..76af39c6 100644 --- a/docs/admin/Installation.md +++ b/docs/admin/Installation.md @@ -45,6 +45,7 @@ For running Nominatim: * [psutil](https://github.com/giampaolo/psutil) * [Jinja2](https://palletsprojects.com/p/jinja/) * [PyICU](https://pypi.org/project/PyICU/) + * [PyYaml](https://pyyaml.org/) (5.1+) * [datrie](https://github.com/pytries/datrie) * [PHP](https://php.net) (7.0 or later) * PHP-pgsql diff --git a/docs/develop/Development-Environment.md b/docs/develop/Development-Environment.md index 43598b9a..eea69c70 100644 --- a/docs/develop/Development-Environment.md +++ b/docs/develop/Development-Environment.md @@ -29,7 +29,7 @@ The Nominatim test suite consists of behavioural tests (using behave) and unit tests (using PHPUnit for PHP code and pytest for Python code). It has the following additional requirements: -* [behave test framework](https://behave.readthedocs.io) >= 1.2.5 +* [behave test framework](https://behave.readthedocs.io) >= 1.2.6 * [phpunit](https://phpunit.de) >= 7.3 * [PHP CodeSniffer](https://github.com/squizlabs/PHP_CodeSniffer) * [Pylint](https://pylint.org/) (2.6.0 is used for the CI) diff --git a/lib-php/AddressDetails.php b/lib-php/AddressDetails.php index bf8defc2..91e3d89f 100644 --- a/lib-php/AddressDetails.php +++ b/lib-php/AddressDetails.php @@ -61,7 +61,7 @@ class AddressDetails return join(', ', $aParts); } - public function getAddressNames($sCountry = null) + public function getAddressNames() { $aAddress = array(); @@ -79,12 +79,11 @@ class AddressDetails $sName = $aLine['housenumber']; } - if (isset($sName)) { - if (!isset($aAddress[$sTypeLabel]) - || $aLine['class'] == 'place' - ) { - $aAddress[$sTypeLabel] = $sName; - } + if (isset($sName) + && (!isset($aAddress[$sTypeLabel]) + || $aLine['class'] == 'place') + ) { + $aAddress[$sTypeLabel] = $sName; } } diff --git a/lib-php/DB.php b/lib-php/DB.php index abd23179..03ee6f1b 100644 --- a/lib-php/DB.php +++ b/lib-php/DB.php @@ -39,7 +39,9 @@ class DB $conn->exec("SET DateStyle TO 'sql,european'"); $conn->exec("SET client_encoding TO 'utf-8'"); $iMaxExecution = ini_get('max_execution_time'); - if ($iMaxExecution > 0) $conn->setAttribute(\PDO::ATTR_TIMEOUT, $iMaxExecution); // seconds + if ($iMaxExecution > 0) { + $conn->setAttribute(\PDO::ATTR_TIMEOUT, $iMaxExecution); // seconds + } $this->connection = $conn; return true; @@ -95,7 +97,9 @@ class DB try { $stmt = $this->getQueryStatement($sSQL, $aInputVars, $sErrMessage); $row = $stmt->fetch(\PDO::FETCH_NUM); - if ($row === false) return false; + if ($row === false) { + return false; + } } catch (\PDOException $e) { throw new \Nominatim\DatabaseError($sErrMessage, 500, null, $e, $sSQL); } @@ -306,9 +310,13 @@ class DB if (preg_match('/^pgsql:(.+)$/', $sDSN, $aMatches)) { foreach (explode(';', $aMatches[1]) as $sKeyVal) { list($sKey, $sVal) = explode('=', $sKeyVal, 2); - if ($sKey == 'host') $sKey = 'hostspec'; - if ($sKey == 'dbname') $sKey = 'database'; - if ($sKey == 'user') $sKey = 'username'; + if ($sKey == 'host') { + $sKey = 'hostspec'; + } elseif ($sKey == 'dbname') { + $sKey = 'database'; + } elseif ($sKey == 'user') { + $sKey = 'username'; + } $aInfo[$sKey] = $sVal; } } diff --git a/lib-php/DatabaseError.php b/lib-php/DatabaseError.php index 3a53bc8f..ec428de1 100644 --- a/lib-php/DatabaseError.php +++ b/lib-php/DatabaseError.php @@ -5,7 +5,7 @@ namespace Nominatim; class DatabaseError extends \Exception { - public function __construct($message, $code = 500, Exception $previous = null, $oPDOErr, $sSql = null) + public function __construct($message, $code, $previous, $oPDOErr, $sSql = null) { parent::__construct($message, $code, $previous); // https://secure.php.net/manual/en/class.pdoexception.php diff --git a/lib-php/DebugHtml.php b/lib-php/DebugHtml.php index 98da8794..19221b43 100644 --- a/lib-php/DebugHtml.php +++ b/lib-php/DebugHtml.php @@ -78,7 +78,7 @@ class Debug echo 'Address TokensAddress Not'; echo 'countryoperator'; echo 'classtypepostcodehousenumber'; - foreach ($aSearches as $iRank => $aRankedSet) { + foreach ($aSearches as $aRankedSet) { foreach ($aRankedSet as $aRow) { $aRow->dumpAsHtmlTableRow($aWordsIDs); } diff --git a/lib-php/Geocode.php b/lib-php/Geocode.php index a3883b25..52b92c99 100644 --- a/lib-php/Geocode.php +++ b/lib-php/Geocode.php @@ -7,6 +7,7 @@ require_once(CONST_LibDir.'/Phrase.php'); require_once(CONST_LibDir.'/ReverseGeocode.php'); require_once(CONST_LibDir.'/SearchDescription.php'); require_once(CONST_LibDir.'/SearchContext.php'); +require_once(CONST_LibDir.'/SearchPosition.php'); require_once(CONST_LibDir.'/TokenList.php'); require_once(CONST_TokenizerDir.'/tokenizer.php'); @@ -70,7 +71,9 @@ class Geocode $aParams['exclude_place_ids'] = implode(',', $this->aExcludePlaceIDs); } - if ($this->bBoundedSearch) $aParams['bounded'] = '1'; + if ($this->bBoundedSearch) { + $aParams['bounded'] = '1'; + } if ($this->aCountryCodes) { $aParams['countrycodes'] = implode(',', $this->aCountryCodes); @@ -85,8 +88,11 @@ class Geocode public function setLimit($iLimit = 10) { - if ($iLimit > 50) $iLimit = 50; - if ($iLimit < 1) $iLimit = 1; + if ($iLimit > 50) { + $iLimit = 50; + } elseif ($iLimit < 1) { + $iLimit = 1; + } $this->iFinalLimit = $iLimit; $this->iLimit = $iLimit + min($iLimit, 10); @@ -181,18 +187,24 @@ class Geocode if ($sExcluded) { foreach ($sExcluded as $iExcludedPlaceID) { $iExcludedPlaceID = (int)$iExcludedPlaceID; - if ($iExcludedPlaceID) + if ($iExcludedPlaceID) { $aExcludePlaceIDs[$iExcludedPlaceID] = $iExcludedPlaceID; + } } - if (isset($aExcludePlaceIDs)) + if (isset($aExcludePlaceIDs)) { $this->aExcludePlaceIDs = $aExcludePlaceIDs; + } } // Only certain ranks of feature $sFeatureType = $oParams->getString('featureType'); - if (!$sFeatureType) $sFeatureType = $oParams->getString('featuretype'); - if ($sFeatureType) $this->setFeatureType($sFeatureType); + if (!$sFeatureType) { + $sFeatureType = $oParams->getString('featuretype'); + } + if ($sFeatureType) { + $this->setFeatureType($sFeatureType); + } // Country code list $sCountries = $oParams->getStringList('countrycodes'); @@ -202,8 +214,9 @@ class Geocode $aCountries[] = strtolower($sCountryCode); } } - if (isset($aCountries)) + if (isset($aCountries)) { $this->aCountryCodes = $aCountries; + } } $aViewbox = $oParams->getStringList('viewboxlbrt'); @@ -255,13 +268,17 @@ class Geocode public function loadStructuredAddressElement($sValue, $sKey, $iNewMinAddressRank, $iNewMaxAddressRank, $aItemListValues) { $sValue = trim($sValue); - if (!$sValue) return false; + if (!$sValue) { + return false; + } $this->aStructuredQuery[$sKey] = $sValue; if ($this->iMinAddressRank == 0 && $this->iMaxAddressRank == 30) { $this->iMinAddressRank = $iNewMinAddressRank; $this->iMaxAddressRank = $iNewMaxAddressRank; } - if ($aItemListValues) $this->aAddressRankList = array_merge($this->aAddressRankList, $aItemListValues); + if ($aItemListValues) { + $this->aAddressRankList = array_merge($this->aAddressRankList, $aItemListValues); + } return true; } @@ -295,11 +312,11 @@ class Geocode public function fallbackStructuredQuery() { - if (!$this->aStructuredQuery) return false; - $aParams = $this->aStructuredQuery; - if (count($aParams) == 1) return false; + if (!$aParams || count($aParams) == 1) { + return false; + } $aOrderToFallback = array('postalcode', 'street', 'city', 'county', 'state'); @@ -329,50 +346,26 @@ class Geocode */ foreach ($aPhrases as $iPhrase => $oPhrase) { $aNewPhraseSearches = array(); - $sPhraseType = $oPhrase->getPhraseType(); + $oPosition = new SearchPosition( + $oPhrase->getPhraseType(), + $iPhrase, + count($aPhrases) + ); foreach ($oPhrase->getWordSets() as $aWordset) { $aWordsetSearches = $aSearches; // Add all words from this wordset foreach ($aWordset as $iToken => $sToken) { - //echo "
$sToken"; $aNewWordsetSearches = array(); + $oPosition->setTokenPosition($iToken, count($aWordset)); foreach ($aWordsetSearches as $oCurrentSearch) { - //echo ""; - //var_dump($oCurrentSearch); - //echo ""; - - // Tokens with full name matches. - foreach ($oValidTokens->get(' '.$sToken) as $oSearchTerm) { - $aNewSearches = $oCurrentSearch->extendWithFullTerm( - $oSearchTerm, - $sPhraseType, - $iToken == 0 && $iPhrase == 0, - $iPhrase == 0, - $iToken + 1 == count($aWordset) - && $iPhrase + 1 == count($aPhrases) - ); - - foreach ($aNewSearches as $oSearch) { - if ($oSearch->getRank() < $this->iMaxRank) { - $aNewWordsetSearches[] = $oSearch; - } - } - } - // Look for partial matches. - // Note that there is no point in adding country terms here - // because country is omitted in the address. - if ($sPhraseType != 'country') { - // Allow searching for a word - but at extra cost - foreach ($oValidTokens->get($sToken) as $oSearchTerm) { - $aNewSearches = $oCurrentSearch->extendWithPartialTerm( - $sToken, - $oSearchTerm, - (bool) $sPhraseType, - $iPhrase, - $oValidTokens->get(' '.$sToken) + foreach ($oValidTokens->get($sToken) as $oSearchTerm) { + if ($oSearchTerm->isExtendable($oCurrentSearch, $oPosition)) { + $aNewSearches = $oSearchTerm->extendSearch( + $oCurrentSearch, + $oPosition ); foreach ($aNewSearches as $oSearch) { @@ -387,7 +380,6 @@ class Geocode usort($aNewWordsetSearches, array('Nominatim\SearchDescription', 'bySearchRank')); $aWordsetSearches = array_slice($aNewWordsetSearches, 0, 50); } - //var_Dump('
',count($aWordsetSearches)); exit; $aNewPhraseSearches = array_merge($aNewPhraseSearches, $aNewWordsetSearches); usort($aNewPhraseSearches, array('Nominatim\SearchDescription', 'bySearchRank')); @@ -395,8 +387,11 @@ class Geocode $aSearchHash = array(); foreach ($aNewPhraseSearches as $iSearch => $aSearch) { $sHash = serialize($aSearch); - if (isset($aSearchHash[$sHash])) unset($aNewPhraseSearches[$iSearch]); - else $aSearchHash[$sHash] = 1; + if (isset($aSearchHash[$sHash])) { + unset($aNewPhraseSearches[$iSearch]); + } else { + $aSearchHash[$sHash] = 1; + } } $aNewPhraseSearches = array_slice($aNewPhraseSearches, 0, 50); @@ -417,10 +412,12 @@ class Geocode $iSearchCount = 0; $aSearches = array(); - foreach ($aGroupedSearches as $iScore => $aNewSearches) { + foreach ($aGroupedSearches as $aNewSearches) { $iSearchCount += count($aNewSearches); $aSearches = array_merge($aSearches, $aNewSearches); - if ($iSearchCount > 50) break; + if ($iSearchCount > 50) { + break; + } } } @@ -477,7 +474,9 @@ class Geocode public function lookup() { Debug::newFunction('Geocode::lookup'); - if (!$this->sQuery && !$this->aStructuredQuery) return array(); + if (!$this->sQuery && !$this->aStructuredQuery) { + return array(); + } Debug::printDebugArray('Geocode', $this); @@ -503,10 +502,6 @@ class Geocode Debug::newSection('Query Preprocessing'); - $sLanguagePrefArraySQL = $this->oDB->getArraySQL( - $this->oDB->getDBQuotedList($this->aLangPrefOrder) - ); - $sQuery = $this->sQuery; if (!preg_match('//u', $sQuery)) { userError('Query string is not UTF-8 encoded.'); @@ -560,15 +555,15 @@ class Geocode if (!empty($aTokens)) { $aNewSearches = array(); + $oPosition = new SearchPosition('', 0, 1); + $oPosition->setTokenPosition(0, 1); + foreach ($aSearches as $oSearch) { foreach ($aTokens as $oToken) { - $oNewSearch = clone $oSearch; - $oNewSearch->setPoiSearch( - $oToken->iOperator, - $oToken->sClass, - $oToken->sType + $aNewSearches = array_merge( + $aNewSearches, + $oToken->extendSearch($oSearch, $oPosition) ); - $aNewSearches[] = $oNewSearch; } } $aSearches = $aNewSearches; @@ -639,7 +634,9 @@ class Geocode $aGroupedSearches = array(); foreach ($aSearches as $aSearch) { if ($aSearch->getRank() < $this->iMaxRank) { - if (!isset($aGroupedSearches[$aSearch->getRank()])) $aGroupedSearches[$aSearch->getRank()] = array(); + if (!isset($aGroupedSearches[$aSearch->getRank()])) { + $aGroupedSearches[$aSearch->getRank()] = array(); + } $aGroupedSearches[$aSearch->getRank()][] = $aSearch; } } @@ -653,7 +650,9 @@ class Geocode $sHash = serialize($aSearch); if (isset($aSearchHash[$sHash])) { unset($aGroupedSearches[$iGroup][$iSearch]); - if (empty($aGroupedSearches[$iGroup])) unset($aGroupedSearches[$iGroup]); + if (empty($aGroupedSearches[$iGroup])) { + unset($aGroupedSearches[$iGroup]); + } } else { $aSearchHash[$sHash] = 1; } @@ -697,7 +696,9 @@ class Geocode } } - if ($iQueryLoop > 20) break; + if ($iQueryLoop > 20) { + break; + } } if (!empty($aResults)) { @@ -772,9 +773,9 @@ class Geocode $aResults = $tempIDs; } - if (!empty($aResults)) break; - if ($iGroupLoop > 4) break; - if ($iQueryLoop > 30) break; + if (!empty($aResults) || $iGroupLoop > 4 || $iQueryLoop > 30) { + break; + } } } else { // Just interpret as a reverse geocode @@ -792,10 +793,8 @@ class Geocode // No results? Done if (empty($aResults)) { - if ($this->bFallback) { - if ($this->fallbackStructuredQuery()) { - return $this->lookup(); - } + if ($this->bFallback && $this->fallbackStructuredQuery()) { + return $this->lookup(); } return array(); @@ -814,7 +813,9 @@ class Geocode $aRecheckWords = preg_split('/\b[\s,\\-]*/u', $sQuery); foreach ($aRecheckWords as $i => $sWord) { - if (!preg_match('/[\pL\pN]/', $sWord)) unset($aRecheckWords[$i]); + if (!preg_match('/[\pL\pN]/', $sWord)) { + unset($aRecheckWords[$i]); + } } Debug::printVar('Recheck words', $aRecheckWords); @@ -874,7 +875,9 @@ class Geocode foreach ($aRecheckWords as $i => $sWord) { if (stripos($sAddress, $sWord)!==false) { $iCountWords++; - if (preg_match('/(^|,)\s*'.preg_quote($sWord, '/').'\s*(,|$)/', $sAddress)) $iCountWords += 0.1; + if (preg_match('/(^|,)\s*'.preg_quote($sWord, '/').'\s*(,|$)/', $sAddress)) { + $iCountWords += 0.1; + } } } @@ -891,15 +894,8 @@ class Geocode $aToFilter = $aSearchResults; $aSearchResults = array(); - $bFirst = true; foreach ($aToFilter as $aResult) { $this->aExcludePlaceIDs[$aResult['place_id']] = $aResult['place_id']; - if ($bFirst) { - $fLat = $aResult['lat']; - $fLon = $aResult['lon']; - if (isset($aResult['zoom'])) $iZoom = $aResult['zoom']; - $bFirst = false; - } if (!$this->oPlaceLookup->doDeDupe() || (!isset($aOSMIDDone[$aResult['osm_type'].$aResult['osm_id']]) && !isset($aClassTypeNameDone[$aResult['osm_type'].$aResult['class'].$aResult['type'].$aResult['name'].$aResult['admin_level']])) ) { @@ -909,7 +905,9 @@ class Geocode } // Absolute limit on number of results - if (count($aSearchResults) >= $this->iFinalLimit) break; + if (count($aSearchResults) >= $this->iFinalLimit) { + break; + } } Debug::printVar('Post-filter results', $aSearchResults); diff --git a/lib-php/ParameterParser.php b/lib-php/ParameterParser.php index 32a848b9..d4068aa3 100644 --- a/lib-php/ParameterParser.php +++ b/lib-php/ParameterParser.php @@ -90,14 +90,16 @@ class ParameterParser $aLanguages = array(); $sLangString = $this->getString('accept-language', $sFallback); - if ($sLangString) { - if (preg_match_all('/(([a-z]{1,8})([-_][a-z]{1,8})?)\s*(;\s*q\s*=\s*(1|0\.[0-9]+))?/i', $sLangString, $aLanguagesParse, PREG_SET_ORDER)) { - foreach ($aLanguagesParse as $iLang => $aLanguage) { - $aLanguages[$aLanguage[1]] = isset($aLanguage[5])?(float)$aLanguage[5]:1 - ($iLang/100); - if (!isset($aLanguages[$aLanguage[2]])) $aLanguages[$aLanguage[2]] = $aLanguages[$aLanguage[1]]/10; + if ($sLangString + && preg_match_all('/(([a-z]{1,8})([-_][a-z]{1,8})?)\s*(;\s*q\s*=\s*(1|0\.[0-9]+))?/i', $sLangString, $aLanguagesParse, PREG_SET_ORDER) + ) { + foreach ($aLanguagesParse as $iLang => $aLanguage) { + $aLanguages[$aLanguage[1]] = isset($aLanguage[5])?(float)$aLanguage[5]:1 - ($iLang/100); + if (!isset($aLanguages[$aLanguage[2]])) { + $aLanguages[$aLanguage[2]] = $aLanguages[$aLanguage[1]]/10; } - arsort($aLanguages); } + arsort($aLanguages); } if (empty($aLanguages) && CONST_Default_Language) { $aLanguages[CONST_Default_Language] = 1; diff --git a/lib-php/PlaceLookup.php b/lib-php/PlaceLookup.php index b9fa3b1c..7e78d536 100644 --- a/lib-php/PlaceLookup.php +++ b/lib-php/PlaceLookup.php @@ -89,20 +89,36 @@ class PlaceLookup { $aParams = array(); - if ($this->bAddressDetails) $aParams['addressdetails'] = '1'; - if ($this->bExtraTags) $aParams['extratags'] = '1'; - if ($this->bNameDetails) $aParams['namedetails'] = '1'; + if ($this->bAddressDetails) { + $aParams['addressdetails'] = '1'; + } + if ($this->bExtraTags) { + $aParams['extratags'] = '1'; + } + if ($this->bNameDetails) { + $aParams['namedetails'] = '1'; + } - if ($this->bIncludePolygonAsText) $aParams['polygon_text'] = '1'; - if ($this->bIncludePolygonAsGeoJSON) $aParams['polygon_geojson'] = '1'; - if ($this->bIncludePolygonAsKML) $aParams['polygon_kml'] = '1'; - if ($this->bIncludePolygonAsSVG) $aParams['polygon_svg'] = '1'; + if ($this->bIncludePolygonAsText) { + $aParams['polygon_text'] = '1'; + } + if ($this->bIncludePolygonAsGeoJSON) { + $aParams['polygon_geojson'] = '1'; + } + if ($this->bIncludePolygonAsKML) { + $aParams['polygon_kml'] = '1'; + } + if ($this->bIncludePolygonAsSVG) { + $aParams['polygon_svg'] = '1'; + } if ($this->fPolygonSimplificationThreshold > 0.0) { $aParams['polygon_threshold'] = $this->fPolygonSimplificationThreshold; } - if (!$this->bDeDupe) $aParams['dedupe'] = '0'; + if (!$this->bDeDupe) { + $aParams['dedupe'] = '0'; + } return $aParams; } @@ -147,8 +163,9 @@ class PlaceLookup private function langAddressSql($sHousenumber) { - if ($this->bAddressDetails) + if ($this->bAddressDetails) { return ''; // langaddress will be computed from address details + } return 'get_address_by_language(place_id,'.$sHousenumber.','.$this->aLangPrefOrderSql.') AS langaddress,'; } @@ -234,12 +251,20 @@ class PlaceLookup $sSQL .= ' housenumber,'; $sSQL .= ' country_code, '; $sSQL .= ' importance, '; - if (!$this->bDeDupe) $sSQL .= 'place_id,'; - if (!$this->bAddressDetails) $sSQL .= 'langaddress, '; + if (!$this->bDeDupe) { + $sSQL .= 'place_id,'; + } + if (!$this->bAddressDetails) { + $sSQL .= 'langaddress, '; + } $sSQL .= ' placename, '; $sSQL .= ' ref, '; - if ($this->bExtraTags) $sSQL .= 'extratags, '; - if ($this->bNameDetails) $sSQL .= 'name, '; + if ($this->bExtraTags) { + $sSQL .= 'extratags, '; + } + if ($this->bNameDetails) { + $sSQL .= 'name, '; + } $sSQL .= ' extra_place '; $aSubSelects[] = $sSQL; @@ -260,8 +285,12 @@ class PlaceLookup $sSQL .= $this->langAddressSql('-1'); $sSQL .= ' postcode as placename,'; $sSQL .= ' postcode as ref,'; - if ($this->bExtraTags) $sSQL .= 'null::text AS extra,'; - if ($this->bNameDetails) $sSQL .= 'null::text AS names,'; + if ($this->bExtraTags) { + $sSQL .= 'null::text AS extra,'; + } + if ($this->bNameDetails) { + $sSQL .= 'null::text AS names,'; + } $sSQL .= ' ST_x(geometry) AS lon, ST_y(geometry) AS lat,'; $sSQL .= ' (0.75-(rank_search::float/40)) AS importance, '; $sSQL .= $this->addressImportanceSql('geometry', 'lp.parent_place_id'); @@ -298,8 +327,12 @@ class PlaceLookup $sSQL .= $this->langAddressSql('housenumber_for_place'); $sSQL .= ' null::text AS placename, '; $sSQL .= ' null::text AS ref, '; - if ($this->bExtraTags) $sSQL .= 'null::text AS extra,'; - if ($this->bNameDetails) $sSQL .= 'null::text AS names,'; + if ($this->bExtraTags) { + $sSQL .= 'null::text AS extra,'; + } + if ($this->bNameDetails) { + $sSQL .= 'null::text AS names,'; + } $sSQL .= ' st_x(centroid) AS lon, '; $sSQL .= ' st_y(centroid) AS lat,'; $sSQL .= ' -1.15 AS importance, '; @@ -344,8 +377,12 @@ class PlaceLookup $sSQL .= $this->langAddressSql('housenumber_for_place'); $sSQL .= ' null::text AS placename, '; $sSQL .= ' null::text AS ref, '; - if ($this->bExtraTags) $sSQL .= 'null::text AS extra, '; - if ($this->bNameDetails) $sSQL .= 'null::text AS names, '; + if ($this->bExtraTags) { + $sSQL .= 'null::text AS extra, '; + } + if ($this->bNameDetails) { + $sSQL .= 'null::text AS names, '; + } $sSQL .= ' st_x(centroid) AS lon, '; $sSQL .= ' st_y(centroid) AS lat, '; // slightly smaller than the importance for normal houses @@ -448,7 +485,9 @@ class PlaceLookup { $aOutlineResult = array(); - if (!$iPlaceID) return $aOutlineResult; + if (!$iPlaceID) { + return $aOutlineResult; + } // Get the bounding box and outline polygon $sSQL = 'select place_id,0 as numfeatures,st_area(geometry) as area,'; @@ -460,10 +499,18 @@ class PlaceLookup } $sSQL .= ' ST_YMin(geometry) as minlat,ST_YMax(geometry) as maxlat,'; $sSQL .= ' ST_XMin(geometry) as minlon,ST_XMax(geometry) as maxlon'; - if ($this->bIncludePolygonAsGeoJSON) $sSQL .= ',ST_AsGeoJSON(geometry) as asgeojson'; - if ($this->bIncludePolygonAsKML) $sSQL .= ',ST_AsKML(geometry) as askml'; - if ($this->bIncludePolygonAsSVG) $sSQL .= ',ST_AsSVG(geometry) as assvg'; - if ($this->bIncludePolygonAsText) $sSQL .= ',ST_AsText(geometry) as astext'; + if ($this->bIncludePolygonAsGeoJSON) { + $sSQL .= ',ST_AsGeoJSON(geometry) as asgeojson'; + } + if ($this->bIncludePolygonAsKML) { + $sSQL .= ',ST_AsKML(geometry) as askml'; + } + if ($this->bIncludePolygonAsSVG) { + $sSQL .= ',ST_AsSVG(geometry) as assvg'; + } + if ($this->bIncludePolygonAsText) { + $sSQL .= ',ST_AsText(geometry) as astext'; + } if ($fLonReverse != null && $fLatReverse != null) { $sFrom = ' from (SELECT * , CASE WHEN (class = \'highway\') AND (ST_GeometryType(geometry) = \'ST_LineString\') THEN '; $sFrom .=' ST_ClosestPoint(geometry, ST_SetSRID(ST_Point('.$fLatReverse.','.$fLonReverse.'),4326))'; @@ -486,10 +533,18 @@ class PlaceLookup $aOutlineResult['lon'] = $aPointPolygon['centrelon']; } - if ($this->bIncludePolygonAsGeoJSON) $aOutlineResult['asgeojson'] = $aPointPolygon['asgeojson']; - if ($this->bIncludePolygonAsKML) $aOutlineResult['askml'] = $aPointPolygon['askml']; - if ($this->bIncludePolygonAsSVG) $aOutlineResult['assvg'] = $aPointPolygon['assvg']; - if ($this->bIncludePolygonAsText) $aOutlineResult['astext'] = $aPointPolygon['astext']; + if ($this->bIncludePolygonAsGeoJSON) { + $aOutlineResult['asgeojson'] = $aPointPolygon['asgeojson']; + } + if ($this->bIncludePolygonAsKML) { + $aOutlineResult['askml'] = $aPointPolygon['askml']; + } + if ($this->bIncludePolygonAsSVG) { + $aOutlineResult['assvg'] = $aPointPolygon['assvg']; + } + if ($this->bIncludePolygonAsText) { + $aOutlineResult['astext'] = $aPointPolygon['astext']; + } if (abs($aPointPolygon['minlat'] - $aPointPolygon['maxlat']) < 0.0000001) { $aPointPolygon['minlat'] = $aPointPolygon['minlat'] - $fRadius; diff --git a/lib-php/ReverseGeocode.php b/lib-php/ReverseGeocode.php index cf396b7a..47e931ef 100644 --- a/lib-php/ReverseGeocode.php +++ b/lib-php/ReverseGeocode.php @@ -74,8 +74,6 @@ class ReverseGeocode protected function lookupLargeArea($sPointSQL, $iMaxRank) { - $oResult = null; - if ($iMaxRank > 4) { $aPlace = $this->lookupPolygon($sPointSQL, $iMaxRank); if ($aPlace) { @@ -167,9 +165,13 @@ class ReverseGeocode { Debug::newFunction('lookupPolygon'); // polygon search begins at suburb-level - if ($iMaxRank > 25) $iMaxRank = 25; + if ($iMaxRank > 25) { + $iMaxRank = 25; + } // no polygon search over country-level - if ($iMaxRank < 5) $iMaxRank = 5; + if ($iMaxRank < 5) { + $iMaxRank = 5; + } // search for polygon $sSQL = 'SELECT place_id, parent_place_id, rank_address, rank_search FROM'; $sSQL .= '(select place_id, parent_place_id, rank_address, rank_search, country_code, geometry'; @@ -190,7 +192,6 @@ class ReverseGeocode if ($aPoly) { // if a polygon is found, search for placenodes begins ... - $iParentPlaceID = $aPoly['parent_place_id']; $iRankAddress = $aPoly['rank_address']; $iRankSearch = $aPoly['rank_search']; $iPlaceID = $aPoly['place_id']; @@ -242,26 +243,24 @@ class ReverseGeocode public function lookupPoint($sPointSQL, $bDoInterpolation = true) { Debug::newFunction('lookupPoint'); - // starts if the search is on POI or street level, - // searches for the nearest POI or street, - // if a street is found and a POI is searched for, - // the nearest POI which the found street is a parent of is choosen. - $iMaxRank = $this->iMaxRank; - // Find the nearest point $fSearchDiam = 0.006; $oResult = null; $aPlace = null; // for POI or street level - if ($iMaxRank >= 26) { + if ($this->iMaxRank >= 26) { + // starts if the search is on POI or street level, + // searches for the nearest POI or street, + // if a street is found and a POI is searched for, + // the nearest POI which the found street is a parent of is choosen. $sSQL = 'select place_id,parent_place_id,rank_address,country_code,'; $sSQL .= ' ST_distance('.$sPointSQL.', geometry) as distance'; $sSQL .= ' FROM '; $sSQL .= ' placex'; $sSQL .= ' WHERE ST_DWithin('.$sPointSQL.', geometry, '.$fSearchDiam.')'; $sSQL .= ' AND'; - $sSQL .= ' rank_address between 26 and '.$iMaxRank; + $sSQL .= ' rank_address between 26 and '.$this->iMaxRank; $sSQL .= ' and (name is not null or housenumber is not null'; $sSQL .= ' or rank_address between 26 and 27)'; $sSQL .= ' and (rank_address between 26 and 27'; @@ -284,7 +283,7 @@ class ReverseGeocode if ($aPlace) { // if street and maxrank > streetlevel - if ($iRankAddress <= 27 && $iMaxRank > 27) { + if ($iRankAddress <= 27 && $this->iMaxRank > 27) { // find the closest object (up to a certain radius) of which the street is a parent of $sSQL = ' select place_id,'; $sSQL .= ' ST_distance('.$sPointSQL.', geometry) as distance'; @@ -338,7 +337,7 @@ class ReverseGeocode } } - if ($bDoInterpolation && $iMaxRank >= 30) { + if ($bDoInterpolation && $this->iMaxRank >= 30) { $fDistance = $fSearchDiam; if ($aPlace) { // We can't reliably go from the closest street to an @@ -356,7 +355,6 @@ class ReverseGeocode $oResult = new Result($aHouse['place_id'], Result::TABLE_OSMLINE); $oResult->iHouseNumber = closestHouseNumber($aHouse); $aPlace = $aHouse; - $iRankAddress = 30; } } @@ -366,7 +364,7 @@ class ReverseGeocode } } else { // lower than street level ($iMaxRank < 26 ) - $oResult = $this->lookupLargeArea($sPointSQL, $iMaxRank); + $oResult = $this->lookupLargeArea($sPointSQL, $this->iMaxRank); } Debug::printVar('Final result', $oResult); diff --git a/lib-php/SearchDescription.php b/lib-php/SearchDescription.php index 3c572f2f..4d944bfb 100644 --- a/lib-php/SearchDescription.php +++ b/lib-php/SearchDescription.php @@ -67,35 +67,6 @@ class SearchDescription return $this->iSearchRank; } - /** - * Make this search a POI search. - * - * In a POI search, objects are not (only) searched by their name - * but also by the primary OSM key/value pair (class and type in Nominatim). - * - * @param integer $iOperator Type of POI search - * @param string $sClass Class (or OSM tag key) of POI. - * @param string $sType Type (or OSM tag value) of POI. - * - * @return void - */ - public function setPoiSearch($iOperator, $sClass, $sType) - { - $this->iOperator = $iOperator; - $this->sClass = $sClass; - $this->sType = $sType; - } - - /** - * Check if any operator is set. - * - * @return bool True, if this is a special search operation. - */ - public function hasOperator() - { - return $this->iOperator != Operator::NONE; - } - /** * Extract key/value pairs from a query. * @@ -148,253 +119,234 @@ class SearchDescription /////////// Search building functions - /** - * Derive new searches by adding a full term to the existing search. + * Create a copy of this search description adding to search rank. * - * @param object $oSearchTerm Description of the token. - * @param string $sPhraseType Type of phrase the token is contained in. - * @param bool $bFirstToken True if the token is at the beginning of the - * query. - * @param bool $bFirstPhrase True if the token is in the first phrase of - * the query. - * @param bool $bLastToken True if the token is at the end of the query. + * @param integer $iTermCost Cost to add to the current search rank. * - * @return SearchDescription[] List of derived search descriptions. + * @return object Cloned search description. */ - public function extendWithFullTerm($oSearchTerm, $sPhraseType, $bFirstToken, $bFirstPhrase, $bLastToken) + public function clone($iTermCost) { - $aNewSearches = array(); + $oSearch = clone $this; + $oSearch->iSearchRank += $iTermCost; - if (($sPhraseType == '' || $sPhraseType == 'country') - && is_a($oSearchTerm, '\Nominatim\Token\Country') - ) { - if (!$this->sCountryCode) { - $oSearch = clone $this; - $oSearch->iSearchRank++; - $oSearch->sCountryCode = $oSearchTerm->sCountryCode; - // Country is almost always at the end of the string - // - increase score for finding it anywhere else (optimisation) - if (!$bLastToken) { - $oSearch->iSearchRank += 5; - $oSearch->iNamePhrase = -1; - } - $aNewSearches[] = $oSearch; - } - } elseif (($sPhraseType == '' || $sPhraseType == 'postalcode') - && is_a($oSearchTerm, '\Nominatim\Token\Postcode') - ) { - if (!$this->sPostcode) { - // If we have structured search or this is the first term, - // make the postcode the primary search element. - if ($this->iOperator == Operator::NONE && $bFirstToken) { - $oSearch = clone $this; - $oSearch->iSearchRank++; - $oSearch->iOperator = Operator::POSTCODE; - $oSearch->aAddress = array_merge($this->aAddress, $this->aName); - $oSearch->aName = - array($oSearchTerm->iId => $oSearchTerm->sPostcode); - $aNewSearches[] = $oSearch; - } + return $oSearch; + } - // If we have a structured search or this is not the first term, - // add the postcode as an addendum. - if ($this->iOperator != Operator::POSTCODE - && ($sPhraseType == 'postalcode' || !empty($this->aName)) - ) { - $oSearch = clone $this; - $oSearch->iSearchRank++; - $oSearch->iNamePhrase = -1; - if (strlen($oSearchTerm->sPostcode) < 4) { - $oSearch->iSearchRank += 4 - strlen($oSearchTerm->sPostcode); - } - $oSearch->sPostcode = $oSearchTerm->sPostcode; - $aNewSearches[] = $oSearch; - } - } - } elseif (($sPhraseType == '' || $sPhraseType == 'street') - && is_a($oSearchTerm, '\Nominatim\Token\HouseNumber') - ) { - if (!$this->sHouseNumber && $this->iOperator != Operator::POSTCODE) { - // sanity check: if the housenumber is not mainly made - // up of numbers, add a penalty - $iSearchCost = 1; - if (preg_match('/\\d/', $oSearchTerm->sToken) === 0 - || preg_match_all('/[^0-9]/', $oSearchTerm->sToken, $aMatches) > 2) { - $iSearchCost++; - } - if ($this->iOperator != Operator::NONE) { - $iSearchCost++; - } - if (empty($oSearchTerm->iId)) { - $iSearchCost++; - } - // also must not appear in the middle of the address - if (!empty($this->aAddress) - || (!empty($this->aAddressNonSearch)) - || $this->sPostcode - ) { - $iSearchCost++; - } + /** + * Check if the search currently includes a name. + * + * @param bool bIncludeNonNames If true stop-word tokens are taken into + * account, too. + * + * @return bool True, if search has a name. + */ + public function hasName($bIncludeNonNames = false) + { + return !empty($this->aName) + || (!empty($this->aNameNonSearch) && $bIncludeNonNames); + } - $oSearch = clone $this; - $oSearch->iSearchRank += $iSearchCost; - $oSearch->iNamePhrase = -1; - $oSearch->sHouseNumber = $oSearchTerm->sToken; - $aNewSearches[] = $oSearch; - - // Housenumbers may appear in the name when the place has its own - // address terms. - if ($oSearchTerm->iId !== null - && ($this->iNamePhrase >= 0 || empty($this->aName)) - && empty($this->aAddress) - ) { - $oSearch = clone $this; - $oSearch->iSearchRank += $iSearchCost; - $oSearch->aAddress = $this->aName; - $oSearch->bRareName = false; - $oSearch->aName = array($oSearchTerm->iId => $oSearchTerm->iId); - $aNewSearches[] = $oSearch; - } - } - } elseif ($sPhraseType == '' - && is_a($oSearchTerm, '\Nominatim\Token\SpecialTerm') - ) { - if ($this->iOperator == Operator::NONE) { - $oSearch = clone $this; - $oSearch->iSearchRank += 2; - $oSearch->iNamePhrase = -1; - - $iOp = $oSearchTerm->iOperator; - if ($iOp == Operator::NONE) { - if (!empty($this->aName) || $this->oContext->isBoundedSearch()) { - $iOp = Operator::NAME; - } else { - $iOp = Operator::NEAR; - } - $oSearch->iSearchRank += 2; - } elseif (!$bFirstToken && !$bLastToken) { - $oSearch->iSearchRank += 2; - } - if ($this->sHouseNumber) { - $oSearch->iSearchRank++; - } + /** + * Check if the search currently includes an address term. + * + * @return bool True, if any address term is included, including stop-word + * terms. + */ + public function hasAddress() + { + return !empty($this->aAddress) || !empty($this->aAddressNonSearch); + } - $oSearch->setPoiSearch( - $iOp, - $oSearchTerm->sClass, - $oSearchTerm->sType - ); - $aNewSearches[] = $oSearch; - } - } elseif ($sPhraseType != 'country' - && is_a($oSearchTerm, '\Nominatim\Token\Word') - ) { - $iWordID = $oSearchTerm->iId; - // Full words can only be a name if they appear at the beginning - // of the phrase. In structured search the name must forcably in - // the first phrase. In unstructured search it may be in a later - // phrase when the first phrase is a house number. - if (!empty($this->aName) || !($bFirstPhrase || $sPhraseType == '')) { - if (($sPhraseType == '' || !$bFirstPhrase) && $oSearchTerm->iTermCount > 1) { - $oSearch = clone $this; - $oSearch->iNamePhrase = -1; - $oSearch->iSearchRank += 1; - $oSearch->aAddress[$iWordID] = $iWordID; - $aNewSearches[] = $oSearch; - } - } elseif (empty($this->aNameNonSearch)) { - $oSearch = clone $this; - $oSearch->iSearchRank++; - $oSearch->aName = array($iWordID => $iWordID); - if (CONST_Search_NameOnlySearchFrequencyThreshold) { - $oSearch->bRareName = - $oSearchTerm->iSearchNameCount - < CONST_Search_NameOnlySearchFrequencyThreshold; - } - $aNewSearches[] = $oSearch; - } - } + /** + * Check if a country restriction is currently included in the search. + * + * @return bool True, if a country restriction is set. + */ + public function hasCountry() + { + return $this->sCountryCode !== ''; + } - return $aNewSearches; + /** + * Check if a postcode is currently included in the search. + * + * @return bool True, if a postcode is set. + */ + public function hasPostcode() + { + return $this->sPostcode !== ''; } /** - * Derive new searches by adding a partial term to the existing search. + * Check if a house number is set for the search. * - * @param string $sToken Term for the token. - * @param object $oSearchTerm Description of the token. - * @param bool $bStructuredPhrases True if the search is structured. - * @param integer $iPhrase Number of the phrase the token is in. - * @param array[] $aFullTokens List of full term tokens with the - * same name. + * @return bool True, if a house number is set. + */ + public function hasHousenumber() + { + return $this->sHouseNumber !== ''; + } + + /** + * Check if a special type of place is requested. * - * @return SearchDescription[] List of derived search descriptions. + * param integer iOperator When set, check for the particular + * operator used for the special type. + * + * @return bool True, if speial type is requested or, if requested, + * a special type with the given operator. */ - public function extendWithPartialTerm($sToken, $oSearchTerm, $bStructuredPhrases, $iPhrase, $aFullTokens) + public function hasOperator($iOperator = null) { - // Only allow name terms. - if (!(is_a($oSearchTerm, '\Nominatim\Token\Word')) - || strpos($sToken, ' ') !== false - ) { - return array(); + return $iOperator === null ? $this->iOperator != Operator::NONE : $this->iOperator == $iOperator; + } + + /** + * Add the given token to the list of terms to search for in the address. + * + * @param integer iID ID of term to add. + * @param bool bSearchable Term should be used to search for result + * (i.e. term is not a stop word). + */ + public function addAddressToken($iId, $bSearchable = true) + { + if ($bSearchable) { + $this->aAddress[$iId] = $iId; + } else { + $this->aAddressNonSearch[$iId] = $iId; } + } - $aNewSearches = array(); - $iWordID = $oSearchTerm->iId; + /** + * Add the given full-word token to the list of terms to search for in the + * name. + * + * @param interger iId ID of term to add. + * @param bool bRareName True if the term is infrequent enough to not + * require other constraints for efficient search. + */ + public function addNameToken($iId, $bRareName) + { + $this->aName[$iId] = $iId; + $this->bRareName = $bRareName; + } - if ((!$bStructuredPhrases || $iPhrase > 0) - && (!empty($this->aName)) - ) { - $oSearch = clone $this; - $oSearch->iSearchRank++; - if (preg_match('#^[0-9 ]+$#', $sToken)) { - $oSearch->iSearchRank++; - } - if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) { - $oSearch->aAddress[$iWordID] = $iWordID; - } else { - $oSearch->aAddressNonSearch[$iWordID] = $iWordID; - if (!empty($aFullTokens)) { - $oSearch->iSearchRank++; - } - } - $aNewSearches[] = $oSearch; + /** + * Add the given partial token to the list of terms to search for in + * the name. + * + * @param integer iID ID of term to add. + * @param bool bSearchable Term should be used to search for result + * (i.e. term is not a stop word). + * @param integer iPhraseNumber Index of phrase, where the partial term + * appears. + */ + public function addPartialNameToken($iId, $bSearchable, $iPhraseNumber) + { + if ($bSearchable) { + $this->aName[$iId] = $iId; + } else { + $this->aNameNonSearch[$iId] = $iId; } + $this->iNamePhrase = $iPhraseNumber; + } - if ((!$this->sPostcode && !$this->aAddress && !$this->aAddressNonSearch) - && ((empty($this->aName) && empty($this->aNameNonSearch)) || $this->iNamePhrase == $iPhrase) - ) { - $oSearch = clone $this; - $oSearch->iSearchRank++; - if (empty($this->aName) && empty($this->aNameNonSearch)) { - $oSearch->iSearchRank++; - } - if (preg_match('#^[0-9 ]+$#', $sToken)) { - $oSearch->iSearchRank++; - } - if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) { - if (empty($this->aName) - && CONST_Search_NameOnlySearchFrequencyThreshold - ) { - $oSearch->bRareName = - $oSearchTerm->iSearchNameCount - < CONST_Search_NameOnlySearchFrequencyThreshold; - } else { - $oSearch->bRareName = false; - } - $oSearch->aName[$iWordID] = $iWordID; - } else { - if (!empty($aFullTokens)) { - $oSearch->iSearchRank++; - } - $oSearch->aNameNonSearch[$iWordID] = $iWordID; - } - $oSearch->iNamePhrase = $iPhrase; - $aNewSearches[] = $oSearch; - } + /** + * Set country restriction for the search. + * + * @param string sCountryCode Country code of country to restrict search to. + */ + public function setCountry($sCountryCode) + { + $this->sCountryCode = $sCountryCode; + $this->iNamePhrase = -1; + } + + /** + * Set postcode search constraint. + * + * @param string sPostcode Postcode the result should have. + */ + public function setPostcode($sPostcode) + { + $this->sPostcode = $sPostcode; + $this->iNamePhrase = -1; + } + + /** + * Make this search a search for a postcode object. + * + * @param integer iId Token Id for the postcode. + * @param string sPostcode Postcode to look for. + */ + public function setPostcodeAsName($iId, $sPostcode) + { + $this->iOperator = Operator::POSTCODE; + $this->aAddress = array_merge($this->aAddress, $this->aName); + $this->aName = array($iId => $sPostcode); + $this->bRareName = true; + $this->iNamePhrase = -1; + } + + /** + * Set house number search cnstraint. + * + * @param string sNumber House number the result should have. + */ + public function setHousenumber($sNumber) + { + $this->sHouseNumber = $sNumber; + $this->iNamePhrase = -1; + } + + /** + * Make this search a search for a house number. + * + * @param integer iId Token Id for the house number. + */ + public function setHousenumberAsName($iId) + { + $this->aAddress = array_merge($this->aAddress, $this->aName); + $this->bRareName = false; + $this->aName = array($iId => $iId); + $this->iNamePhrase = -1; + } + + /** + * Make this search a POI search. + * + * In a POI search, objects are not (only) searched by their name + * but also by the primary OSM key/value pair (class and type in Nominatim). + * + * @param integer $iOperator Type of POI search + * @param string $sClass Class (or OSM tag key) of POI. + * @param string $sType Type (or OSM tag value) of POI. + * + * @return void + */ + public function setPoiSearch($iOperator, $sClass, $sType) + { + $this->iOperator = $iOperator; + $this->sClass = $sClass; + $this->sType = $sType; + $this->iNamePhrase = -1; + } + + public function getNamePhrase() + { + return $this->iNamePhrase; + } - return $aNewSearches; + /** + * Get the global search context. + * + * @return object Objects of global search constraints. + */ + public function getContext() + { + return $this->oContext; } /////////// Query functions @@ -415,7 +367,6 @@ class SearchDescription public function query(&$oDB, $iMinRank, $iMaxRank, $iLimit) { $aResults = array(); - $iHousenumber = -1; if ($this->sCountryCode && empty($this->aName) diff --git a/lib-php/SearchPosition.php b/lib-php/SearchPosition.php new file mode 100644 index 00000000..e4260bf2 --- /dev/null +++ b/lib-php/SearchPosition.php @@ -0,0 +1,87 @@ +sPhraseType = $sPhraseType; + $this->iPhrase = $iPhrase; + $this->iNumPhrases = $iNumPhrases; + } + + public function setTokenPosition($iToken, $iNumTokens) + { + $this->iToken = $iToken; + $this->iNumTokens = $iNumTokens; + } + + /** + * Check if the phrase can be of the given type. + * + * @param string $sType Type of phrse requested. + * + * @return True if the phrase is untyped or of the given type. + */ + public function maybePhrase($sType) + { + return $this->sPhraseType == '' || $this->sPhraseType == $sType; + } + + /** + * Check if the phrase is exactly of the given type. + * + * @param string $sType Type of phrse requested. + * + * @return True if the phrase of the given type. + */ + public function isPhrase($sType) + { + return $this->sPhraseType == $sType; + } + + /** + * Return true if the token is the very first in the query. + */ + public function isFirstToken() + { + return $this->iPhrase == 0 && $this->iToken == 0; + } + + /** + * Check if the token is the final one in the query. + */ + public function isLastToken() + { + return $this->iToken + 1 == $this->iNumTokens && $this->iPhrase + 1 == $this->iNumPhrases; + } + + /** + * Check if the current token is part of the first phrase in the query. + */ + public function isFirstPhrase() + { + return $this->iPhrase == 0; + } + + /** + * Get the phrase position in the query. + */ + public function getPhrase() + { + return $this->iPhrase; + } +} diff --git a/lib-php/Shell.php b/lib-php/Shell.php index b43db135..4bec20e9 100644 --- a/lib-php/Shell.php +++ b/lib-php/Shell.php @@ -33,7 +33,9 @@ class Shell public function addEnvPair($sKey, $sVal) { if (isset($sKey) && $sKey && isset($sVal)) { - if (!isset($this->aEnv)) $this->aEnv = $_ENV; + if (!isset($this->aEnv)) { + $this->aEnv = $_ENV; + } $this->aEnv = array_merge($this->aEnv, array($sKey => $sVal), $_ENV); } return $this; @@ -75,11 +77,8 @@ class Shell return $iStat; } - - private function escapeParam($sParam) { - if (preg_match('/^-*\w+$/', $sParam)) return $sParam; - return escapeshellarg($sParam); + return (preg_match('/^-*\w+$/', $sParam)) ? $sParam : escapeshellarg($sParam); } } diff --git a/lib-php/TokenCountry.php b/lib-php/TokenCountry.php index 518c0a31..c9b7b6af 100644 --- a/lib-php/TokenCountry.php +++ b/lib-php/TokenCountry.php @@ -8,9 +8,9 @@ namespace Nominatim\Token; class Country { /// Database word id, if available. - public $iId; + private $iId; /// Two-letter country code (lower-cased). - public $sCountryCode; + private $sCountryCode; public function __construct($iId, $sCountryCode) { @@ -18,6 +18,44 @@ class Country $this->sCountryCode = $sCountryCode; } + public function getId() + { + return $this->iId; + } + + /** + * Check if the token can be added to the given search. + * Derive new searches by adding this token to an existing search. + * + * @param object $oSearch Partial search description derived so far. + * @param object $oPosition Description of the token position within + the query. + * + * @return True if the token is compatible with the search configuration + * given the position. + */ + public function isExtendable($oSearch, $oPosition) + { + return !$oSearch->hasCountry() && $oPosition->maybePhrase('country'); + } + + /** + * Derive new searches by adding this token to an existing search. + * + * @param object $oSearch Partial search description derived so far. + * @param object $oPosition Description of the token position within + the query. + * + * @return SearchDescription[] List of derived search descriptions. + */ + public function extendSearch($oSearch, $oPosition) + { + $oNewSearch = $oSearch->clone($oPosition->isLastToken() ? 1 : 6); + $oNewSearch->setCountry($this->sCountryCode); + + return array($oNewSearch); + } + public function debugInfo() { return array( @@ -26,4 +64,9 @@ class Country 'Info' => $this->sCountryCode ); } + + public function debugCode() + { + return 'C'; + } } diff --git a/lib-php/TokenHousenumber.php b/lib-php/TokenHousenumber.php index 5c7c6e9b..cd60d3ca 100644 --- a/lib-php/TokenHousenumber.php +++ b/lib-php/TokenHousenumber.php @@ -8,9 +8,9 @@ namespace Nominatim\Token; class HouseNumber { /// Database word id, if available. - public $iId; + private $iId; /// Normalized house number. - public $sToken; + private $sToken; public function __construct($iId, $sToken) { @@ -18,6 +18,80 @@ class HouseNumber $this->sToken = $sToken; } + public function getId() + { + return $this->iId; + } + + /** + * Check if the token can be added to the given search. + * Derive new searches by adding this token to an existing search. + * + * @param object $oSearch Partial search description derived so far. + * @param object $oPosition Description of the token position within + the query. + * + * @return True if the token is compatible with the search configuration + * given the position. + */ + public function isExtendable($oSearch, $oPosition) + { + return !$oSearch->hasHousenumber() + && !$oSearch->hasOperator(\Nominatim\Operator::POSTCODE) + && $oPosition->maybePhrase('street'); + } + + /** + * Derive new searches by adding this token to an existing search. + * + * @param object $oSearch Partial search description derived so far. + * @param object $oPosition Description of the token position within + the query. + * + * @return SearchDescription[] List of derived search descriptions. + */ + public function extendSearch($oSearch, $oPosition) + { + $aNewSearches = array(); + + // sanity check: if the housenumber is not mainly made + // up of numbers, add a penalty + $iSearchCost = 1; + if (preg_match('/\\d/', $this->sToken) === 0 + || preg_match_all('/[^0-9]/', $this->sToken, $aMatches) > 2) { + $iSearchCost++; + } + if (!$oSearch->hasOperator(\Nominatim\Operator::NONE)) { + $iSearchCost++; + } + if (empty($this->iId)) { + $iSearchCost++; + } + // also must not appear in the middle of the address + if ($oSearch->hasAddress() || $oSearch->hasPostcode()) { + $iSearchCost++; + } + + $oNewSearch = $oSearch->clone($iSearchCost); + $oNewSearch->setHousenumber($this->sToken); + $aNewSearches[] = $oNewSearch; + + // Housenumbers may appear in the name when the place has its own + // address terms. + if ($this->iId !== null + && ($oSearch->getNamePhrase() >= 0 || !$oSearch->hasName()) + && !$oSearch->hasAddress() + ) { + $oNewSearch = $oSearch->clone($iSearchCost); + $oNewSearch->setHousenumberAsName($this->iId); + + $aNewSearches[] = $oNewSearch; + } + + return $aNewSearches; + } + + public function debugInfo() { return array( @@ -26,4 +100,9 @@ class HouseNumber 'Info' => array('nr' => $this->sToken) ); } + + public function debugCode() + { + return 'H'; + } } diff --git a/lib-php/TokenList.php b/lib-php/TokenList.php index 2df9fe05..a599648c 100644 --- a/lib-php/TokenList.php +++ b/lib-php/TokenList.php @@ -7,6 +7,7 @@ require_once(CONST_LibDir.'/TokenHousenumber.php'); require_once(CONST_LibDir.'/TokenPostcode.php'); require_once(CONST_LibDir.'/TokenSpecialTerm.php'); require_once(CONST_LibDir.'/TokenWord.php'); +require_once(CONST_LibDir.'/TokenPartial.php'); require_once(CONST_LibDir.'/SpecialSearchOperator.php'); /** @@ -17,15 +18,6 @@ require_once(CONST_LibDir.'/SpecialSearchOperator.php'); * tokens do not have a common base class. All tokens need to have a field * with the word id that points to an entry in the `word` database table * but otherwise the information saved about a token can be very different. - * - * There are two different kinds of token words: full words and partial terms. - * - * Full words start with a space. They represent a complete name of a place. - * All special tokens are normally full words. - * - * Partial terms have no space at the beginning. They may represent a part of - * a name of a place (e.g. in the name 'World Trade Center' a partial term - * would be 'Trade' or 'Trade Center'). They are only used in TokenWord. */ class TokenList { @@ -64,7 +56,7 @@ class TokenList */ public function containsAny($sWord) { - return isset($this->aTokens[$sWord]) || isset($this->aTokens[' '.$sWord]); + return isset($this->aTokens[$sWord]); } /** @@ -86,8 +78,8 @@ class TokenList foreach ($this->aTokens as $aTokenList) { foreach ($aTokenList as $oToken) { - if (is_a($oToken, '\Nominatim\Token\Word') && !$oToken->bPartial) { - $ids[$oToken->iId] = $oToken->iId; + if (is_a($oToken, '\Nominatim\Token\Word')) { + $ids[$oToken->getId()] = $oToken->getId(); } } } @@ -117,9 +109,9 @@ class TokenList $aWordsIDs = array(); foreach ($this->aTokens as $sToken => $aWords) { foreach ($aWords as $aToken) { - if ($aToken->iId !== null) { - $aWordsIDs[$aToken->iId] = - '#'.$sToken.'('.$aToken->iId.')#'; + $iId = $aToken->getId(); + if ($iId !== null) { + $aWordsIDs[$iId] = '#'.$sToken.'('.$aToken->debugCode().' '.$iId.')#'; } } } diff --git a/lib-php/TokenPartial.php b/lib-php/TokenPartial.php new file mode 100644 index 00000000..131bb2a3 --- /dev/null +++ b/lib-php/TokenPartial.php @@ -0,0 +1,118 @@ +iId = $iId; + $this->bNumberToken = (bool) preg_match('#^[0-9 ]+$#', $sToken); + $this->iSearchNameCount = $iSearchNameCount; + } + + public function getId() + { + return $this->iId; + } + + /** + * Check if the token can be added to the given search. + * Derive new searches by adding this token to an existing search. + * + * @param object $oSearch Partial search description derived so far. + * @param object $oPosition Description of the token position within + the query. + * + * @return True if the token is compatible with the search configuration + * given the position. + */ + public function isExtendable($oSearch, $oPosition) + { + return !$oPosition->isPhrase('country'); + } + + /** + * Derive new searches by adding this token to an existing search. + * + * @param object $oSearch Partial search description derived so far. + * @param object $oPosition Description of the token position within + the query. + * + * @return SearchDescription[] List of derived search descriptions. + */ + public function extendSearch($oSearch, $oPosition) + { + $aNewSearches = array(); + + // Partial token in Address. + if (($oPosition->isPhrase('') || !$oPosition->isFirstPhrase()) + && $oSearch->hasName() + ) { + $iSearchCost = $this->bNumberToken ? 2 : 1; + if ($this->iSearchNameCount >= CONST_Max_Word_Frequency) { + $iSearchCost += 1; + } + + $oNewSearch = $oSearch->clone($iSearchCost); + $oNewSearch->addAddressToken( + $this->iId, + $this->iSearchNameCount < CONST_Max_Word_Frequency + ); + + $aNewSearches[] = $oNewSearch; + } + + // Partial token in Name. + if ((!$oSearch->hasPostcode() && !$oSearch->hasAddress()) + && (!$oSearch->hasName(true) + || $oSearch->getNamePhrase() == $oPosition->getPhrase()) + ) { + $iSearchCost = 1; + if (!$oSearch->hasName(true)) { + $iSearchCost += 1; + } + if ($this->bNumberToken) { + $iSearchCost += 1; + } + + $oNewSearch = $oSearch->clone($iSearchCost); + $oNewSearch->addPartialNameToken( + $this->iId, + $this->iSearchNameCount < CONST_Max_Word_Frequency, + $oPosition->getPhrase() + ); + + $aNewSearches[] = $oNewSearch; + } + + return $aNewSearches; + } + + + public function debugInfo() + { + return array( + 'ID' => $this->iId, + 'Type' => 'partial', + 'Info' => array( + 'count' => $this->iSearchNameCount + ) + ); + } + + public function debugCode() + { + return 'w'; + } +} diff --git a/lib-php/TokenPostcode.php b/lib-php/TokenPostcode.php index 8fa2ae80..c0b42fad 100644 --- a/lib-php/TokenPostcode.php +++ b/lib-php/TokenPostcode.php @@ -8,11 +8,11 @@ namespace Nominatim\Token; class Postcode { /// Database word id, if available. - public $iId; + private $iId; /// Full nomralized postcode (upper cased). - public $sPostcode; + private $sPostcode; // Optional country code the postcode belongs to (currently unused). - public $sCountryCode; + private $sCountryCode; public function __construct($iId, $sPostcode, $sCountryCode = '') { @@ -21,6 +21,67 @@ class Postcode $this->sCountryCode = empty($sCountryCode) ? '' : $sCountryCode; } + public function getId() + { + return $this->iId; + } + + /** + * Check if the token can be added to the given search. + * Derive new searches by adding this token to an existing search. + * + * @param object $oSearch Partial search description derived so far. + * @param object $oPosition Description of the token position within + the query. + * + * @return True if the token is compatible with the search configuration + * given the position. + */ + public function isExtendable($oSearch, $oPosition) + { + return !$oSearch->hasPostcode() && $oPosition->maybePhrase('postalcode'); + } + + /** + * Derive new searches by adding this token to an existing search. + * + * @param object $oSearch Partial search description derived so far. + * @param object $oPosition Description of the token position within + the query. + * + * @return SearchDescription[] List of derived search descriptions. + */ + public function extendSearch($oSearch, $oPosition) + { + $aNewSearches = array(); + + // If we have structured search or this is the first term, + // make the postcode the primary search element. + if ($oSearch->hasOperator(\Nominatim\Operator::NONE) && $oPosition->isFirstToken()) { + $oNewSearch = $oSearch->clone(1); + $oNewSearch->setPostcodeAsName($this->iId, $this->sPostcode); + + $aNewSearches[] = $oNewSearch; + } + + // If we have a structured search or this is not the first term, + // add the postcode as an addendum. + if (!$oSearch->hasOperator(\Nominatim\Operator::POSTCODE) + && ($oPosition->isPhrase('postalcode') || $oSearch->hasName()) + ) { + $iPenalty = 1; + if (strlen($this->sPostcode) < 4) { + $iPenalty += 4 - strlen($this->sPostcode); + } + $oNewSearch = $oSearch->clone($iPenalty); + $oNewSearch->setPostcode($this->sPostcode); + + $aNewSearches[] = $oNewSearch; + } + + return $aNewSearches; + } + public function debugInfo() { return array( @@ -29,4 +90,9 @@ class Postcode 'Info' => $this->sPostcode.'('.$this->sCountryCode.')' ); } + + public function debugCode() + { + return 'P'; + } } diff --git a/lib-php/TokenSpecialTerm.php b/lib-php/TokenSpecialTerm.php index b2c312ec..5b2d4c70 100644 --- a/lib-php/TokenSpecialTerm.php +++ b/lib-php/TokenSpecialTerm.php @@ -10,13 +10,13 @@ require_once(CONST_LibDir.'/SpecialSearchOperator.php'); class SpecialTerm { /// Database word id, if applicable. - public $iId; + private $iId; /// Class (or OSM tag key) of the place to look for. - public $sClass; + private $sClass; /// Type (or OSM tag value) of the place to look for. - public $sType; + private $sType; /// Relationship of the operator to the object (see Operator class). - public $iOperator; + private $iOperator; public function __construct($iID, $sClass, $sType, $iOperator) { @@ -26,6 +26,62 @@ class SpecialTerm $this->iOperator = $iOperator; } + public function getId() + { + return $this->iId; + } + + /** + * Check if the token can be added to the given search. + * Derive new searches by adding this token to an existing search. + * + * @param object $oSearch Partial search description derived so far. + * @param object $oPosition Description of the token position within + the query. + * + * @return True if the token is compatible with the search configuration + * given the position. + */ + public function isExtendable($oSearch, $oPosition) + { + return !$oSearch->hasOperator() && $oPosition->isPhrase(''); + } + + /** + * Derive new searches by adding this token to an existing search. + * + * @param object $oSearch Partial search description derived so far. + * @param object $oPosition Description of the token position within + the query. + * + * @return SearchDescription[] List of derived search descriptions. + */ + public function extendSearch($oSearch, $oPosition) + { + $iSearchCost = 2; + + $iOp = $this->iOperator; + if ($iOp == \Nominatim\Operator::NONE) { + if ($oSearch->hasName() || $oSearch->getContext()->isBoundedSearch()) { + $iOp = \Nominatim\Operator::NAME; + } else { + $iOp = \Nominatim\Operator::NEAR; + } + $iSearchCost += 2; + } elseif (!$oPosition->isFirstToken() && !$oPosition->isLastToken()) { + $iSearchCost += 2; + } + if ($oSearch->hasHousenumber()) { + $iSearchCost ++; + } + + $oNewSearch = $oSearch->clone($iSearchCost); + $oNewSearch->setPoiSearch($iOp, $this->sClass, $this->sType); + + return array($oNewSearch); + } + + public function debugInfo() { return array( @@ -38,4 +94,9 @@ class SpecialTerm ) ); } + + public function debugCode() + { + return 'S'; + } } diff --git a/lib-php/TokenWord.php b/lib-php/TokenWord.php index fc28535d..59456e35 100644 --- a/lib-php/TokenWord.php +++ b/lib-php/TokenWord.php @@ -8,31 +8,95 @@ namespace Nominatim\Token; class Word { /// Database word id, if applicable. - public $iId; - /// If true, the word may represent only part of a place name. - public $bPartial; + private $iId; /// Number of appearances in the database. - public $iSearchNameCount; + private $iSearchNameCount; /// Number of terms in the word. - public $iTermCount; + private $iTermCount; - public function __construct($iId, $bPartial, $iSearchNameCount, $iTermCount) + public function __construct($iId, $iSearchNameCount, $iTermCount) { $this->iId = $iId; - $this->bPartial = $bPartial; $this->iSearchNameCount = $iSearchNameCount; $this->iTermCount = $iTermCount; } + public function getId() + { + return $this->iId; + } + + /** + * Check if the token can be added to the given search. + * Derive new searches by adding this token to an existing search. + * + * @param object $oSearch Partial search description derived so far. + * @param object $oPosition Description of the token position within + the query. + * + * @return True if the token is compatible with the search configuration + * given the position. + */ + public function isExtendable($oSearch, $oPosition) + { + return !$oPosition->isPhrase('country'); + } + + /** + * Derive new searches by adding this token to an existing search. + * + * @param object $oSearch Partial search description derived so far. + * @param object $oPosition Description of the token position within + the query. + * + * @return SearchDescription[] List of derived search descriptions. + */ + public function extendSearch($oSearch, $oPosition) + { + // Full words can only be a name if they appear at the beginning + // of the phrase. In structured search the name must forcably in + // the first phrase. In unstructured search it may be in a later + // phrase when the first phrase is a house number. + if ($oSearch->hasName() + || !($oPosition->isFirstPhrase() || $oPosition->isPhrase('')) + ) { + if ($this->iTermCount > 1 + && ($oPosition->isPhrase('') || !$oPosition->isFirstPhrase()) + ) { + $oNewSearch = $oSearch->clone(1); + $oNewSearch->addAddressToken($this->iId); + + return array($oNewSearch); + } + } elseif (!$oSearch->hasName(true)) { + $oNewSearch = $oSearch->clone(1); + $oNewSearch->addNameToken( + $this->iId, + CONST_Search_NameOnlySearchFrequencyThreshold + && $this->iSearchNameCount + < CONST_Search_NameOnlySearchFrequencyThreshold + ); + + return array($oNewSearch); + } + + return array(); + } + public function debugInfo() { return array( 'ID' => $this->iId, 'Type' => 'word', 'Info' => array( - 'partial' => $this->bPartial, - 'count' => $this->iSearchNameCount + 'count' => $this->iSearchNameCount, + 'terms' => $this->iTermCount ) ); } + + public function debugCode() + { + return 'W'; + } } diff --git a/lib-php/admin/export.php b/lib-php/admin/export.php index b038cf2a..9f205de7 100644 --- a/lib-php/admin/export.php +++ b/lib-php/admin/export.php @@ -49,7 +49,9 @@ $oDB->connect(); if (isset($aCMDResult['output-type'])) { - if (!isset($aRankmap[$aCMDResult['output-type']])) fail('unknown output-type: '.$aCMDResult['output-type']); + if (!isset($aRankmap[$aCMDResult['output-type']])) { + fail('unknown output-type: '.$aCMDResult['output-type']); + } $iOutputRank = $aRankmap[$aCMDResult['output-type']]; } else { $iOutputRank = $aRankmap['street']; @@ -58,14 +60,18 @@ // Preferred language $oParams = new Nominatim\ParameterParser(); - if (!isset($aCMDResult['language'])) $aCMDResult['language'] = 'xx'; + if (!isset($aCMDResult['language'])) { + $aCMDResult['language'] = 'xx'; + } $aLangPrefOrder = $oParams->getPreferredLanguages($aCMDResult['language']); $sLanguagePrefArraySQL = $oDB->getArraySQL($oDB->getDBQuotedList($aLangPrefOrder)); // output formatting: build up a lookup table that maps address ranks to columns $aColumnMapping = array(); $iNumCol = 0; - if (!isset($aCMDResult['output-format'])) $aCMDResult['output-format'] = 'street;suburb;city;county;state;country'; + if (!isset($aCMDResult['output-format'])) { + $aCMDResult['output-format'] = 'street;suburb;city;county;state;country'; + } foreach (preg_split('/\s*;\s*/', $aCMDResult['output-format']) as $sColumn) { $bHasData = false; foreach (preg_split('/\s*,\s*/', $sColumn) as $sRank) { @@ -80,7 +86,9 @@ } } } - if ($bHasData) $iNumCol++; + if ($bHasData) { + $iNumCol++; + } } // build the query for objects @@ -122,7 +130,9 @@ if ($sOsmType) { $sSQL = 'select place_id from placex where osm_type = :osm_type and osm_id = :osm_id'; $sParentId = $oDB->getOne($sSQL, array('osm_type' => $sOsmType, 'osm_id' => $sOsmId)); - if (!$sParentId) fail('Could not find place '.$sOsmType.' '.$sOsmId); + if (!$sParentId) { + fail('Could not find place '.$sOsmType.' '.$sOsmId); + } } if ($sParentId) { $sPlacexSQL .= ' and place_id in (select place_id from place_addressline where address_place_id = '.$sParentId.' and isaddress)'; @@ -136,7 +146,6 @@ $oResults = $oDB->getQueryStatement($sPlacexSQL); $fOutstream = fopen('php://output', 'w'); while ($aRow = $oResults->fetch()) { - //var_dump($aRow); $iPlaceID = $aRow['place_id']; $sSQL = "select rank_address,get_name_by_language(name,$sLanguagePrefArraySQL) as localname from get_addressdata(:place_id, -1)"; $sSQL .= ' WHERE isaddress'; diff --git a/lib-php/admin/update.php b/lib-php/admin/update.php index ea58f37c..3075070a 100644 --- a/lib-php/admin/update.php +++ b/lib-php/admin/update.php @@ -40,7 +40,9 @@ $oDB->connect(); $fPostgresVersion = $oDB->getPostgresVersion(); $aDSNInfo = Nominatim\DB::parseDSN(getSetting('DATABASE_DSN')); -if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) $aDSNInfo['port'] = 5432; +if (!isset($aDSNInfo['port']) || !$aDSNInfo['port']) { + $aDSNInfo['port'] = 5432; +} // cache memory to be used by osm2pgsql, should not be more than the available memory $iCacheMemory = (isset($aResult['osm2pgsql-cache'])?$aResult['osm2pgsql-cache']:2000); diff --git a/lib-php/admin/warm.php b/lib-php/admin/warm.php index d6aa3d9b..39a37506 100644 --- a/lib-php/admin/warm.php +++ b/lib-php/admin/warm.php @@ -62,11 +62,15 @@ if (!$aResult['search-only']) { $oPlaceLookup->setLanguagePreference(array('en')); echo 'Warm reverse: '; - if ($bVerbose) echo "\n"; + if ($bVerbose) { + echo "\n"; + } for ($i = 0; $i < 1000; $i++) { $fLat = rand(-9000, 9000) / 100; $fLon = rand(-18000, 18000) / 100; - if ($bVerbose) echo "$fLat, $fLon = "; + if ($bVerbose) { + echo "$fLat, $fLon = "; + } $oLookup = $oReverseGeocode->lookup($fLat, $fLon); $aSearchResults = $oLookup ? $oPlaceLookup->lookup(array($oLookup->iId => $oLookup)) : null; @@ -79,10 +83,14 @@ if (!$aResult['reverse-only']) { $oGeocode = new Nominatim\Geocode($oDB); echo 'Warm search: '; - if ($bVerbose) echo "\n"; + if ($bVerbose) { + echo "\n"; + } $sSQL = 'SELECT word FROM word WHERE word is not null ORDER BY search_name_count DESC LIMIT 1000'; foreach ($oDB->getCol($sSQL) as $sWord) { - if ($bVerbose) echo "$sWord = "; + if ($bVerbose) { + echo "$sWord = "; + } $oGeocode->setLanguagePreference(array('en')); $oGeocode->setQuery($sWord); diff --git a/lib-php/cmd.php b/lib-php/cmd.php index 9c971e5f..a52e8fce 100644 --- a/lib-php/cmd.php +++ b/lib-php/cmd.php @@ -9,8 +9,12 @@ function getCmdOpt($aArg, $aSpec, &$aResult, $bExitOnError = false, $bExitOnUnkn foreach ($aSpec as $aLine) { if (is_array($aLine)) { - if ($aLine[0]) $aQuick['--'.$aLine[0]] = $aLine; - if ($aLine[1]) $aQuick['-'.$aLine[1]] = $aLine; + if ($aLine[0]) { + $aQuick['--'.$aLine[0]] = $aLine; + } + if ($aLine[1]) { + $aQuick['-'.$aLine[1]] = $aLine; + } $aCounts[$aLine[0]] = 0; } } @@ -28,7 +32,9 @@ function getCmdOpt($aArg, $aSpec, &$aResult, $bExitOnError = false, $bExitOnUnkn $xVal = array(); for ($n = $aLine[4]; $i < $iSize && $n; $n--) { $i++; - if ($i >= $iSize || $aArg[$i][0] == '-') showUsage($aSpec, $bExitOnError, 'Parameter of \''.$aLine[0].'\' is missing'); + if ($i >= $iSize || $aArg[$i][0] == '-') { + showUsage($aSpec, $bExitOnError, 'Parameter of \''.$aLine[0].'\' is missing'); + } switch ($aLine[6]) { case 'realpath': @@ -56,7 +62,9 @@ function getCmdOpt($aArg, $aSpec, &$aResult, $bExitOnError = false, $bExitOnUnkn break; } } - if ($aLine[4] == 1) $xVal = $xVal[0]; + if ($aLine[4] == 1) { + $xVal = $xVal[0]; + } } else { $xVal = true; } @@ -65,7 +73,9 @@ function getCmdOpt($aArg, $aSpec, &$aResult, $bExitOnError = false, $bExitOnUnkn } if ($aLine[3] > 1) { - if (!array_key_exists($aLine[0], $aResult)) $aResult[$aLine[0]] = array(); + if (!array_key_exists($aLine[0], $aResult)) { + $aResult[$aLine[0]] = array(); + } $aResult[$aLine[0]][] = $xVal; } else { $aResult[$aLine[0]] = $xVal; @@ -75,18 +85,23 @@ function getCmdOpt($aArg, $aSpec, &$aResult, $bExitOnError = false, $bExitOnUnkn } } - if (array_key_exists('help', $aResult)) showUsage($aSpec); - if ($bUnknown && $bExitOnUnknown) showUsage($aSpec, $bExitOnError, 'Unknown option \''.$bUnknown.'\''); + if (array_key_exists('help', $aResult)) { + showUsage($aSpec); + } + if ($bUnknown && $bExitOnUnknown) { + showUsage($aSpec, $bExitOnError, 'Unknown option \''.$bUnknown.'\''); + } foreach ($aSpec as $aLine) { if (is_array($aLine)) { - if ($aCounts[$aLine[0]] < $aLine[2]) showUsage($aSpec, $bExitOnError, 'Option \''.$aLine[0].'\' is missing'); - if ($aCounts[$aLine[0]] > $aLine[3]) showUsage($aSpec, $bExitOnError, 'Option \''.$aLine[0].'\' is pressent too many times'); - switch ($aLine[6]) { - case 'bool': - if (!array_key_exists($aLine[0], $aResult)) - $aResult[$aLine[0]] = false; - break; + if ($aCounts[$aLine[0]] < $aLine[2]) { + showUsage($aSpec, $bExitOnError, 'Option \''.$aLine[0].'\' is missing'); + } + if ($aCounts[$aLine[0]] > $aLine[3]) { + showUsage($aSpec, $bExitOnError, 'Option \''.$aLine[0].'\' is pressent too many times'); + } + if ($aLine[6] == 'bool' && !array_key_exists($aLine[0], $aResult)) { + $aResult[$aLine[0]] = false; } } } @@ -109,8 +124,12 @@ function showUsage($aSpec, $bExit = false, $sError = false) echo "\n"; } $aNames = array(); - if ($aLine[1]) $aNames[] = '-'.$aLine[1]; - if ($aLine[0]) $aNames[] = '--'.$aLine[0]; + if ($aLine[1]) { + $aNames[] = '-'.$aLine[1]; + } + if ($aLine[0]) { + $aNames[] = '--'.$aLine[0]; + } $sName = join(', ', $aNames); echo ' '.$sName.str_repeat(' ', 30-strlen($sName)).$aLine[7]."\n"; } else { diff --git a/lib-php/init-website.php b/lib-php/init-website.php index f2d52980..d6cc8a24 100644 --- a/lib-php/init-website.php +++ b/lib-php/init-website.php @@ -81,6 +81,10 @@ if (CONST_NoAccessControl) { header('Access-Control-Allow-Headers: '.$_SERVER['HTTP_ACCESS_CONTROL_REQUEST_HEADERS']); } } -if (isset($_SERVER['REQUEST_METHOD']) && $_SERVER['REQUEST_METHOD'] == 'OPTIONS') exit; +if (isset($_SERVER['REQUEST_METHOD']) && $_SERVER['REQUEST_METHOD'] == 'OPTIONS') { + exit; +} -if (CONST_Debug) header('Content-type: text/html; charset=utf-8'); +if (CONST_Debug) { + header('Content-type: text/html; charset=utf-8'); +} diff --git a/lib-php/lib.php b/lib-php/lib.php index 8d82c5b3..d95ad4ee 100644 --- a/lib-php/lib.php +++ b/lib-php/lib.php @@ -6,10 +6,7 @@ function loadSettings($sProjectDir) // Temporary hack to set the direcory via environment instead of // the installed scripts. Neither setting is part of the official // set of settings. - defined('CONST_DataDir') or define('CONST_DataDir', $_SERVER['NOMINATIM_DATADIR']); - defined('CONST_SqlDir') or define('CONST_SqlDir', $_SERVER['NOMINATIM_SQLDIR']); defined('CONST_ConfigDir') or define('CONST_ConfigDir', $_SERVER['NOMINATIM_CONFIGDIR']); - defined('CONST_Default_ModulePath') or define('CONST_Default_ModulePath', $_SERVER['NOMINATIM_DATABASE_MODULE_SRC_PATH']); } function getSetting($sConfName, $sDefault = null) @@ -32,22 +29,14 @@ function getSettingBool($sConfName) || strcmp($sVal, '1') == 0; } -function getSettingConfig($sConfName, $sSystemConfig) -{ - $sValue = $_SERVER['NOMINATIM_'.$sConfName]; - - if (!$sValue) { - return CONST_ConfigDir.'/'.$sSystemConfig; - } - - return $sValue; -} - function fail($sError, $sUserError = false) { - if (!$sUserError) $sUserError = $sError; + if (!$sUserError) { + $sUserError = $sError; + } error_log('ERROR: '.$sError); - var_dump($sUserError)."\n"; + var_dump($sUserError); + echo "\n"; exit(-1); } @@ -95,8 +84,9 @@ function getDatabaseDate(&$oDB) function byImportance($a, $b) { - if ($a['importance'] != $b['importance']) + if ($a['importance'] != $b['importance']) { return ($a['importance'] > $b['importance']?-1:1); + } return $a['foundorder'] <=> $b['foundorder']; } @@ -231,6 +221,8 @@ function closestHouseNumber($aRow) if (!function_exists('array_key_last')) { function array_key_last(array $array) { - if (!empty($array)) return key(array_slice($array, -1, 1, true)); + if (!empty($array)) { + return key(array_slice($array, -1, 1, true)); + } } } diff --git a/lib-php/log.php b/lib-php/log.php index 25ed75cb..d7e14932 100644 --- a/lib-php/log.php +++ b/lib-php/log.php @@ -5,15 +5,23 @@ function logStart(&$oDB, $sType = '', $sQuery = '', $aLanguageList = array()) { $fStartTime = microtime(true); $aStartTime = explode('.', $fStartTime); - if (!isset($aStartTime[1])) $aStartTime[1] = '0'; + if (!isset($aStartTime[1])) { + $aStartTime[1] = '0'; + } $sOutputFormat = ''; - if (isset($_GET['format'])) $sOutputFormat = $_GET['format']; + if (isset($_GET['format'])) { + $sOutputFormat = $_GET['format']; + } if ($sType == 'reverse') { $sOutQuery = (isset($_GET['lat'])?$_GET['lat']:'').'/'; - if (isset($_GET['lon'])) $sOutQuery .= $_GET['lon']; - if (isset($_GET['zoom'])) $sOutQuery .= '/'.$_GET['zoom']; + if (isset($_GET['lon'])) { + $sOutQuery .= $_GET['lon']; + } + if (isset($_GET['zoom'])) { + $sOutQuery .= '/'.$_GET['zoom']; + } } else { $sOutQuery = $sQuery; } @@ -28,13 +36,15 @@ function logStart(&$oDB, $sType = '', $sQuery = '', $aLanguageList = array()) ); if (CONST_Log_DB) { - if (isset($_GET['email'])) + if (isset($_GET['email'])) { $sUserAgent = $_GET['email']; - elseif (isset($_SERVER['HTTP_REFERER'])) + } elseif (isset($_SERVER['HTTP_REFERER'])) { $sUserAgent = $_SERVER['HTTP_REFERER']; - elseif (isset($_SERVER['HTTP_USER_AGENT'])) + } elseif (isset($_SERVER['HTTP_USER_AGENT'])) { $sUserAgent = $_SERVER['HTTP_USER_AGENT']; - else $sUserAgent = ''; + } else { + $sUserAgent = ''; + } $sSQL = 'insert into new_query_log (type,starttime,query,ipaddress,useragent,language,format,searchterm)'; $sSQL .= ' values ('; $sSQL .= join(',', $oDB->getDBQuotedList(array( @@ -60,7 +70,9 @@ function logEnd(&$oDB, $hLog, $iNumResults) if (CONST_Log_DB) { $aEndTime = explode('.', $fEndTime); - if (!$aEndTime[1]) $aEndTime[1] = '0'; + if (!$aEndTime[1]) { + $aEndTime[1] = '0'; + } $sEndTime = date('Y-m-d H:i:s', $aEndTime[0]).'.'.$aEndTime[1]; $sSQL = 'update new_query_log set endtime = '.$oDB->getDBQuoted($sEndTime).', results = '.$iNumResults; diff --git a/lib-php/migration/PhraseSettingsToJson.php b/lib-php/migration/PhraseSettingsToJson.php index 15c49f0a..ac6e6213 100644 --- a/lib-php/migration/PhraseSettingsToJson.php +++ b/lib-php/migration/PhraseSettingsToJson.php @@ -8,10 +8,12 @@ if (file_exists($phpPhraseSettingsFile) && !file_exists($jsonPhraseSettingsFile) $data = array(); - if (isset($aTagsBlacklist)) + if (isset($aTagsBlacklist)) { $data['blackList'] = $aTagsBlacklist; - if (isset($aTagsWhitelist)) + } + if (isset($aTagsWhitelist)) { $data['whiteList'] = $aTagsWhitelist; + } $jsonFile = fopen($jsonPhraseSettingsFile, 'w'); fwrite($jsonFile, json_encode($data)); diff --git a/lib-php/output.php b/lib-php/output.php index 8de81576..ee1db44c 100644 --- a/lib-php/output.php +++ b/lib-php/output.php @@ -3,14 +3,26 @@ function formatOSMType($sType, $bIncludeExternal = true) { - if ($sType == 'N') return 'node'; - if ($sType == 'W') return 'way'; - if ($sType == 'R') return 'relation'; + if ($sType == 'N') { + return 'node'; + } + if ($sType == 'W') { + return 'way'; + } + if ($sType == 'R') { + return 'relation'; + } - if (!$bIncludeExternal) return ''; + if (!$bIncludeExternal) { + return ''; + } - if ($sType == 'T') return 'way'; - if ($sType == 'I') return 'way'; + if ($sType == 'T') { + return 'way'; + } + if ($sType == 'I') { + return 'way'; + } // not handled: P, L diff --git a/lib-php/template/address-geocodejson.php b/lib-php/template/address-geocodejson.php index 0066e80e..927f3861 100644 --- a/lib-php/template/address-geocodejson.php +++ b/lib-php/template/address-geocodejson.php @@ -5,9 +5,11 @@ $aFilteredPlaces = array(); if (empty($aPlace)) { - if (isset($sError)) + if (isset($sError)) { $aFilteredPlaces['error'] = $sError; - else $aFilteredPlaces['error'] = 'Unable to geocode'; + } else { + $aFilteredPlaces['error'] = 'Unable to geocode'; + } javascript_renderData($aFilteredPlaces); } else { $aFilteredPlaces = array( @@ -17,7 +19,9 @@ if (empty($aPlace)) { ) ); - if (isset($aPlace['place_id'])) $aFilteredPlaces['properties']['geocoding']['place_id'] = $aPlace['place_id']; + if (isset($aPlace['place_id'])) { + $aFilteredPlaces['properties']['geocoding']['place_id'] = $aPlace['place_id']; + } $sOSMType = formatOSMType($aPlace['osm_type']); if ($sOSMType) { $aFilteredPlaces['properties']['geocoding']['osm_type'] = $sOSMType; diff --git a/lib-php/template/address-geojson.php b/lib-php/template/address-geojson.php index 089a86b6..0dd96f21 100644 --- a/lib-php/template/address-geojson.php +++ b/lib-php/template/address-geojson.php @@ -3,9 +3,11 @@ $aFilteredPlaces = array(); if (empty($aPlace)) { - if (isset($sError)) + if (isset($sError)) { $aFilteredPlaces['error'] = $sError; - else $aFilteredPlaces['error'] = 'Unable to geocode'; + } else { + $aFilteredPlaces['error'] = 'Unable to geocode'; + } javascript_renderData($aFilteredPlaces); } else { $aFilteredPlaces = array( @@ -13,7 +15,9 @@ if (empty($aPlace)) { 'properties' => array() ); - if (isset($aPlace['place_id'])) $aFilteredPlaces['properties']['place_id'] = $aPlace['place_id']; + if (isset($aPlace['place_id'])) { + $aFilteredPlaces['properties']['place_id'] = $aPlace['place_id']; + } $sOSMType = formatOSMType($aPlace['osm_type']); if ($sOSMType) { $aFilteredPlaces['properties']['osm_type'] = $sOSMType; @@ -36,8 +40,12 @@ if (empty($aPlace)) { if (isset($aPlace['address'])) { $aFilteredPlaces['properties']['address'] = $aPlace['address']->getAddressNames(); } - if (isset($aPlace['sExtraTags'])) $aFilteredPlaces['properties']['extratags'] = $aPlace['sExtraTags']; - if (isset($aPlace['sNameDetails'])) $aFilteredPlaces['properties']['namedetails'] = $aPlace['sNameDetails']; + if (isset($aPlace['sExtraTags'])) { + $aFilteredPlaces['properties']['extratags'] = $aPlace['sExtraTags']; + } + if (isset($aPlace['sNameDetails'])) { + $aFilteredPlaces['properties']['namedetails'] = $aPlace['sNameDetails']; + } if (isset($aPlace['aBoundingBox'])) { $aFilteredPlaces['bbox'] = array( diff --git a/lib-php/template/address-json.php b/lib-php/template/address-json.php index 691d6a74..513d312e 100644 --- a/lib-php/template/address-json.php +++ b/lib-php/template/address-json.php @@ -3,19 +3,27 @@ $aFilteredPlaces = array(); if (empty($aPlace)) { - if (isset($sError)) + if (isset($sError)) { $aFilteredPlaces['error'] = $sError; - else $aFilteredPlaces['error'] = 'Unable to geocode'; + } else { + $aFilteredPlaces['error'] = 'Unable to geocode'; + } } else { - if (isset($aPlace['place_id'])) $aFilteredPlaces['place_id'] = $aPlace['place_id']; + if (isset($aPlace['place_id'])) { + $aFilteredPlaces['place_id'] = $aPlace['place_id']; + } $aFilteredPlaces['licence'] = 'Data © OpenStreetMap contributors, ODbL 1.0. https://osm.org/copyright'; $sOSMType = formatOSMType($aPlace['osm_type']); if ($sOSMType) { $aFilteredPlaces['osm_type'] = $sOSMType; $aFilteredPlaces['osm_id'] = $aPlace['osm_id']; } - if (isset($aPlace['lat'])) $aFilteredPlaces['lat'] = $aPlace['lat']; - if (isset($aPlace['lon'])) $aFilteredPlaces['lon'] = $aPlace['lon']; + if (isset($aPlace['lat'])) { + $aFilteredPlaces['lat'] = $aPlace['lat']; + } + if (isset($aPlace['lon'])) { + $aFilteredPlaces['lon'] = $aPlace['lon']; + } if ($sOutputFormat == 'jsonv2' || $sOutputFormat == 'geojson') { $aFilteredPlaces['place_rank'] = $aPlace['rank_search']; @@ -35,8 +43,12 @@ if (empty($aPlace)) { if (isset($aPlace['address'])) { $aFilteredPlaces['address'] = $aPlace['address']->getAddressNames(); } - if (isset($aPlace['sExtraTags'])) $aFilteredPlaces['extratags'] = $aPlace['sExtraTags']; - if (isset($aPlace['sNameDetails'])) $aFilteredPlaces['namedetails'] = $aPlace['sNameDetails']; + if (isset($aPlace['sExtraTags'])) { + $aFilteredPlaces['extratags'] = $aPlace['sExtraTags']; + } + if (isset($aPlace['sNameDetails'])) { + $aFilteredPlaces['namedetails'] = $aPlace['sNameDetails']; + } if (isset($aPlace['aBoundingBox'])) { $aFilteredPlaces['boundingbox'] = $aPlace['aBoundingBox']; diff --git a/lib-php/template/address-xml.php b/lib-php/template/address-xml.php index ab0bc72c..cf045ab9 100644 --- a/lib-php/template/address-xml.php +++ b/lib-php/template/address-xml.php @@ -12,17 +12,29 @@ echo " querystring='".htmlspecialchars($_SERVER['QUERY_STRING'], ENT_QUOTES)."'" echo ">\n"; if (empty($aPlace)) { - if (isset($sError)) + if (isset($sError)) { echo "$sError"; - else echo 'Unable to geocode'; + } else { + echo 'Unable to geocode'; + } } else { echo 'contains($sToken)) { - if (preg_match('/^ ([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) { + if ($sToken[0] != ' ' && !$oValidTokens->contains($sToken)) { + if (preg_match('/^([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) { // US ZIP+4 codes - merge in the 5-digit ZIP code $oValidTokens->addToken( $sToken, new Token\Postcode(null, $aData[1], 'us') ); - } elseif (preg_match('/^ [0-9]+$/', $sToken)) { + } elseif (preg_match('/^[0-9]+$/', $sToken)) { // Unknown single word token with a number. // Assume it is a house number. $oValidTokens->addToken( @@ -195,17 +195,28 @@ class Tokenizer ) { $oToken = new Token\Country($iId, $aWord['country_code']); } + } elseif ($aWord['word_token'][0] == ' ') { + $oToken = new Token\Word( + $iId, + $aWord['word_token'][0] != ' ', + (int) $aWord['count'], + substr_count($aWord['word_token'], ' ') + ); } else { - $oToken = new Token\Word( + $oToken = new Token\Partial( $iId, - $aWord['word_token'][0] != ' ', - (int) $aWord['count'], - substr_count($aWord['word_token'], ' ') + $aWord['word_token'], + (int) $aWord['count'] ); } if ($oToken) { - $oValidTokens->addToken($aWord['word_token'], $oToken); + // remove any leading spaces + if ($aWord['word_token'][0] == ' ') { + $oValidTokens->addToken(substr($aWord['word_token'], 1), $oToken); + } else { + $oValidTokens->addToken($aWord['word_token'], $oToken); + } } } } diff --git a/lib-php/tokenizer/legacy_tokenizer.php b/lib-php/tokenizer/legacy_tokenizer.php index 0fb37fd0..064b4166 100644 --- a/lib-php/tokenizer/legacy_tokenizer.php +++ b/lib-php/tokenizer/legacy_tokenizer.php @@ -105,7 +105,7 @@ class Tokenizer // now compute all possible tokens $aWordLists = array(); $aTokens = array(); - foreach ($aNormPhrases as $sTitle => $sPhrase) { + foreach ($aNormPhrases as $sPhrase) { if (strlen($sPhrase) > 0) { $aWords = explode(' ', $sPhrase); Tokenizer::addTokens($aTokens, $aWords); @@ -137,14 +137,14 @@ class Tokenizer // Try more interpretations for Tokens that could not be matched. foreach ($aTokens as $sToken) { - if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) { - if (preg_match('/^ ([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) { + if ($sToken[0] != ' ' && !$oValidTokens->contains($sToken)) { + if (preg_match('/^([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) { // US ZIP+4 codes - merge in the 5-digit ZIP code $oValidTokens->addToken( $sToken, new Token\Postcode(null, $aData[1], 'us') ); - } elseif (preg_match('/^ [0-9]+$/', $sToken)) { + } elseif (preg_match('/^[0-9]+$/', $sToken)) { // Unknown single word token with a number. // Assume it is a house number. $oValidTokens->addToken( @@ -212,17 +212,29 @@ class Tokenizer ) { $oToken = new Token\Country($iId, $aWord['country_code']); } - } else { + } elseif ($aWord['word_token'][0] == ' ') { $oToken = new Token\Word( $iId, - $aWord['word_token'][0] != ' ', (int) $aWord['count'], substr_count($aWord['word_token'], ' ') ); + // For backward compatibility: ignore all partial tokens with more + // than one word. + } elseif (strpos($aWord['word_token'], ' ') === false) { + $oToken = new Token\Partial( + $iId, + $aWord['word_token'], + (int) $aWord['count'] + ); } if ($oToken) { - $oValidTokens->addToken($aWord['word_token'], $oToken); + // remove any leading spaces + if ($aWord['word_token'][0] == ' ') { + $oValidTokens->addToken(substr($aWord['word_token'], 1), $oToken); + } else { + $oValidTokens->addToken($aWord['word_token'], $oToken); + } } } } diff --git a/lib-php/website/details.php b/lib-php/website/details.php index 55a088d1..c16725e2 100644 --- a/lib-php/website/details.php +++ b/lib-php/website/details.php @@ -95,16 +95,22 @@ $iPlaceID = (int)$sPlaceId; if (CONST_Use_US_Tiger_Data) { $iParentPlaceID = $oDB->getOne('SELECT parent_place_id FROM location_property_tiger WHERE place_id = '.$iPlaceID); - if ($iParentPlaceID) $iPlaceID = $iParentPlaceID; + if ($iParentPlaceID) { + $iPlaceID = $iParentPlaceID; + } } // interpolated house numbers $iParentPlaceID = $oDB->getOne('SELECT parent_place_id FROM location_property_osmline WHERE place_id = '.$iPlaceID); -if ($iParentPlaceID) $iPlaceID = $iParentPlaceID; +if ($iParentPlaceID) { + $iPlaceID = $iParentPlaceID; +} // artificial postcodes $iParentPlaceID = $oDB->getOne('SELECT parent_place_id FROM location_postcode WHERE place_id = '.$iPlaceID); -if ($iParentPlaceID) $iPlaceID = $iParentPlaceID; +if ($iParentPlaceID) { + $iPlaceID = $iParentPlaceID; +} $hLog = logStart($oDB, 'details', $_SERVER['QUERY_STRING'], $aLangPrefOrder); diff --git a/lib-php/website/lookup.php b/lib-php/website/lookup.php index 737edc62..eb3705fc 100644 --- a/lib-php/website/lookup.php +++ b/lib-php/website/lookup.php @@ -35,8 +35,10 @@ if (count($aOsmIds) > CONST_Places_Max_ID_count) { foreach ($aOsmIds as $sItem) { // Skip empty sItem - if (empty($sItem)) continue; - + if (empty($sItem)) { + continue; + } + $sType = $sItem[0]; $iId = (int) substr($sItem, 1); if ($iId > 0 && ($sType == 'N' || $sType == 'W' || $sType == 'R')) { @@ -48,7 +50,9 @@ foreach ($aOsmIds as $sItem) { // key names $oResult = $oPlace; unset($oResult['aAddress']); - if (isset($oPlace['aAddress'])) $oResult['address'] = $oPlace['aAddress']; + if (isset($oPlace['aAddress'])) { + $oResult['address'] = $oPlace['aAddress']; + } if ($sOutputFormat != 'geocodejson') { unset($oResult['langaddress']); $oResult['name'] = $oPlace['langaddress']; @@ -71,7 +75,9 @@ foreach ($aOsmIds as $sItem) { } -if (CONST_Debug) exit; +if (CONST_Debug) { + exit; +} $sXmlRootTag = 'lookupresults'; $sQuery = join(',', $aCleanedQueryParts); diff --git a/lib-php/website/polygons.php b/lib-php/website/polygons.php index 88b48ae8..c1382dbf 100644 --- a/lib-php/website/polygons.php +++ b/lib-php/website/polygons.php @@ -30,8 +30,12 @@ while ($iTotalBroken && empty($aPolygons)) { $iDays++; } - if ($bReduced) $aWhere[] = "errormessage like 'Area reduced%'"; - if ($sClass) $sWhere[] = "class = '".pg_escape_string($sClass)."'"; + if ($bReduced) { + $aWhere[] = "errormessage like 'Area reduced%'"; + } + if ($sClass) { + $sWhere[] = "class = '".pg_escape_string($sClass)."'"; + } if (!empty($aWhere)) { $sSQL .= ' WHERE '.join(' and ', $aWhere); diff --git a/lib-php/website/search.php b/lib-php/website/search.php index 56064e7a..e553ef39 100644 --- a/lib-php/website/search.php +++ b/lib-php/website/search.php @@ -82,7 +82,9 @@ if (isset($_SERVER['REQUEST_SCHEME']) $sMoreURL = '/search.php?'.http_build_query($aMoreParams); } -if (CONST_Debug) exit; +if (CONST_Debug) { + exit; +} $sOutputTemplate = ($sOutputFormat == 'jsonv2') ? 'json' : $sOutputFormat; include(CONST_LibDir.'/template/search-'.$sOutputTemplate.'.php'); diff --git a/nominatim/cli.py b/nominatim/cli.py index 533a920e..5626deb4 100644 --- a/nominatim/cli.py +++ b/nominatim/cli.py @@ -103,7 +103,7 @@ class CommandlineParser: return 1 -##### Subcommand classes +# Subcommand classes # # Each class needs to implement two functions: add_args() adds the CLI parameters # for the subfunction, run() executes the subcommand. diff --git a/nominatim/clicmd/api.py b/nominatim/clicmd/api.py index a5556952..b99d37b8 100644 --- a/nominatim/clicmd/api.py +++ b/nominatim/clicmd/api.py @@ -90,7 +90,7 @@ class APISearch: if args.query: params = dict(q=args.query) else: - params = {k : getattr(args, k) for k, _ in STRUCTURED_QUERY if getattr(args, k)} + params = {k: getattr(args, k) for k, _ in STRUCTURED_QUERY if getattr(args, k)} for param, _ in EXTRADATA_PARAMS: if getattr(args, param): diff --git a/nominatim/clicmd/args.py b/nominatim/clicmd/args.py index ee194187..996f48f2 100644 --- a/nominatim/clicmd/args.py +++ b/nominatim/clicmd/args.py @@ -24,4 +24,4 @@ class NominatimArgs: main_data=self.config.TABLESPACE_PLACE_DATA, main_index=self.config.TABLESPACE_PLACE_INDEX ) - ) + ) diff --git a/nominatim/clicmd/refresh.py b/nominatim/clicmd/refresh.py index fbc23350..969998ad 100644 --- a/nominatim/clicmd/refresh.py +++ b/nominatim/clicmd/refresh.py @@ -61,7 +61,7 @@ class UpdateRefresh: args.threads or 1) indexer.index_postcodes() else: - LOG.error("The place table doesn\'t exist. " \ + LOG.error("The place table doesn't exist. " "Postcode updates on a frozen database is not possible.") if args.word_counts: diff --git a/nominatim/clicmd/replication.py b/nominatim/clicmd/replication.py index 242b0f6a..4c8cd44e 100644 --- a/nominatim/clicmd/replication.py +++ b/nominatim/clicmd/replication.py @@ -93,7 +93,7 @@ class UpdateReplication: indexed_only=not args.once) # Sanity check to not overwhelm the Geofabrik servers. - if 'download.geofabrik.de'in params['base_url']\ + if 'download.geofabrik.de' in params['base_url']\ and params['update_interval'] < 86400: LOG.fatal("Update interval too low for download.geofabrik.de.\n" "Please check install documentation " diff --git a/nominatim/config.py b/nominatim/config.py index 72aaf0bd..a8436440 100644 --- a/nominatim/config.py +++ b/nominatim/config.py @@ -68,9 +68,9 @@ class Configuration: """ try: return int(self.__getattr__(name)) - except ValueError: + except ValueError as exp: LOG.fatal("Invalid setting NOMINATIM_%s. Needs to be a number.", name) - raise UsageError("Configuration error.") + raise UsageError("Configuration error.") from exp def get_libpq_dsn(self): diff --git a/nominatim/db/async_connection.py b/nominatim/db/async_connection.py index db4b89ce..a86c5bdc 100644 --- a/nominatim/db/async_connection.py +++ b/nominatim/db/async_connection.py @@ -33,18 +33,17 @@ class DeadlockHandler: self.ignore_sql_errors = ignore_sql_errors def __enter__(self): - pass + return self def __exit__(self, exc_type, exc_value, traceback): if __has_psycopg2_errors__: if exc_type == psycopg2.errors.DeadlockDetected: # pylint: disable=E1101 self.handler() return True - else: - if exc_type == psycopg2.extensions.TransactionRollbackError: - if exc_value.pgcode == '40P01': - self.handler() - return True + elif exc_type == psycopg2.extensions.TransactionRollbackError \ + and exc_value.pgcode == '40P01': + self.handler() + return True if self.ignore_sql_errors and isinstance(exc_value, psycopg2.Error): LOG.info("SQL error ignored: %s", exc_value) @@ -86,7 +85,7 @@ class DBConnection: # Use a dict to hand in the parameters because async is a reserved # word in Python3. - self.conn = psycopg2.connect(**{'dsn' : self.dsn, 'async' : True}) + self.conn = psycopg2.connect(**{'dsn': self.dsn, 'async': True}) self.wait() self.cursor = self.conn.cursor(cursor_factory=cursor_factory) @@ -191,10 +190,7 @@ class WorkerPool: yield thread if command_stat > self.REOPEN_CONNECTIONS_AFTER: - for thread in self.threads: - while not thread.is_done(): - thread.wait() - thread.connect() + self._reconnect_threads() ready = self.threads command_stat = 0 else: @@ -203,6 +199,13 @@ class WorkerPool: self.wait_time += time.time() - tstart + def _reconnect_threads(self): + for thread in self.threads: + while not thread.is_done(): + thread.wait() + thread.connect() + + def __enter__(self): return self diff --git a/nominatim/db/connection.py b/nominatim/db/connection.py index ac8d7c85..1319ac16 100644 --- a/nominatim/db/connection.py +++ b/nominatim/db/connection.py @@ -8,6 +8,7 @@ import os import psycopg2 import psycopg2.extensions import psycopg2.extras +from psycopg2 import sql as pysql from nominatim.errors import UsageError @@ -25,6 +26,16 @@ class _Cursor(psycopg2.extras.DictCursor): super().execute(query, args) + + def execute_values(self, sql, argslist, template=None): + """ Wrapper for the psycopg2 convenience function to execute + SQL for a list of values. + """ + LOG.debug("SQL execute_values(%s, %s)", sql, argslist) + + psycopg2.extras.execute_values(self, sql, argslist, template=template) + + def scalar(self, sql, args=None): """ Execute query that returns a single value. The value is returned. If the query yields more than one row, a ValueError is raised. @@ -37,6 +48,22 @@ class _Cursor(psycopg2.extras.DictCursor): return self.fetchone()[0] + def drop_table(self, name, if_exists=True, cascade=False): + """ Drop the table with the given name. + Set `if_exists` to False if a non-existant table should raise + an exception instead of just being ignored. If 'cascade' is set + to True then all dependent tables are deleted as well. + """ + sql = 'DROP TABLE ' + if if_exists: + sql += 'IF EXISTS ' + sql += '{}' + if cascade: + sql += ' CASCADE' + + self.execute(pysql.SQL(sql).format(pysql.Identifier(name))) + + class _Connection(psycopg2.extensions.connection): """ A connection that provides the specialised cursor by default and adds convenience functions for administrating the database. @@ -75,14 +102,13 @@ class _Connection(psycopg2.extensions.connection): return True - def drop_table(self, name, if_exists=True): + def drop_table(self, name, if_exists=True, cascade=False): """ Drop the table with the given name. Set `if_exists` to False if a non-existant table should raise an exception instead of just being ignored. """ with self.cursor() as cur: - cur.execute("""DROP TABLE {} "{}" - """.format('IF EXISTS' if if_exists else '', name)) + cur.drop_table(name, if_exists, cascade) self.commit() diff --git a/nominatim/db/sql_preprocessor.py b/nominatim/db/sql_preprocessor.py index dafc5de4..d756a215 100644 --- a/nominatim/db/sql_preprocessor.py +++ b/nominatim/db/sql_preprocessor.py @@ -61,7 +61,7 @@ def _setup_postgresql_features(conn): """ pg_version = conn.server_version_tuple() return { - 'has_index_non_key_column' : pg_version >= (11, 0, 0) + 'has_index_non_key_column': pg_version >= (11, 0, 0) } class SQLPreprocessor: diff --git a/nominatim/db/utils.py b/nominatim/db/utils.py index 4d4305e7..9a4a41a5 100644 --- a/nominatim/db/utils.py +++ b/nominatim/db/utils.py @@ -61,9 +61,9 @@ def execute_file(dsn, fname, ignore_errors=False, pre_code=None, post_code=None) # List of characters that need to be quoted for the copy command. -_SQL_TRANSLATION = {ord(u'\\') : u'\\\\', - ord(u'\t') : u'\\t', - ord(u'\n') : u'\\n'} +_SQL_TRANSLATION = {ord(u'\\'): u'\\\\', + ord(u'\t'): u'\\t', + ord(u'\n'): u'\\n'} class CopyBuffer: """ Data collector for the copy_from command. diff --git a/nominatim/indexer/indexer.py b/nominatim/indexer/indexer.py index 5ab0eac3..d0cfb391 100644 --- a/nominatim/indexer/indexer.py +++ b/nominatim/indexer/indexer.py @@ -92,7 +92,7 @@ class Indexer: def index_full(self, analyse=True): - """ Index the complete database. This will first index boudnaries + """ Index the complete database. This will first index boundaries followed by all other objects. When `analyse` is True, then the database will be analysed at the appropriate places to ensure that database statistics are updated. @@ -100,13 +100,10 @@ class Indexer: with connect(self.dsn) as conn: conn.autocommit = True - if analyse: - def _analyze(): + def _analyze(): + if analyse: with conn.cursor() as cur: cur.execute('ANALYZE') - else: - def _analyze(): - pass self.index_by_rank(0, 4) _analyze() @@ -206,7 +203,7 @@ class Indexer: # And insert the curent batch for idx in range(0, len(places), batch): - part = places[idx:idx+batch] + part = places[idx:idx + batch] LOG.debug("Processing places: %s", str(part)) runner.index_places(pool.next_free_worker(), part) progress.add(len(part)) diff --git a/nominatim/indexer/progress.py b/nominatim/indexer/progress.py index 177e67b8..634b1fae 100644 --- a/nominatim/indexer/progress.py +++ b/nominatim/indexer/progress.py @@ -63,7 +63,7 @@ class ProgressLogger: places_per_sec = self.done_places else: diff_seconds = (rank_end_time - self.rank_start_time).total_seconds() - places_per_sec = self.done_places/diff_seconds + places_per_sec = self.done_places / diff_seconds LOG.warning("Done %d/%d in %d @ %.3f per second - FINISHED %s\n", self.done_places, self.total_places, int(diff_seconds), diff --git a/nominatim/indexer/runners.py b/nominatim/indexer/runners.py index aa607faa..068d7d0f 100644 --- a/nominatim/indexer/runners.py +++ b/nominatim/indexer/runners.py @@ -5,13 +5,17 @@ tasks. import functools import psycopg2.extras +from psycopg2 import sql as pysql # pylint: disable=C0111 +def _mk_valuelist(template, num): + return pysql.SQL(',').join([pysql.SQL(template)] * num) + class AbstractPlacexRunner: """ Returns SQL commands for indexing of the placex table. """ - SELECT_SQL = 'SELECT place_id FROM placex' + SELECT_SQL = pysql.SQL('SELECT place_id FROM placex ') def __init__(self, rank, analyzer): self.rank = rank @@ -21,11 +25,12 @@ class AbstractPlacexRunner: @staticmethod @functools.lru_cache(maxsize=1) def _index_sql(num_places): - return """ UPDATE placex - SET indexed_status = 0, address = v.addr, token_info = v.ti - FROM (VALUES {}) as v(id, addr, ti) - WHERE place_id = v.id - """.format(','.join(["(%s, %s::hstore, %s::jsonb)"] * num_places)) + return pysql.SQL( + """ UPDATE placex + SET indexed_status = 0, address = v.addr, token_info = v.ti + FROM (VALUES {}) as v(id, addr, ti) + WHERE place_id = v.id + """).format(_mk_valuelist("(%s, %s::hstore, %s::jsonb)", num_places)) @staticmethod @@ -52,14 +57,15 @@ class RankRunner(AbstractPlacexRunner): return "rank {}".format(self.rank) def sql_count_objects(self): - return """SELECT count(*) FROM placex - WHERE rank_address = {} and indexed_status > 0 - """.format(self.rank) + return pysql.SQL("""SELECT count(*) FROM placex + WHERE rank_address = {} and indexed_status > 0 + """).format(pysql.Literal(self.rank)) def sql_get_objects(self): - return """{} WHERE indexed_status > 0 and rank_address = {} - ORDER BY geometry_sector - """.format(self.SELECT_SQL, self.rank) + return self.SELECT_SQL + pysql.SQL( + """WHERE indexed_status > 0 and rank_address = {} + ORDER BY geometry_sector + """).format(pysql.Literal(self.rank)) class BoundaryRunner(AbstractPlacexRunner): @@ -71,17 +77,18 @@ class BoundaryRunner(AbstractPlacexRunner): return "boundaries rank {}".format(self.rank) def sql_count_objects(self): - return """SELECT count(*) FROM placex - WHERE indexed_status > 0 - AND rank_search = {} - AND class = 'boundary' and type = 'administrative' - """.format(self.rank) + return pysql.SQL("""SELECT count(*) FROM placex + WHERE indexed_status > 0 + AND rank_search = {} + AND class = 'boundary' and type = 'administrative' + """).format(pysql.Literal(self.rank)) def sql_get_objects(self): - return """{} WHERE indexed_status > 0 and rank_search = {} - and class = 'boundary' and type = 'administrative' - ORDER BY partition, admin_level - """.format(self.SELECT_SQL, self.rank) + return self.SELECT_SQL + pysql.SQL( + """WHERE indexed_status > 0 and rank_search = {} + and class = 'boundary' and type = 'administrative' + ORDER BY partition, admin_level + """).format(pysql.Literal(self.rank)) class InterpolationRunner: @@ -120,11 +127,11 @@ class InterpolationRunner: @staticmethod @functools.lru_cache(maxsize=1) def _index_sql(num_places): - return """ UPDATE location_property_osmline - SET indexed_status = 0, address = v.addr, token_info = v.ti - FROM (VALUES {}) as v(id, addr, ti) - WHERE place_id = v.id - """.format(','.join(["(%s, %s::hstore, %s::jsonb)"] * num_places)) + return pysql.SQL("""UPDATE location_property_osmline + SET indexed_status = 0, address = v.addr, token_info = v.ti + FROM (VALUES {}) as v(id, addr, ti) + WHERE place_id = v.id + """).format(_mk_valuelist("(%s, %s::hstore, %s::jsonb)", num_places)) def index_places(self, worker, places): @@ -157,6 +164,6 @@ class PostcodeRunner: @staticmethod def index_places(worker, ids): - worker.perform(""" UPDATE location_postcode SET indexed_status = 0 - WHERE place_id IN ({}) - """.format(','.join((str(i[0]) for i in ids)))) + worker.perform(pysql.SQL("""UPDATE location_postcode SET indexed_status = 0 + WHERE place_id IN ({})""") + .format(pysql.SQL(',').join((pysql.Literal(i[0]) for i in ids)))) diff --git a/nominatim/tokenizer/icu_name_processor.py b/nominatim/tokenizer/icu_name_processor.py index 28719df1..93d2b0ff 100644 --- a/nominatim/tokenizer/icu_name_processor.py +++ b/nominatim/tokenizer/icu_name_processor.py @@ -119,18 +119,22 @@ class ICUNameProcessor: pos += 1 force_space = False - results = set() - + # No variants detected? Fast return. if startpos == 0: trans_name = self.to_ascii.transliterate(norm_name).strip() + return [trans_name] if trans_name else [] + + return self._compute_result_set(partials, baseform[startpos:]) + + + def _compute_result_set(self, partials, prefix): + results = set() + + for variant in partials: + vname = variant + prefix + trans_name = self.to_ascii.transliterate(vname[1:-1]).strip() if trans_name: results.add(trans_name) - else: - for variant in partials: - name = variant + baseform[startpos:] - trans_name = self.to_ascii.transliterate(name[1:-1]).strip() - if trans_name: - results.add(trans_name) return list(results) diff --git a/nominatim/tokenizer/icu_variants.py b/nominatim/tokenizer/icu_variants.py index 5148f3e2..9ebe3684 100644 --- a/nominatim/tokenizer/icu_variants.py +++ b/nominatim/tokenizer/icu_variants.py @@ -7,12 +7,11 @@ import json _ICU_VARIANT_PORPERTY_FIELDS = ['lang'] -class ICUVariantProperties(namedtuple('_ICUVariantProperties', _ICU_VARIANT_PORPERTY_FIELDS, - defaults=(None, )*len(_ICU_VARIANT_PORPERTY_FIELDS))): +class ICUVariantProperties(namedtuple('_ICUVariantProperties', _ICU_VARIANT_PORPERTY_FIELDS)): """ Data container for saving properties that describe when a variant should be applied. - Porperty instances are hashable. + Property instances are hashable. """ @classmethod def from_rules(cls, _): @@ -52,7 +51,7 @@ def unpickle_variant_set(variant_string): """ data = json.loads(variant_string) - properties = {int(k): ICUVariantProperties(**v) for k, v in data['properties'].items()} - print(properties) + properties = {int(k): ICUVariantProperties.from_rules(v) + for k, v in data['properties'].items()} return set((ICUVariant(src, repl, properties[pid]) for src, repl, pid in data['variants'])) diff --git a/nominatim/tokenizer/legacy_icu_tokenizer.py b/nominatim/tokenizer/legacy_icu_tokenizer.py index c585c5af..6d3d11c1 100644 --- a/nominatim/tokenizer/legacy_icu_tokenizer.py +++ b/nominatim/tokenizer/legacy_icu_tokenizer.py @@ -9,8 +9,6 @@ import re from textwrap import dedent from pathlib import Path -import psycopg2.extras - from nominatim.db.connection import connect from nominatim.db.properties import set_property, get_property from nominatim.db.utils import CopyBuffer @@ -341,7 +339,7 @@ class LegacyICUNameAnalyzer: term = self.name_processor.get_search_normalized(word) if term: copystr.add(word, ' ' + term, cls, typ, - oper if oper in ('in', 'near') else None, 0) + oper if oper in ('in', 'near') else None, 0) added += 1 copystr.copy_out(cursor, 'word', @@ -359,8 +357,7 @@ class LegacyICUNameAnalyzer: to_delete = existing_phrases - new_phrases if to_delete: - psycopg2.extras.execute_values( - cursor, + cursor.execute_values( """ DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op) WHERE word = name and class = in_class and type = in_type and ((op = '-' and operator is null) or op = operator)""", @@ -386,9 +383,9 @@ class LegacyICUNameAnalyzer: if word_tokens: cur.execute("""INSERT INTO word (word_id, word_token, country_code, search_name_count) - (SELECT nextval('seq_word'), token, '{}', 0 + (SELECT nextval('seq_word'), token, %s, 0 FROM unnest(%s) as token) - """.format(country_code), (list(word_tokens),)) + """, (country_code, list(word_tokens))) def process_place(self, place): @@ -411,33 +408,36 @@ class LegacyICUNameAnalyzer: self.add_country_names(country_feature.lower(), names) address = place.get('address') - if address: - hnrs = [] - addr_terms = [] - for key, value in address.items(): - if key == 'postcode': - self._add_postcode(value) - elif key in ('housenumber', 'streetnumber', 'conscriptionnumber'): - hnrs.append(value) - elif key == 'street': - token_info.add_street(*self._compute_name_tokens({'name': value})) - elif key == 'place': - token_info.add_place(*self._compute_name_tokens({'name': value})) - elif not key.startswith('_') and \ - key not in ('country', 'full'): - addr_terms.append((key, *self._compute_name_tokens({'name': value}))) - - if hnrs: - hnrs = self._split_housenumbers(hnrs) - token_info.add_housenumbers(self.conn, [self._make_standard_hnr(n) for n in hnrs]) - - if addr_terms: - token_info.add_address_terms(addr_terms) + self._process_place_address(token_info, address) return token_info.data + def _process_place_address(self, token_info, address): + hnrs = [] + addr_terms = [] + for key, value in address.items(): + if key == 'postcode': + self._add_postcode(value) + elif key in ('housenumber', 'streetnumber', 'conscriptionnumber'): + hnrs.append(value) + elif key == 'street': + token_info.add_street(*self._compute_name_tokens({'name': value})) + elif key == 'place': + token_info.add_place(*self._compute_name_tokens({'name': value})) + elif not key.startswith('_') and \ + key not in ('country', 'full'): + addr_terms.append((key, *self._compute_name_tokens({'name': value}))) + + if hnrs: + hnrs = self._split_housenumbers(hnrs) + token_info.add_housenumbers(self.conn, [self._make_standard_hnr(n) for n in hnrs]) + + if addr_terms: + token_info.add_address_terms(addr_terms) + + def _compute_name_tokens(self, names): """ Computes the full name and partial name tokens for the given dictionary of names. diff --git a/nominatim/tokenizer/legacy_tokenizer.py b/nominatim/tokenizer/legacy_tokenizer.py index 6040f88f..c19dce2f 100644 --- a/nominatim/tokenizer/legacy_tokenizer.py +++ b/nominatim/tokenizer/legacy_tokenizer.py @@ -370,8 +370,7 @@ class LegacyNameAnalyzer: to_delete = existing_phrases - norm_phrases if to_add: - psycopg2.extras.execute_values( - cur, + cur.execute_values( """ INSERT INTO word (word_id, word_token, word, class, type, search_name_count, operator) (SELECT nextval('seq_word'), ' ' || make_standard_name(name), name, @@ -381,8 +380,7 @@ class LegacyNameAnalyzer: to_add) if to_delete and should_replace: - psycopg2.extras.execute_values( - cur, + cur.execute_values( """ DELETE FROM word USING (VALUES %s) as v(name, in_class, in_type, op) WHERE word = name and class = in_class and type = in_type and ((op = '-' and operator is null) or op = operator)""", @@ -424,37 +422,37 @@ class LegacyNameAnalyzer: self.add_country_names(country_feature.lower(), names) address = place.get('address') - if address: - hnrs = [] - addr_terms = [] - for key, value in address.items(): - if key == 'postcode': - self._add_postcode(value) - elif key in ('housenumber', 'streetnumber', 'conscriptionnumber'): - hnrs.append(value) - elif key == 'street': - token_info.add_street(self.conn, value) - elif key == 'place': - token_info.add_place(self.conn, value) - elif not key.startswith('_') and \ - key not in ('country', 'full'): - addr_terms.append((key, value)) - - if hnrs: - token_info.add_housenumbers(self.conn, hnrs) - - if addr_terms: - token_info.add_address_terms(self.conn, addr_terms) + self._process_place_address(token_info, address) return token_info.data - def _add_postcode(self, postcode): - """ Make sure the normalized postcode is present in the word table. - """ - if re.search(r'[:,;]', postcode) is None: - self._cache.add_postcode(self.conn, self.normalize_postcode(postcode)) + def _process_place_address(self, token_info, address): + hnrs = [] + addr_terms = [] + + for key, value in address.items(): + if key == 'postcode': + # Make sure the normalized postcode is present in the word table. + if re.search(r'[:,;]', value) is None: + self._cache.add_postcode(self.conn, + self.normalize_postcode(value)) + elif key in ('housenumber', 'streetnumber', 'conscriptionnumber'): + hnrs.append(value) + elif key == 'street': + token_info.add_street(self.conn, value) + elif key == 'place': + token_info.add_place(self.conn, value) + elif not key.startswith('_') and key not in ('country', 'full'): + addr_terms.append((key, value)) + + if hnrs: + token_info.add_housenumbers(self.conn, hnrs) + + if addr_terms: + token_info.add_address_terms(self.conn, addr_terms) + class _TokenInfo: @@ -582,7 +580,7 @@ class _TokenCache: with conn.cursor() as cur: cur.execute("""SELECT i, ARRAY[getorcreate_housenumber_id(i::text)]::text FROM generate_series(1, 100) as i""") - self._cached_housenumbers = {str(r[0]) : r[1] for r in cur} + self._cached_housenumbers = {str(r[0]): r[1] for r in cur} # For postcodes remember the ones that have already been added self.postcodes = set() diff --git a/nominatim/tools/check_database.py b/nominatim/tools/check_database.py index d4f793b4..d116554f 100644 --- a/nominatim/tools/check_database.py +++ b/nominatim/tools/check_database.py @@ -24,6 +24,7 @@ def _check(hint=None): """ def decorator(func): title = func.__doc__.split('\n', 1)[0].strip() + def run_check(conn, config): print(title, end=' ... ') ret = func(conn, config) @@ -98,13 +99,12 @@ def _get_indexes(conn): if conn.table_exists('place'): indexes.extend(('idx_placex_pendingsector', 'idx_location_area_country_place_id', - 'idx_place_osm_unique' - )) + 'idx_place_osm_unique')) return indexes -### CHECK FUNCTIONS +# CHECK FUNCTIONS # # Functions are exectured in the order they appear here. diff --git a/nominatim/tools/database_import.py b/nominatim/tools/database_import.py index efbf2ec8..a4d7220f 100644 --- a/nominatim/tools/database_import.py +++ b/nominatim/tools/database_import.py @@ -9,6 +9,7 @@ from pathlib import Path import psutil import psycopg2.extras +from psycopg2 import sql as pysql from nominatim.db.connection import connect, get_pg_env from nominatim.db import utils as db_utils @@ -130,9 +131,8 @@ def import_osm_data(osm_file, options, drop=False, ignore_errors=False): if drop: conn.drop_table('planet_osm_nodes') - if drop: - if options['flatnode_file']: - Path(options['flatnode_file']).unlink() + if drop and options['flatnode_file']: + Path(options['flatnode_file']).unlink() def create_tables(conn, config, reverse_only=False): @@ -185,7 +185,12 @@ def truncate_data_tables(conn): conn.commit() -_COPY_COLUMNS = 'osm_type, osm_id, class, type, name, admin_level, address, extratags, geometry' + +_COPY_COLUMNS = pysql.SQL(',').join(map(pysql.Identifier, + ('osm_type', 'osm_id', 'class', 'type', + 'name', 'admin_level', 'address', + 'extratags', 'geometry'))) + def load_data(dsn, threads): """ Copy data into the word and placex table. @@ -196,12 +201,15 @@ def load_data(dsn, threads): for imod in range(place_threads): conn = DBConnection(dsn) conn.connect() - conn.perform("""INSERT INTO placex ({0}) - SELECT {0} FROM place - WHERE osm_id % {1} = {2} - AND NOT (class='place' and (type='houses' or type='postcode')) - AND ST_IsValid(geometry) - """.format(_COPY_COLUMNS, place_threads, imod)) + conn.perform( + pysql.SQL("""INSERT INTO placex ({columns}) + SELECT {columns} FROM place + WHERE osm_id % {total} = {mod} + AND NOT (class='place' and (type='houses' or type='postcode')) + AND ST_IsValid(geometry) + """).format(columns=_COPY_COLUMNS, + total=pysql.Literal(place_threads), + mod=pysql.Literal(imod))) sel.register(conn, selectors.EVENT_READ, conn) # Address interpolations go into another table. @@ -251,6 +259,7 @@ def create_search_indices(conn, config, drop=False): sql.run_sql_file(conn, 'indices.sql', drop=drop) + def create_country_names(conn, tokenizer, languages=None): """ Add default country names to search index. `languages` is a comma- separated list of language codes as used in OSM. If `languages` is not @@ -262,8 +271,7 @@ def create_country_names(conn, tokenizer, languages=None): def _include_key(key): return key == 'name' or \ - (key.startswith('name:') \ - and (not languages or key[5:] in languages)) + (key.startswith('name:') and (not languages or key[5:] in languages)) with conn.cursor() as cur: psycopg2.extras.register_hstore(cur) @@ -272,7 +280,7 @@ def create_country_names(conn, tokenizer, languages=None): with tokenizer.name_analyzer() as analyzer: for code, name in cur: - names = {'countrycode' : code} + names = {'countrycode': code} if code == 'gb': names['short_name'] = 'UK' if code == 'us': diff --git a/nominatim/tools/exec_utils.py b/nominatim/tools/exec_utils.py index 9888d96a..72d252b7 100644 --- a/nominatim/tools/exec_utils.py +++ b/nominatim/tools/exec_utils.py @@ -70,7 +70,9 @@ def run_api_script(endpoint, project_dir, extra_env=None, phpcgi_bin=None, else: cmd = [str(phpcgi_bin)] - proc = subprocess.run(cmd, cwd=str(project_dir), env=env, capture_output=True, + proc = subprocess.run(cmd, cwd=str(project_dir), env=env, + stdout=subprocess.PIPE, + stderr=subprocess.PIPE, check=False) if proc.returncode != 0 or proc.stderr: @@ -134,11 +136,11 @@ def run_osm2pgsql(options): def get_url(url): """ Get the contents from the given URL and return it as a UTF-8 string. """ - headers = {"User-Agent" : "Nominatim/{0[0]}.{0[1]}.{0[2]}-{0[3]}".format(NOMINATIM_VERSION)} + headers = {"User-Agent": "Nominatim/{0[0]}.{0[1]}.{0[2]}-{0[3]}".format(NOMINATIM_VERSION)} try: with urlrequest.urlopen(urlrequest.Request(url, headers=headers)) as response: return response.read().decode('utf-8') - except: + except Exception: LOG.fatal('Failed to load URL: %s', url) raise diff --git a/nominatim/tools/freeze.py b/nominatim/tools/freeze.py index cc1bf97e..a182fc8b 100644 --- a/nominatim/tools/freeze.py +++ b/nominatim/tools/freeze.py @@ -3,6 +3,8 @@ Functions for removing unnecessary data from the database. """ from pathlib import Path +from psycopg2 import sql as pysql + UPDATE_TABLES = [ 'address_levels', 'gb_postcode', @@ -21,15 +23,15 @@ def drop_update_tables(conn): """ Drop all tables only necessary for updating the database from OSM replication data. """ - - where = ' or '.join(["(tablename LIKE '{}')".format(t) for t in UPDATE_TABLES]) + parts = (pysql.SQL("(tablename LIKE {})").format(pysql.Literal(t)) for t in UPDATE_TABLES) with conn.cursor() as cur: - cur.execute("SELECT tablename FROM pg_tables WHERE " + where) + cur.execute(pysql.SQL("SELECT tablename FROM pg_tables WHERE ") + + pysql.SQL(' or ').join(parts)) tables = [r[0] for r in cur] for table in tables: - cur.execute('DROP TABLE IF EXISTS "{}" CASCADE'.format(table)) + cur.drop_table(table, cascade=True) conn.commit() diff --git a/nominatim/tools/migration.py b/nominatim/tools/migration.py index de1e5101..d7faca31 100644 --- a/nominatim/tools/migration.py +++ b/nominatim/tools/migration.py @@ -142,7 +142,8 @@ def change_housenumber_transliteration(conn, **_): BEGIN SELECT array_to_string(array_agg(trans), ';') INTO normtext - FROM (SELECT lookup_word as trans, getorcreate_housenumber_id(lookup_word) + FROM (SELECT lookup_word as trans, + getorcreate_housenumber_id(lookup_word) FROM (SELECT make_standard_name(h) as lookup_word FROM regexp_split_to_table(housenumber, '[,;]') h) x) y; return normtext; diff --git a/nominatim/tools/postcodes.py b/nominatim/tools/postcodes.py index cfd242e2..d00fc97a 100644 --- a/nominatim/tools/postcodes.py +++ b/nominatim/tools/postcodes.py @@ -7,7 +7,7 @@ import gzip import logging from math import isfinite -from psycopg2.extras import execute_values +from psycopg2 import sql as pysql from nominatim.db.connection import connect @@ -52,27 +52,26 @@ class _CountryPostcodesCollector: with conn.cursor() as cur: if to_add: - execute_values(cur, - """INSERT INTO location_postcode - (place_id, indexed_status, country_code, - postcode, geometry) VALUES %s""", - to_add, - template="""(nextval('seq_place'), 1, '{}', - %s, 'SRID=4326;POINT(%s %s)') - """.format(self.country)) + cur.execute_values( + """INSERT INTO location_postcode + (place_id, indexed_status, country_code, + postcode, geometry) VALUES %s""", + to_add, + template=pysql.SQL("""(nextval('seq_place'), 1, {}, + %s, 'SRID=4326;POINT(%s %s)') + """).format(pysql.Literal(self.country))) if to_delete: cur.execute("""DELETE FROM location_postcode WHERE country_code = %s and postcode = any(%s) """, (self.country, to_delete)) if to_update: - execute_values(cur, - """UPDATE location_postcode - SET indexed_status = 2, - geometry = ST_SetSRID(ST_Point(v.x, v.y), 4326) - FROM (VALUES %s) AS v (pc, x, y) - WHERE country_code = '{}' and postcode = pc - """.format(self.country), - to_update) + cur.execute_values( + pysql.SQL("""UPDATE location_postcode + SET indexed_status = 2, + geometry = ST_SetSRID(ST_Point(v.x, v.y), 4326) + FROM (VALUES %s) AS v (pc, x, y) + WHERE country_code = {} and postcode = pc + """).format(pysql.Literal(self.country)), to_update) def _compute_changes(self, conn): @@ -165,11 +164,14 @@ def update_postcodes(dsn, project_dir, tokenizer): with conn.cursor(name="placex_postcodes") as cur: cur.execute(""" SELECT cc as country_code, pc, ST_X(centroid), ST_Y(centroid) - FROM (SELECT - COALESCE(plx.country_code, get_country_code(ST_Centroid(pl.geometry))) as cc, + FROM (SELECT + COALESCE(plx.country_code, + get_country_code(ST_Centroid(pl.geometry))) as cc, token_normalized_postcode(pl.address->'postcode') as pc, - ST_Centroid(ST_Collect(COALESCE(plx.centroid, ST_Centroid(pl.geometry)))) as centroid - FROM place AS pl LEFT OUTER JOIN placex AS plx ON pl.osm_id = plx.osm_id AND pl.osm_type = plx.osm_type + ST_Centroid(ST_Collect(COALESCE(plx.centroid, + ST_Centroid(pl.geometry)))) as centroid + FROM place AS pl LEFT OUTER JOIN placex AS plx + ON pl.osm_id = plx.osm_id AND pl.osm_type = plx.osm_type WHERE pl.address ? 'postcode' AND pl.geometry IS NOT null GROUP BY cc, pc) xx WHERE pc IS NOT null AND cc IS NOT null diff --git a/nominatim/tools/refresh.py b/nominatim/tools/refresh.py index 25a97127..5aaee0c8 100644 --- a/nominatim/tools/refresh.py +++ b/nominatim/tools/refresh.py @@ -5,7 +5,7 @@ import json import logging from textwrap import dedent -from psycopg2.extras import execute_values +from psycopg2 import sql as pysql from nominatim.db.utils import execute_file from nominatim.db.sql_preprocessor import SQLPreprocessor @@ -49,7 +49,7 @@ def load_address_levels(conn, table, levels): _add_address_level_rows_from_entry(rows, entry) with conn.cursor() as cur: - cur.execute('DROP TABLE IF EXISTS {}'.format(table)) + cur.drop_table(table) cur.execute("""CREATE TABLE {} (country_code varchar(2), class TEXT, @@ -57,7 +57,8 @@ def load_address_levels(conn, table, levels): rank_search SMALLINT, rank_address SMALLINT)""".format(table)) - execute_values(cur, "INSERT INTO {} VALUES %s".format(table), rows) + cur.execute_values(pysql.SQL("INSERT INTO {} VALUES %s") + .format(pysql.Identifier(table)), rows) cur.execute('CREATE UNIQUE INDEX ON {} (country_code, class, type)'.format(table)) @@ -155,6 +156,20 @@ def recompute_importance(conn): conn.commit() +def _quote_php_variable(var_type, config, conf_name): + if var_type == bool: + return 'true' if config.get_bool(conf_name) else 'false' + + if var_type == int: + return getattr(config, conf_name) + + if not getattr(config, conf_name): + return 'false' + + quoted = getattr(config, conf_name).replace("'", "\\'") + return f"'{quoted}'" + + def setup_website(basedir, config, conn): """ Create the website script stubs. """ @@ -174,18 +189,11 @@ def setup_website(basedir, config, conn): config.project_dir / 'tokenizer')) for php_name, conf_name, var_type in PHP_CONST_DEFS: - if var_type == bool: - varout = 'true' if config.get_bool(conf_name) else 'false' - elif var_type == int: - varout = getattr(config, conf_name) - elif not getattr(config, conf_name): - varout = 'false' - else: - varout = "'{}'".format(getattr(config, conf_name).replace("'", "\\'")) + varout = _quote_php_variable(var_type, config, conf_name) - template += "@define('CONST_{}', {});\n".format(php_name, varout) + template += f"@define('CONST_{php_name}', {varout});\n" - template += "\nrequire_once('{}/website/{{}}');\n".format(config.lib_dir.php) + template += f"\nrequire_once('{config.lib_dir.php}/website/{{}}');\n" search_name_table_exists = bool(conn and conn.table_exists('search_name')) diff --git a/nominatim/tools/special_phrases/sp_importer.py b/nominatim/tools/special_phrases/sp_importer.py index 48764518..791f4dc3 100644 --- a/nominatim/tools/special_phrases/sp_importer.py +++ b/nominatim/tools/special_phrases/sp_importer.py @@ -20,6 +20,12 @@ from nominatim.errors import UsageError from nominatim.tools.special_phrases.importer_statistics import SpecialPhrasesImporterStatistics LOG = logging.getLogger() + +def _classtype_table(phrase_class, phrase_type): + """ Return the name of the table for the given class and type. + """ + return f'place_classtype_{phrase_class}_{phrase_type}' + class SPImporter(): # pylint: disable-msg=too-many-instance-attributes """ @@ -38,8 +44,8 @@ class SPImporter(): # This set will contain all existing phrases to be added. # It contains tuples with the following format: (lable, class, type, operator) self.word_phrases = set() - #This set will contain all existing place_classtype tables which doesn't match any - #special phrases class/type on the wiki. + # This set will contain all existing place_classtype tables which doesn't match any + # special phrases class/type on the wiki. self.table_phrases_to_delete = set() def import_phrases(self, tokenizer, should_replace): @@ -54,7 +60,7 @@ class SPImporter(): LOG.warning('Special phrases importation starting') self._fetch_existing_place_classtype_tables() - #Store pairs of class/type for further processing + # Store pairs of class/type for further processing class_type_pairs = set() for loaded_phrases in self.sp_loader: @@ -125,17 +131,17 @@ class SPImporter(): Return the class/type pair corresponding to the phrase. """ - #blacklisting: disallow certain class/type combinations + # blacklisting: disallow certain class/type combinations if phrase.p_class in self.black_list.keys() \ and phrase.p_type in self.black_list[phrase.p_class]: return None - #whitelisting: if class is in whitelist, allow only tags in the list + # whitelisting: if class is in whitelist, allow only tags in the list if phrase.p_class in self.white_list.keys() \ and phrase.p_type not in self.white_list[phrase.p_class]: return None - #sanity check, in case somebody added garbage in the wiki + # sanity check, in case somebody added garbage in the wiki if not self._check_sanity(phrase): self.statistics_handler.notify_one_phrase_invalid() return None @@ -155,7 +161,7 @@ class SPImporter(): sql_tablespace = self.config.TABLESPACE_AUX_DATA if sql_tablespace: - sql_tablespace = ' TABLESPACE '+sql_tablespace + sql_tablespace = ' TABLESPACE ' + sql_tablespace with self.db_connection.cursor() as db_cursor: db_cursor.execute("CREATE INDEX idx_placex_classtype ON placex (class, type)") @@ -164,23 +170,23 @@ class SPImporter(): phrase_class = pair[0] phrase_type = pair[1] - table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type) + table_name = _classtype_table(phrase_class, phrase_type) if table_name in self.table_phrases_to_delete: self.statistics_handler.notify_one_table_ignored() - #Remove this table from the ones to delete as it match a class/type - #still existing on the special phrases of the wiki. + # Remove this table from the ones to delete as it match a + # class/type still existing on the special phrases of the wiki. self.table_phrases_to_delete.remove(table_name) - #So dont need to create the table and indexes. + # So don't need to create the table and indexes. continue - #Table creation + # Table creation self._create_place_classtype_table(sql_tablespace, phrase_class, phrase_type) - #Indexes creation + # Indexes creation self._create_place_classtype_indexes(sql_tablespace, phrase_class, phrase_type) - #Grant access on read to the web user. + # Grant access on read to the web user. self._grant_access_to_webuser(phrase_class, phrase_type) self.statistics_handler.notify_one_table_created() @@ -193,11 +199,11 @@ class SPImporter(): """ Create table place_classtype of the given phrase_class/phrase_type if doesn't exit. """ - table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type) + table_name = _classtype_table(phrase_class, phrase_type) with self.db_connection.cursor() as db_cursor: db_cursor.execute(SQL(""" - CREATE TABLE IF NOT EXISTS {{}} {} - AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex + CREATE TABLE IF NOT EXISTS {{}} {} + AS SELECT place_id AS place_id,st_centroid(geometry) AS centroid FROM placex WHERE class = {{}} AND type = {{}}""".format(sql_tablespace)) .format(Identifier(table_name), Literal(phrase_class), Literal(phrase_type))) @@ -208,8 +214,8 @@ class SPImporter(): Create indexes on centroid and place_id for the place_classtype table. """ index_prefix = 'idx_place_classtype_{}_{}_'.format(phrase_class, phrase_type) - base_table = 'place_classtype_{}_{}'.format(phrase_class, phrase_type) - #Index on centroid + base_table = _classtype_table(phrase_class, phrase_type) + # Index on centroid if not self.db_connection.index_exists(index_prefix + 'centroid'): with self.db_connection.cursor() as db_cursor: db_cursor.execute(SQL(""" @@ -217,7 +223,7 @@ class SPImporter(): .format(Identifier(index_prefix + 'centroid'), Identifier(base_table)), sql_tablespace) - #Index on place_id + # Index on place_id if not self.db_connection.index_exists(index_prefix + 'place_id'): with self.db_connection.cursor() as db_cursor: db_cursor.execute(SQL( @@ -230,7 +236,7 @@ class SPImporter(): """ Grant access on read to the table place_classtype for the webuser. """ - table_name = 'place_classtype_{}_{}'.format(phrase_class, phrase_type) + table_name = _classtype_table(phrase_class, phrase_type) with self.db_connection.cursor() as db_cursor: db_cursor.execute(SQL("""GRANT SELECT ON {} TO {}""") .format(Identifier(table_name), @@ -242,18 +248,14 @@ class SPImporter(): Delete the place_classtype tables. """ LOG.warning('Cleaning database...') - #Array containing all queries to execute. Contain tuples of format (query, parameters) - queries_parameters = [] - - #Delete place_classtype tables corresponding to class/type which are not on the wiki anymore - for table in self.table_phrases_to_delete: - self.statistics_handler.notify_one_table_deleted() - query = SQL('DROP TABLE IF EXISTS {}').format(Identifier(table)) - queries_parameters.append((query, ())) + # Delete place_classtype tables corresponding to class/type which + # are not on the wiki anymore. with self.db_connection.cursor() as db_cursor: - for query, parameters in queries_parameters: - db_cursor.execute(query, parameters) + for table in self.table_phrases_to_delete: + self.statistics_handler.notify_one_table_deleted() + db_cursor.drop_table(table) + def _convert_php_settings_if_needed(self, file_path): """ @@ -265,7 +267,7 @@ class SPImporter(): file, extension = os.path.splitext(file_path) json_file_path = Path(file + '.json').resolve() - if extension not in('.php', '.json'): + if extension not in ('.php', '.json'): raise UsageError('The custom NOMINATIM_PHRASE_CONFIG file has not a valid extension.') if extension == '.php' and not isfile(json_file_path): @@ -274,9 +276,8 @@ class SPImporter(): (self.phplib_dir / 'migration/PhraseSettingsToJson.php').resolve(), file_path], check=True) LOG.warning('special_phrase configuration file has been converted to json.') - return json_file_path except subprocess.CalledProcessError: LOG.error('Error while converting %s to json.', file_path) raise - else: - return json_file_path + + return json_file_path diff --git a/nominatim/tools/special_phrases/sp_wiki_loader.py b/nominatim/tools/special_phrases/sp_wiki_loader.py index 914e1539..1ad9de7e 100644 --- a/nominatim/tools/special_phrases/sp_wiki_loader.py +++ b/nominatim/tools/special_phrases/sp_wiki_loader.py @@ -15,7 +15,7 @@ class SPWikiLoader(Iterator): def __init__(self, config, languages=None): super().__init__() self.config = config - #Compile the regex here to increase performances. + # Compile the regex here to increase performances. self.occurence_pattern = re.compile( r'\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([^\|]+) *\|\| *([\-YN])' ) @@ -35,7 +35,7 @@ class SPWikiLoader(Iterator): Parses XML content and extracts special phrases from it. Return a list of SpecialPhrase. """ - #One match will be of format [label, class, type, operator, plural] + # One match will be of format [label, class, type, operator, plural] matches = self.occurence_pattern.findall(xml) returned_phrases = set() for match in matches: @@ -65,5 +65,6 @@ class SPWikiLoader(Iterator): Requested URL Example : https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/EN """ - url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' + lang.upper() # pylint: disable=line-too-long + url = 'https://wiki.openstreetmap.org/wiki/Special:Export/Nominatim/Special_Phrases/' \ + + lang.upper() return get_url(url) diff --git a/nominatim/tools/special_phrases/special_phrase.py b/nominatim/tools/special_phrases/special_phrase.py index 448fbee4..da7968ca 100644 --- a/nominatim/tools/special_phrases/special_phrase.py +++ b/nominatim/tools/special_phrases/special_phrase.py @@ -13,7 +13,7 @@ class SpecialPhrase(): def __init__(self, p_label, p_class, p_type, p_operator): self.p_label = p_label.strip() self.p_class = p_class.strip() - #Hack around a bug where building=yes was imported with quotes into the wiki + # Hack around a bug where building=yes was imported with quotes into the wiki self.p_type = re.sub(r'\"|"', '', p_type.strip()) - #Needed if some operator in the wiki are not written in english + # Needed if some operator in the wiki are not written in english self.p_operator = '-' if p_operator not in ('near', 'in') else p_operator diff --git a/vagrant/Install-on-Centos-7.sh b/vagrant/Install-on-Centos-7.sh index 9fb90150..d3fd9ef0 100755 --- a/vagrant/Install-on-Centos-7.sh +++ b/vagrant/Install-on-Centos-7.sh @@ -42,7 +42,7 @@ python3-pip python3-setuptools python3-devel \ expat-devel zlib-devel libicu-dev - pip3 install --user psycopg2 python-dotenv psutil Jinja2 PyICU datrie + pip3 install --user psycopg2 python-dotenv psutil Jinja2 PyICU datrie pyyaml # diff --git a/vagrant/Install-on-Centos-8.sh b/vagrant/Install-on-Centos-8.sh index 2330fc3b..a41e846c 100755 --- a/vagrant/Install-on-Centos-8.sh +++ b/vagrant/Install-on-Centos-8.sh @@ -35,7 +35,7 @@ python3-pip python3-setuptools python3-devel \ expat-devel zlib-devel libicu-dev - pip3 install --user psycopg2 python-dotenv psutil Jinja2 PyICU datrie + pip3 install --user psycopg2 python-dotenv psutil Jinja2 PyICU datrie pyyaml # diff --git a/vagrant/Install-on-Ubuntu-18.sh b/vagrant/Install-on-Ubuntu-18.sh index 63c07bec..dadce086 100755 --- a/vagrant/Install-on-Ubuntu-18.sh +++ b/vagrant/Install-on-Ubuntu-18.sh @@ -32,10 +32,10 @@ export DEBIAN_FRONTEND=noninteractive #DOCS: php php-pgsql php-intl libicu-dev python3-pip \ python3-psycopg2 python3-psutil python3-jinja2 python3-icu git -# The python-dotenv adn datrie package that comes with Ubuntu 18.04 is too old, so +# Some of the Python packages that come with Ubuntu 18.04 are too old, so # install the latest version from pip: - pip3 install python-dotenv datrie + pip3 install python-dotenv datrie pyyaml # # System Configuration