From: Sarah Hoffmann Date: Mon, 21 May 2018 10:01:56 +0000 (+0200) Subject: Merge remote-tracking branch 'upstream/master' X-Git-Tag: deploy~328 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/1d8e2961f0527a548989c1e38bb062f2f9e5877e?hp=-c Merge remote-tracking branch 'upstream/master' --- 1d8e2961f0527a548989c1e38bb062f2f9e5877e diff --combined lib/Geocode.php index e9b304d2,6cc2e4cb..18526f43 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@@ -7,6 -7,7 +7,7 @@@ require_once(CONST_BasePath.'/lib/Phras require_once(CONST_BasePath.'/lib/ReverseGeocode.php'); require_once(CONST_BasePath.'/lib/SearchDescription.php'); require_once(CONST_BasePath.'/lib/SearchContext.php'); + require_once(CONST_BasePath.'/lib/TokenList.php'); class Geocode { @@@ -19,7 -20,7 +20,7 @@@ protected $bIncludeAddressDetails = false; protected $aExcludePlaceIDs = array(); - protected $bReverseInPlan = false; + protected $bReverseInPlan = true; protected $iLimit = 20; protected $iFinalLimit = 10; @@@ -332,10 -333,10 +333,10 @@@ return false; } - public function getGroupedSearches($aSearches, $aPhrases, $aValidTokens, $bIsStructured) + public function getGroupedSearches($aSearches, $aPhrases, $oValidTokens, $bIsStructured) { /* - Calculate all searches using aValidTokens i.e. + Calculate all searches using oValidTokens i.e. 'Wodsworth Road, Sheffield' => Phrase Wordset @@@ -365,38 -366,37 +366,37 @@@ //var_dump($oCurrentSearch); //echo ""; - // If the token is valid - if (isset($aValidTokens[' '.$sToken])) { - foreach ($aValidTokens[' '.$sToken] as $aSearchTerm) { - $aNewSearches = $oCurrentSearch->extendWithFullTerm( - $aSearchTerm, - isset($aValidTokens[$sToken]) - && strpos($sToken, ' ') === false, - $sPhraseType, - $iToken == 0 && $iPhrase == 0, - $iPhrase == 0, - $iToken + 1 == count($aWordset) - && $iPhrase + 1 == count($aPhrases) - ); - - foreach ($aNewSearches as $oSearch) { - if ($oSearch->getRank() < $this->iMaxRank) { - $aNewWordsetSearches[] = $oSearch; - } + // Tokens with full name matches. + foreach ($oValidTokens->get(' '.$sToken) as $oSearchTerm) { + $aNewSearches = $oCurrentSearch->extendWithFullTerm( + $oSearchTerm, + $oValidTokens->contains($sToken) + && strpos($sToken, ' ') === false, + $sPhraseType, + $iToken == 0 && $iPhrase == 0, + $iPhrase == 0, + $iToken + 1 == count($aWordset) + && $iPhrase + 1 == count($aPhrases) + ); + + foreach ($aNewSearches as $oSearch) { + if ($oSearch->getRank() < $this->iMaxRank) { + $aNewWordsetSearches[] = $oSearch; } } } // Look for partial matches. // Note that there is no point in adding country terms here // because country is omitted in the address. - if (isset($aValidTokens[$sToken]) && $sPhraseType != 'country') { + if ($sPhraseType != 'country') { // Allow searching for a word - but at extra cost - foreach ($aValidTokens[$sToken] as $aSearchTerm) { + foreach ($oValidTokens->get($sToken) as $oSearchTerm) { $aNewSearches = $oCurrentSearch->extendWithPartialTerm( - $aSearchTerm, + $sToken, + $oSearchTerm, $bIsStructured, $iPhrase, - isset($aValidTokens[' '.$sToken]) ? $aValidTokens[' '.$sToken] : array() + $oValidTokens->get(' '.$sToken) ); foreach ($aNewSearches as $oSearch) { @@@ -645,73 -645,51 +645,51 @@@ Debug::printDebugTable('Phrases', $aPhrases); Debug::printVar('Tokens', $aTokens); + $oValidTokens = new TokenList(); + if (!empty($aTokens)) { - // Check which tokens we have, get the ID numbers $sSQL = 'SELECT word_id, word_token, word, class, type, country_code, operator, search_name_count'; $sSQL .= ' FROM word '; $sSQL .= ' WHERE word_token in ('.join(',', array_map('getDBQuoted', $aTokens)).')'; Debug::printSQL($sSQL); - $aValidTokens = array(); - $aDatabaseWords = chksql( - $this->oDB->getAll($sSQL), - 'Could not get word tokens.' + $oValidTokens->addTokensFromDB( + $this->oDB, + $aTokens, + $this->aCountryCodes, + $sNormQuery, + $this->oNormalizer ); - foreach ($aDatabaseWords as $aToken) { - // Filter country tokens that do not match restricted countries. - if ($this->aCountryCodes - && $aToken['country_code'] - && !in_array($aToken['country_code'], $this->aCountryCodes) - ) { - continue; - } - - // Special terms need to appear in their normalized form. - if ($aToken['word'] && $aToken['class']) { - $sNormWord = $this->normTerm($aToken['word']); - if (strpos($sNormQuery, $sNormWord) === false) { - continue; - } - } - if (isset($aValidTokens[$aToken['word_token']])) { - $aValidTokens[$aToken['word_token']][] = $aToken; - } else { - $aValidTokens[$aToken['word_token']] = array($aToken); - } - } - - // US ZIP+4 codes - if there is no token, merge in the 5-digit ZIP code + // Try more interpretations for Tokens that could not be matched. foreach ($aTokens as $sToken) { - if (!isset($aValidTokens[$sToken]) && preg_match('/^([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) { - if (isset($aValidTokens[$aData[1]])) { - foreach ($aValidTokens[$aData[1]] as $aToken) { - if (!$aToken['class']) { - if (isset($aValidTokens[$sToken])) { - $aValidTokens[$sToken][] = $aToken; - } else { - $aValidTokens[$sToken] = array($aToken); - } - } - } + if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) { + if (preg_match('/^ ([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) { + // US ZIP+4 codes - merge in the 5-digit ZIP code + $oValidTokens->addToken( + $sToken, + new Token\Postcode(null, $aData[1], 'us') + ); + } elseif (preg_match('/^ [0-9]+$/', $sToken)) { + // Unknown single word token with a number. + // Assume it is a house number. + $oValidTokens->addToken( + $sToken, + new Token\HouseNumber(null, trim($sToken)) + ); } } } - foreach ($aTokens as $sToken) { - // Unknown single word token with a number - assume it is a house number - if (!isset($aValidTokens[' '.$sToken]) && strpos($sToken, ' ') === false && preg_match('/^[0-9]+$/', $sToken)) { - $aValidTokens[' '.$sToken] = array(array('class' => 'place', 'type' => 'house', 'word_token' => ' '.$sToken)); - } - } - Debug::printGroupTable('Valid Tokens', $aValidTokens); - // Any words that have failed completely? // TODO: suggestions + + Debug::printGroupTable('Valid Tokens', $oValidTokens->debugInfo()); + Debug::newSection('Search candidates'); - $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $aValidTokens, $bStructuredPhrases); + $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens, $bStructuredPhrases); if ($this->bReverseInPlan) { // Reverse phrase array and also reverse the order of the wordsets in @@@ -722,7 -700,7 +700,7 @@@ if (count($aPhrases) > 1) { $aPhrases[count($aPhrases)-1]->invertWordSets(); } - $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $aValidTokens, false); + $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens, false); foreach ($aGroupedSearches as $aSearches) { foreach ($aSearches as $aSearch) { @@@ -762,7 -740,10 +740,10 @@@ } } - if (CONST_Debug) _debugDumpGroupedSearches($aGroupedSearches, $aValidTokens); + Debug::printGroupedSearch( + $aGroupedSearches, + $oValidTokens->debugTokenByWordIdList() + ); // Start the search process $iGroupLoop = 0; @@@ -772,10 -753,11 +753,11 @@@ foreach ($aSearches as $oSearch) { $iQueryLoop++; - if (CONST_Debug) { - echo "
Search Loop, group $iGroupLoop, loop $iQueryLoop"; - _debugDumpGroupedSearches(array($iGroupedRank => array($oSearch)), $aValidTokens); - } + Debug::newSection("Search Loop, group $iGroupLoop, loop $iQueryLoop"); + Debug::printGroupedSearch( + array($iGroupedRank => array($oSearch)), + $oValidTokens->debugTokenByWordIdList() + ); $aResults += $oSearch->query( $this->oDB, diff --combined lib/lib.php index fa71d296,0b939da4..317ba549 --- a/lib/lib.php +++ b/lib/lib.php @@@ -426,32 -426,6 +426,6 @@@ function javascript_renderData($xVal, $ } - function _debugDumpGroupedSearches($aData, $aTokens) - { - $aWordsIDs = array(); - if ($aTokens) { - foreach ($aTokens as $sToken => $aWords) { - if ($aWords) { - foreach ($aWords as $aToken) { - $aWordsIDs[$aToken['word_id']] = - '#'.$sToken.'('.$aToken['word_id'].')#'; - } - } - } - } - echo ''; - echo ''; - echo ''; - echo ''; - foreach ($aData as $iRank => $aRankedSet) { - foreach ($aRankedSet as $aRow) { - $aRow->dumpAsHtmlTableRow($aWordsIDs); - } - } - echo '
rankName TokensName NotAddress TokensAddress Notcountryoperatorclasstypepostcodehousenumber
'; - } - - function getAddressDetails(&$oDB, $sLanguagePrefArraySQL, $iPlaceID, $sCountryCode = false, $housenumber = -1, $bRaw = false) { $sSQL = "select *,get_name_by_language(name,$sLanguagePrefArraySQL) as localname from get_addressdata($iPlaceID, $housenumber)"; @@@ -585,10 -559,10 +559,10 @@@ function geometryText2Points($geometry_ // preg_match_all('/(-?[0-9.]+) (-?[0-9.]+)/', $aMatch[1], $aPolyPoints, PREG_SET_ORDER); // - } elseif (preg_match('#MULTIPOLYGON\\(\\(\\(([- 0-9.,]+)#', $geometry_as_text, $aMatch)) { +/* } elseif (preg_match('#MULTIPOLYGON\\(\\(\\(([- 0-9.,]+)#', $geometry_as_text, $aMatch)) { // preg_match_all('/(-?[0-9.]+) (-?[0-9.]+)/', $aMatch[1], $aPolyPoints, PREG_SET_ORDER); - // + */ } elseif (preg_match('#POINT\\((-?[0-9.]+) (-?[0-9.]+)\\)#', $geometry_as_text, $aMatch)) { // $aPolyPoints = createPointsAroundCenter($aMatch[1], $aMatch[2], $fRadius);