X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/e62ae5b090661f7fa56cc68981457e5b4d4bd9ca..4c2fa68c552e2a32a63ea2d9f141dbcaeebec4d1:/lib/Geocode.php
diff --git a/lib/Geocode.php b/lib/Geocode.php
index 7f00de6e..11e47d9a 100644
--- a/lib/Geocode.php
+++ b/lib/Geocode.php
@@ -441,6 +441,293 @@
return $aSearchResults;
}
+ function getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases)
+ {
+ /*
+ Calculate all searches using aValidTokens i.e.
+ 'Wodsworth Road, Sheffield' =>
+
+ Phrase Wordset
+ 0 0 (wodsworth road)
+ 0 1 (wodsworth)(road)
+ 1 0 (sheffield)
+
+ Score how good the search is so they can be ordered
+ */
+ foreach($aPhrases as $iPhrase => $sPhrase)
+ {
+ $aNewPhraseSearches = array();
+ if ($bStructuredPhrases) $sPhraseType = $aPhraseTypes[$iPhrase];
+ else $sPhraseType = '';
+
+ foreach($aPhrases[$iPhrase]['wordsets'] as $iWordSet => $aWordset)
+ {
+ // Too many permutations - too expensive
+ if ($iWordSet > 120) break;
+
+ $aWordsetSearches = $aSearches;
+
+ // Add all words from this wordset
+ foreach($aWordset as $iToken => $sToken)
+ {
+ //echo "
$sToken";
+ $aNewWordsetSearches = array();
+
+ foreach($aWordsetSearches as $aCurrentSearch)
+ {
+ //echo "";
+ //var_dump($aCurrentSearch);
+ //echo "";
+
+ // If the token is valid
+ if (isset($aValidTokens[' '.$sToken]))
+ {
+ foreach($aValidTokens[' '.$sToken] as $aSearchTerm)
+ {
+ $aSearch = $aCurrentSearch;
+ $aSearch['iSearchRank']++;
+ if (($sPhraseType == '' || $sPhraseType == 'country') && !empty($aSearchTerm['country_code']) && $aSearchTerm['country_code'] != '0')
+ {
+ if ($aSearch['sCountryCode'] === false)
+ {
+ $aSearch['sCountryCode'] = strtolower($aSearchTerm['country_code']);
+ // Country is almost always at the end of the string - increase score for finding it anywhere else (optimisation)
+ if (($iToken+1 != sizeof($aWordset) || $iPhrase+1 != sizeof($aPhrases)))
+ {
+ $aSearch['iSearchRank'] += 5;
+ }
+ if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
+ }
+ }
+ elseif (isset($aSearchTerm['lat']) && $aSearchTerm['lat'] !== '' && $aSearchTerm['lat'] !== null)
+ {
+ if ($aSearch['fLat'] === '')
+ {
+ $aSearch['fLat'] = $aSearchTerm['lat'];
+ $aSearch['fLon'] = $aSearchTerm['lon'];
+ $aSearch['fRadius'] = $aSearchTerm['radius'];
+ if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
+ }
+ }
+ elseif ($sPhraseType == 'postalcode')
+ {
+ // We need to try the case where the postal code is the primary element (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode) so try both
+ if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'])
+ {
+ // If we already have a name try putting the postcode first
+ if (sizeof($aSearch['aName']))
+ {
+ $aNewSearch = $aSearch;
+ $aNewSearch['aAddress'] = array_merge($aNewSearch['aAddress'], $aNewSearch['aName']);
+ $aNewSearch['aName'] = array();
+ $aNewSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
+ if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aNewSearch;
+ }
+
+ if (sizeof($aSearch['aName']))
+ {
+ if ((!$bStructuredPhrases || $iPhrase > 0) && $sPhraseType != 'country' && (!isset($aValidTokens[$sToken]) || strlen($sToken) < 4 || strpos($sToken, ' ') !== false))
+ {
+ $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
+ }
+ else
+ {
+ $aCurrentSearch['aFullNameAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
+ $aSearch['iSearchRank'] += 1000; // skip;
+ }
+ }
+ else
+ {
+ $aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
+ //$aSearch['iNamePhrase'] = $iPhrase;
+ }
+ if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
+ }
+
+ }
+ elseif (($sPhraseType == '' || $sPhraseType == 'street') && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'house')
+ {
+ if ($aSearch['sHouseNumber'] === '')
+ {
+ $aSearch['sHouseNumber'] = $sToken;
+ // sanity check: if the housenumber is not mainly made
+ // up of numbers, add a penalty
+ if (preg_match_all("/[^0-9]/", $sToken, $aMatches) > 2) $aSearch['iSearchRank']++;
+ if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
+ /*
+ // Fall back to not searching for this item (better than nothing)
+ $aSearch = $aCurrentSearch;
+ $aSearch['iSearchRank'] += 1;
+ if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
+ */
+ }
+ }
+ elseif ($sPhraseType == '' && $aSearchTerm['class'] !== '' && $aSearchTerm['class'] !== null)
+ {
+ if ($aSearch['sClass'] === '')
+ {
+ $aSearch['sOperator'] = $aSearchTerm['operator'];
+ $aSearch['sClass'] = $aSearchTerm['class'];
+ $aSearch['sType'] = $aSearchTerm['type'];
+ if (sizeof($aSearch['aName'])) $aSearch['sOperator'] = 'name';
+ else $aSearch['sOperator'] = 'near'; // near = in for the moment
+ if (strlen($aSearchTerm['operator']) == 0) $aSearch['iSearchRank'] += 1;
+
+ // Do we have a shortcut id?
+ if ($aSearch['sOperator'] == 'name')
+ {
+ $sSQL = "select get_tagpair('".$aSearch['sClass']."', '".$aSearch['sType']."')";
+ if ($iAmenityID = $this->oDB->getOne($sSQL))
+ {
+ $aValidTokens[$aSearch['sClass'].':'.$aSearch['sType']] = array('word_id' => $iAmenityID);
+ $aSearch['aName'][$iAmenityID] = $iAmenityID;
+ $aSearch['sClass'] = '';
+ $aSearch['sType'] = '';
+ }
+ }
+ if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
+ }
+ }
+ elseif (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'])
+ {
+ if (sizeof($aSearch['aName']))
+ {
+ if ((!$bStructuredPhrases || $iPhrase > 0) && $sPhraseType != 'country' && (!isset($aValidTokens[$sToken]) || strlen($sToken) < 4 || strpos($sToken, ' ') !== false))
+ {
+ $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
+ }
+ else
+ {
+ $aCurrentSearch['aFullNameAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
+ $aSearch['iSearchRank'] += 1000; // skip;
+ }
+ }
+ else
+ {
+ $aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
+ //$aSearch['iNamePhrase'] = $iPhrase;
+ }
+ if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
+ }
+ }
+ }
+ // Look for partial matches.
+ // Note that there is no point in adding country terms here
+ // because country are omitted in the address.
+ if (isset($aValidTokens[$sToken]) && $sPhraseType != 'country')
+ {
+ // Allow searching for a word - but at extra cost
+ foreach($aValidTokens[$sToken] as $aSearchTerm)
+ {
+ if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'])
+ {
+ if ((!$bStructuredPhrases || $iPhrase > 0) && sizeof($aCurrentSearch['aName']) && strpos($sToken, ' ') === false)
+ {
+ $aSearch = $aCurrentSearch;
+ $aSearch['iSearchRank'] += 1;
+ if ($aWordFrequencyScores[$aSearchTerm['word_id']] < CONST_Max_Word_Frequency)
+ {
+ $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
+ if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
+ }
+ elseif (isset($aValidTokens[' '.$sToken]) && strlen($sToken) >= 4) // revert to the token version?
+ {
+ $aSearch['aAddressNonSearch'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
+ $aSearch['iSearchRank'] += 1;
+ if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
+ foreach($aValidTokens[' '.$sToken] as $aSearchTermToken)
+ {
+ if (empty($aSearchTermToken['country_code'])
+ && empty($aSearchTermToken['lat'])
+ && empty($aSearchTermToken['class']))
+ {
+ $aSearch = $aCurrentSearch;
+ $aSearch['iSearchRank'] += 1;
+ $aSearch['aAddress'][$aSearchTermToken['word_id']] = $aSearchTermToken['word_id'];
+ if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
+ }
+ }
+ }
+ else
+ {
+ $aSearch['aAddressNonSearch'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
+ if (preg_match('#^[0-9]+$#', $sToken)) $aSearch['iSearchRank'] += 2;
+ if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
+ }
+ }
+
+ if (!sizeof($aCurrentSearch['aName']) || $aCurrentSearch['iNamePhrase'] == $iPhrase)
+ {
+ $aSearch = $aCurrentSearch;
+ $aSearch['iSearchRank'] += 1;
+ if (!sizeof($aCurrentSearch['aName'])) $aSearch['iSearchRank'] += 1;
+ if (preg_match('#^[0-9]+$#', $sToken)) $aSearch['iSearchRank'] += 2;
+ if ($aWordFrequencyScores[$aSearchTerm['word_id']] < CONST_Max_Word_Frequency)
+ $aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
+ else
+ $aSearch['aNameNonSearch'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
+ $aSearch['iNamePhrase'] = $iPhrase;
+ if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
+ }
+ }
+ }
+ }
+ else
+ {
+ // Allow skipping a word - but at EXTREAM cost
+ //$aSearch = $aCurrentSearch;
+ //$aSearch['iSearchRank']+=100;
+ //$aNewWordsetSearches[] = $aSearch;
+ }
+ }
+ // Sort and cut
+ usort($aNewWordsetSearches, 'bySearchRank');
+ $aWordsetSearches = array_slice($aNewWordsetSearches, 0, 50);
+ }
+ //var_Dump('