X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/1787892d32351988231a60b71900c6a623609c54..13291274e73e1d2fc3582e49fd635f92a9b061e1:/lib/SearchDescription.php?ds=sidebyside
diff --git a/lib/SearchDescription.php b/lib/SearchDescription.php
index c4f05a0e..079cb8a6 100644
--- a/lib/SearchDescription.php
+++ b/lib/SearchDescription.php
@@ -17,6 +17,8 @@ class SearchDescription
private $sCountryCode = '';
/// List of word ids making up the name of the object.
private $aName = array();
+ /// True if the name is rare enough to force index use on name.
+ private $bRareName = false;
/// List of word ids making up the address of the object.
private $aAddress = array();
/// Subset of word ids of full words making up the address.
@@ -43,7 +45,6 @@ class SearchDescription
/// Index of phrase currently processed.
private $iNamePhrase = -1;
-
/**
* Create an empty search description.
*
@@ -165,30 +166,29 @@ class SearchDescription
/**
* Derive new searches by adding a full term to the existing search.
*
- * @param mixed[] $aSearchTerm Description of the token.
- * @param bool $bHasPartial True if there are also tokens of partial terms
- * with the same name.
- * @param string $sPhraseType Type of phrase the token is contained in.
- * @param bool $bFirstToken True if the token is at the beginning of the
- * query.
- * @param bool $bFirstPhrase True if the token is in the first phrase of
- * the query.
- * @param bool $bLastToken True if the token is at the end of the query.
+ * @param object $oSearchTerm Description of the token.
+ * @param bool $bHasPartial True if there are also tokens of partial terms
+ * with the same name.
+ * @param string $sPhraseType Type of phrase the token is contained in.
+ * @param bool $bFirstToken True if the token is at the beginning of the
+ * query.
+ * @param bool $bFirstPhrase True if the token is in the first phrase of
+ * the query.
+ * @param bool $bLastToken True if the token is at the end of the query.
*
* @return SearchDescription[] List of derived search descriptions.
*/
- public function extendWithFullTerm($aSearchTerm, $bHasPartial, $sPhraseType, $bFirstToken, $bFirstPhrase, $bLastToken)
+ public function extendWithFullTerm($oSearchTerm, $bHasPartial, $sPhraseType, $bFirstToken, $bFirstPhrase, $bLastToken)
{
$aNewSearches = array();
if (($sPhraseType == '' || $sPhraseType == 'country')
- && !empty($aSearchTerm['country_code'])
- && $aSearchTerm['country_code'] != '0'
+ && is_a($oSearchTerm, '\Nominatim\Token\Country')
) {
if (!$this->sCountryCode) {
$oSearch = clone $this;
$oSearch->iSearchRank++;
- $oSearch->sCountryCode = $aSearchTerm['country_code'];
+ $oSearch->sCountryCode = $oSearchTerm->sCountryCode;
// Country is almost always at the end of the string
// - increase score for finding it anywhere else (optimisation)
if (!$bLastToken) {
@@ -197,15 +197,12 @@ class SearchDescription
$aNewSearches[] = $oSearch;
}
} elseif (($sPhraseType == '' || $sPhraseType == 'postalcode')
- && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'postcode'
+ && is_a($oSearchTerm, '\Nominatim\Token\Postcode')
) {
// We need to try the case where the postal code is the primary element
// (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode)
// so try both.
- if (!$this->sPostcode
- && $aSearchTerm['word']
- && pg_escape_string($aSearchTerm['word']) == $aSearchTerm['word']
- ) {
+ if (!$this->sPostcode) {
// If we have structured search or this is the first term,
// make the postcode the primary search element.
if ($this->iOperator == Operator::NONE
@@ -216,7 +213,7 @@ class SearchDescription
$oSearch->iOperator = Operator::POSTCODE;
$oSearch->aAddress = array_merge($this->aAddress, $this->aName);
$oSearch->aName =
- array($aSearchTerm['word_id'] => $aSearchTerm['word']);
+ array($oSearchTerm->iId => $oSearchTerm->sPostcode);
$aNewSearches[] = $oSearch;
}
@@ -227,23 +224,23 @@ class SearchDescription
) {
$oSearch = clone $this;
$oSearch->iSearchRank++;
- $oSearch->sPostcode = $aSearchTerm['word'];
+ $oSearch->sPostcode = $oSearchTerm->sPostcode;
$aNewSearches[] = $oSearch;
}
}
} elseif (($sPhraseType == '' || $sPhraseType == 'street')
- && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'house'
+ && is_a($oSearchTerm, '\Nominatim\Token\HouseNumber')
) {
if (!$this->sHouseNumber && $this->iOperator != Operator::POSTCODE) {
$oSearch = clone $this;
$oSearch->iSearchRank++;
- $oSearch->sHouseNumber = trim($aSearchTerm['word_token']);
+ $oSearch->sHouseNumber = $oSearchTerm->sToken;
// sanity check: if the housenumber is not mainly made
// up of numbers, add a penalty
if (preg_match_all('/[^0-9]/', $oSearch->sHouseNumber, $aMatches) > 2) {
$oSearch->iSearchRank++;
}
- if (!isset($aSearchTerm['word_id'])) {
+ if (empty($oSearchTerm->iId)) {
$oSearch->iSearchRank++;
}
// also must not appear in the middle of the address
@@ -255,27 +252,34 @@ class SearchDescription
}
$aNewSearches[] = $oSearch;
}
- } elseif ($sPhraseType == '' && $aSearchTerm['class']) {
+ } elseif ($sPhraseType == ''
+ && is_a($oSearchTerm, '\Nominatim\Token\SpecialTerm')
+ ) {
if ($this->iOperator == Operator::NONE) {
$oSearch = clone $this;
$oSearch->iSearchRank++;
- $iOp = Operator::NEAR; // near == in for the moment
- if ($aSearchTerm['operator'] == '') {
+ $iOp = $oSearchTerm->iOperator;
+ if ($iOp == Operator::NONE) {
if (!empty($this->aName) || $this->oContext->isBoundedSearch()) {
$iOp = Operator::NAME;
+ } else {
+ $iOp = Operator::NEAR;
}
$oSearch->iSearchRank += 2;
}
- $oSearch->setPoiSearch($iOp, $aSearchTerm['class'], $aSearchTerm['type']);
+ $oSearch->setPoiSearch(
+ $iOp,
+ $oSearchTerm->sClass,
+ $oSearchTerm->sType
+ );
$aNewSearches[] = $oSearch;
}
- } elseif (isset($aSearchTerm['word_id'])
- && $aSearchTerm['word_id']
- && $sPhraseType != 'country'
+ } elseif ($sPhraseType != 'country'
+ && is_a($oSearchTerm, '\Nominatim\Token\Word')
) {
- $iWordID = $aSearchTerm['word_id'];
+ $iWordID = $oSearchTerm->iId;
// Full words can only be a name if they appear at the beginning
// of the phrase. In structured search the name must forcably in
// the first phrase. In unstructured search it may be in a later
@@ -293,6 +297,11 @@ class SearchDescription
$oSearch = clone $this;
$oSearch->iSearchRank++;
$oSearch->aName = array($iWordID => $iWordID);
+ if (CONST_Search_NameOnlySearchFrequencyThreshold) {
+ $oSearch->bRareName =
+ $oSearchTerm->iSearchNameCount
+ < CONST_Search_NameOnlySearchFrequencyThreshold;
+ }
$aNewSearches[] = $oSearch;
}
}
@@ -303,7 +312,8 @@ class SearchDescription
/**
* Derive new searches by adding a partial term to the existing search.
*
- * @param mixed[] $aSearchTerm Description of the token.
+ * @param string $sToken Term for the token.
+ * @param object $oSearchTerm Description of the token.
* @param bool $bStructuredPhrases True if the search is structured.
* @param integer $iPhrase Number of the phrase the token is in.
* @param array[] $aFullTokens List of full term tokens with the
@@ -311,21 +321,21 @@ class SearchDescription
*
* @return SearchDescription[] List of derived search descriptions.
*/
- public function extendWithPartialTerm($aSearchTerm, $bStructuredPhrases, $iPhrase, $aFullTokens)
+ public function extendWithPartialTerm($sToken, $oSearchTerm, $bStructuredPhrases, $iPhrase, $aFullTokens)
{
// Only allow name terms.
- if (!(isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'])) {
+ if (!(is_a($oSearchTerm, '\Nominatim\Token\Word'))) {
return array();
}
$aNewSearches = array();
- $iWordID = $aSearchTerm['word_id'];
+ $iWordID = $oSearchTerm->iId;
if ((!$bStructuredPhrases || $iPhrase > 0)
&& (!empty($this->aName))
- && strpos($aSearchTerm['word_token'], ' ') === false
+ && strpos($sToken, ' ') === false
) {
- if ($aSearchTerm['search_name_count'] + 1 < CONST_Max_Word_Frequency) {
+ if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) {
$oSearch = clone $this;
$oSearch->iSearchRank += 2;
$oSearch->aAddress[$iWordID] = $iWordID;
@@ -334,7 +344,7 @@ class SearchDescription
$oSearch = clone $this;
$oSearch->iSearchRank++;
$oSearch->aAddressNonSearch[$iWordID] = $iWordID;
- if (preg_match('#^[0-9]+$#', $aSearchTerm['word_token'])) {
+ if (preg_match('#^[0-9]+$#', $sToken)) {
$oSearch->iSearchRank += 2;
}
if (!empty($aFullTokens)) {
@@ -343,14 +353,12 @@ class SearchDescription
$aNewSearches[] = $oSearch;
// revert to the token version?
- foreach ($aFullTokens as $aSearchTermToken) {
- if (empty($aSearchTermToken['country_code'])
- && empty($aSearchTermToken['lat'])
- && empty($aSearchTermToken['class'])
- ) {
+ foreach ($aFullTokens as $oSearchTermToken) {
+ if (is_a($oSearchTermToken, '\Nominatim\Token\Word')) {
$oSearch = clone $this;
$oSearch->iSearchRank++;
- $oSearch->aAddress[$aSearchTermToken['word_id']] = $aSearchTermToken['word_id'];
+ $oSearch->aAddress[$oSearchTermToken->iId]
+ = $oSearchTermToken->iId;
$aNewSearches[] = $oSearch;
}
}
@@ -365,10 +373,19 @@ class SearchDescription
if (empty($this->aName)) {
$oSearch->iSearchRank += 1;
}
- if (preg_match('#^[0-9]+$#', $aSearchTerm['word_token'])) {
+ if (preg_match('#^[0-9]+$#', $sToken)) {
$oSearch->iSearchRank += 2;
}
- if ($aSearchTerm['search_name_count'] + 1 < CONST_Max_Word_Frequency) {
+ if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) {
+ if (empty($this->aName)
+ && CONST_Search_NameOnlySearchFrequencyThreshold
+ ) {
+ $oSearch->bRareName =
+ $oSearchTerm->iSearchNameCount
+ < CONST_Search_NameOnlySearchFrequencyThreshold;
+ } else {
+ $oSearch->bRareName = false;
+ }
$oSearch->aName[$iWordID] = $iWordID;
} else {
$oSearch->aNameNonSearch[$iWordID] = $iWordID;
@@ -386,20 +403,16 @@ class SearchDescription
/**
* Query database for places that match this search.
*
- * @param object $oDB Database connection to use.
- * @param mixed[] $aWordFrequencyScores Number of times tokens appears
- * overall in a planet database.
- * @param integer $iMinRank Minimum address rank to restrict
- * search to.
- * @param integer $iMaxRank Maximum address rank to restrict
- * search to.
- * @param integer $iLimit Maximum number of results.
+ * @param object $oDB Database connection to use.
+ * @param integer $iMinRank Minimum address rank to restrict search to.
+ * @param integer $iMaxRank Maximum address rank to restrict search to.
+ * @param integer $iLimit Maximum number of results.
*
* @return mixed[] An array with two fields: IDs contains the list of
* matching place IDs and houseNumber the houseNumber
* if appicable or -1 if not.
*/
- public function query(&$oDB, &$aWordFrequencyScores, $iMinRank, $iMaxRank, $iLimit)
+ public function query(&$oDB, $iMinRank, $iMaxRank, $iLimit)
{
$aResults = array();
$iHousenumber = -1;
@@ -428,7 +441,6 @@ class SearchDescription
// First search for places according to name and address.
$aResults = $this->queryNamedPlace(
$oDB,
- $aWordFrequencyScores,
$iMinRank,
$iMaxRank,
$iLimit
@@ -437,7 +449,7 @@ class SearchDescription
//now search for housenumber, if housenumber provided
if ($this->sHouseNumber && !empty($aResults)) {
$aNamedPlaceIDs = $aResults;
- $aResults = $this->queryHouseNumber($oDB, $aNamedPlaceIDs, $iLimit);
+ $aResults = $this->queryHouseNumber($oDB, $aNamedPlaceIDs);
if (empty($aResults) && $this->looksLikeFullAddress()) {
$aResults = $aNamedPlaceIDs;
@@ -450,10 +462,7 @@ class SearchDescription
}
}
- if (CONST_Debug) {
- echo '
Place IDs: ';
- var_dump(array_keys($aResults));
- }
+ Debug::printDebugTable('Place IDs', $aResults);
if (!empty($aResults) && $this->sPostcode) {
$sPlaceIds = Result::joinIdsByTable($aResults, Result::TABLE_PLACEX);
@@ -461,7 +470,7 @@ class SearchDescription
$sSQL = 'SELECT place_id FROM placex';
$sSQL .= ' WHERE place_id in ('.$sPlaceIds.')';
$sSQL .= " AND postcode = '".$this->sPostcode."'";
- if (CONST_Debug) var_dump($sSQL);
+ Debug::printSQL($sSQL);
$aFilteredPlaceIDs = chksql($oDB->getCol($sSQL));
if ($aFilteredPlaceIDs) {
$aNewResults = array();
@@ -469,10 +478,7 @@ class SearchDescription
$aNewResults[$iPlaceId] = $aResults[$iPlaceId];
}
$aResults = $aNewResults;
- if (CONST_Debug) {
- echo '
Place IDs after postcode filtering: ';
- var_dump(array_keys($aResults));
- }
+ Debug::printVar('Place IDs after postcode filtering', $aResults);
}
}
}
@@ -491,7 +497,7 @@ class SearchDescription
}
$sSQL .= ' ORDER BY st_area(geometry) DESC LIMIT 1';
- if (CONST_Debug) var_dump($sSQL);
+ Debug::printSQL($sSQL);
$aResults = array();
foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) {
@@ -532,7 +538,7 @@ class SearchDescription
$sSQL .= ' ORDER BY '.$this->oContext->distanceSQL('ct.centroid').' ASC';
}
$sSQL .= " limit $iLimit";
- if (CONST_Debug) var_dump($sSQL);
+ Debug::printSQL($sSQL);
$aDBResults = chksql($oDB->getCol($sSQL));
}
@@ -546,7 +552,7 @@ class SearchDescription
}
$sSQL .= ' ORDER BY '.$this->oContext->distanceSQL('centroid').' ASC';
$sSQL .= " LIMIT $iLimit";
- if (CONST_Debug) var_dump($sSQL);
+ Debug::printSQL($sSQL);
$aDBResults = chksql($oDB->getCol($sSQL));
}
@@ -576,7 +582,7 @@ class SearchDescription
$sSQL .= $this->oContext->excludeSQL(' AND p.place_id');
$sSQL .= " LIMIT $iLimit";
- if (CONST_Debug) var_dump($sSQL);
+ Debug::printSQL($sSQL);
$aResults = array();
foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) {
@@ -586,12 +592,16 @@ class SearchDescription
return $aResults;
}
- private function queryNamedPlace(&$oDB, $aWordFrequencyScores, $iMinAddressRank, $iMaxAddressRank, $iLimit)
+ private function queryNamedPlace(&$oDB, $iMinAddressRank, $iMaxAddressRank, $iLimit)
{
$aTerms = array();
$aOrder = array();
- if ($this->sHouseNumber && !empty($this->aAddress)) {
+ // Sort by existence of the requested house number but only if not
+ // too many results are expected for the street, i.e. if the result
+ // will be narrowed down by an address. Remeber that with ordering
+ // every single result has to be checked.
+ if ($this->sHouseNumber && (!empty($this->aAddress) || $this->sPostcode)) {
$sHouseNumberRegex = '\\\\m'.$this->sHouseNumber.'\\\\M';
$aOrder[] = ' (';
$aOrder[0] .= 'EXISTS(';
@@ -622,11 +632,7 @@ class SearchDescription
}
if (!empty($this->aAddress)) {
// For infrequent name terms disable index usage for address
- if (CONST_Search_NameOnlySearchFrequencyThreshold
- && count($this->aName) == 1
- && $aWordFrequencyScores[$this->aName[reset($this->aName)]]
- < CONST_Search_NameOnlySearchFrequencyThreshold
- ) {
+ if ($this->bRareName) {
$aTerms[] = 'array_cat(nameaddress_vector,ARRAY[]::integer[]) @> '.getArraySQL($this->aAddress);
} else {
$aTerms[] = 'nameaddress_vector @> '.getArraySQL($this->aAddress);
@@ -707,7 +713,7 @@ class SearchDescription
$sSQL .= ' ORDER BY '.join(', ', $aOrder);
$sSQL .= ' LIMIT '.$iLimit;
- if (CONST_Debug) var_dump($sSQL);
+ Debug::printSQL($sSQL);
$aDBResults = chksql(
$oDB->getAll($sSQL),
@@ -724,7 +730,7 @@ class SearchDescription
return $aResults;
}
- private function queryHouseNumber(&$oDB, $aRoadPlaceIDs, $iLimit)
+ private function queryHouseNumber(&$oDB, $aRoadPlaceIDs)
{
$aResults = array();
$sPlaceIDs = Result::joinIdsByTable($aRoadPlaceIDs, Result::TABLE_PLACEX);
@@ -738,9 +744,8 @@ class SearchDescription
$sSQL .= 'WHERE parent_place_id in ('.$sPlaceIDs.')';
$sSQL .= " AND transliteration(housenumber) ~* E'".$sHouseNumberRegex."'";
$sSQL .= $this->oContext->excludeSQL(' AND place_id');
- $sSQL .= " LIMIT $iLimit";
- if (CONST_Debug) var_dump($sSQL);
+ Debug::printSQL($sSQL);
// XXX should inherit the exactMatches from its parent
foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) {
@@ -766,9 +771,8 @@ class SearchDescription
$sSQL .= $iHousenumber.'>=startnumber and ';
$sSQL .= $iHousenumber.'<=endnumber';
$sSQL .= $this->oContext->excludeSQL(' AND place_id');
- $sSQL .= " limit $iLimit";
- if (CONST_Debug) var_dump($sSQL);
+ Debug::printSQL($sSQL);
foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) {
$oResult = new Result($iPlaceId, Result::TABLE_OSMLINE);
@@ -783,9 +787,8 @@ class SearchDescription
$sSQL .= ' WHERE parent_place_id in ('.$sPlaceIDs.')';
$sSQL .= " AND housenumber = '".$this->sHouseNumber."'";
$sSQL .= $this->oContext->excludeSQL(' AND place_id');
- $sSQL .= " limit $iLimit";
- if (CONST_Debug) var_dump($sSQL);
+ Debug::printSQL($sSQL);
foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) {
$aResults[$iPlaceId] = new Result($iPlaceId, Result::TABLE_AUX);
@@ -805,9 +808,8 @@ class SearchDescription
$sSQL .= $iHousenumber.'>=startnumber and ';
$sSQL .= $iHousenumber.'<=endnumber';
$sSQL .= $this->oContext->excludeSQL(' AND place_id');
- $sSQL .= " limit $iLimit";
- if (CONST_Debug) var_dump($sSQL);
+ Debug::printSQL($sSQL);
foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) {
$oResult = new Result($iPlaceId, Result::TABLE_TIGER);
@@ -841,7 +843,7 @@ class SearchDescription
$sSQL .= ' ORDER BY rank_search ASC ';
$sSQL .= " LIMIT $iLimit";
- if (CONST_Debug) var_dump($sSQL);
+ Debug::printSQL($sSQL);
foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) {
$aResults[$iPlaceId] = new Result($iPlaceId);
@@ -855,7 +857,7 @@ class SearchDescription
$bCacheTable = (bool) chksql($oDB->getOne($sSQL));
$sSQL = "SELECT min(rank_search) FROM placex WHERE place_id in ($sPlaceIDs)";
- if (CONST_Debug) var_dump($sSQL);
+ Debug::printSQL($sSQL);
$iMaxRank = (int)chksql($oDB->getOne($sSQL));
// For state / country level searches the normal radius search doesn't work very well
@@ -868,7 +870,7 @@ class SearchDescription
$sSQL .= " AND ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon')";
$sSQL .= ' ORDER BY rank_search ASC ';
$sSQL .= ' LIMIT 1';
- if (CONST_Debug) var_dump($sSQL);
+ Debug::printSQL($sSQL);
$sPlaceGeom = chksql($oDB->getOne($sSQL));
}
@@ -878,7 +880,7 @@ class SearchDescription
$iMaxRank += 5;
$sSQL = 'SELECT place_id FROM placex';
$sSQL .= " WHERE place_id in ($sPlaceIDs) and rank_search < $iMaxRank";
- if (CONST_Debug) var_dump($sSQL);
+ Debug::printSQL($sSQL);
$aPlaceIDs = chksql($oDB->getCol($sSQL));
$sPlaceIDs = join(',', $aPlaceIDs);
}
@@ -923,7 +925,7 @@ class SearchDescription
}
$sSQL .= " limit $iLimit";
- if (CONST_Debug) var_dump($sSQL);
+ Debug::printSQL($sSQL);
foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) {
$aResults[$iPlaceId] = new Result($iPlaceId);
@@ -955,7 +957,7 @@ class SearchDescription
}
$sSQL .= " limit $iLimit";
- if (CONST_Debug) var_dump($sSQL);
+ Debug::printSQL($sSQL);
foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) {
$aResults[$iPlaceId] = new Result($iPlaceId);
@@ -1000,6 +1002,24 @@ class SearchDescription
//////////// Debugging functions
+ public function debugInfo()
+ {
+ return array(
+ 'Search rank' => $this->iSearchRank,
+ 'Country code' => $this->sCountryCode,
+ 'Name terms' => $this->aName,
+ 'Name terms (stop words)' => $this->aNameNonSearch,
+ 'Address terms' => $this->aAddress,
+ 'Address terms (stop words)' => $this->aAddressNonSearch,
+ 'Address terms (full words)' => $this->aFullNameAddress,
+ 'Special search' => $this->iOperator,
+ 'Class' => $this->sClass,
+ 'Type' => $this->sType,
+ 'House number' => $this->sHouseNumber,
+ 'Postcode' => $this->sPostcode
+ );
+ }
+
public function dumpAsHtmlTableRow(&$aWordIDs)
{
$kf = function ($k) use (&$aWordIDs) {