X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/cdfa31c390851487af59562b90bc451eee56664d..ae0bf810cc05fdd140193920cf8c2de459c45ea8:/lib/SearchDescription.php
diff --git a/lib/SearchDescription.php b/lib/SearchDescription.php
index 13775d65..5f01e01b 100644
--- a/lib/SearchDescription.php
+++ b/lib/SearchDescription.php
@@ -17,6 +17,8 @@ class SearchDescription
private $sCountryCode = '';
/// List of word ids making up the name of the object.
private $aName = array();
+ /// True if the name is rare enough to force index use on name.
+ private $bRareName = false;
/// List of word ids making up the address of the object.
private $aAddress = array();
/// Subset of word ids of full words making up the address.
@@ -43,7 +45,6 @@ class SearchDescription
/// Index of phrase currently processed.
private $iNamePhrase = -1;
-
/**
* Create an empty search description.
*
@@ -58,7 +59,7 @@ class SearchDescription
/**
* Get current search rank.
*
- * The higher the search rank the lower the likelyhood that the
+ * The higher the search rank the lower the likelihood that the
* search is a correct interpretation of the search query.
*
* @return integer Search rank.
@@ -94,8 +95,8 @@ class SearchDescription
*/
public function looksLikeFullAddress()
{
- return sizeof($this->aName)
- && (sizeof($this->aAddress || $this->sCountryCode))
+ return (!empty($this->aName))
+ && (!empty($this->aAddress) || $this->sCountryCode)
&& preg_match('/[0-9]+/', $this->sHouseNumber);
}
@@ -147,7 +148,7 @@ class SearchDescription
*/
public function isValidSearch()
{
- if (!sizeof($this->aName)) {
+ if (empty($this->aName)) {
if ($this->sHouseNumber) {
return false;
}
@@ -223,7 +224,7 @@ class SearchDescription
// If we have a structured search or this is not the first term,
// add the postcode as an addendum.
if ($this->iOperator != Operator::POSTCODE
- && ($sPhraseType == 'postalcode' || sizeof($this->aName))
+ && ($sPhraseType == 'postalcode' || !empty($this->aName))
) {
$oSearch = clone $this;
$oSearch->iSearchRank++;
@@ -247,8 +248,8 @@ class SearchDescription
$oSearch->iSearchRank++;
}
// also must not appear in the middle of the address
- if (sizeof($this->aAddress)
- || sizeof($this->aAddressNonSearch)
+ if (!empty($this->aAddress)
+ || (!empty($this->aAddressNonSearch))
|| $this->sPostcode
) {
$oSearch->iSearchRank++;
@@ -262,7 +263,7 @@ class SearchDescription
$iOp = Operator::NEAR; // near == in for the moment
if ($aSearchTerm['operator'] == '') {
- if (sizeof($this->aName)) {
+ if (!empty($this->aName) || $this->oContext->isBoundedSearch()) {
$iOp = Operator::NAME;
}
$oSearch->iSearchRank += 2;
@@ -276,11 +277,12 @@ class SearchDescription
&& $sPhraseType != 'country'
) {
$iWordID = $aSearchTerm['word_id'];
- if (sizeof($this->aName)) {
- if (($sPhraseType == '' || !$bFirstPhrase)
- && $sPhraseType != 'country'
- && !$bHasPartial
- ) {
+ // Full words can only be a name if they appear at the beginning
+ // of the phrase. In structured search the name must forcably in
+ // the first phrase. In unstructured search it may be in a later
+ // phrase when the first phrase is a house number.
+ if (!empty($this->aName) || !($bFirstPhrase || $sPhraseType == '')) {
+ if (($sPhraseType == '' || !$bFirstPhrase) && !$bHasPartial) {
$oSearch = clone $this;
$oSearch->iSearchRank++;
$oSearch->aAddress[$iWordID] = $iWordID;
@@ -292,6 +294,11 @@ class SearchDescription
$oSearch = clone $this;
$oSearch->iSearchRank++;
$oSearch->aName = array($iWordID => $iWordID);
+ if (CONST_Search_NameOnlySearchFrequencyThreshold) {
+ $oSearch->bRareName =
+ $aSearchTerm['search_name_count'] + 1
+ < CONST_Search_NameOnlySearchFrequencyThreshold;
+ }
$aNewSearches[] = $oSearch;
}
}
@@ -321,7 +328,7 @@ class SearchDescription
$iWordID = $aSearchTerm['word_id'];
if ((!$bStructuredPhrases || $iPhrase > 0)
- && sizeof($this->aName)
+ && (!empty($this->aName))
&& strpos($aSearchTerm['word_token'], ' ') === false
) {
if ($aSearchTerm['search_name_count'] + 1 < CONST_Max_Word_Frequency) {
@@ -336,7 +343,7 @@ class SearchDescription
if (preg_match('#^[0-9]+$#', $aSearchTerm['word_token'])) {
$oSearch->iSearchRank += 2;
}
- if (sizeof($aFullTokens)) {
+ if (!empty($aFullTokens)) {
$oSearch->iSearchRank++;
}
$aNewSearches[] = $oSearch;
@@ -357,17 +364,24 @@ class SearchDescription
}
if ((!$this->sPostcode && !$this->aAddress && !$this->aAddressNonSearch)
- && (!sizeof($this->aName) || $this->iNamePhrase == $iPhrase)
+ && (empty($this->aName) || $this->iNamePhrase == $iPhrase)
) {
$oSearch = clone $this;
$oSearch->iSearchRank += 2;
- if (!sizeof($this->aName)) {
+ if (empty($this->aName)) {
$oSearch->iSearchRank += 1;
}
if (preg_match('#^[0-9]+$#', $aSearchTerm['word_token'])) {
$oSearch->iSearchRank += 2;
}
if ($aSearchTerm['search_name_count'] + 1 < CONST_Max_Word_Frequency) {
+ if (empty($this->aName) && CONST_Search_NameOnlySearchFrequencyThreshold) {
+ $oSearch->bRareName =
+ $aSearchTerm['search_name_count'] + 1
+ < CONST_Search_NameOnlySearchFrequencyThreshold;
+ } else {
+ $oSearch->bRareName = false;
+ }
$oSearch->aName[$iWordID] = $iWordID;
} else {
$oSearch->aNameNonSearch[$iWordID] = $iWordID;
@@ -385,26 +399,22 @@ class SearchDescription
/**
* Query database for places that match this search.
*
- * @param object $oDB Database connection to use.
- * @param mixed[] $aWordFrequencyScores Number of times tokens appears
- * overall in a planet database.
- * @param integer $iMinRank Minimum address rank to restrict
- * search to.
- * @param integer $iMaxRank Maximum address rank to restrict
- * search to.
- * @param integer $iLimit Maximum number of results.
+ * @param object $oDB Database connection to use.
+ * @param integer $iMinRank Minimum address rank to restrict search to.
+ * @param integer $iMaxRank Maximum address rank to restrict search to.
+ * @param integer $iLimit Maximum number of results.
*
* @return mixed[] An array with two fields: IDs contains the list of
* matching place IDs and houseNumber the houseNumber
* if appicable or -1 if not.
*/
- public function query(&$oDB, &$aWordFrequencyScores, $iMinRank, $iMaxRank, $iLimit)
+ public function query(&$oDB, $iMinRank, $iMaxRank, $iLimit)
{
$aResults = array();
$iHousenumber = -1;
if ($this->sCountryCode
- && !sizeof($this->aName)
+ && empty($this->aName)
&& !$this->iOperator
&& !$this->sClass
&& !$this->oContext->hasNearPoint()
@@ -413,7 +423,7 @@ class SearchDescription
if (4 >= $iMinRank && 4 <= $iMaxRank) {
$aResults = $this->queryCountry($oDB);
}
- } elseif (!sizeof($this->aName) && !sizeof($this->aAddress)) {
+ } elseif (empty($this->aName) && empty($this->aAddress)) {
// Neither name nor address? Then we must be
// looking for a POI in a geographic area.
if ($this->oContext->isBoundedSearch()) {
@@ -427,40 +437,36 @@ class SearchDescription
// First search for places according to name and address.
$aResults = $this->queryNamedPlace(
$oDB,
- $aWordFrequencyScores,
$iMinRank,
$iMaxRank,
$iLimit
);
//now search for housenumber, if housenumber provided
- if ($this->sHouseNumber && sizeof($aResults)) {
+ if ($this->sHouseNumber && !empty($aResults)) {
$aNamedPlaceIDs = $aResults;
- $aResults = $this->queryHouseNumber($oDB, $aNamedPlaceIDs, $iLimit);
+ $aResults = $this->queryHouseNumber($oDB, $aNamedPlaceIDs);
- if (!sizeof($aResults) && $this->looksLikeFullAddress()) {
+ if (empty($aResults) && $this->looksLikeFullAddress()) {
$aResults = $aNamedPlaceIDs;
}
}
// finally get POIs if requested
- if ($this->sClass && sizeof($aResults)) {
+ if ($this->sClass && !empty($aResults)) {
$aResults = $this->queryPoiByOperator($oDB, $aResults, $iLimit);
}
}
- if (CONST_Debug) {
- echo '
Place IDs: ';
- var_dump(array_keys($aResults));
- }
+ Debug::printDebugTable('Place IDs', $aResults);
- if (sizeof($aResults) && $this->sPostcode) {
+ if (!empty($aResults) && $this->sPostcode) {
$sPlaceIds = Result::joinIdsByTable($aResults, Result::TABLE_PLACEX);
if ($sPlaceIds) {
$sSQL = 'SELECT place_id FROM placex';
$sSQL .= ' WHERE place_id in ('.$sPlaceIds.')';
$sSQL .= " AND postcode = '".$this->sPostcode."'";
- if (CONST_Debug) var_dump($sSQL);
+ Debug::printSQL($sSQL);
$aFilteredPlaceIDs = chksql($oDB->getCol($sSQL));
if ($aFilteredPlaceIDs) {
$aNewResults = array();
@@ -468,10 +474,7 @@ class SearchDescription
$aNewResults[$iPlaceId] = $aResults[$iPlaceId];
}
$aResults = $aNewResults;
- if (CONST_Debug) {
- echo '
Place IDs after postcode filtering: ';
- var_dump(array_keys($aResults));
- }
+ Debug::printVar('Place IDs after postcode filtering', $aResults);
}
}
}
@@ -490,7 +493,7 @@ class SearchDescription
}
$sSQL .= ' ORDER BY st_area(geometry) DESC LIMIT 1';
- if (CONST_Debug) var_dump($sSQL);
+ Debug::printSQL($sSQL);
$aResults = array();
foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) {
@@ -531,7 +534,7 @@ class SearchDescription
$sSQL .= ' ORDER BY '.$this->oContext->distanceSQL('ct.centroid').' ASC';
}
$sSQL .= " limit $iLimit";
- if (CONST_Debug) var_dump($sSQL);
+ Debug::printSQL($sSQL);
$aDBResults = chksql($oDB->getCol($sSQL));
}
@@ -545,7 +548,7 @@ class SearchDescription
}
$sSQL .= ' ORDER BY '.$this->oContext->distanceSQL('centroid').' ASC';
$sSQL .= " LIMIT $iLimit";
- if (CONST_Debug) var_dump($sSQL);
+ Debug::printSQL($sSQL);
$aDBResults = chksql($oDB->getCol($sSQL));
}
@@ -561,7 +564,7 @@ class SearchDescription
{
$sSQL = 'SELECT p.place_id FROM location_postcode p ';
- if (sizeof($this->aAddress)) {
+ if (!empty($this->aAddress)) {
$sSQL .= ', search_name s ';
$sSQL .= 'WHERE s.place_id = p.parent_place_id ';
$sSQL .= 'AND array_cat(s.nameaddress_vector, s.name_vector)';
@@ -575,7 +578,7 @@ class SearchDescription
$sSQL .= $this->oContext->excludeSQL(' AND p.place_id');
$sSQL .= " LIMIT $iLimit";
- if (CONST_Debug) var_dump($sSQL);
+ Debug::printSQL($sSQL);
$aResults = array();
foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) {
@@ -585,12 +588,16 @@ class SearchDescription
return $aResults;
}
- private function queryNamedPlace(&$oDB, $aWordFrequencyScores, $iMinAddressRank, $iMaxAddressRank, $iLimit)
+ private function queryNamedPlace(&$oDB, $iMinAddressRank, $iMaxAddressRank, $iLimit)
{
$aTerms = array();
$aOrder = array();
- if ($this->sHouseNumber && sizeof($this->aAddress)) {
+ // Sort by existence of the requested house number but only if not
+ // too many results are expected for the street, i.e. if the result
+ // will be narrowed down by an address. Remeber that with ordering
+ // every single result has to be checked.
+ if ($this->sHouseNumber && (!empty($this->aAddress) || $this->sPostcode)) {
$sHouseNumberRegex = '\\\\m'.$this->sHouseNumber.'\\\\M';
$aOrder[] = ' (';
$aOrder[0] .= 'EXISTS(';
@@ -616,16 +623,12 @@ class SearchDescription
$aOrder[0] .= ') DESC';
}
- if (sizeof($this->aName)) {
+ if (!empty($this->aName)) {
$aTerms[] = 'name_vector @> '.getArraySQL($this->aName);
}
- if (sizeof($this->aAddress)) {
+ if (!empty($this->aAddress)) {
// For infrequent name terms disable index usage for address
- if (CONST_Search_NameOnlySearchFrequencyThreshold
- && sizeof($this->aName) == 1
- && $aWordFrequencyScores[$this->aName[reset($this->aName)]]
- < CONST_Search_NameOnlySearchFrequencyThreshold
- ) {
+ if ($this->bRareName) {
$aTerms[] = 'array_cat(nameaddress_vector,ARRAY[]::integer[]) @> '.getArraySQL($this->aAddress);
} else {
$aTerms[] = 'nameaddress_vector @> '.getArraySQL($this->aAddress);
@@ -652,7 +655,7 @@ class SearchDescription
$aTerms[] = $this->oContext->withinSQL('centroid');
$aOrder[] = $this->oContext->distanceSQL('centroid');
} elseif ($this->sPostcode) {
- if (!sizeof($this->aAddress)) {
+ if (empty($this->aAddress)) {
$aTerms[] = "EXISTS(SELECT place_id FROM location_postcode p WHERE p.postcode = '".$this->sPostcode."' AND ST_DWithin(search_name.centroid, p.geometry, 0.1))";
} else {
$aOrder[] = "(SELECT min(ST_Distance(search_name.centroid, p.geometry)) FROM location_postcode p WHERE p.postcode = '".$this->sPostcode."')";
@@ -675,12 +678,12 @@ class SearchDescription
if ($this->sHouseNumber) {
$sImportanceSQL = '- abs(26 - address_rank) + 3';
} else {
- $sImportanceSQL = '(CASE WHEN importance = 0 OR importance IS NULL THEN 0.75-(search_rank::float/40) ELSE importance END)';
+ $sImportanceSQL = '(CASE WHEN importance = 0 OR importance IS NULL THEN 0.75001-(search_rank::float/40) ELSE importance END)';
}
$sImportanceSQL .= $this->oContext->viewboxImportanceSQL('centroid');
$aOrder[] = "$sImportanceSQL DESC";
- if (sizeof($this->aFullNameAddress)) {
+ if (!empty($this->aFullNameAddress)) {
$sExactMatchSQL = ' ( ';
$sExactMatchSQL .= ' SELECT count(*) FROM ( ';
$sExactMatchSQL .= ' SELECT unnest('.getArraySQL($this->aFullNameAddress).')';
@@ -699,14 +702,14 @@ class SearchDescription
$aResults = array();
- if (sizeof($aTerms)) {
+ if (!empty($aTerms)) {
$sSQL = 'SELECT place_id,'.$sExactMatchSQL;
$sSQL .= ' FROM search_name';
$sSQL .= ' WHERE '.join(' and ', $aTerms);
$sSQL .= ' ORDER BY '.join(', ', $aOrder);
$sSQL .= ' LIMIT '.$iLimit;
- if (CONST_Debug) var_dump($sSQL);
+ Debug::printSQL($sSQL);
$aDBResults = chksql(
$oDB->getAll($sSQL),
@@ -723,7 +726,7 @@ class SearchDescription
return $aResults;
}
- private function queryHouseNumber(&$oDB, $aRoadPlaceIDs, $iLimit)
+ private function queryHouseNumber(&$oDB, $aRoadPlaceIDs)
{
$aResults = array();
$sPlaceIDs = Result::joinIdsByTable($aRoadPlaceIDs, Result::TABLE_PLACEX);
@@ -737,9 +740,8 @@ class SearchDescription
$sSQL .= 'WHERE parent_place_id in ('.$sPlaceIDs.')';
$sSQL .= " AND transliteration(housenumber) ~* E'".$sHouseNumberRegex."'";
$sSQL .= $this->oContext->excludeSQL(' AND place_id');
- $sSQL .= " LIMIT $iLimit";
- if (CONST_Debug) var_dump($sSQL);
+ Debug::printSQL($sSQL);
// XXX should inherit the exactMatches from its parent
foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) {
@@ -748,7 +750,7 @@ class SearchDescription
$bIsIntHouseNumber= (bool) preg_match('/[0-9]+/', $this->sHouseNumber);
$iHousenumber = intval($this->sHouseNumber);
- if ($bIsIntHouseNumber && !sizeof($aResults)) {
+ if ($bIsIntHouseNumber && empty($aResults)) {
// if nothing found, search in the interpolation line table
$sSQL = 'SELECT distinct place_id FROM location_property_osmline';
$sSQL .= ' WHERE startnumber is not NULL';
@@ -765,9 +767,8 @@ class SearchDescription
$sSQL .= $iHousenumber.'>=startnumber and ';
$sSQL .= $iHousenumber.'<=endnumber';
$sSQL .= $this->oContext->excludeSQL(' AND place_id');
- $sSQL .= " limit $iLimit";
- if (CONST_Debug) var_dump($sSQL);
+ Debug::printSQL($sSQL);
foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) {
$oResult = new Result($iPlaceId, Result::TABLE_OSMLINE);
@@ -777,14 +778,13 @@ class SearchDescription
}
// If nothing found try the aux fallback table
- if (CONST_Use_Aux_Location_data && !sizeof($aResults)) {
+ if (CONST_Use_Aux_Location_data && empty($aResults)) {
$sSQL = 'SELECT place_id FROM location_property_aux';
$sSQL .= ' WHERE parent_place_id in ('.$sPlaceIDs.')';
$sSQL .= " AND housenumber = '".$this->sHouseNumber."'";
$sSQL .= $this->oContext->excludeSQL(' AND place_id');
- $sSQL .= " limit $iLimit";
- if (CONST_Debug) var_dump($sSQL);
+ Debug::printSQL($sSQL);
foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) {
$aResults[$iPlaceId] = new Result($iPlaceId, Result::TABLE_AUX);
@@ -792,7 +792,7 @@ class SearchDescription
}
// If nothing found then search in Tiger data (location_property_tiger)
- if (CONST_Use_US_Tiger_Data && $bIsIntHouseNumber && !sizeof($aResults)) {
+ if (CONST_Use_US_Tiger_Data && $bIsIntHouseNumber && empty($aResults)) {
$sSQL = 'SELECT place_id FROM location_property_tiger';
$sSQL .= ' WHERE parent_place_id in ('.$sPlaceIDs.') and (';
if ($iHousenumber % 2 == 0) {
@@ -804,9 +804,8 @@ class SearchDescription
$sSQL .= $iHousenumber.'>=startnumber and ';
$sSQL .= $iHousenumber.'<=endnumber';
$sSQL .= $this->oContext->excludeSQL(' AND place_id');
- $sSQL .= " limit $iLimit";
- if (CONST_Debug) var_dump($sSQL);
+ Debug::printSQL($sSQL);
foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) {
$oResult = new Result($iPlaceId, Result::TABLE_TIGER);
@@ -840,7 +839,7 @@ class SearchDescription
$sSQL .= ' ORDER BY rank_search ASC ';
$sSQL .= " LIMIT $iLimit";
- if (CONST_Debug) var_dump($sSQL);
+ Debug::printSQL($sSQL);
foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) {
$aResults[$iPlaceId] = new Result($iPlaceId);
@@ -854,7 +853,7 @@ class SearchDescription
$bCacheTable = (bool) chksql($oDB->getOne($sSQL));
$sSQL = "SELECT min(rank_search) FROM placex WHERE place_id in ($sPlaceIDs)";
- if (CONST_Debug) var_dump($sSQL);
+ Debug::printSQL($sSQL);
$iMaxRank = (int)chksql($oDB->getOne($sSQL));
// For state / country level searches the normal radius search doesn't work very well
@@ -867,7 +866,7 @@ class SearchDescription
$sSQL .= " AND ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon')";
$sSQL .= ' ORDER BY rank_search ASC ';
$sSQL .= ' LIMIT 1';
- if (CONST_Debug) var_dump($sSQL);
+ Debug::printSQL($sSQL);
$sPlaceGeom = chksql($oDB->getOne($sSQL));
}
@@ -877,7 +876,7 @@ class SearchDescription
$iMaxRank += 5;
$sSQL = 'SELECT place_id FROM placex';
$sSQL .= " WHERE place_id in ($sPlaceIDs) and rank_search < $iMaxRank";
- if (CONST_Debug) var_dump($sSQL);
+ Debug::printSQL($sSQL);
$aPlaceIDs = chksql($oDB->getCol($sSQL));
$sPlaceIDs = join(',', $aPlaceIDs);
}
@@ -922,7 +921,7 @@ class SearchDescription
}
$sSQL .= " limit $iLimit";
- if (CONST_Debug) var_dump($sSQL);
+ Debug::printSQL($sSQL);
foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) {
$aResults[$iPlaceId] = new Result($iPlaceId);
@@ -954,7 +953,7 @@ class SearchDescription
}
$sSQL .= " limit $iLimit";
- if (CONST_Debug) var_dump($sSQL);
+ Debug::printSQL($sSQL);
foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) {
$aResults[$iPlaceId] = new Result($iPlaceId);
@@ -999,6 +998,24 @@ class SearchDescription
//////////// Debugging functions
+ public function debugInfo()
+ {
+ return array(
+ 'Search rank' => $this->iSearchRank,
+ 'Country code' => $this->sCountryCode,
+ 'Name terms' => $this->aName,
+ 'Name terms (stop words)' => $this->aNameNonSearch,
+ 'Address terms' => $this->aAddress,
+ 'Address terms (stop words)' => $this->aAddressNonSearch,
+ 'Address terms (full words)' => $this->aFullNameAddress,
+ 'Special search' => $this->iOperator,
+ 'Class' => $this->sClass,
+ 'Type' => $this->sType,
+ 'House number' => $this->sHouseNumber,
+ 'Postcode' => $this->sPostcode
+ );
+ }
+
public function dumpAsHtmlTableRow(&$aWordIDs)
{
$kf = function ($k) use (&$aWordIDs) {