X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/9a5d5d9aec4cf785c8190c37a3136cf09aca6902..c3940466b817a4c10a956f61c43d6bee3214dd40:/lib/SearchDescription.php?ds=inline diff --git a/lib/SearchDescription.php b/lib/SearchDescription.php index f3afaff2..edf4d059 100644 --- a/lib/SearchDescription.php +++ b/lib/SearchDescription.php @@ -4,6 +4,7 @@ namespace Nominatim; require_once(CONST_BasePath.'/lib/SpecialSearchOperator.php'); require_once(CONST_BasePath.'/lib/SearchContext.php'); +require_once(CONST_BasePath.'/lib/Result.php'); /** * Description of a single interpretation of a search query. @@ -43,22 +44,42 @@ class SearchDescription private $iNamePhrase = -1; + /** + * Create an empty search description. + * + * @param object $oContext Global context to use. Will be inherited by + * all derived search objects. + */ public function __construct($oContext) { $this->oContext = $oContext; } + /** + * Get current search rank. + * + * The higher the search rank the lower the likelyhood that the + * search is a correct interpretation of the search query. + * + * @return integer Search rank. + */ public function getRank() { return $this->iSearchRank; } - public function addToRank($iAddRank) - { - $this->iSearchRank += $iAddRank; - return $this->iSearchRank; - } - + /** + * Make this search a POI search. + * + * In a POI search, objects are not (only) searched by their name + * but also by the primary OSM key/value pair (class and type in Nominatim). + * + * @param integer $iOperator Type of POI search + * @param string $sClass Class (or OSM tag key) of POI. + * @param string $sType Type (or OSM tag value) of POI. + * + * @return void + */ public function setPoiSearch($iOperator, $sClass, $sType) { $this->iOperator = $iOperator; @@ -66,6 +87,11 @@ class SearchDescription $this->sType = $sType; } + /** + * Check if this might be a full address search. + * + * @return bool True if the search contains name, address and housenumber. + */ public function looksLikeFullAddress() { return sizeof($this->aName) @@ -73,28 +99,27 @@ class SearchDescription && preg_match('/[0-9]+/', $this->sHouseNumber); } - private function poiTable() - { - return 'place_classtype_'.$this->sClass.'_'.$this->sType; - } - - public function countryCodeSQL($sVar) - { - if ($this->sCountryCode) { - return $sVar.' = \''.$this->sCountryCode."'"; - } - if ($this->oContext->sqlCountryList) { - return $sVar.' in '.$this->oContext->sqlCountryList; - } - - return ''; - } - + /** + * Check if any operator is set. + * + * @return bool True, if this is a special search operation. + */ public function hasOperator() { return $this->iOperator != Operator::NONE; } + /** + * Extract key/value pairs from a query. + * + * Key/value pairs are recognised if they are of the form [=]. + * If multiple terms of this kind are found then all terms are removed + * but only the first is used for search. + * + * @param string $sQuery Original query string. + * + * @return string The query string with the special search patterns removed. + */ public function extractKeyValuePairs($sQuery) { // Search for terms of kind [=]. @@ -115,18 +140,20 @@ class SearchDescription return $sQuery; } - public function isValidSearch(&$aCountryCodes) + /** + * Check if the combination of parameters is sensible. + * + * @return bool True, if the search looks valid. + */ + public function isValidSearch() { if (!sizeof($this->aName)) { if ($this->sHouseNumber) { return false; } - } - if ($aCountryCodes - && $this->sCountryCode - && !in_array($this->sCountryCode, $aCountryCodes) - ) { - return false; + if (!$this->sClass && !$this->sCountryCode) { + return false; + } } return true; @@ -135,7 +162,22 @@ class SearchDescription /////////// Search building functions - public function extendWithFullTerm($aSearchTerm, $bWordInQuery, $bHasPartial, $sPhraseType, $bFirstToken, $bFirstPhrase, $bLastToken, &$iGlobalRank) + /** + * Derive new searches by adding a full term to the existing search. + * + * @param mixed[] $aSearchTerm Description of the token. + * @param bool $bHasPartial True if there are also tokens of partial terms + * with the same name. + * @param string $sPhraseType Type of phrase the token is contained in. + * @param bool $bFirstToken True if the token is at the beginning of the + * query. + * @param bool $bFirstPhrase True if the token is in the first phrase of + * the query. + * @param bool $bLastToken True if the token is at the end of the query. + * + * @return SearchDescription[] List of derived search descriptions. + */ + public function extendWithFullTerm($aSearchTerm, $bHasPartial, $sPhraseType, $bFirstToken, $bFirstPhrase, $bLastToken) { $aNewSearches = array(); @@ -153,12 +195,6 @@ class SearchDescription $oSearch->iSearchRank += 5; } $aNewSearches[] = $oSearch; - - // If it is at the beginning, we can be almost sure that - // the terms are in the wrong order. Increase score for all searches. - if ($bFirstToken) { - $iGlobalRank++; - } } } elseif (($sPhraseType == '' || $sPhraseType == 'postalcode') && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'postcode' @@ -166,7 +202,8 @@ class SearchDescription // We need to try the case where the postal code is the primary element // (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode) // so try both. - if (!$this->sPostcode && $bWordInQuery + if (!$this->sPostcode + && $aSearchTerm['word'] && pg_escape_string($aSearchTerm['word']) == $aSearchTerm['word'] ) { // If we have structured search or this is the first term, @@ -210,21 +247,16 @@ class SearchDescription $oSearch->iSearchRank++; } // also must not appear in the middle of the address - if (sizeof($this->aAddress) || sizeof($this->aAddressNonSearch)) { + if (sizeof($this->aAddress) + || sizeof($this->aAddressNonSearch) + || $this->sPostcode + ) { $oSearch->iSearchRank++; } $aNewSearches[] = $oSearch; } - } elseif ($sPhraseType == '' - && $aSearchTerm['class'] !== '' && $aSearchTerm['class'] !== null - ) { - // require a normalized exact match of the term - // if we have the normalizer version of the query - // available - if ($this->iOperator == Operator::NONE - && (isset($aSearchTerm['word']) && $aSearchTerm['word']) - && $bWordInQuery - ) { + } elseif ($sPhraseType == '' && $aSearchTerm['class']) { + if ($this->iOperator == Operator::NONE) { $oSearch = clone $this; $oSearch->iSearchRank++; @@ -239,7 +271,10 @@ class SearchDescription $oSearch->setPoiSearch($iOp, $aSearchTerm['class'], $aSearchTerm['type']); $aNewSearches[] = $oSearch; } - } elseif (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id']) { + } elseif (isset($aSearchTerm['word_id']) + && $aSearchTerm['word_id'] + && $sPhraseType != 'country' + ) { $iWordID = $aSearchTerm['word_id']; if (sizeof($this->aName)) { if (($sPhraseType == '' || !$bFirstPhrase) @@ -264,7 +299,18 @@ class SearchDescription return $aNewSearches; } - public function extendWithPartialTerm($aSearchTerm, $bStructuredPhrases, $iPhrase, &$aWordFrequencyScores, $aFullTokens) + /** + * Derive new searches by adding a partial term to the existing search. + * + * @param mixed[] $aSearchTerm Description of the token. + * @param bool $bStructuredPhrases True if the search is structured. + * @param integer $iPhrase Number of the phrase the token is in. + * @param array[] $aFullTokens List of full term tokens with the + * same name. + * + * @return SearchDescription[] List of derived search descriptions. + */ + public function extendWithPartialTerm($aSearchTerm, $bStructuredPhrases, $iPhrase, $aFullTokens) { // Only allow name terms. if (!(isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'])) { @@ -278,7 +324,7 @@ class SearchDescription && sizeof($this->aName) && strpos($aSearchTerm['word_token'], ' ') === false ) { - if ($aWordFrequencyScores[$iWordID] < CONST_Max_Word_Frequency) { + if ($aSearchTerm['search_name_count'] + 1 < CONST_Max_Word_Frequency) { $oSearch = clone $this; $oSearch->iSearchRank++; $oSearch->aAddress[$iWordID] = $iWordID; @@ -321,7 +367,7 @@ class SearchDescription if (preg_match('#^[0-9]+$#', $aSearchTerm['word_token'])) { $oSearch->iSearchRank += 2; } - if ($aWordFrequencyScores[$iWordID] < CONST_Max_Word_Frequency) { + if ($aSearchTerm['search_name_count'] + 1 < CONST_Max_Word_Frequency) { $oSearch->aName[$iWordID] = $iWordID; } else { $oSearch->aNameNonSearch[$iWordID] = $iWordID; @@ -335,9 +381,26 @@ class SearchDescription /////////// Query functions - public function query(&$oDB, &$aWordFrequencyScores, &$aExactMatchCache, $iMinRank, $iMaxRank, $iLimit) + + /** + * Query database for places that match this search. + * + * @param object $oDB Database connection to use. + * @param mixed[] $aWordFrequencyScores Number of times tokens appears + * overall in a planet database. + * @param integer $iMinRank Minimum address rank to restrict + * search to. + * @param integer $iMaxRank Maximum address rank to restrict + * search to. + * @param integer $iLimit Maximum number of results. + * + * @return mixed[] An array with two fields: IDs contains the list of + * matching place IDs and houseNumber the houseNumber + * if appicable or -1 if not. + */ + public function query(&$oDB, &$aWordFrequencyScores, $iMinRank, $iMaxRank, $iLimit) { - $aPlaceIDs = array(); + $aResults = array(); $iHousenumber = -1; if ($this->sCountryCode @@ -348,21 +411,21 @@ class SearchDescription ) { // Just looking for a country - look it up if (4 >= $iMinRank && 4 <= $iMaxRank) { - $aPlaceIDs = $this->queryCountry($oDB); + $aResults = $this->queryCountry($oDB); } } elseif (!sizeof($this->aName) && !sizeof($this->aAddress)) { // Neither name nor address? Then we must be // looking for a POI in a geographic area. if ($this->oContext->isBoundedSearch()) { - $aPlaceIDs = $this->queryNearbyPoi($oDB, $iLimit); + $aResults = $this->queryNearbyPoi($oDB, $iLimit); } } elseif ($this->iOperator == Operator::POSTCODE) { // looking for postcode - $aPlaceIDs = $this->queryPostcode($oDB, $iLimit); + $aResults = $this->queryPostcode($oDB, $iLimit); } else { // Ordinary search: // First search for places according to name and address. - $aNamedPlaceIDs = $this->queryNamedPlace( + $aResults = $this->queryNamedPlace( $oDB, $aWordFrequencyScores, $iMinRank, @@ -370,52 +433,50 @@ class SearchDescription $iLimit ); - if (sizeof($aNamedPlaceIDs)) { - foreach ($aNamedPlaceIDs as $aRow) { - $aPlaceIDs[] = $aRow['place_id']; - $aExactMatchCache[$aRow['place_id']] = $aRow['exactmatch']; - } - } - //now search for housenumber, if housenumber provided - if ($this->sHouseNumber && sizeof($aPlaceIDs)) { - $aResult = $this->queryHouseNumber($oDB, $aPlaceIDs, $iLimit); - - if (sizeof($aResult)) { - $iHousenumber = $aResult['iHouseNumber']; - $aPlaceIDs = $aResult['aPlaceIDs']; - } elseif (!$this->looksLikeFullAddress()) { - $aPlaceIDs = array(); + if ($this->sHouseNumber && sizeof($aResults)) { + $aNamedPlaceIDs = $aResults; + $aResults = $this->queryHouseNumber($oDB, $aNamedPlaceIDs, $iLimit); + + if (!sizeof($aResults) && $this->looksLikeFullAddress()) { + $aResults = $aNamedPlaceIDs; } } // finally get POIs if requested - if ($this->sClass && sizeof($aPlaceIDs)) { - $aPlaceIDs = $this->queryPoiByOperator($oDB, $aPlaceIDs, $iLimit); + if ($this->sClass && sizeof($aResults)) { + $aResults = $this->queryPoiByOperator($oDB, $aResults, $iLimit); } } if (CONST_Debug) { echo "
Place IDs: "; - var_Dump($aPlaceIDs); + var_dump(array_keys($aResults)); } - if (sizeof($aPlaceIDs) && $this->sPostcode) { - $sSQL = 'SELECT place_id FROM placex'; - $sSQL .= ' WHERE place_id in ('.join(',', $aPlaceIDs).')'; - $sSQL .= " AND postcode = '".$this->sPostcode."'"; - if (CONST_Debug) var_dump($sSQL); - $aFilteredPlaceIDs = chksql($oDB->getCol($sSQL)); - if ($aFilteredPlaceIDs) { - $aPlaceIDs = $aFilteredPlaceIDs; - if (CONST_Debug) { - echo "
Place IDs after postcode filtering: "; - var_Dump($aPlaceIDs); + if (sizeof($aResults) && $this->sPostcode) { + $sPlaceIds = Result::joinIdsByTable($aResults, Result::TABLE_PLACEX); + if ($sPlaceIds) { + $sSQL = 'SELECT place_id FROM placex'; + $sSQL .= ' WHERE place_id in ('.$sPlaceIds.')'; + $sSQL .= " AND postcode = '".$this->sPostcode."'"; + if (CONST_Debug) var_dump($sSQL); + $aFilteredPlaceIDs = chksql($oDB->getCol($sSQL)); + if ($aFilteredPlaceIDs) { + $aNewResults = array(); + foreach ($aFilteredPlaceIDs as $iPlaceId) { + $aNewResults[$iPlaceId] = $aResults[$iPlaceId]; + } + $aResults = $aNewResults; + if (CONST_Debug) { + echo "
Place IDs after postcode filtering: "; + var_dump(array_keys($aResults)); + } } } } - return array('IDs' => $aPlaceIDs, 'houseNumber' => $iHousenumber); + return $aResults; } @@ -431,7 +492,12 @@ class SearchDescription if (CONST_Debug) var_dump($sSQL); - return chksql($oDB->getCol($sSQL)); + $aResults = array(); + foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) { + $aResults[$iPlaceId] = new Result($iPlaceId); + } + + return $aResults; } private function queryNearbyPoi(&$oDB, $iLimit) @@ -440,6 +506,7 @@ class SearchDescription return array(); } + $aDBResults = array(); $sPoiTable = $this->poiTable(); $sSQL = 'SELECT count(*) FROM pg_tables WHERE tablename = \''.$sPoiTable."'"; @@ -450,7 +517,7 @@ class SearchDescription } if ($this->oContext->hasNearPoint()) { $sSQL .= ' WHERE '.$this->oContext->withinSQL('ct.centroid'); - } else if ($this->oContext->bViewboxBounded) { + } elseif ($this->oContext->bViewboxBounded) { $sSQL .= ' WHERE ST_Contains('.$this->oContext->sqlViewboxSmall.', ct.centroid)'; } if ($this->oContext->sqlCountryList) { @@ -465,7 +532,7 @@ class SearchDescription } $sSQL .= " limit $iLimit"; if (CONST_Debug) var_dump($sSQL); - return chksql($oDB->getCol($sSQL)); + $aDBResults = chksql($oDB->getCol($sSQL)); } if ($this->oContext->hasNearPoint()) { @@ -479,10 +546,15 @@ class SearchDescription $sSQL .= ' ORDER BY '.$this->oContext->distanceSQL('centroid')." ASC"; $sSQL .= " LIMIT $iLimit"; if (CONST_Debug) var_dump($sSQL); - return chksql($oDB->getCol($sSQL)); + $aDBResults = chksql($oDB->getCol($sSQL)); + } + + $aResults = array(); + foreach ($aDBResults as $iPlaceId) { + $aResults[$iPlaceId] = new Result($iPlaceId); } - return array(); + return $aResults; } private function queryPostcode(&$oDB, $iLimit) @@ -505,7 +577,12 @@ class SearchDescription if (CONST_Debug) var_dump($sSQL); - return chksql($oDB->getCol($sSQL)); + $aResults = array(); + foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) { + $aResults[$iPlaceId] = new Result($iPlaceId, Result::TABLE_POSTCODE); + } + + return $aResults; } private function queryNamedPlace(&$oDB, $aWordFrequencyScores, $iMinAddressRank, $iMaxAddressRank, $iLimit) @@ -620,6 +697,8 @@ class SearchDescription $iLimit = 20; } + $aResults = array(); + if (sizeof($aTerms)) { $sSQL = 'SELECT place_id,'.$sExactMatchSQL; $sSQL .= ' FROM search_name'; @@ -629,18 +708,29 @@ class SearchDescription if (CONST_Debug) var_dump($sSQL); - return chksql( + $aDBResults = chksql( $oDB->getAll($sSQL), "Could not get places for search terms." ); + + foreach ($aDBResults as $aResult) { + $oResult = new Result($aResult['place_id']); + $oResult->iExactMatches = $aResult['exactmatch']; + $aResults[$aResult['place_id']] = $oResult; + } } - return array(); + return $aResults; } private function queryHouseNumber(&$oDB, $aRoadPlaceIDs, $iLimit) { - $sPlaceIDs = join(',', $aRoadPlaceIDs); + $aResults = array(); + $sPlaceIDs = Result::joinIdsByTable($aRoadPlaceIDs, Result::TABLE_PLACEX); + + if (!$sPlaceIDs) { + return $aResults; + } $sHouseNumberRegex = '\\\\m'.$this->sHouseNumber.'\\\\M'; $sSQL = 'SELECT place_id FROM placex '; @@ -651,15 +741,14 @@ class SearchDescription if (CONST_Debug) var_dump($sSQL); - $aPlaceIDs = chksql($oDB->getCol($sSQL)); - - if (sizeof($aPlaceIDs)) { - return array('aPlaceIDs' => $aPlaceIDs, 'iHouseNumber' => -1); + // XXX should inherit the exactMatches from its parent + foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) { + $aResults[$iPlaceId] = new Result($iPlaceId); } $bIsIntHouseNumber= (bool) preg_match('/[0-9]+/', $this->sHouseNumber); $iHousenumber = intval($this->sHouseNumber); - if ($bIsIntHouseNumber) { + if ($bIsIntHouseNumber && !sizeof($aResults)) { // if nothing found, search in the interpolation line table $sSQL = 'SELECT distinct place_id FROM location_property_osmline'; $sSQL .= ' WHERE startnumber is not NULL'; @@ -680,15 +769,15 @@ class SearchDescription if (CONST_Debug) var_dump($sSQL); - $aPlaceIDs = chksql($oDB->getCol($sSQL, 0)); - - if (sizeof($aPlaceIDs)) { - return array('aPlaceIDs' => $aPlaceIDs, 'iHouseNumber' => $iHousenumber); + foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) { + $oResult = new Result($iPlaceId, Result::TABLE_OSMLINE); + $oResult->iHouseNumber = $iHousenumber; + $aResults[$iPlaceId] = $oResult; } } // If nothing found try the aux fallback table - if (CONST_Use_Aux_Location_data) { + if (CONST_Use_Aux_Location_data && !sizeof($aResults)) { $sSQL = 'SELECT place_id FROM location_property_aux'; $sSQL .= ' WHERE parent_place_id in ('.$sPlaceIDs.')'; $sSQL .= " AND housenumber = '".$this->sHouseNumber."'"; @@ -697,16 +786,14 @@ class SearchDescription if (CONST_Debug) var_dump($sSQL); - $aPlaceIDs = chksql($oDB->getCol($sSQL)); - - if (sizeof($aPlaceIDs)) { - return array('aPlaceIDs' => $aPlaceIDs, 'iHouseNumber' => -1); + foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) { + $aResults[$iPlaceId] = new Result($iPlaceId, Result::TABLE_AUX); } } // If nothing found then search in Tiger data (location_property_tiger) - if (CONST_Use_US_Tiger_Data && $bIsIntHouseNumber) { - $sSQL = 'SELECT distinct place_id FROM location_property_tiger'; + if (CONST_Use_US_Tiger_Data && $bIsIntHouseNumber && !sizeof($aResults)) { + $sSQL = 'SELECT place_id FROM location_property_tiger'; $sSQL .= ' WHERE parent_place_id in ('.$sPlaceIDs.') and ('; if ($iHousenumber % 2 == 0) { $sSQL .= "interpolationtype='even'"; @@ -721,21 +808,25 @@ class SearchDescription if (CONST_Debug) var_dump($sSQL); - $aPlaceIDs = chksql($oDB->getCol($sSQL, 0)); - - if (sizeof($aPlaceIDs)) { - return array('aPlaceIDs' => $aPlaceIDs, 'iHouseNumber' => $iHousenumber); + foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) { + $oResult = new Result($iPlaceId, Result::TABLE_TIGER); + $oResult->iHouseNumber = $iHousenumber; + $aResults[$iPlaceId] = $oResult; } } - return array(); + return $aResults; } private function queryPoiByOperator(&$oDB, $aParentIDs, $iLimit) { - $sPlaceIDs = join(',', $aParentIDs); - $aClassPlaceIDs = array(); + $aResults = array(); + $sPlaceIDs = Result::joinIdsByTable($aParentIDs, Result::TABLE_PLACEX); + + if (!$sPlaceIDs) { + return $aResults; + } if ($this->iOperator == Operator::TYPE || $this->iOperator == Operator::NAME) { // If they were searching for a named class (i.e. 'Kings Head pub') @@ -751,7 +842,9 @@ class SearchDescription if (CONST_Debug) var_dump($sSQL); - $aClassPlaceIDs = chksql($oDB->getCol($sSQL)); + foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) { + $aResults[$iPlaceId] = new Result($iPlaceId); + } } // NEAR and IN are handled the same @@ -831,7 +924,9 @@ class SearchDescription if (CONST_Debug) var_dump($sSQL); - $aClassPlaceIDs = array_merge($aClassPlaceIDs, chksql($oDB->getCol($sSQL))); + foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) { + $aResults[$iPlaceId] = new Result($iPlaceId); + } } else { if ($this->oContext->hasNearPoint()) { $fRange = $this->oContext->nearRadius(); @@ -861,14 +956,32 @@ class SearchDescription if (CONST_Debug) var_dump($sSQL); - $aClassPlaceIDs = array_merge($aClassPlaceIDs, chksql($oDB->getCol($sSQL))); + foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) { + $aResults[$iPlaceId] = new Result($iPlaceId); + } } } } - return $aClassPlaceIDs; + return $aResults; } + private function poiTable() + { + return 'place_classtype_'.$this->sClass.'_'.$this->sType; + } + + private function countryCodeSQL($sVar) + { + if ($this->sCountryCode) { + return $sVar.' = \''.$this->sCountryCode."'"; + } + if ($this->oContext->sqlCountryList) { + return $sVar.' in '.$this->oContext->sqlCountryList; + } + + return ''; + } /////////// Sort functions