X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/c02bf4986f4c6a820325727fa4b56e54a63c40d6..64ace51e02790bf88e162a15236c5ac2af865c5e:/lib/SearchDescription.php diff --git a/lib/SearchDescription.php b/lib/SearchDescription.php index c287c898..bb478b29 100644 --- a/lib/SearchDescription.php +++ b/lib/SearchDescription.php @@ -4,6 +4,7 @@ namespace Nominatim; require_once(CONST_BasePath.'/lib/SpecialSearchOperator.php'); require_once(CONST_BasePath.'/lib/SearchContext.php'); +require_once(CONST_BasePath.'/lib/Result.php'); /** * Description of a single interpretation of a search query. @@ -16,6 +17,8 @@ class SearchDescription private $sCountryCode = ''; /// List of word ids making up the name of the object. private $aName = array(); + /// True if the name is rare enough to force index use on name. + private $bRareName = false; /// List of word ids making up the address of the object. private $aAddress = array(); /// Subset of word ids of full words making up the address. @@ -42,23 +45,42 @@ class SearchDescription /// Index of phrase currently processed. private $iNamePhrase = -1; - + /** + * Create an empty search description. + * + * @param object $oContext Global context to use. Will be inherited by + * all derived search objects. + */ public function __construct($oContext) { $this->oContext = $oContext; } + /** + * Get current search rank. + * + * The higher the search rank the lower the likelihood that the + * search is a correct interpretation of the search query. + * + * @return integer Search rank. + */ public function getRank() { return $this->iSearchRank; } - public function addToRank($iAddRank) - { - $this->iSearchRank += $iAddRank; - return $this->iSearchRank; - } - + /** + * Make this search a POI search. + * + * In a POI search, objects are not (only) searched by their name + * but also by the primary OSM key/value pair (class and type in Nominatim). + * + * @param integer $iOperator Type of POI search + * @param string $sClass Class (or OSM tag key) of POI. + * @param string $sType Type (or OSM tag value) of POI. + * + * @return void + */ public function setPoiSearch($iOperator, $sClass, $sType) { $this->iOperator = $iOperator; @@ -66,18 +88,39 @@ class SearchDescription $this->sType = $sType; } + /** + * Check if this might be a full address search. + * + * @return bool True if the search contains name, address and housenumber. + */ public function looksLikeFullAddress() { - return sizeof($this->aName) - && (sizeof($this->aAddress || $this->sCountryCode)) + return (!empty($this->aName)) + && (!empty($this->aAddress) || $this->sCountryCode) && preg_match('/[0-9]+/', $this->sHouseNumber); } + /** + * Check if any operator is set. + * + * @return bool True, if this is a special search operation. + */ public function hasOperator() { return $this->iOperator != Operator::NONE; } + /** + * Extract key/value pairs from a query. + * + * Key/value pairs are recognised if they are of the form [=]. + * If multiple terms of this kind are found then all terms are removed + * but only the first is used for search. + * + * @param string $sQuery Original query string. + * + * @return string The query string with the special search patterns removed. + */ public function extractKeyValuePairs($sQuery) { // Search for terms of kind [=]. @@ -98,18 +141,20 @@ class SearchDescription return $sQuery; } - public function isValidSearch(&$aCountryCodes) + /** + * Check if the combination of parameters is sensible. + * + * @return bool True, if the search looks valid. + */ + public function isValidSearch() { - if (!sizeof($this->aName)) { + if (empty($this->aName)) { if ($this->sHouseNumber) { return false; } - } - if ($aCountryCodes - && $this->sCountryCode - && !in_array($this->sCountryCode, $aCountryCodes) - ) { - return false; + if (!$this->sClass && !$this->sCountryCode) { + return false; + } } return true; @@ -118,40 +163,46 @@ class SearchDescription /////////// Search building functions - public function extendWithFullTerm($aSearchTerm, $bWordInQuery, $bHasPartial, $sPhraseType, $bFirstToken, $bFirstPhrase, $bLastToken, &$iGlobalRank) + /** + * Derive new searches by adding a full term to the existing search. + * + * @param object $oSearchTerm Description of the token. + * @param bool $bHasPartial True if there are also tokens of partial terms + * with the same name. + * @param string $sPhraseType Type of phrase the token is contained in. + * @param bool $bFirstToken True if the token is at the beginning of the + * query. + * @param bool $bFirstPhrase True if the token is in the first phrase of + * the query. + * @param bool $bLastToken True if the token is at the end of the query. + * + * @return SearchDescription[] List of derived search descriptions. + */ + public function extendWithFullTerm($oSearchTerm, $bHasPartial, $sPhraseType, $bFirstToken, $bFirstPhrase, $bLastToken) { $aNewSearches = array(); if (($sPhraseType == '' || $sPhraseType == 'country') - && !empty($aSearchTerm['country_code']) - && $aSearchTerm['country_code'] != '0' + && is_a($oSearchTerm, '\Nominatim\Token\Country') ) { if (!$this->sCountryCode) { $oSearch = clone $this; $oSearch->iSearchRank++; - $oSearch->sCountryCode = $aSearchTerm['country_code']; + $oSearch->sCountryCode = $oSearchTerm->sCountryCode; // Country is almost always at the end of the string // - increase score for finding it anywhere else (optimisation) if (!$bLastToken) { $oSearch->iSearchRank += 5; } $aNewSearches[] = $oSearch; - - // If it is at the beginning, we can be almost sure that - // the terms are in the wrong order. Increase score for all searches. - if ($bFirstToken) { - $iGlobalRank++; - } } } elseif (($sPhraseType == '' || $sPhraseType == 'postalcode') - && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'postcode' + && is_a($oSearchTerm, '\Nominatim\Token\Postcode') ) { // We need to try the case where the postal code is the primary element // (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode) // so try both. - if (!$this->sPostcode && $bWordInQuery - && pg_escape_string($aSearchTerm['word']) == $aSearchTerm['word'] - ) { + if (!$this->sPostcode) { // If we have structured search or this is the first term, // make the postcode the primary search element. if ($this->iOperator == Operator::NONE @@ -162,75 +213,82 @@ class SearchDescription $oSearch->iOperator = Operator::POSTCODE; $oSearch->aAddress = array_merge($this->aAddress, $this->aName); $oSearch->aName = - array($aSearchTerm['word_id'] => $aSearchTerm['word']); + array($oSearchTerm->iId => $oSearchTerm->sPostcode); $aNewSearches[] = $oSearch; } // If we have a structured search or this is not the first term, // add the postcode as an addendum. if ($this->iOperator != Operator::POSTCODE - && ($sPhraseType == 'postalcode' || sizeof($this->aName)) + && ($sPhraseType == 'postalcode' || !empty($this->aName)) ) { $oSearch = clone $this; $oSearch->iSearchRank++; - $oSearch->sPostcode = $aSearchTerm['word']; + $oSearch->sPostcode = $oSearchTerm->sPostcode; $aNewSearches[] = $oSearch; } } } elseif (($sPhraseType == '' || $sPhraseType == 'street') - && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'house' + && is_a($oSearchTerm, '\Nominatim\Token\HouseNumber') ) { if (!$this->sHouseNumber && $this->iOperator != Operator::POSTCODE) { $oSearch = clone $this; $oSearch->iSearchRank++; - $oSearch->sHouseNumber = trim($aSearchTerm['word_token']); + $oSearch->sHouseNumber = $oSearchTerm->sToken; // sanity check: if the housenumber is not mainly made // up of numbers, add a penalty - if (preg_match_all("/[^0-9]/", $oSearch->sHouseNumber, $aMatches) > 2) { + if (preg_match('/\\d/', $oSearch->sHouseNumber) === 0 + || preg_match_all('/[^0-9]/', $oSearch->sHouseNumber, $aMatches) > 2) { $oSearch->iSearchRank++; } - if (!isset($aSearchTerm['word_id'])) { + if (empty($oSearchTerm->iId)) { $oSearch->iSearchRank++; } // also must not appear in the middle of the address - if (sizeof($this->aAddress) || sizeof($this->aAddressNonSearch)) { + if (!empty($this->aAddress) + || (!empty($this->aAddressNonSearch)) + || $this->sPostcode + ) { $oSearch->iSearchRank++; } $aNewSearches[] = $oSearch; } } elseif ($sPhraseType == '' - && $aSearchTerm['class'] !== '' && $aSearchTerm['class'] !== null + && is_a($oSearchTerm, '\Nominatim\Token\SpecialTerm') ) { - // require a normalized exact match of the term - // if we have the normalizer version of the query - // available - if ($this->iOperator == Operator::NONE - && (isset($aSearchTerm['word']) && $aSearchTerm['word']) - && $bWordInQuery - ) { + if ($this->iOperator == Operator::NONE) { $oSearch = clone $this; $oSearch->iSearchRank++; - $iOp = Operator::NEAR; // near == in for the moment - if ($aSearchTerm['operator'] == '') { - if (sizeof($this->aName)) { + $iOp = $oSearchTerm->iOperator; + if ($iOp == Operator::NONE) { + if (!empty($this->aName) || $this->oContext->isBoundedSearch()) { $iOp = Operator::NAME; + } else { + $iOp = Operator::NEAR; } $oSearch->iSearchRank += 2; } - $oSearch->setPoiSearch($iOp, $aSearchTerm['class'], $aSearchTerm['type']); + $oSearch->setPoiSearch( + $iOp, + $oSearchTerm->sClass, + $oSearchTerm->sType + ); $aNewSearches[] = $oSearch; } - } elseif (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id']) { - $iWordID = $aSearchTerm['word_id']; - if (sizeof($this->aName)) { - if (($sPhraseType == '' || !$bFirstPhrase) - && $sPhraseType != 'country' - && !$bHasPartial - ) { + } elseif ($sPhraseType != 'country' + && is_a($oSearchTerm, '\Nominatim\Token\Word') + ) { + $iWordID = $oSearchTerm->iId; + // Full words can only be a name if they appear at the beginning + // of the phrase. In structured search the name must forcably in + // the first phrase. In unstructured search it may be in a later + // phrase when the first phrase is a house number. + if (!empty($this->aName) || !($bFirstPhrase || $sPhraseType == '')) { + if (($sPhraseType == '' || !$bFirstPhrase) && !$bHasPartial) { $oSearch = clone $this; - $oSearch->iSearchRank++; + $oSearch->iSearchRank += 2; $oSearch->aAddress[$iWordID] = $iWordID; $aNewSearches[] = $oSearch; } else { @@ -240,6 +298,11 @@ class SearchDescription $oSearch = clone $this; $oSearch->iSearchRank++; $oSearch->aName = array($iWordID => $iWordID); + if (CONST_Search_NameOnlySearchFrequencyThreshold) { + $oSearch->bRareName = + $oSearchTerm->iSearchNameCount + < CONST_Search_NameOnlySearchFrequencyThreshold; + } $aNewSearches[] = $oSearch; } } @@ -247,46 +310,56 @@ class SearchDescription return $aNewSearches; } - public function extendWithPartialTerm($aSearchTerm, $bStructuredPhrases, $iPhrase, &$aWordFrequencyScores, $aFullTokens) + /** + * Derive new searches by adding a partial term to the existing search. + * + * @param string $sToken Term for the token. + * @param object $oSearchTerm Description of the token. + * @param bool $bStructuredPhrases True if the search is structured. + * @param integer $iPhrase Number of the phrase the token is in. + * @param array[] $aFullTokens List of full term tokens with the + * same name. + * + * @return SearchDescription[] List of derived search descriptions. + */ + public function extendWithPartialTerm($sToken, $oSearchTerm, $bStructuredPhrases, $iPhrase, $aFullTokens) { // Only allow name terms. - if (!(isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'])) { + if (!(is_a($oSearchTerm, '\Nominatim\Token\Word'))) { return array(); } $aNewSearches = array(); - $iWordID = $aSearchTerm['word_id']; + $iWordID = $oSearchTerm->iId; if ((!$bStructuredPhrases || $iPhrase > 0) - && sizeof($this->aName) - && strpos($aSearchTerm['word_token'], ' ') === false + && (!empty($this->aName)) + && strpos($sToken, ' ') === false ) { - if ($aWordFrequencyScores[$iWordID] < CONST_Max_Word_Frequency) { + if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) { $oSearch = clone $this; - $oSearch->iSearchRank++; + $oSearch->iSearchRank += 2; $oSearch->aAddress[$iWordID] = $iWordID; $aNewSearches[] = $oSearch; } else { $oSearch = clone $this; $oSearch->iSearchRank++; $oSearch->aAddressNonSearch[$iWordID] = $iWordID; - if (preg_match('#^[0-9]+$#', $aSearchTerm['word_token'])) { + if (preg_match('#^[0-9]+$#', $sToken)) { $oSearch->iSearchRank += 2; } - if (sizeof($aFullTokens)) { + if (!empty($aFullTokens)) { $oSearch->iSearchRank++; } $aNewSearches[] = $oSearch; // revert to the token version? - foreach ($aFullTokens as $aSearchTermToken) { - if (empty($aSearchTermToken['country_code']) - && empty($aSearchTermToken['lat']) - && empty($aSearchTermToken['class']) - ) { + foreach ($aFullTokens as $oSearchTermToken) { + if (is_a($oSearchTermToken, '\Nominatim\Token\Word')) { $oSearch = clone $this; $oSearch->iSearchRank++; - $oSearch->aAddress[$aSearchTermToken['word_id']] = $aSearchTermToken['word_id']; + $oSearch->aAddress[$oSearchTermToken->iId] + = $oSearchTermToken->iId; $aNewSearches[] = $oSearch; } } @@ -294,17 +367,26 @@ class SearchDescription } if ((!$this->sPostcode && !$this->aAddress && !$this->aAddressNonSearch) - && (!sizeof($this->aName) || $this->iNamePhrase == $iPhrase) + && (empty($this->aName) || $this->iNamePhrase == $iPhrase) ) { $oSearch = clone $this; - $oSearch->iSearchRank++; - if (!sizeof($this->aName)) { + $oSearch->iSearchRank += 2; + if (empty($this->aName)) { $oSearch->iSearchRank += 1; } - if (preg_match('#^[0-9]+$#', $aSearchTerm['word_token'])) { + if (preg_match('#^[0-9]+$#', $sToken)) { $oSearch->iSearchRank += 2; } - if ($aWordFrequencyScores[$iWordID] < CONST_Max_Word_Frequency) { + if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) { + if (empty($this->aName) + && CONST_Search_NameOnlySearchFrequencyThreshold + ) { + $oSearch->bRareName = + $oSearchTerm->iSearchNameCount + < CONST_Search_NameOnlySearchFrequencyThreshold; + } else { + $oSearch->bRareName = false; + } $oSearch->aName[$iWordID] = $iWordID; } else { $oSearch->aNameNonSearch[$iWordID] = $iWordID; @@ -319,87 +401,94 @@ class SearchDescription /////////// Query functions - public function query(&$oDB, &$aWordFrequencyScores, &$aExactMatchCache, $iMinRank, $iMaxRank, $iLimit) + /** + * Query database for places that match this search. + * + * @param object $oDB Nominatim::DB instance to use. + * @param integer $iMinRank Minimum address rank to restrict search to. + * @param integer $iMaxRank Maximum address rank to restrict search to. + * @param integer $iLimit Maximum number of results. + * + * @return mixed[] An array with two fields: IDs contains the list of + * matching place IDs and houseNumber the houseNumber + * if appicable or -1 if not. + */ + public function query(&$oDB, $iMinRank, $iMaxRank, $iLimit) { - $aPlaceIDs = array(); + $aResults = array(); $iHousenumber = -1; if ($this->sCountryCode - && !sizeof($this->aName) + && empty($this->aName) && !$this->iOperator && !$this->sClass && !$this->oContext->hasNearPoint() ) { // Just looking for a country - look it up if (4 >= $iMinRank && 4 <= $iMaxRank) { - $aPlaceIDs = $this->queryCountry($oDB); + $aResults = $this->queryCountry($oDB); } - } elseif (!sizeof($this->aName) && !sizeof($this->aAddress)) { + } elseif (empty($this->aName) && empty($this->aAddress)) { // Neither name nor address? Then we must be // looking for a POI in a geographic area. if ($this->oContext->isBoundedSearch()) { - $aPlaceIDs = $this->queryNearbyPoi($oDB, $iLimit); + $aResults = $this->queryNearbyPoi($oDB, $iLimit); } } elseif ($this->iOperator == Operator::POSTCODE) { // looking for postcode - $aPlaceIDs = $this->queryPostcode($oDB, $iLimit); + $aResults = $this->queryPostcode($oDB, $iLimit); } else { // Ordinary search: // First search for places according to name and address. - $aNamedPlaceIDs = $this->queryNamedPlace( + $aResults = $this->queryNamedPlace( $oDB, - $aWordFrequencyScores, $iMinRank, $iMaxRank, $iLimit ); - if (sizeof($aNamedPlaceIDs)) { - foreach ($aNamedPlaceIDs as $aRow) { - $aPlaceIDs[] = $aRow['place_id']; - $aExactMatchCache[$aRow['place_id']] = $aRow['exactmatch']; + // Now search for housenumber, if housenumber provided. Can be zero. + if (($this->sHouseNumber || $this->sHouseNumber === '0') && !empty($aResults)) { + // Downgrade the rank of the street results, they are missing + // the housenumber. + foreach ($aResults as $oRes) { + $oRes->iResultRank++; } - } - //now search for housenumber, if housenumber provided - if ($this->sHouseNumber && sizeof($aPlaceIDs)) { - $aResult = $this->queryHouseNumber($oDB, $aPlaceIDs, $iLimit); + $aHnResults = $this->queryHouseNumber($oDB, $aResults); - if (sizeof($aResult)) { - $iHousenumber = $aResult['iHouseNumber']; - $aPlaceIDs = $aResult['aPlaceIDs']; - } elseif (!$this->looksLikeFullAddress()) { - $aPlaceIDs = array(); + if (!empty($aHnResults)) { + foreach ($aHnResults as $oRes) { + $aResults[$oRes->iId] = $oRes; + } } } // finally get POIs if requested - if ($this->sClass && sizeof($aPlaceIDs)) { - $aPlaceIDs = $this->queryPoiByOperator($oDB, $aPlaceIDs, $iLimit); + if ($this->sClass && !empty($aResults)) { + $aResults = $this->queryPoiByOperator($oDB, $aResults, $iLimit); } } - if (CONST_Debug) { - echo "
Place IDs: "; - var_Dump($aPlaceIDs); - } + Debug::printDebugTable('Place IDs', $aResults); - if (sizeof($aPlaceIDs) && $this->sPostcode) { - $sSQL = 'SELECT place_id FROM placex'; - $sSQL .= ' WHERE place_id in ('.join(',', $aPlaceIDs).')'; - $sSQL .= " AND postcode = '".$this->sPostcode."'"; - if (CONST_Debug) var_dump($sSQL); - $aFilteredPlaceIDs = chksql($oDB->getCol($sSQL)); - if ($aFilteredPlaceIDs) { - $aPlaceIDs = $aFilteredPlaceIDs; - if (CONST_Debug) { - echo "
Place IDs after postcode filtering: "; - var_Dump($aPlaceIDs); + if (!empty($aResults) && $this->sPostcode) { + $sPlaceIds = Result::joinIdsByTable($aResults, Result::TABLE_PLACEX); + if ($sPlaceIds) { + $sSQL = 'SELECT place_id FROM placex'; + $sSQL .= ' WHERE place_id in ('.$sPlaceIds.')'; + $sSQL .= " AND postcode != '".$this->sPostcode."'"; + Debug::printSQL($sSQL); + $aFilteredPlaceIDs = $oDB->getCol($sSQL); + if ($aFilteredPlaceIDs) { + foreach ($aFilteredPlaceIDs as $iPlaceId) { + $aResults[$iPlaceId]->iResultRank++; + } } } } - return array('IDs' => $aPlaceIDs, 'houseNumber' => $iHousenumber); + return $aResults; } @@ -411,11 +500,18 @@ class SearchDescription if ($this->oContext->bViewboxBounded) { $sSQL .= ' AND ST_Intersects('.$this->oContext->sqlViewboxSmall.', geometry)'; } - $sSQL .= " ORDER BY st_area(geometry) DESC LIMIT 1"; + $sSQL .= ' ORDER BY st_area(geometry) DESC LIMIT 1'; + + Debug::printSQL($sSQL); + + $iPlaceId = $oDB->getOne($sSQL); - if (CONST_Debug) var_dump($sSQL); + $aResults = array(); + if ($iPlaceId) { + $aResults[$iPlaceId] = new Result($iPlaceId); + } - return chksql($oDB->getCol($sSQL)); + return $aResults; } private function queryNearbyPoi(&$oDB, $iLimit) @@ -424,10 +520,10 @@ class SearchDescription return array(); } + $aDBResults = array(); $sPoiTable = $this->poiTable(); - $sSQL = 'SELECT count(*) FROM pg_tables WHERE tablename = \''.$sPoiTable."'"; - if (chksql($oDB->getOne($sSQL))) { + if ($oDB->tableExists($sPoiTable)) { $sSQL = 'SELECT place_id FROM '.$sPoiTable.' ct'; if ($this->oContext->sqlCountryList) { $sSQL .= ' JOIN placex USING (place_id)'; @@ -447,57 +543,77 @@ class SearchDescription } elseif ($this->oContext->hasNearPoint()) { $sSQL .= ' ORDER BY '.$this->oContext->distanceSQL('ct.centroid').' ASC'; } - $sSQL .= " limit $iLimit"; - if (CONST_Debug) var_dump($sSQL); - return chksql($oDB->getCol($sSQL)); + $sSQL .= " LIMIT $iLimit"; + Debug::printSQL($sSQL); + $aDBResults = $oDB->getCol($sSQL); } if ($this->oContext->hasNearPoint()) { $sSQL = 'SELECT place_id FROM placex WHERE '; - $sSQL .= 'class=\''.$this->sClass."' and type='".$this->sType."'"; + $sSQL .= 'class = :class and type = :type'; $sSQL .= ' AND '.$this->oContext->withinSQL('geometry'); $sSQL .= ' AND linked_place_id is null'; if ($this->oContext->sqlCountryList) { $sSQL .= ' AND country_code in '.$this->oContext->sqlCountryList; } - $sSQL .= ' ORDER BY '.$this->oContext->distanceSQL('centroid')." ASC"; + $sSQL .= ' ORDER BY '.$this->oContext->distanceSQL('centroid').' ASC'; $sSQL .= " LIMIT $iLimit"; - if (CONST_Debug) var_dump($sSQL); - return chksql($oDB->getCol($sSQL)); + Debug::printSQL($sSQL); + $aDBResults = $oDB->getCol( + $sSQL, + array(':class' => $this->sClass, ':type' => $this->sType) + ); + } + + $aResults = array(); + foreach ($aDBResults as $iPlaceId) { + $aResults[$iPlaceId] = new Result($iPlaceId); } - return array(); + return $aResults; } private function queryPostcode(&$oDB, $iLimit) { $sSQL = 'SELECT p.place_id FROM location_postcode p '; - if (sizeof($this->aAddress)) { + if (!empty($this->aAddress)) { $sSQL .= ', search_name s '; $sSQL .= 'WHERE s.place_id = p.parent_place_id '; $sSQL .= 'AND array_cat(s.nameaddress_vector, s.name_vector)'; - $sSQL .= ' @> '.getArraySQL($this->aAddress).' AND '; + $sSQL .= ' @> '.$oDB->getArraySQL($this->aAddress).' AND '; } else { $sSQL .= 'WHERE '; } $sSQL .= "p.postcode = '".reset($this->aName)."'"; $sSQL .= $this->countryCodeSQL(' AND p.country_code'); + if ($this->oContext->bViewboxBounded) { + $sSQL .= ' AND ST_Intersects('.$this->oContext->sqlViewboxSmall.', geometry)'; + } $sSQL .= $this->oContext->excludeSQL(' AND p.place_id'); $sSQL .= " LIMIT $iLimit"; - if (CONST_Debug) var_dump($sSQL); + Debug::printSQL($sSQL); - return chksql($oDB->getCol($sSQL)); + $aResults = array(); + foreach ($oDB->getCol($sSQL) as $iPlaceId) { + $aResults[$iPlaceId] = new Result($iPlaceId, Result::TABLE_POSTCODE); + } + + return $aResults; } - private function queryNamedPlace(&$oDB, $aWordFrequencyScores, $iMinAddressRank, $iMaxAddressRank, $iLimit) + private function queryNamedPlace(&$oDB, $iMinAddressRank, $iMaxAddressRank, $iLimit) { $aTerms = array(); $aOrder = array(); - if ($this->sHouseNumber && sizeof($this->aAddress)) { + // Sort by existence of the requested house number but only if not + // too many results are expected for the street, i.e. if the result + // will be narrowed down by an address. Remeber that with ordering + // every single result has to be checked. + if ($this->sHouseNumber && (!empty($this->aAddress) || $this->sPostcode)) { $sHouseNumberRegex = '\\\\m'.$this->sHouseNumber.'\\\\M'; $aOrder[] = ' ('; $aOrder[0] .= 'EXISTS('; @@ -523,19 +639,15 @@ class SearchDescription $aOrder[0] .= ') DESC'; } - if (sizeof($this->aName)) { - $aTerms[] = 'name_vector @> '.getArraySQL($this->aName); + if (!empty($this->aName)) { + $aTerms[] = 'name_vector @> '.$oDB->getArraySQL($this->aName); } - if (sizeof($this->aAddress)) { + if (!empty($this->aAddress)) { // For infrequent name terms disable index usage for address - if (CONST_Search_NameOnlySearchFrequencyThreshold - && sizeof($this->aName) == 1 - && $aWordFrequencyScores[$this->aName[reset($this->aName)]] - < CONST_Search_NameOnlySearchFrequencyThreshold - ) { - $aTerms[] = 'array_cat(nameaddress_vector,ARRAY[]::integer[]) @> '.getArraySQL($this->aAddress); + if ($this->bRareName) { + $aTerms[] = 'array_cat(nameaddress_vector,ARRAY[]::integer[]) @> '.$oDB->getArraySQL($this->aAddress); } else { - $aTerms[] = 'nameaddress_vector @> '.getArraySQL($this->aAddress); + $aTerms[] = 'nameaddress_vector @> '.$oDB->getArraySQL($this->aAddress); } } @@ -545,13 +657,10 @@ class SearchDescription } if ($this->sHouseNumber) { - $aTerms[] = "address_rank between 16 and 27"; + $aTerms[] = 'address_rank between 16 and 27'; } elseif (!$this->sClass || $this->iOperator == Operator::NAME) { if ($iMinAddressRank > 0) { - $aTerms[] = "address_rank >= ".$iMinAddressRank; - } - if ($iMaxAddressRank < 30) { - $aTerms[] = "address_rank <= ".$iMaxAddressRank; + $aTerms[] = "((address_rank between $iMinAddressRank and $iMaxAddressRank) or (search_rank between $iMinAddressRank and $iMaxAddressRank))"; } } @@ -559,7 +668,7 @@ class SearchDescription $aTerms[] = $this->oContext->withinSQL('centroid'); $aOrder[] = $this->oContext->distanceSQL('centroid'); } elseif ($this->sPostcode) { - if (!sizeof($this->aAddress)) { + if (empty($this->aAddress)) { $aTerms[] = "EXISTS(SELECT place_id FROM location_postcode p WHERE p.postcode = '".$this->sPostcode."' AND ST_DWithin(search_name.centroid, p.geometry, 0.1))"; } else { $aOrder[] = "(SELECT min(ST_Distance(search_name.centroid, p.geometry)) FROM location_postcode p WHERE p.postcode = '".$this->sPostcode."')"; @@ -582,15 +691,15 @@ class SearchDescription if ($this->sHouseNumber) { $sImportanceSQL = '- abs(26 - address_rank) + 3'; } else { - $sImportanceSQL = '(CASE WHEN importance = 0 OR importance IS NULL THEN 0.75-(search_rank::float/40) ELSE importance END)'; + $sImportanceSQL = '(CASE WHEN importance = 0 OR importance IS NULL THEN 0.75001-(search_rank::float/40) ELSE importance END)'; } $sImportanceSQL .= $this->oContext->viewboxImportanceSQL('centroid'); $aOrder[] = "$sImportanceSQL DESC"; - if (sizeof($this->aFullNameAddress)) { + if (!empty($this->aFullNameAddress)) { $sExactMatchSQL = ' ( '; $sExactMatchSQL .= ' SELECT count(*) FROM ( '; - $sExactMatchSQL .= ' SELECT unnest('.getArraySQL($this->aFullNameAddress).')'; + $sExactMatchSQL .= ' SELECT unnest('.$oDB->getArraySQL($this->aFullNameAddress).')'; $sExactMatchSQL .= ' INTERSECT '; $sExactMatchSQL .= ' SELECT unnest(nameaddress_vector)'; $sExactMatchSQL .= ' ) s'; @@ -601,49 +710,57 @@ class SearchDescription } if ($this->sHouseNumber || $this->sClass) { - $iLimit = 20; + $iLimit = 40; } - if (sizeof($aTerms)) { + $aResults = array(); + + if (!empty($aTerms)) { $sSQL = 'SELECT place_id,'.$sExactMatchSQL; $sSQL .= ' FROM search_name'; $sSQL .= ' WHERE '.join(' and ', $aTerms); $sSQL .= ' ORDER BY '.join(', ', $aOrder); $sSQL .= ' LIMIT '.$iLimit; - if (CONST_Debug) var_dump($sSQL); + Debug::printSQL($sSQL); - return chksql( - $oDB->getAll($sSQL), - "Could not get places for search terms." - ); + $aDBResults = $oDB->getAll($sSQL, null, 'Could not get places for search terms.'); + + foreach ($aDBResults as $aResult) { + $oResult = new Result($aResult['place_id']); + $oResult->iExactMatches = $aResult['exactmatch']; + $aResults[$aResult['place_id']] = $oResult; + } } - return array(); + return $aResults; } - private function queryHouseNumber(&$oDB, $aRoadPlaceIDs, $iLimit) + private function queryHouseNumber(&$oDB, $aRoadPlaceIDs) { - $sPlaceIDs = join(',', $aRoadPlaceIDs); + $aResults = array(); + $sPlaceIDs = Result::joinIdsByTable($aRoadPlaceIDs, Result::TABLE_PLACEX); + + if (!$sPlaceIDs) { + return $aResults; + } $sHouseNumberRegex = '\\\\m'.$this->sHouseNumber.'\\\\M'; $sSQL = 'SELECT place_id FROM placex '; $sSQL .= 'WHERE parent_place_id in ('.$sPlaceIDs.')'; $sSQL .= " AND transliteration(housenumber) ~* E'".$sHouseNumberRegex."'"; $sSQL .= $this->oContext->excludeSQL(' AND place_id'); - $sSQL .= " LIMIT $iLimit"; - - if (CONST_Debug) var_dump($sSQL); - $aPlaceIDs = chksql($oDB->getCol($sSQL)); + Debug::printSQL($sSQL); - if (sizeof($aPlaceIDs)) { - return array('aPlaceIDs' => $aPlaceIDs, 'iHouseNumber' => -1); + // XXX should inherit the exactMatches from its parent + foreach ($oDB->getCol($sSQL) as $iPlaceId) { + $aResults[$iPlaceId] = new Result($iPlaceId); } $bIsIntHouseNumber= (bool) preg_match('/[0-9]+/', $this->sHouseNumber); $iHousenumber = intval($this->sHouseNumber); - if ($bIsIntHouseNumber) { + if ($bIsIntHouseNumber && empty($aResults)) { // if nothing found, search in the interpolation line table $sSQL = 'SELECT distinct place_id FROM location_property_osmline'; $sSQL .= ' WHERE startnumber is not NULL'; @@ -657,40 +774,36 @@ class SearchDescription $sSQL .= "interpolationtype='odd'"; } $sSQL .= " or interpolationtype='all') and "; - $sSQL .= $iHousenumber.">=startnumber and "; - $sSQL .= $iHousenumber."<=endnumber"; + $sSQL .= $iHousenumber.'>=startnumber and '; + $sSQL .= $iHousenumber.'<=endnumber'; $sSQL .= $this->oContext->excludeSQL(' AND place_id'); - $sSQL .= " limit $iLimit"; - if (CONST_Debug) var_dump($sSQL); + Debug::printSQL($sSQL); - $aPlaceIDs = chksql($oDB->getCol($sSQL, 0)); - - if (sizeof($aPlaceIDs)) { - return array('aPlaceIDs' => $aPlaceIDs, 'iHouseNumber' => $iHousenumber); + foreach ($oDB->getCol($sSQL) as $iPlaceId) { + $oResult = new Result($iPlaceId, Result::TABLE_OSMLINE); + $oResult->iHouseNumber = $iHousenumber; + $aResults[$iPlaceId] = $oResult; } } // If nothing found try the aux fallback table - if (CONST_Use_Aux_Location_data) { + if (CONST_Use_Aux_Location_data && empty($aResults)) { $sSQL = 'SELECT place_id FROM location_property_aux'; $sSQL .= ' WHERE parent_place_id in ('.$sPlaceIDs.')'; $sSQL .= " AND housenumber = '".$this->sHouseNumber."'"; $sSQL .= $this->oContext->excludeSQL(' AND place_id'); - $sSQL .= " limit $iLimit"; - - if (CONST_Debug) var_dump($sSQL); - $aPlaceIDs = chksql($oDB->getCol($sSQL)); + Debug::printSQL($sSQL); - if (sizeof($aPlaceIDs)) { - return array('aPlaceIDs' => $aPlaceIDs, 'iHouseNumber' => -1); + foreach ($oDB->getCol($sSQL) as $iPlaceId) { + $aResults[$iPlaceId] = new Result($iPlaceId, Result::TABLE_AUX); } } // If nothing found then search in Tiger data (location_property_tiger) - if (CONST_Use_US_Tiger_Data && $bIsIntHouseNumber) { - $sSQL = 'SELECT distinct place_id FROM location_property_tiger'; + if (CONST_Use_US_Tiger_Data && $bIsIntHouseNumber && empty($aResults)) { + $sSQL = 'SELECT place_id FROM location_property_tiger'; $sSQL .= ' WHERE parent_place_id in ('.$sPlaceIDs.') and ('; if ($iHousenumber % 2 == 0) { $sSQL .= "interpolationtype='even'"; @@ -698,28 +811,31 @@ class SearchDescription $sSQL .= "interpolationtype='odd'"; } $sSQL .= " or interpolationtype='all') and "; - $sSQL .= $iHousenumber.">=startnumber and "; - $sSQL .= $iHousenumber."<=endnumber"; + $sSQL .= $iHousenumber.'>=startnumber and '; + $sSQL .= $iHousenumber.'<=endnumber'; $sSQL .= $this->oContext->excludeSQL(' AND place_id'); - $sSQL .= " limit $iLimit"; - if (CONST_Debug) var_dump($sSQL); + Debug::printSQL($sSQL); - $aPlaceIDs = chksql($oDB->getCol($sSQL, 0)); - - if (sizeof($aPlaceIDs)) { - return array('aPlaceIDs' => $aPlaceIDs, 'iHouseNumber' => $iHousenumber); + foreach ($oDB->getCol($sSQL) as $iPlaceId) { + $oResult = new Result($iPlaceId, Result::TABLE_TIGER); + $oResult->iHouseNumber = $iHousenumber; + $aResults[$iPlaceId] = $oResult; } } - return array(); + return $aResults; } private function queryPoiByOperator(&$oDB, $aParentIDs, $iLimit) { - $sPlaceIDs = join(',', $aParentIDs); - $aClassPlaceIDs = array(); + $aResults = array(); + $sPlaceIDs = Result::joinIdsByTable($aParentIDs, Result::TABLE_PLACEX); + + if (!$sPlaceIDs) { + return $aResults; + } if ($this->iOperator == Operator::TYPE || $this->iOperator == Operator::NAME) { // If they were searching for a named class (i.e. 'Kings Head pub') @@ -728,25 +844,26 @@ class SearchDescription $sSQL .= " WHERE place_id in ($sPlaceIDs)"; $sSQL .= " AND class='".$this->sClass."' "; $sSQL .= " AND type='".$this->sType."'"; - $sSQL .= " AND linked_place_id is null"; + $sSQL .= ' AND linked_place_id is null'; $sSQL .= $this->oContext->excludeSQL(' AND place_id'); - $sSQL .= " ORDER BY rank_search ASC "; + $sSQL .= ' ORDER BY rank_search ASC '; $sSQL .= " LIMIT $iLimit"; - if (CONST_Debug) var_dump($sSQL); + Debug::printSQL($sSQL); - $aClassPlaceIDs = chksql($oDB->getCol($sSQL)); + foreach ($oDB->getCol($sSQL) as $iPlaceId) { + $aResults[$iPlaceId] = new Result($iPlaceId); + } } // NEAR and IN are handled the same if ($this->iOperator == Operator::TYPE || $this->iOperator == Operator::NEAR) { $sClassTable = $this->poiTable(); - $sSQL = "SELECT count(*) FROM pg_tables WHERE tablename = '$sClassTable'"; - $bCacheTable = (bool) chksql($oDB->getOne($sSQL)); + $bCacheTable = $oDB->tableExists($sClassTable); $sSQL = "SELECT min(rank_search) FROM placex WHERE place_id in ($sPlaceIDs)"; - if (CONST_Debug) var_dump($sSQL); - $iMaxRank = (int)chksql($oDB->getOne($sSQL)); + Debug::printSQL($sSQL); + $iMaxRank = (int) $oDB->getOne($sSQL); // For state / country level searches the normal radius search doesn't work very well $sPlaceGeom = false; @@ -756,10 +873,10 @@ class SearchDescription $sSQL .= " WHERE place_id in ($sPlaceIDs)"; $sSQL .= " AND rank_search < $iMaxRank + 5"; $sSQL .= " AND ST_GeometryType(geometry) in ('ST_Polygon','ST_MultiPolygon')"; - $sSQL .= " ORDER BY rank_search ASC "; - $sSQL .= " LIMIT 1"; - if (CONST_Debug) var_dump($sSQL); - $sPlaceGeom = chksql($oDB->getOne($sSQL)); + $sSQL .= ' ORDER BY rank_search ASC '; + $sSQL .= ' LIMIT 1'; + Debug::printSQL($sSQL); + $sPlaceGeom = $oDB->getOne($sSQL); } if ($sPlaceGeom) { @@ -768,8 +885,8 @@ class SearchDescription $iMaxRank += 5; $sSQL = 'SELECT place_id FROM placex'; $sSQL .= " WHERE place_id in ($sPlaceIDs) and rank_search < $iMaxRank"; - if (CONST_Debug) var_dump($sSQL); - $aPlaceIDs = chksql($oDB->getCol($sSQL)); + Debug::printSQL($sSQL); + $aPlaceIDs = $oDB->getCol($sSQL); $sPlaceIDs = join(',', $aPlaceIDs); } @@ -783,7 +900,7 @@ class SearchDescription if ($this->oContext->hasNearPoint()) { $sOrderBySQL = $this->oContext->distanceSQL('l.centroid'); } elseif ($sPlaceIDs) { - $sOrderBySQL = "ST_Distance(l.centroid, f.geometry)"; + $sOrderBySQL = 'ST_Distance(l.centroid, f.geometry)'; } elseif ($sPlaceGeom) { $sOrderBySQL = "ST_Distance(st_centroid('".$sPlaceGeom."'), l.centroid)"; } @@ -799,7 +916,7 @@ class SearchDescription $sSQL .= ' from '.$sClassTable.' as l'; if ($sPlaceIDs) { - $sSQL .= ",placex as f WHERE "; + $sSQL .= ',placex as f WHERE '; $sSQL .= "f.place_id in ($sPlaceIDs) "; $sSQL .= " AND ST_DWithin(l.centroid, f.centroid, $fRange)"; } elseif ($sPlaceGeom) { @@ -813,9 +930,11 @@ class SearchDescription } $sSQL .= " limit $iLimit"; - if (CONST_Debug) var_dump($sSQL); + Debug::printSQL($sSQL); - $aClassPlaceIDs = array_merge($aClassPlaceIDs, chksql($oDB->getCol($sSQL))); + foreach ($oDB->getCol($sSQL) as $iPlaceId) { + $aResults[$iPlaceId] = new Result($iPlaceId); + } } else { if ($this->oContext->hasNearPoint()) { $fRange = $this->oContext->nearRadius(); @@ -825,7 +944,7 @@ class SearchDescription if ($this->oContext->hasNearPoint()) { $sOrderBySQL = $this->oContext->distanceSQL('l.geometry'); } else { - $sOrderBySQL = "ST_Distance(l.geometry, f.geometry)"; + $sOrderBySQL = 'ST_Distance(l.geometry, f.geometry)'; } $sSQL = 'SELECT distinct l.place_id'; @@ -839,18 +958,20 @@ class SearchDescription $sSQL .= " AND l.type='".$this->sType."'"; $sSQL .= $this->oContext->excludeSQL(' AND l.place_id'); if ($sOrderBySQL) { - $sSQL .= "ORDER BY orderterm ASC"; + $sSQL .= 'ORDER BY orderterm ASC'; } $sSQL .= " limit $iLimit"; - if (CONST_Debug) var_dump($sSQL); + Debug::printSQL($sSQL); - $aClassPlaceIDs = array_merge($aClassPlaceIDs, chksql($oDB->getCol($sSQL))); + foreach ($oDB->getCol($sSQL) as $iPlaceId) { + $aResults[$iPlaceId] = new Result($iPlaceId); + } } } } - return $aClassPlaceIDs; + return $aResults; } private function poiTable() @@ -886,25 +1007,43 @@ class SearchDescription //////////// Debugging functions + public function debugInfo() + { + return array( + 'Search rank' => $this->iSearchRank, + 'Country code' => $this->sCountryCode, + 'Name terms' => $this->aName, + 'Name terms (stop words)' => $this->aNameNonSearch, + 'Address terms' => $this->aAddress, + 'Address terms (stop words)' => $this->aAddressNonSearch, + 'Address terms (full words)' => $this->aFullNameAddress, + 'Special search' => $this->iOperator, + 'Class' => $this->sClass, + 'Type' => $this->sType, + 'House number' => $this->sHouseNumber, + 'Postcode' => $this->sPostcode + ); + } + public function dumpAsHtmlTableRow(&$aWordIDs) { $kf = function ($k) use (&$aWordIDs) { return $aWordIDs[$k]; }; - echo ""; + echo ''; echo "$this->iSearchRank"; - echo "".join(', ', array_map($kf, $this->aName)).""; - echo "".join(', ', array_map($kf, $this->aNameNonSearch)).""; - echo "".join(', ', array_map($kf, $this->aAddress)).""; - echo "".join(', ', array_map($kf, $this->aAddressNonSearch)).""; - echo "".$this->sCountryCode.""; - echo "".Operator::toString($this->iOperator).""; - echo "".$this->sClass.""; - echo "".$this->sType.""; - echo "".$this->sPostcode.""; - echo "".$this->sHouseNumber.""; - - echo ""; + echo ''.join(', ', array_map($kf, $this->aName)).''; + echo ''.join(', ', array_map($kf, $this->aNameNonSearch)).''; + echo ''.join(', ', array_map($kf, $this->aAddress)).''; + echo ''.join(', ', array_map($kf, $this->aAddressNonSearch)).''; + echo ''.$this->sCountryCode.''; + echo ''.Operator::toString($this->iOperator).''; + echo ''.$this->sClass.''; + echo ''.$this->sType.''; + echo ''.$this->sPostcode.''; + echo ''.$this->sHouseNumber.''; + + echo ''; } }