X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/d72c8633531c859ebf20ed366f1d6976853ffe0d..4bff2814a9527e4d4f7645e9eedadf6bfeba698e:/lib/SearchDescription.php diff --git a/lib/SearchDescription.php b/lib/SearchDescription.php index 1a994acd..533c0ab4 100644 --- a/lib/SearchDescription.php +++ b/lib/SearchDescription.php @@ -2,24 +2,7 @@ namespace Nominatim; -/** - * Operators describing special searches. - */ -abstract final class Operator -{ - /// No operator selected. - const NONE = 0; - /// Search for POI of the given type. - const TYPE = 1; - /// Search for POIs near the given place. - const NEAR = 2; - /// Search for POIS in the given place. - const IN = 3; - /// Search for POIS named as given. - const NAME = 4; - /// Search for postcodes. - const POSTCODE = 5; -} +require_once(CONST_BasePath.'/lib/SpecialSearchOperator.php'); /** * Description of a single interpretation of a search query. @@ -58,19 +41,23 @@ class SearchDescription /// Index of phrase currently processed private $iNamePhrase = -1; + public function getRank() { return $this->iSearchRank; } + public function addToRank($iAddRank) + { + $this->iSearchRank += $iAddRank; + return $this->iSearchRank; + } + public function getPostCode() { return $this->sPostcode; } - /** - * Set the geographic search radius. - */ public function setNear(&$oNearPoint) { $this->oNearPoint = $oNearPoint; @@ -83,29 +70,20 @@ class SearchDescription $this->sType = $sType; } - /** - * Check if name or address for the search are specified. - */ public function isNamedSearch() { return sizeof($this->aName) > 0 || sizeof($this->aAddress) > 0; } - /** - * Check if only a country is requested. - */ public function isCountrySearch() { return $this->sCountryCode && sizeof($this->aName) == 0 - && !$this->iOperator && !$this->oNear; + && !$this->iOperator && !$this->oNearPoint; } - /** - * Check if a search near a geographic location is requested. - */ public function isNearSearch() { - return (bool) $this->oNear; + return (bool) $this->oNearPoint; } public function isPoiSearch() @@ -141,7 +119,7 @@ class SearchDescription return $sVar.' = \''.$this->sCountryCode."'"; } if ($sCountryList) { - return $sVar.' in ('.$this->sCountryCode.')'; + return $sVar.' in ('.$sCountryList.')'; } return ''; @@ -152,13 +130,6 @@ class SearchDescription return $this->iOperator != Operator::NONE; } - /** - * Extract special terms from the query, amend the search - * and return the shortended query. - * - * Only the first special term found will be used but all will - * be removed from the query. - */ public function extractKeyValuePairs($sQuery) { // Search for terms of kind [=]. @@ -179,12 +150,228 @@ class SearchDescription return $sQuery; } + public function isValidSearch(&$aCountryCodes) + { + if (!sizeof($this->aName)) { + if ($this->sHouseNumber) { + return false; + } + } + if ($aCountryCodes + && $this->sCountryCode + && !in_array($this->sCountryCode, $aCountryCodes) + ) { + return false; + } + + return true; + } + + /////////// Search building functions + + + public function extendWithFullTerm($aSearchTerm, $bWordInQuery, $bHasPartial, $sPhraseType, $bFirstToken, $bFirstPhrase, $bLastToken, &$iGlobalRank) + { + $aNewSearches = array(); + + if (($sPhraseType == '' || $sPhraseType == 'country') + && !empty($aSearchTerm['country_code']) + && $aSearchTerm['country_code'] != '0' + ) { + if (!$this->sCountryCode) { + $oSearch = clone $this; + $oSearch->iSearchRank++; + $oSearch->sCountryCode = $aSearchTerm['country_code']; + // Country is almost always at the end of the string + // - increase score for finding it anywhere else (optimisation) + if (!$bLastToken) { + $oSearch->iSearchRank += 5; + } + $aNewSearches[] = $oSearch; + + // If it is at the beginning, we can be almost sure that + // the terms are in the wrong order. Increase score for all searches. + if ($bFirstToken) { + $iGlobalRank++; + } + } + } elseif (($sPhraseType == '' || $sPhraseType == 'postalcode') + && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'postcode' + ) { + // We need to try the case where the postal code is the primary element + // (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode) + // so try both. + if (!$this->sPostcode && $bWordInQuery) { + // If we have structured search or this is the first term, + // make the postcode the primary search element. + if ($this->iOperator == Operator::NONE + && ($sPhraseType == 'postalcode' || $bFirstToken) + ) { + $oSearch = clone $this; + $oSearch->iSearchRank++; + $oSearch->iOperator = Operator::POSTCODE; + $oSearch->aAddress = array_merge($this->aAddress, $this->aName); + $oSearch->aName = + array($aSearchTerm['word_id'] => $aSearchTerm['word']); + $aNewSearches[] = $oSearch; + } + + // If we have a structured search or this is not the first term, + // add the postcode as an addendum. + if ($this->iOperator != Operator::POSTCODE + && ($sPhraseType == 'postalcode' || sizeof($this->aName)) + ) { + $oSearch = clone $this; + $oSearch->iSearchRank++; + $oSearch->sPostcode = $aSearchTerm['word']; + $aNewSearches[] = $oSearch; + } + } + } elseif (($sPhraseType == '' || $sPhraseType == 'street') + && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'house' + ) { + if (!$this->sHouseNumber && $this->iOperator != Operator::POSTCODE) { + $oSearch = clone $this; + $oSearch->iSearchRank++; + $oSearch->sHouseNumber = trim($aSearchTerm['word_token']); + // sanity check: if the housenumber is not mainly made + // up of numbers, add a penalty + if (preg_match_all("/[^0-9]/", $oSearch->sHouseNumber, $aMatches) > 2) { + $oSearch->iSearchRank++; + } + // also must not appear in the middle of the address + if (sizeof($this->aAddress) || sizeof($this->aAddressNonSearch)) { + $oSearch->iSearchRank++; + } + $aNewSearches[] = $oSearch; + } + } elseif ($sPhraseType == '' + && $aSearchTerm['class'] !== '' && $aSearchTerm['class'] !== null + ) { + // require a normalized exact match of the term + // if we have the normalizer version of the query + // available + if ($this->iOperator == Operator::NONE + && (isset($aSearchTerm['word']) && $aSearchTerm['word']) + && $bWordInQuery + ) { + $oSearch = clone $this; + $oSearch->iSearchRank++; + + $iOp = Operator::NEAR; // near == in for the moment + if ($aSearchTerm['operator'] == '') { + if (sizeof($this->aName)) { + $iOp = Operator::NAME; + } + $oSearch->iSearchRank += 2; + } + + $oSearch->setPoiSearch($iOp, $aSearchTerm['class'], $aSearchTerm['type']); + $aNewSearches[] = $oSearch; + } + } elseif (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id']) { + $iWordID = $aSearchTerm['word_id']; + if (sizeof($this->aName)) { + if (($sPhraseType == '' || !$bFirstPhrase) + && $sPhraseType != 'country' + && !$bHasPartial + ) { + $oSearch = clone $this; + $oSearch->iSearchRank++; + $oSearch->aAddress[$iWordID] = $iWordID; + $aNewSearches[] = $oSearch; + } else { + $this->aFullNameAddress[$iWordID] = $iWordID; + } + } else { + $oSearch = clone $this; + $oSearch->iSearchRank++; + $oSearch->aName = array($iWordID => $iWordID); + $aNewSearches[] = $oSearch; + } + } + + return $aNewSearches; + } + + public function extendWithPartialTerm($aSearchTerm, $bStructuredPhrases, $iPhrase, &$aWordFrequencyScores, $aFullTokens) + { + // Only allow name terms. + if (!(isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'])) { + return array(); + } + + $aNewSearches = array(); + $iWordID = $aSearchTerm['word_id']; + + if ((!$bStructuredPhrases || $iPhrase > 0) + && sizeof($this->aName) + && strpos($aSearchTerm['word_token'], ' ') === false + ) { + if ($aWordFrequencyScores[$iWordID] < CONST_Max_Word_Frequency) { + $oSearch = clone $this; + $oSearch->iSearchRank++; + $oSearch->aAddress[$iWordID] = $iWordID; + $aNewSearches[] = $oSearch; + } else { + $oSearch = clone $this; + $oSearch->iSearchRank++; + $oSearch->aAddressNonSearch[$iWordID] = $iWordID; + if (preg_match('#^[0-9]+$#', $aSearchTerm['word_token'])) { + $oSearch->iSearchRank += 2; + } + if (sizeof($aFullTokens)) { + $oSearch->iSearchRank++; + } + $aNewSearches[] = $oSearch; + + // revert to the token version? + foreach ($aFullTokens as $aSearchTermToken) { + if (empty($aSearchTermToken['country_code']) + && empty($aSearchTermToken['lat']) + && empty($aSearchTermToken['class']) + ) { + $oSearch = clone $this; + $oSearch->iSearchRank++; + $oSearch->aAddress[$aSearchTermToken['word_id']] = $aSearchTermToken['word_id']; + $aNewSearches[] = $oSearch; + } + } + } + } + + if ((!$this->sPostcode && !$this->aAddress && !$this->aAddressNonSearch) + && (!sizeof($this->aName) || $this->iNamePhrase == $iPhrase) + ) { + $oSearch = clone $this; + $oSearch->iSearchRank++; + if (!sizeof($this->aName)) { + $oSearch->iSearchRank += 1; + } + if (preg_match('#^[0-9]+$#', $aSearchTerm['word_token'])) { + $oSearch->iSearchRank += 2; + } + if ($aWordFrequencyScores[$iWordID] < CONST_Max_Word_Frequency) { + $oSearch->aName[$iWordID] = $iWordID; + } else { + $oSearch->aNameNonSearch[$iWordID] = $iWordID; + } + $oSearch->iNamePhrase = $iPhrase; + $aNewSearches[] = $oSearch; + } + + return $aNewSearches; + } + + /////////// Query functions + + public function queryCountry(&$oDB, $sViewboxSQL) { $sSQL = 'SELECT place_id FROM placex '; $sSQL .= "WHERE country_code='".$this->sCountryCode."'"; $sSQL .= ' AND rank_search = 4'; - if ($ViewboxSQL) { + if ($sViewboxSQL) { $sSQL .= " AND ST_Intersects($sViewboxSQL, geometry)"; } $sSQL .= " ORDER BY st_area(geometry) DESC LIMIT 1"; @@ -226,7 +413,7 @@ class SearchDescription } $sSQL .= " limit $iLimit"; if (CONST_Debug) var_dump($sSQL); - return chksql($this->oDB->getCol($sSQL)); + return chksql($oDB->getCol($sSQL)); } if ($this->oNearPoint) { @@ -240,7 +427,7 @@ class SearchDescription $sSQL .= ' ORDER BY '.$this->oNearPoint->distanceSQL('centroid')." ASC"; $sSQL .= " LIMIT $iLimit"; if (CONST_Debug) var_dump($sSQL); - return chksql($this->oDB->getCol($sSQL)); + return chksql($oDB->getCol($sSQL)); } return array(); @@ -248,7 +435,7 @@ class SearchDescription public function queryPostcode(&$oDB, $sCountryList, $iLimit) { - $sSQL = 'SELECT p.place_id FROM location_postcode p '; + $sSQL = 'SELECT p.place_id FROM location_postcode p '; if (sizeof($this->aAddress)) { $sSQL .= ', search_name s '; @@ -259,16 +446,16 @@ class SearchDescription $sSQL .= 'WHERE '; } - $sSQL .= "p.postcode = '".pg_escape_string(reset($this->$aName))."'"; + $sSQL .= "p.postcode = '".pg_escape_string(reset($this->aName))."'"; $sCountryTerm = $this->countryCodeSQL('p.country_code', $sCountryList); if ($sCountryTerm) { - $sSQL .= ' AND '.$sCountyTerm; + $sSQL .= ' AND '.$sCountryTerm; } $sSQL .= " LIMIT $iLimit"; if (CONST_Debug) var_dump($sSQL); - return chksql($this->oDB->getCol($sSQL)); + return chksql($oDB->getCol($sSQL)); } public function queryNamedPlace(&$oDB, $aWordFrequencyScores, $sCountryList, $iMinAddressRank, $iMaxAddressRank, $sExcludeSQL, $sViewboxSmall, $sViewboxLarge, $iLimit) @@ -318,7 +505,7 @@ class SearchDescription } } - $sCountryTerm = $this->countryCodeSQL('p.country_code', $sCountryList); + $sCountryTerm = $this->countryCodeSQL('country_code', $sCountryList); if ($sCountryTerm) { $aTerms[] = $sCountryTerm; } @@ -346,11 +533,11 @@ class SearchDescription } if ($sExcludeSQL) { - $aTerms = 'place_id not in ('.$sExcludeSQL.')'; + $aTerms[] = 'place_id not in ('.$sExcludeSQL.')'; } if ($sViewboxSmall) { - $aTerms[] = 'centroid && '.$sViewboxSmall; + $aTerms[] = 'centroid && '.$sViewboxSmall; } if ($this->oNearPoint) { @@ -397,7 +584,7 @@ class SearchDescription if (CONST_Debug) var_dump($sSQL); return chksql( - $this->oDB->getAll($sSQL), + $oDB->getAll($sSQL), "Could not get places for search terms." ); } @@ -421,7 +608,7 @@ class SearchDescription if (CONST_Debug) var_dump($sSQL); - $aPlaceIDs = chksql($this->oDB->getCol($sSQL)); + $aPlaceIDs = chksql($oDB->getCol($sSQL)); if (sizeof($aPlaceIDs)) { return array('aPlaceIDs' => $aPlaceIDs, 'iHouseNumber' => -1); @@ -446,14 +633,14 @@ class SearchDescription $sSQL .= $iHousenumber.">=startnumber and "; $sSQL .= $iHousenumber."<=endnumber"; - if ($sExcludeSQL)) { + if ($sExcludeSQL) { $sSQL .= ' AND place_id not in ('.$sExcludeSQL.')'; } $sSQL .= " limit $iLimit"; if (CONST_Debug) var_dump($sSQL); - $aPlaceIDs = chksql($this->oDB->getCol($sSQL, 0)); + $aPlaceIDs = chksql($oDB->getCol($sSQL, 0)); if (sizeof($aPlaceIDs)) { return array('aPlaceIDs' => $aPlaceIDs, 'iHouseNumber' => $iHousenumber); @@ -472,7 +659,7 @@ class SearchDescription if (CONST_Debug) var_dump($sSQL); - $aPlaceIDs = chksql($this->oDB->getCol($sSQL)); + $aPlaceIDs = chksql($oDB->getCol($sSQL)); if (sizeof($aPlaceIDs)) { return array('aPlaceIDs' => $aPlaceIDs, 'iHouseNumber' => -1); @@ -499,7 +686,7 @@ class SearchDescription if (CONST_Debug) var_dump($sSQL); - $aPlaceIDs = chksql($this->oDB->getCol($sSQL, 0)); + $aPlaceIDs = chksql($oDB->getCol($sSQL, 0)); if (sizeof($aPlaceIDs)) { return array('aPlaceIDs' => $aPlaceIDs, 'iHouseNumber' => $iHousenumber); @@ -528,18 +715,18 @@ class SearchDescription if (CONST_Debug) var_dump($sSQL); - $aClassPlaceIDs = chksql($this->oDB->getCol($sSQL)); + $aClassPlaceIDs = chksql($oDB->getCol($sSQL)); } // NEAR and IN are handled the same if ($this->iOperator == Operator::TYPE || $this->iOperator == Operator::NEAR) { $sClassTable = $this->poiTable(); $sSQL = "SELECT count(*) FROM pg_tables WHERE tablename = '$sClassTable'"; - $bCacheTable = (bool) chksql($this->oDB->getOne($sSQL)); + $bCacheTable = (bool) chksql($oDB->getOne($sSQL)); $sSQL = "SELECT min(rank_search) FROM placex WHERE place_id in ($sPlaceIDs)"; if (CONST_Debug) var_dump($sSQL); - $iMaxRank = (int)chksql($this->oDB->getOne($sSQL)); + $iMaxRank = (int)chksql($oDB->getOne($sSQL)); // For state / country level searches the normal radius search doesn't work very well $sPlaceGeom = false; @@ -552,7 +739,7 @@ class SearchDescription $sSQL .= " ORDER BY rank_search ASC "; $sSQL .= " LIMIT 1"; if (CONST_Debug) var_dump($sSQL); - $sPlaceGeom = chksql($this->oDB->getOne($sSQL)); + $sPlaceGeom = chksql($oDB->getOne($sSQL)); } if ($sPlaceGeom) { @@ -562,7 +749,7 @@ class SearchDescription $sSQL = 'SELECT place_id FROM placex'; $sSQL .= " WHERE place_id in ($sPlaceIDs) and rank_search < $iMaxRank"; if (CONST_Debug) var_dump($sSQL); - $aPlaceIDs = chksql($this->oDB->getCol($sSQL)); + $aPlaceIDs = chksql($oDB->getCol($sSQL)); $sPlaceIDs = join(',', $aPlaceIDs); } @@ -610,7 +797,7 @@ class SearchDescription if (CONST_Debug) var_dump($sSQL); - $aClassPlaceIDs = array_merge($aClassPlaceIDs, chksql($this->oDB->getCol($sSQL))); + $aClassPlaceIDs = array_merge($aClassPlaceIDs, chksql($oDB->getCol($sSQL))); } else { if ($this->oNearPoint) { $fRange = $this->oNearPoint->radius(); @@ -642,11 +829,58 @@ class SearchDescription if (CONST_Debug) var_dump($sSQL); - $aClassPlaceIDs = array_merge($aClassPlaceIDs, chksql($this->oDB->getCol($sSQL))); + $aClassPlaceIDs = array_merge($aClassPlaceIDs, chksql($oDB->getCol($sSQL))); } } } return $aClassPlaceIDs; } -}; + + + /////////// Sort functions + + + public static function bySearchRank($a, $b) + { + if ($a->iSearchRank == $b->iSearchRank) { + return $a->iOperator + strlen($a->sHouseNumber) + - $b->iOperator - strlen($b->sHouseNumber); + } + + return $a->iSearchRank < $b->iSearchRank ? -1 : 1; + } + + //////////// Debugging functions + + + public function dumpAsHtmlTableRow(&$aWordIDs) + { + $kf = function ($k) use (&$aWordIDs) { + return $aWordIDs[$k]; + }; + + echo ""; + echo "$this->iSearchRank"; + echo "".join(', ', array_map($kf, $this->aName)).""; + echo "".join(', ', array_map($kf, $this->aNameNonSearch)).""; + echo "".join(', ', array_map($kf, $this->aAddress)).""; + echo "".join(', ', array_map($kf, $this->aAddressNonSearch)).""; + echo "".$this->sCountryCode.""; + echo "".Operator::toString($this->iOperator).""; + echo "".$this->sClass.""; + echo "".$this->sType.""; + echo "".$this->sPostcode.""; + echo "".$this->sHouseNumber.""; + + if ($this->oNearPoint) { + echo "".$this->oNearPoint->lat().""; + echo "".$this->oNearPoint->lon().""; + echo "".$this->oNearPoint->radius().""; + } else { + echo ""; + } + + echo ""; + } +}