X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/ec3f6c9c42dd89e71d4edd3cfb2a911453aba58d..79effae93387958e2716aa679137305c8db9d11d:/lib-php/SearchDescription.php diff --git a/lib-php/SearchDescription.php b/lib-php/SearchDescription.php index 8924287a..be5623af 100644 --- a/lib-php/SearchDescription.php +++ b/lib-php/SearchDescription.php @@ -19,6 +19,8 @@ class SearchDescription private $aName = array(); /// True if the name is rare enough to force index use on name. private $bRareName = false; + /// True if the name requires to be accompanied by address terms. + private $bNameNeedsAddress = false; /// List of word ids making up the address of the object. private $aAddress = array(); /// List of word ids that appear in the name but should be ignored. @@ -67,35 +69,6 @@ class SearchDescription return $this->iSearchRank; } - /** - * Make this search a POI search. - * - * In a POI search, objects are not (only) searched by their name - * but also by the primary OSM key/value pair (class and type in Nominatim). - * - * @param integer $iOperator Type of POI search - * @param string $sClass Class (or OSM tag key) of POI. - * @param string $sType Type (or OSM tag value) of POI. - * - * @return void - */ - public function setPoiSearch($iOperator, $sClass, $sType) - { - $this->iOperator = $iOperator; - $this->sClass = $sClass; - $this->sType = $sType; - } - - /** - * Check if any operator is set. - * - * @return bool True, if this is a special search operation. - */ - public function hasOperator() - { - return $this->iOperator != Operator::NONE; - } - /** * Extract key/value pairs from a query. * @@ -142,253 +115,253 @@ class SearchDescription return false; } } + if ($this->bNameNeedsAddress && empty($this->aAddress)) { + return false; + } return true; } /////////// Search building functions - /** - * Derive new searches by adding a full term to the existing search. + * Create a copy of this search description adding to search rank. * - * @param string $sToken Term for the token. - * @param object $oSearchTerm Description of the token. - * @param object $oPosition Description of the token position within - the query. + * @param integer $iTermCost Cost to add to the current search rank. * - * @return SearchDescription[] List of derived search descriptions. + * @return object Cloned search description. */ - public function extendWithSearchTerm($sToken, $oSearchTerm, $oPosition) + public function clone($iTermCost) { - $aNewSearches = array(); + $oSearch = clone $this; + $oSearch->iSearchRank += $iTermCost; - if ($oPosition->maybePhrase('country') - && is_a($oSearchTerm, '\Nominatim\Token\Country') - ) { - if (!$this->sCountryCode) { - $oSearch = clone $this; - $oSearch->iSearchRank++; - $oSearch->sCountryCode = $oSearchTerm->sCountryCode; - // Country is almost always at the end of the string - // - increase score for finding it anywhere else (optimisation) - if (!$oPosition->isLastToken()) { - $oSearch->iSearchRank += 5; - $oSearch->iNamePhrase = -1; - } - $aNewSearches[] = $oSearch; - } - } elseif ($oPosition->maybePhrase('postalcode') - && is_a($oSearchTerm, '\Nominatim\Token\Postcode') - ) { - if (!$this->sPostcode) { - // If we have structured search or this is the first term, - // make the postcode the primary search element. - if ($this->iOperator == Operator::NONE && $oPosition->isFirstToken()) { - $oSearch = clone $this; - $oSearch->iSearchRank++; - $oSearch->iOperator = Operator::POSTCODE; - $oSearch->aAddress = array_merge($this->aAddress, $this->aName); - $oSearch->aName = - array($oSearchTerm->iId => $oSearchTerm->sPostcode); - $aNewSearches[] = $oSearch; - } + return $oSearch; + } - // If we have a structured search or this is not the first term, - // add the postcode as an addendum. - if ($this->iOperator != Operator::POSTCODE - && ($oPosition->isPhrase('postalcode') || !empty($this->aName)) - ) { - $oSearch = clone $this; - $oSearch->iSearchRank++; - $oSearch->iNamePhrase = -1; - if (strlen($oSearchTerm->sPostcode) < 4) { - $oSearch->iSearchRank += 4 - strlen($oSearchTerm->sPostcode); - } - $oSearch->sPostcode = $oSearchTerm->sPostcode; - $aNewSearches[] = $oSearch; - } - } - } elseif ($oPosition->maybePhrase('street') - && is_a($oSearchTerm, '\Nominatim\Token\HouseNumber') - ) { - if (!$this->sHouseNumber && $this->iOperator != Operator::POSTCODE) { - // sanity check: if the housenumber is not mainly made - // up of numbers, add a penalty - $iSearchCost = 1; - if (preg_match('/\\d/', $oSearchTerm->sToken) === 0 - || preg_match_all('/[^0-9]/', $oSearchTerm->sToken, $aMatches) > 2) { - $iSearchCost++; - } - if ($this->iOperator != Operator::NONE) { - $iSearchCost++; - } - if (empty($oSearchTerm->iId)) { - $iSearchCost++; - } - // also must not appear in the middle of the address - if (!empty($this->aAddress) - || (!empty($this->aAddressNonSearch)) - || $this->sPostcode - ) { - $iSearchCost++; - } + /** + * Check if the search currently includes a name. + * + * @param bool bIncludeNonNames If true stop-word tokens are taken into + * account, too. + * + * @return bool True, if search has a name. + */ + public function hasName($bIncludeNonNames = false) + { + return !empty($this->aName) + || (!empty($this->aNameNonSearch) && $bIncludeNonNames); + } - $oSearch = clone $this; - $oSearch->iSearchRank += $iSearchCost; - $oSearch->iNamePhrase = -1; - $oSearch->sHouseNumber = $oSearchTerm->sToken; - $aNewSearches[] = $oSearch; - - // Housenumbers may appear in the name when the place has its own - // address terms. - if ($oSearchTerm->iId !== null - && ($this->iNamePhrase >= 0 || empty($this->aName)) - && empty($this->aAddress) - ) { - $oSearch = clone $this; - $oSearch->iSearchRank += $iSearchCost; - $oSearch->aAddress = $this->aName; - $oSearch->bRareName = false; - $oSearch->aName = array($oSearchTerm->iId => $oSearchTerm->iId); - $aNewSearches[] = $oSearch; - } - } - } elseif ($oPosition->isPhrase('') - && is_a($oSearchTerm, '\Nominatim\Token\SpecialTerm') - ) { - if ($this->iOperator == Operator::NONE) { - $oSearch = clone $this; - $oSearch->iSearchRank += 2; - $oSearch->iNamePhrase = -1; - - $iOp = $oSearchTerm->iOperator; - if ($iOp == Operator::NONE) { - if (!empty($this->aName) || $this->oContext->isBoundedSearch()) { - $iOp = Operator::NAME; - } else { - $iOp = Operator::NEAR; - } - $oSearch->iSearchRank += 2; - } elseif (!$oPosition->isFirstToken() && !$oPosition->isLastToken()) { - $oSearch->iSearchRank += 2; - } - if ($this->sHouseNumber) { - $oSearch->iSearchRank++; - } + /** + * Check if the search currently includes an address term. + * + * @return bool True, if any address term is included, including stop-word + * terms. + */ + public function hasAddress() + { + return !empty($this->aAddress) || !empty($this->aAddressNonSearch); + } - $oSearch->setPoiSearch( - $iOp, - $oSearchTerm->sClass, - $oSearchTerm->sType - ); - $aNewSearches[] = $oSearch; - } - } elseif (!$oPosition->isPhrase('country') - && is_a($oSearchTerm, '\Nominatim\Token\Word') - ) { - $iWordID = $oSearchTerm->iId; - // Full words can only be a name if they appear at the beginning - // of the phrase. In structured search the name must forcably in - // the first phrase. In unstructured search it may be in a later - // phrase when the first phrase is a house number. - if (!empty($this->aName) || !($oPosition->isFirstPhrase() || $oPosition->isPhrase(''))) { - if (($oPosition->isPhrase('') || !$oPosition->isFirstPhrase()) - && $oSearchTerm->iTermCount > 1 - ) { - $oSearch = clone $this; - $oSearch->iNamePhrase = -1; - $oSearch->iSearchRank += 1; - $oSearch->aAddress[$iWordID] = $iWordID; - $aNewSearches[] = $oSearch; - } - } elseif (empty($this->aNameNonSearch)) { - $oSearch = clone $this; - $oSearch->iSearchRank++; - $oSearch->aName = array($iWordID => $iWordID); - if (CONST_Search_NameOnlySearchFrequencyThreshold) { - $oSearch->bRareName = - $oSearchTerm->iSearchNameCount - < CONST_Search_NameOnlySearchFrequencyThreshold; - } - $aNewSearches[] = $oSearch; - } - } elseif (!$oPosition->isPhrase('country') - && is_a($oSearchTerm, '\Nominatim\Token\Partial') - && strpos($sToken, ' ') === false - ) { - $aNewSearches = $this->extendWithPartialTerm( - $sToken, - $oSearchTerm, - $oPosition - ); - } + /** + * Check if a country restriction is currently included in the search. + * + * @return bool True, if a country restriction is set. + */ + public function hasCountry() + { + return $this->sCountryCode !== ''; + } + + /** + * Check if a postcode is currently included in the search. + * + * @return bool True, if a postcode is set. + */ + public function hasPostcode() + { + return $this->sPostcode !== ''; + } - return $aNewSearches; + /** + * Check if a house number is set for the search. + * + * @return bool True, if a house number is set. + */ + public function hasHousenumber() + { + return $this->sHouseNumber !== ''; } /** - * Derive new searches by adding a partial term to the existing search. + * Check if a special type of place is requested. * - * @param string $sToken Term for the token. - * @param object $oSearchTerm Description of the token. - * @param object $oPosition Description of the token position within - the query. + * param integer iOperator When set, check for the particular + * operator used for the special type. * - * @return SearchDescription[] List of derived search descriptions. + * @return bool True, if speial type is requested or, if requested, + * a special type with the given operator. */ - private function extendWithPartialTerm($sToken, $oSearchTerm, $oPosition) + public function hasOperator($iOperator = null) { - $aNewSearches = array(); - $iWordID = $oSearchTerm->iId; + return $iOperator === null ? $this->iOperator != Operator::NONE : $this->iOperator == $iOperator; + } - if (($oPosition->isPhrase('') || !$oPosition->isFirstPhrase()) - && (!empty($this->aName)) - ) { - $oSearch = clone $this; - $oSearch->iSearchRank++; - if (preg_match('#^[0-9 ]+$#', $sToken)) { - $oSearch->iSearchRank++; - } - if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) { - $oSearch->aAddress[$iWordID] = $iWordID; - } else { - $oSearch->aAddressNonSearch[$iWordID] = $iWordID; - } - $aNewSearches[] = $oSearch; + /** + * Add the given token to the list of terms to search for in the address. + * + * @param integer iID ID of term to add. + * @param bool bSearchable Term should be used to search for result + * (i.e. term is not a stop word). + */ + public function addAddressToken($iId, $bSearchable = true) + { + if ($bSearchable) { + $this->aAddress[$iId] = $iId; + } else { + $this->aAddressNonSearch[$iId] = $iId; } + } - if ((!$this->sPostcode && !$this->aAddress && !$this->aAddressNonSearch) - && ((empty($this->aName) && empty($this->aNameNonSearch)) - || $this->iNamePhrase == $oPosition->getPhrase()) - ) { - $oSearch = clone $this; - $oSearch->iSearchRank++; - if (empty($this->aName) && empty($this->aNameNonSearch)) { - $oSearch->iSearchRank++; - } - if (preg_match('#^[0-9 ]+$#', $sToken)) { - $oSearch->iSearchRank++; - } - if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) { - if (empty($this->aName) - && CONST_Search_NameOnlySearchFrequencyThreshold - ) { - $oSearch->bRareName = - $oSearchTerm->iSearchNameCount - < CONST_Search_NameOnlySearchFrequencyThreshold; - } else { - $oSearch->bRareName = false; - } - $oSearch->aName[$iWordID] = $iWordID; - } else { - $oSearch->aNameNonSearch[$iWordID] = $iWordID; - } - $oSearch->iNamePhrase = $oPosition->getPhrase(); - $aNewSearches[] = $oSearch; + /** + * Add the given full-word token to the list of terms to search for in the + * name. + * + * @param interger iId ID of term to add. + * @param bool bRareName True if the term is infrequent enough to not + * require other constraints for efficient search. + */ + public function addNameToken($iId, $bRareName) + { + $this->aName[$iId] = $iId; + $this->bRareName = $bRareName; + $this->bNameNeedsAddress = false; + } + + /** + * Add the given partial token to the list of terms to search for in + * the name. + * + * @param integer iID ID of term to add. + * @param bool bSearchable Term should be used to search for result + * (i.e. term is not a stop word). + * @param bool bNeedsAddress True if the term is too unspecific to be used + * in a stand-alone search without an address + * to narrow down the search. + * @param integer iPhraseNumber Index of phrase, where the partial term + * appears. + */ + public function addPartialNameToken($iId, $bSearchable, $bNeedsAddress, $iPhraseNumber) + { + if (empty($this->aName)) { + $this->bNameNeedsAddress = $bNeedsAddress; + } else { + $this->bNameNeedsAddress &= $bNeedsAddress; + } + if ($bSearchable) { + $this->aName[$iId] = $iId; + } else { + $this->aNameNonSearch[$iId] = $iId; } + $this->iNamePhrase = $iPhraseNumber; + } + + /** + * Set country restriction for the search. + * + * @param string sCountryCode Country code of country to restrict search to. + */ + public function setCountry($sCountryCode) + { + $this->sCountryCode = $sCountryCode; + $this->iNamePhrase = -1; + } + + /** + * Set postcode search constraint. + * + * @param string sPostcode Postcode the result should have. + */ + public function setPostcode($sPostcode) + { + $this->sPostcode = $sPostcode; + $this->iNamePhrase = -1; + } + + /** + * Make this search a search for a postcode object. + * + * @param integer iId Token Id for the postcode. + * @param string sPostcode Postcode to look for. + */ + public function setPostcodeAsName($iId, $sPostcode) + { + $this->iOperator = Operator::POSTCODE; + $this->aAddress = array_merge($this->aAddress, $this->aName); + $this->aName = array($iId => $sPostcode); + $this->bRareName = true; + $this->iNamePhrase = -1; + } + + /** + * Set house number search cnstraint. + * + * @param string sNumber House number the result should have. + */ + public function setHousenumber($sNumber) + { + $this->sHouseNumber = $sNumber; + $this->iNamePhrase = -1; + } + + /** + * Make this search a search for a house number. + * + * @param integer iId Token Id for the house number. + */ + public function setHousenumberAsName($iId) + { + $this->aAddress = array_merge($this->aAddress, $this->aName); + $this->bRareName = false; + $this->bNameNeedsAddress = true; + $this->aName = array($iId => $iId); + $this->iNamePhrase = -1; + } + + /** + * Make this search a POI search. + * + * In a POI search, objects are not (only) searched by their name + * but also by the primary OSM key/value pair (class and type in Nominatim). + * + * @param integer $iOperator Type of POI search + * @param string $sClass Class (or OSM tag key) of POI. + * @param string $sType Type (or OSM tag value) of POI. + * + * @return void + */ + public function setPoiSearch($iOperator, $sClass, $sType) + { + $this->iOperator = $iOperator; + $this->sClass = $sClass; + $this->sType = $sType; + $this->iNamePhrase = -1; + } + + public function getNamePhrase() + { + return $this->iNamePhrase; + } - return $aNewSearches; + /** + * Get the global search context. + * + * @return object Objects of global search constraints. + */ + public function getContext() + { + return $this->oContext; } /////////// Query functions @@ -608,32 +581,40 @@ class SearchDescription // Sort by existence of the requested house number but only if not // too many results are expected for the street, i.e. if the result - // will be narrowed down by an address. Remeber that with ordering + // will be narrowed down by an address. Remember that with ordering // every single result has to be checked. if ($this->sHouseNumber && ($this->bRareName || !empty($this->aAddress) || $this->sPostcode)) { - $sHouseNumberRegex = '\\\\m'.$this->sHouseNumber.'\\\\M'; - $aOrder[] = ' ('; - $aOrder[0] .= 'EXISTS('; - $aOrder[0] .= ' SELECT place_id'; - $aOrder[0] .= ' FROM placex'; - $aOrder[0] .= ' WHERE parent_place_id = search_name.place_id'; - $aOrder[0] .= " AND housenumber ~* E'".$sHouseNumberRegex."'"; - $aOrder[0] .= ' LIMIT 1'; - $aOrder[0] .= ') '; - // also housenumbers from interpolation lines table are needed - if (preg_match('/[0-9]+/', $this->sHouseNumber)) { - $iHouseNumber = intval($this->sHouseNumber); - $aOrder[0] .= 'OR EXISTS('; - $aOrder[0] .= ' SELECT place_id '; - $aOrder[0] .= ' FROM location_property_osmline '; - $aOrder[0] .= ' WHERE parent_place_id = search_name.place_id'; - $aOrder[0] .= ' AND startnumber is not NULL'; - $aOrder[0] .= ' AND '.$iHouseNumber.'>=startnumber '; - $aOrder[0] .= ' AND '.$iHouseNumber.'<=endnumber '; - $aOrder[0] .= ' LIMIT 1'; - $aOrder[0] .= ')'; + $sHouseNumberRegex = $oDB->getDBQuoted('\\\\m'.$this->sHouseNumber.'\\\\M'); + + // Housenumbers on streets and places. + $sChildHnr = 'SELECT * FROM placex WHERE parent_place_id = search_name.place_id'; + $sChildHnr .= ' AND housenumber ~* E'.$sHouseNumberRegex; + // Interpolations on streets and places. + if (preg_match('/^[0-9]+$/', $this->sHouseNumber)) { + $sIpolHnr = 'WHERE parent_place_id = search_name.place_id '; + $sIpolHnr .= ' AND startnumber is not NULL'; + $sIpolHnr .= ' AND '.$this->sHouseNumber.'>=startnumber '; + $sIpolHnr .= ' AND '.$this->sHouseNumber.'<=endnumber '; + } else { + $sIpolHnr = false; + } + // Housenumbers on the object iteself for unlisted places. + $sSelfHnr = 'SELECT * FROM placex WHERE place_id = search_name.place_id'; + $sSelfHnr .= ' AND housenumber ~* E'.$sHouseNumberRegex; + + $sSql = '(CASE WHEN address_rank = 30 THEN EXISTS('.$sSelfHnr.') '; + $sSql .= ' ELSE EXISTS('.$sChildHnr.') '; + if ($sIpolHnr) { + $sSql .= 'OR EXISTS(SELECT * FROM location_property_osmline '.$sIpolHnr.') '; + if (CONST_Use_US_Tiger_Data) { + $sSql .= "OR (country_code = 'us' AND "; + $sSql .= ' EXISTS(SELECT * FROM location_property_tiger '.$sIpolHnr.')) '; + } } - $aOrder[0] .= ') DESC'; + $sSql .= 'END) DESC'; + + + $aOrder[] = $sSql; } if (!empty($this->aName)) { @@ -666,7 +647,7 @@ class SearchDescription $aOrder[] = $this->oContext->distanceSQL('centroid'); } elseif ($this->sPostcode) { if (empty($this->aAddress)) { - $aTerms[] = "EXISTS(SELECT place_id FROM location_postcode p WHERE p.postcode = '".$this->sPostcode."' AND ST_DWithin(search_name.centroid, p.geometry, 0.1))"; + $aTerms[] = "EXISTS(SELECT place_id FROM location_postcode p WHERE p.postcode = '".$this->sPostcode."' AND ST_DWithin(search_name.centroid, p.geometry, 0.12))"; } else { $aOrder[] = "(SELECT min(ST_Distance(search_name.centroid, p.geometry)) FROM location_postcode p WHERE p.postcode = '".$this->sPostcode."')"; } @@ -761,9 +742,9 @@ class SearchDescription return $aResults; } - $sHouseNumberRegex = '\\\\m'.$this->sHouseNumber.'\\\\M'; + $sHouseNumberRegex = $oDB->getDBQuoted('\\\\m'.$this->sHouseNumber.'\\\\M'); $sSQL = 'SELECT place_id FROM placex WHERE'; - $sSQL .= " housenumber ~* E'".$sHouseNumberRegex."'"; + $sSQL .= ' housenumber ~* E'.$sHouseNumberRegex; $sSQL .= ' AND ('.join(' OR ', $aIDCondition).')'; $sSQL .= $this->oContext->excludeSQL(' AND place_id');