X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/2613ebfa0143cdd1e3220577d48d9aa8341dbf38..de45152028125edfb7467202c3a5edb87f46659f:/lib/SearchDescription.php diff --git a/lib/SearchDescription.php b/lib/SearchDescription.php index 6345f50f..4fafbec2 100644 --- a/lib/SearchDescription.php +++ b/lib/SearchDescription.php @@ -17,6 +17,8 @@ class SearchDescription private $sCountryCode = ''; /// List of word ids making up the name of the object. private $aName = array(); + /// True if the name is rare enough to force index use on name. + private $bRareName = false; /// List of word ids making up the address of the object. private $aAddress = array(); /// Subset of word ids of full words making up the address. @@ -164,30 +166,29 @@ class SearchDescription /** * Derive new searches by adding a full term to the existing search. * - * @param mixed[] $aSearchTerm Description of the token. - * @param bool $bHasPartial True if there are also tokens of partial terms - * with the same name. - * @param string $sPhraseType Type of phrase the token is contained in. - * @param bool $bFirstToken True if the token is at the beginning of the - * query. - * @param bool $bFirstPhrase True if the token is in the first phrase of - * the query. - * @param bool $bLastToken True if the token is at the end of the query. + * @param object $oSearchTerm Description of the token. + * @param bool $bHasPartial True if there are also tokens of partial terms + * with the same name. + * @param string $sPhraseType Type of phrase the token is contained in. + * @param bool $bFirstToken True if the token is at the beginning of the + * query. + * @param bool $bFirstPhrase True if the token is in the first phrase of + * the query. + * @param bool $bLastToken True if the token is at the end of the query. * * @return SearchDescription[] List of derived search descriptions. */ - public function extendWithFullTerm($aSearchTerm, $bHasPartial, $sPhraseType, $bFirstToken, $bFirstPhrase, $bLastToken) + public function extendWithFullTerm($oSearchTerm, $bHasPartial, $sPhraseType, $bFirstToken, $bFirstPhrase, $bLastToken) { $aNewSearches = array(); if (($sPhraseType == '' || $sPhraseType == 'country') - && !empty($aSearchTerm['country_code']) - && $aSearchTerm['country_code'] != '0' + && is_a($oSearchTerm, '\Nominatim\Token\Country') ) { if (!$this->sCountryCode) { $oSearch = clone $this; $oSearch->iSearchRank++; - $oSearch->sCountryCode = $aSearchTerm['country_code']; + $oSearch->sCountryCode = $oSearchTerm->sCountryCode; // Country is almost always at the end of the string // - increase score for finding it anywhere else (optimisation) if (!$bLastToken) { @@ -196,15 +197,12 @@ class SearchDescription $aNewSearches[] = $oSearch; } } elseif (($sPhraseType == '' || $sPhraseType == 'postalcode') - && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'postcode' + && is_a($oSearchTerm, '\Nominatim\Token\Postcode') ) { // We need to try the case where the postal code is the primary element // (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode) // so try both. - if (!$this->sPostcode - && $aSearchTerm['word'] - && pg_escape_string($aSearchTerm['word']) == $aSearchTerm['word'] - ) { + if (!$this->sPostcode) { // If we have structured search or this is the first term, // make the postcode the primary search element. if ($this->iOperator == Operator::NONE @@ -215,7 +213,7 @@ class SearchDescription $oSearch->iOperator = Operator::POSTCODE; $oSearch->aAddress = array_merge($this->aAddress, $this->aName); $oSearch->aName = - array($aSearchTerm['word_id'] => $aSearchTerm['word']); + array($oSearchTerm->iId => $oSearchTerm->sPostcode); $aNewSearches[] = $oSearch; } @@ -226,23 +224,24 @@ class SearchDescription ) { $oSearch = clone $this; $oSearch->iSearchRank++; - $oSearch->sPostcode = $aSearchTerm['word']; + $oSearch->sPostcode = $oSearchTerm->sPostcode; $aNewSearches[] = $oSearch; } } } elseif (($sPhraseType == '' || $sPhraseType == 'street') - && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'house' + && is_a($oSearchTerm, '\Nominatim\Token\HouseNumber') ) { if (!$this->sHouseNumber && $this->iOperator != Operator::POSTCODE) { $oSearch = clone $this; $oSearch->iSearchRank++; - $oSearch->sHouseNumber = trim($aSearchTerm['word_token']); + $oSearch->sHouseNumber = $oSearchTerm->sToken; // sanity check: if the housenumber is not mainly made // up of numbers, add a penalty - if (preg_match_all('/[^0-9]/', $oSearch->sHouseNumber, $aMatches) > 2) { + if (preg_match('/\\d/', $oSearch->sHouseNumber) === 0 + || preg_match_all('/[^0-9]/', $oSearch->sHouseNumber, $aMatches) > 2) { $oSearch->iSearchRank++; } - if (!isset($aSearchTerm['word_id'])) { + if (empty($oSearchTerm->iId)) { $oSearch->iSearchRank++; } // also must not appear in the middle of the address @@ -254,27 +253,34 @@ class SearchDescription } $aNewSearches[] = $oSearch; } - } elseif ($sPhraseType == '' && $aSearchTerm['class']) { + } elseif ($sPhraseType == '' + && is_a($oSearchTerm, '\Nominatim\Token\SpecialTerm') + ) { if ($this->iOperator == Operator::NONE) { $oSearch = clone $this; $oSearch->iSearchRank++; - $iOp = Operator::NEAR; // near == in for the moment - if ($aSearchTerm['operator'] == '') { + $iOp = $oSearchTerm->iOperator; + if ($iOp == Operator::NONE) { if (!empty($this->aName) || $this->oContext->isBoundedSearch()) { $iOp = Operator::NAME; + } else { + $iOp = Operator::NEAR; } $oSearch->iSearchRank += 2; } - $oSearch->setPoiSearch($iOp, $aSearchTerm['class'], $aSearchTerm['type']); + $oSearch->setPoiSearch( + $iOp, + $oSearchTerm->sClass, + $oSearchTerm->sType + ); $aNewSearches[] = $oSearch; } - } elseif (isset($aSearchTerm['word_id']) - && $aSearchTerm['word_id'] - && $sPhraseType != 'country' + } elseif ($sPhraseType != 'country' + && is_a($oSearchTerm, '\Nominatim\Token\Word') ) { - $iWordID = $aSearchTerm['word_id']; + $iWordID = $oSearchTerm->iId; // Full words can only be a name if they appear at the beginning // of the phrase. In structured search the name must forcably in // the first phrase. In unstructured search it may be in a later @@ -282,7 +288,7 @@ class SearchDescription if (!empty($this->aName) || !($bFirstPhrase || $sPhraseType == '')) { if (($sPhraseType == '' || !$bFirstPhrase) && !$bHasPartial) { $oSearch = clone $this; - $oSearch->iSearchRank++; + $oSearch->iSearchRank += 2; $oSearch->aAddress[$iWordID] = $iWordID; $aNewSearches[] = $oSearch; } else { @@ -292,6 +298,11 @@ class SearchDescription $oSearch = clone $this; $oSearch->iSearchRank++; $oSearch->aName = array($iWordID => $iWordID); + if (CONST_Search_NameOnlySearchFrequencyThreshold) { + $oSearch->bRareName = + $oSearchTerm->iSearchNameCount + < CONST_Search_NameOnlySearchFrequencyThreshold; + } $aNewSearches[] = $oSearch; } } @@ -302,7 +313,8 @@ class SearchDescription /** * Derive new searches by adding a partial term to the existing search. * - * @param mixed[] $aSearchTerm Description of the token. + * @param string $sToken Term for the token. + * @param object $oSearchTerm Description of the token. * @param bool $bStructuredPhrases True if the search is structured. * @param integer $iPhrase Number of the phrase the token is in. * @param array[] $aFullTokens List of full term tokens with the @@ -310,21 +322,21 @@ class SearchDescription * * @return SearchDescription[] List of derived search descriptions. */ - public function extendWithPartialTerm($aSearchTerm, $bStructuredPhrases, $iPhrase, $aFullTokens) + public function extendWithPartialTerm($sToken, $oSearchTerm, $bStructuredPhrases, $iPhrase, $aFullTokens) { // Only allow name terms. - if (!(isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'])) { + if (!(is_a($oSearchTerm, '\Nominatim\Token\Word'))) { return array(); } $aNewSearches = array(); - $iWordID = $aSearchTerm['word_id']; + $iWordID = $oSearchTerm->iId; if ((!$bStructuredPhrases || $iPhrase > 0) && (!empty($this->aName)) - && strpos($aSearchTerm['word_token'], ' ') === false + && strpos($sToken, ' ') === false ) { - if ($aSearchTerm['search_name_count'] + 1 < CONST_Max_Word_Frequency) { + if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) { $oSearch = clone $this; $oSearch->iSearchRank += 2; $oSearch->aAddress[$iWordID] = $iWordID; @@ -333,7 +345,7 @@ class SearchDescription $oSearch = clone $this; $oSearch->iSearchRank++; $oSearch->aAddressNonSearch[$iWordID] = $iWordID; - if (preg_match('#^[0-9]+$#', $aSearchTerm['word_token'])) { + if (preg_match('#^[0-9]+$#', $sToken)) { $oSearch->iSearchRank += 2; } if (!empty($aFullTokens)) { @@ -342,14 +354,12 @@ class SearchDescription $aNewSearches[] = $oSearch; // revert to the token version? - foreach ($aFullTokens as $aSearchTermToken) { - if (empty($aSearchTermToken['country_code']) - && empty($aSearchTermToken['lat']) - && empty($aSearchTermToken['class']) - ) { + foreach ($aFullTokens as $oSearchTermToken) { + if (is_a($oSearchTermToken, '\Nominatim\Token\Word')) { $oSearch = clone $this; $oSearch->iSearchRank++; - $oSearch->aAddress[$aSearchTermToken['word_id']] = $aSearchTermToken['word_id']; + $oSearch->aAddress[$oSearchTermToken->iId] + = $oSearchTermToken->iId; $aNewSearches[] = $oSearch; } } @@ -364,10 +374,19 @@ class SearchDescription if (empty($this->aName)) { $oSearch->iSearchRank += 1; } - if (preg_match('#^[0-9]+$#', $aSearchTerm['word_token'])) { + if (preg_match('#^[0-9]+$#', $sToken)) { $oSearch->iSearchRank += 2; } - if ($aSearchTerm['search_name_count'] + 1 < CONST_Max_Word_Frequency) { + if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) { + if (empty($this->aName) + && CONST_Search_NameOnlySearchFrequencyThreshold + ) { + $oSearch->bRareName = + $oSearchTerm->iSearchNameCount + < CONST_Search_NameOnlySearchFrequencyThreshold; + } else { + $oSearch->bRareName = false; + } $oSearch->aName[$iWordID] = $iWordID; } else { $oSearch->aNameNonSearch[$iWordID] = $iWordID; @@ -385,20 +404,16 @@ class SearchDescription /** * Query database for places that match this search. * - * @param object $oDB Database connection to use. - * @param mixed[] $aWordFrequencyScores Number of times tokens appears - * overall in a planet database. - * @param integer $iMinRank Minimum address rank to restrict - * search to. - * @param integer $iMaxRank Maximum address rank to restrict - * search to. - * @param integer $iLimit Maximum number of results. + * @param object $oDB Nominatim::DB instance to use. + * @param integer $iMinRank Minimum address rank to restrict search to. + * @param integer $iMaxRank Maximum address rank to restrict search to. + * @param integer $iLimit Maximum number of results. * * @return mixed[] An array with two fields: IDs contains the list of * matching place IDs and houseNumber the houseNumber * if appicable or -1 if not. */ - public function query(&$oDB, &$aWordFrequencyScores, $iMinRank, $iMaxRank, $iLimit) + public function query(&$oDB, $iMinRank, $iMaxRank, $iLimit) { $aResults = array(); $iHousenumber = -1; @@ -427,19 +442,25 @@ class SearchDescription // First search for places according to name and address. $aResults = $this->queryNamedPlace( $oDB, - $aWordFrequencyScores, $iMinRank, $iMaxRank, $iLimit ); - //now search for housenumber, if housenumber provided - if ($this->sHouseNumber && !empty($aResults)) { - $aNamedPlaceIDs = $aResults; - $aResults = $this->queryHouseNumber($oDB, $aNamedPlaceIDs); + // Now search for housenumber, if housenumber provided. Can be zero. + if (($this->sHouseNumber || $this->sHouseNumber === '0') && !empty($aResults)) { + // Downgrade the rank of the street results, they are missing + // the housenumber. + foreach ($aResults as $oRes) { + $oRes->iResultRank++; + } + + $aHnResults = $this->queryHouseNumber($oDB, $aResults); - if (empty($aResults) && $this->looksLikeFullAddress()) { - $aResults = $aNamedPlaceIDs; + if (!empty($aHnResults)) { + foreach ($aHnResults as $oRes) { + $aResults[$oRes->iId] = $oRes; + } } } @@ -456,16 +477,13 @@ class SearchDescription if ($sPlaceIds) { $sSQL = 'SELECT place_id FROM placex'; $sSQL .= ' WHERE place_id in ('.$sPlaceIds.')'; - $sSQL .= " AND postcode = '".$this->sPostcode."'"; + $sSQL .= " AND postcode != '".$this->sPostcode."'"; Debug::printSQL($sSQL); - $aFilteredPlaceIDs = chksql($oDB->getCol($sSQL)); + $aFilteredPlaceIDs = $oDB->getCol($sSQL); if ($aFilteredPlaceIDs) { - $aNewResults = array(); foreach ($aFilteredPlaceIDs as $iPlaceId) { - $aNewResults[$iPlaceId] = $aResults[$iPlaceId]; + $aResults[$iPlaceId]->iResultRank++; } - $aResults = $aNewResults; - Debug::printVar('Place IDs after postcode filtering', $aResults); } } } @@ -486,8 +504,10 @@ class SearchDescription Debug::printSQL($sSQL); + $iPlaceId = $oDB->getOne($sSQL); + $aResults = array(); - foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) { + if ($iPlaceId) { $aResults[$iPlaceId] = new Result($iPlaceId); } @@ -503,8 +523,7 @@ class SearchDescription $aDBResults = array(); $sPoiTable = $this->poiTable(); - $sSQL = 'SELECT count(*) FROM pg_tables WHERE tablename = \''.$sPoiTable."'"; - if (chksql($oDB->getOne($sSQL))) { + if ($oDB->tableExists($sPoiTable)) { $sSQL = 'SELECT place_id FROM '.$sPoiTable.' ct'; if ($this->oContext->sqlCountryList) { $sSQL .= ' JOIN placex USING (place_id)'; @@ -524,14 +543,14 @@ class SearchDescription } elseif ($this->oContext->hasNearPoint()) { $sSQL .= ' ORDER BY '.$this->oContext->distanceSQL('ct.centroid').' ASC'; } - $sSQL .= " limit $iLimit"; + $sSQL .= " LIMIT $iLimit"; Debug::printSQL($sSQL); - $aDBResults = chksql($oDB->getCol($sSQL)); + $aDBResults = $oDB->getCol($sSQL); } if ($this->oContext->hasNearPoint()) { $sSQL = 'SELECT place_id FROM placex WHERE '; - $sSQL .= 'class=\''.$this->sClass."' and type='".$this->sType."'"; + $sSQL .= 'class = :class and type = :type'; $sSQL .= ' AND '.$this->oContext->withinSQL('geometry'); $sSQL .= ' AND linked_place_id is null'; if ($this->oContext->sqlCountryList) { @@ -540,7 +559,10 @@ class SearchDescription $sSQL .= ' ORDER BY '.$this->oContext->distanceSQL('centroid').' ASC'; $sSQL .= " LIMIT $iLimit"; Debug::printSQL($sSQL); - $aDBResults = chksql($oDB->getCol($sSQL)); + $aDBResults = $oDB->getCol( + $sSQL, + array(':class' => $this->sClass, ':type' => $this->sType) + ); } $aResults = array(); @@ -559,32 +581,39 @@ class SearchDescription $sSQL .= ', search_name s '; $sSQL .= 'WHERE s.place_id = p.parent_place_id '; $sSQL .= 'AND array_cat(s.nameaddress_vector, s.name_vector)'; - $sSQL .= ' @> '.getArraySQL($this->aAddress).' AND '; + $sSQL .= ' @> '.$oDB->getArraySQL($this->aAddress).' AND '; } else { $sSQL .= 'WHERE '; } $sSQL .= "p.postcode = '".reset($this->aName)."'"; $sSQL .= $this->countryCodeSQL(' AND p.country_code'); + if ($this->oContext->bViewboxBounded) { + $sSQL .= ' AND ST_Intersects('.$this->oContext->sqlViewboxSmall.', geometry)'; + } $sSQL .= $this->oContext->excludeSQL(' AND p.place_id'); $sSQL .= " LIMIT $iLimit"; Debug::printSQL($sSQL); $aResults = array(); - foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) { + foreach ($oDB->getCol($sSQL) as $iPlaceId) { $aResults[$iPlaceId] = new Result($iPlaceId, Result::TABLE_POSTCODE); } return $aResults; } - private function queryNamedPlace(&$oDB, $aWordFrequencyScores, $iMinAddressRank, $iMaxAddressRank, $iLimit) + private function queryNamedPlace(&$oDB, $iMinAddressRank, $iMaxAddressRank, $iLimit) { $aTerms = array(); $aOrder = array(); - if ($this->sHouseNumber && !empty($this->aAddress)) { + // Sort by existence of the requested house number but only if not + // too many results are expected for the street, i.e. if the result + // will be narrowed down by an address. Remeber that with ordering + // every single result has to be checked. + if ($this->sHouseNumber && (!empty($this->aAddress) || $this->sPostcode)) { $sHouseNumberRegex = '\\\\m'.$this->sHouseNumber.'\\\\M'; $aOrder[] = ' ('; $aOrder[0] .= 'EXISTS('; @@ -611,18 +640,14 @@ class SearchDescription } if (!empty($this->aName)) { - $aTerms[] = 'name_vector @> '.getArraySQL($this->aName); + $aTerms[] = 'name_vector @> '.$oDB->getArraySQL($this->aName); } if (!empty($this->aAddress)) { // For infrequent name terms disable index usage for address - if (CONST_Search_NameOnlySearchFrequencyThreshold - && count($this->aName) == 1 - && $aWordFrequencyScores[$this->aName[reset($this->aName)]] - < CONST_Search_NameOnlySearchFrequencyThreshold - ) { - $aTerms[] = 'array_cat(nameaddress_vector,ARRAY[]::integer[]) @> '.getArraySQL($this->aAddress); + if ($this->bRareName) { + $aTerms[] = 'array_cat(nameaddress_vector,ARRAY[]::integer[]) @> '.$oDB->getArraySQL($this->aAddress); } else { - $aTerms[] = 'nameaddress_vector @> '.getArraySQL($this->aAddress); + $aTerms[] = 'nameaddress_vector @> '.$oDB->getArraySQL($this->aAddress); } } @@ -677,7 +702,7 @@ class SearchDescription if (!empty($this->aFullNameAddress)) { $sExactMatchSQL = ' ( '; $sExactMatchSQL .= ' SELECT count(*) FROM ( '; - $sExactMatchSQL .= ' SELECT unnest('.getArraySQL($this->aFullNameAddress).')'; + $sExactMatchSQL .= ' SELECT unnest('.$oDB->getArraySQL($this->aFullNameAddress).')'; $sExactMatchSQL .= ' INTERSECT '; $sExactMatchSQL .= ' SELECT unnest(nameaddress_vector)'; $sExactMatchSQL .= ' ) s'; @@ -688,7 +713,7 @@ class SearchDescription } if ($this->sHouseNumber || $this->sClass) { - $iLimit = 20; + $iLimit = 40; } $aResults = array(); @@ -702,10 +727,7 @@ class SearchDescription Debug::printSQL($sSQL); - $aDBResults = chksql( - $oDB->getAll($sSQL), - 'Could not get places for search terms.' - ); + $aDBResults = $oDB->getAll($sSQL, null, 'Could not get places for search terms.'); foreach ($aDBResults as $aResult) { $oResult = new Result($aResult['place_id']); @@ -735,7 +757,7 @@ class SearchDescription Debug::printSQL($sSQL); // XXX should inherit the exactMatches from its parent - foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) { + foreach ($oDB->getCol($sSQL) as $iPlaceId) { $aResults[$iPlaceId] = new Result($iPlaceId); } @@ -761,7 +783,7 @@ class SearchDescription Debug::printSQL($sSQL); - foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) { + foreach ($oDB->getCol($sSQL) as $iPlaceId) { $oResult = new Result($iPlaceId, Result::TABLE_OSMLINE); $oResult->iHouseNumber = $iHousenumber; $aResults[$iPlaceId] = $oResult; @@ -777,7 +799,7 @@ class SearchDescription Debug::printSQL($sSQL); - foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) { + foreach ($oDB->getCol($sSQL) as $iPlaceId) { $aResults[$iPlaceId] = new Result($iPlaceId, Result::TABLE_AUX); } } @@ -798,7 +820,7 @@ class SearchDescription Debug::printSQL($sSQL); - foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) { + foreach ($oDB->getCol($sSQL) as $iPlaceId) { $oResult = new Result($iPlaceId, Result::TABLE_TIGER); $oResult->iHouseNumber = $iHousenumber; $aResults[$iPlaceId] = $oResult; @@ -832,7 +854,7 @@ class SearchDescription Debug::printSQL($sSQL); - foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) { + foreach ($oDB->getCol($sSQL) as $iPlaceId) { $aResults[$iPlaceId] = new Result($iPlaceId); } } @@ -840,12 +862,11 @@ class SearchDescription // NEAR and IN are handled the same if ($this->iOperator == Operator::TYPE || $this->iOperator == Operator::NEAR) { $sClassTable = $this->poiTable(); - $sSQL = "SELECT count(*) FROM pg_tables WHERE tablename = '$sClassTable'"; - $bCacheTable = (bool) chksql($oDB->getOne($sSQL)); + $bCacheTable = $oDB->tableExists($sClassTable); $sSQL = "SELECT min(rank_search) FROM placex WHERE place_id in ($sPlaceIDs)"; Debug::printSQL($sSQL); - $iMaxRank = (int)chksql($oDB->getOne($sSQL)); + $iMaxRank = (int) $oDB->getOne($sSQL); // For state / country level searches the normal radius search doesn't work very well $sPlaceGeom = false; @@ -858,7 +879,7 @@ class SearchDescription $sSQL .= ' ORDER BY rank_search ASC '; $sSQL .= ' LIMIT 1'; Debug::printSQL($sSQL); - $sPlaceGeom = chksql($oDB->getOne($sSQL)); + $sPlaceGeom = $oDB->getOne($sSQL); } if ($sPlaceGeom) { @@ -868,7 +889,7 @@ class SearchDescription $sSQL = 'SELECT place_id FROM placex'; $sSQL .= " WHERE place_id in ($sPlaceIDs) and rank_search < $iMaxRank"; Debug::printSQL($sSQL); - $aPlaceIDs = chksql($oDB->getCol($sSQL)); + $aPlaceIDs = $oDB->getCol($sSQL); $sPlaceIDs = join(',', $aPlaceIDs); } @@ -914,7 +935,7 @@ class SearchDescription Debug::printSQL($sSQL); - foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) { + foreach ($oDB->getCol($sSQL) as $iPlaceId) { $aResults[$iPlaceId] = new Result($iPlaceId); } } else { @@ -946,7 +967,7 @@ class SearchDescription Debug::printSQL($sSQL); - foreach (chksql($oDB->getCol($sSQL)) as $iPlaceId) { + foreach ($oDB->getCol($sSQL) as $iPlaceId) { $aResults[$iPlaceId] = new Result($iPlaceId); } }