]> git.openstreetmap.org Git - nominatim.git/blobdiff - lib/SearchDescription.php
address tokens get a double search rank also as full terms
[nominatim.git] / lib / SearchDescription.php
index 6345f50fc5aa570c06ccb1099e1544ad11bb88e2..35424b529244c9071a89d808a2419e3dba00f0ab 100644 (file)
@@ -17,6 +17,8 @@ class SearchDescription
     private $sCountryCode = '';
     /// List of word ids making up the name of the object.
     private $aName = array();
     private $sCountryCode = '';
     /// List of word ids making up the name of the object.
     private $aName = array();
+    /// True if the name is rare enough to force index use on name.
+    private $bRareName = false;
     /// List of word ids making up the address of the object.
     private $aAddress = array();
     /// Subset of word ids of full words making up the address.
     /// List of word ids making up the address of the object.
     private $aAddress = array();
     /// Subset of word ids of full words making up the address.
@@ -164,30 +166,29 @@ class SearchDescription
     /**
      * Derive new searches by adding a full term to the existing search.
      *
     /**
      * Derive new searches by adding a full term to the existing search.
      *
-     * @param mixed[] $aSearchTerm  Description of the token.
-     * @param bool    $bHasPartial  True if there are also tokens of partial terms
-     *                              with the same name.
-     * @param string  $sPhraseType  Type of phrase the token is contained in.
-     * @param bool    $bFirstToken  True if the token is at the beginning of the
-     *                              query.
-     * @param bool    $bFirstPhrase True if the token is in the first phrase of
-     *                              the query.
-     * @param bool    $bLastToken   True if the token is at the end of the query.
+     * @param object $oSearchTerm  Description of the token.
+     * @param bool   $bHasPartial  True if there are also tokens of partial terms
+     *                             with the same name.
+     * @param string $sPhraseType  Type of phrase the token is contained in.
+     * @param bool   $bFirstToken  True if the token is at the beginning of the
+     *                             query.
+     * @param bool   $bFirstPhrase True if the token is in the first phrase of
+     *                             the query.
+     * @param bool   $bLastToken   True if the token is at the end of the query.
      *
      * @return SearchDescription[] List of derived search descriptions.
      */
      *
      * @return SearchDescription[] List of derived search descriptions.
      */
-    public function extendWithFullTerm($aSearchTerm, $bHasPartial, $sPhraseType, $bFirstToken, $bFirstPhrase, $bLastToken)
+    public function extendWithFullTerm($oSearchTerm, $bHasPartial, $sPhraseType, $bFirstToken, $bFirstPhrase, $bLastToken)
     {
         $aNewSearches = array();
 
         if (($sPhraseType == '' || $sPhraseType == 'country')
     {
         $aNewSearches = array();
 
         if (($sPhraseType == '' || $sPhraseType == 'country')
-            && !empty($aSearchTerm['country_code'])
-            && $aSearchTerm['country_code'] != '0'
+            && is_a($oSearchTerm, '\Nominatim\Token\Country')
         ) {
             if (!$this->sCountryCode) {
                 $oSearch = clone $this;
                 $oSearch->iSearchRank++;
         ) {
             if (!$this->sCountryCode) {
                 $oSearch = clone $this;
                 $oSearch->iSearchRank++;
-                $oSearch->sCountryCode = $aSearchTerm['country_code'];
+                $oSearch->sCountryCode = $oSearchTerm->sCountryCode;
                 // Country is almost always at the end of the string
                 // - increase score for finding it anywhere else (optimisation)
                 if (!$bLastToken) {
                 // Country is almost always at the end of the string
                 // - increase score for finding it anywhere else (optimisation)
                 if (!$bLastToken) {
@@ -196,15 +197,12 @@ class SearchDescription
                 $aNewSearches[] = $oSearch;
             }
         } elseif (($sPhraseType == '' || $sPhraseType == 'postalcode')
                 $aNewSearches[] = $oSearch;
             }
         } elseif (($sPhraseType == '' || $sPhraseType == 'postalcode')
-                  && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'postcode'
+                  && is_a($oSearchTerm, '\Nominatim\Token\Postcode')
         ) {
             // We need to try the case where the postal code is the primary element
             // (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode)
             // so try both.
         ) {
             // We need to try the case where the postal code is the primary element
             // (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode)
             // so try both.
-            if (!$this->sPostcode
-                && $aSearchTerm['word']
-                && pg_escape_string($aSearchTerm['word']) == $aSearchTerm['word']
-            ) {
+            if (!$this->sPostcode) {
                 // If we have structured search or this is the first term,
                 // make the postcode the primary search element.
                 if ($this->iOperator == Operator::NONE
                 // If we have structured search or this is the first term,
                 // make the postcode the primary search element.
                 if ($this->iOperator == Operator::NONE
@@ -215,7 +213,7 @@ class SearchDescription
                     $oSearch->iOperator = Operator::POSTCODE;
                     $oSearch->aAddress = array_merge($this->aAddress, $this->aName);
                     $oSearch->aName =
                     $oSearch->iOperator = Operator::POSTCODE;
                     $oSearch->aAddress = array_merge($this->aAddress, $this->aName);
                     $oSearch->aName =
-                        array($aSearchTerm['word_id'] => $aSearchTerm['word']);
+                        array($oSearchTerm->iId => $oSearchTerm->sPostcode);
                     $aNewSearches[] = $oSearch;
                 }
 
                     $aNewSearches[] = $oSearch;
                 }
 
@@ -226,23 +224,23 @@ class SearchDescription
                 ) {
                     $oSearch = clone $this;
                     $oSearch->iSearchRank++;
                 ) {
                     $oSearch = clone $this;
                     $oSearch->iSearchRank++;
-                    $oSearch->sPostcode = $aSearchTerm['word'];
+                    $oSearch->sPostcode = $oSearchTerm->sPostcode;
                     $aNewSearches[] = $oSearch;
                 }
             }
         } elseif (($sPhraseType == '' || $sPhraseType == 'street')
                     $aNewSearches[] = $oSearch;
                 }
             }
         } elseif (($sPhraseType == '' || $sPhraseType == 'street')
-                 && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'house'
+                 && is_a($oSearchTerm, '\Nominatim\Token\HouseNumber')
         ) {
             if (!$this->sHouseNumber && $this->iOperator != Operator::POSTCODE) {
                 $oSearch = clone $this;
                 $oSearch->iSearchRank++;
         ) {
             if (!$this->sHouseNumber && $this->iOperator != Operator::POSTCODE) {
                 $oSearch = clone $this;
                 $oSearch->iSearchRank++;
-                $oSearch->sHouseNumber = trim($aSearchTerm['word_token']);
+                $oSearch->sHouseNumber = $oSearchTerm->sToken;
                 // sanity check: if the housenumber is not mainly made
                 // up of numbers, add a penalty
                 if (preg_match_all('/[^0-9]/', $oSearch->sHouseNumber, $aMatches) > 2) {
                     $oSearch->iSearchRank++;
                 }
                 // sanity check: if the housenumber is not mainly made
                 // up of numbers, add a penalty
                 if (preg_match_all('/[^0-9]/', $oSearch->sHouseNumber, $aMatches) > 2) {
                     $oSearch->iSearchRank++;
                 }
-                if (!isset($aSearchTerm['word_id'])) {
+                if (empty($oSearchTerm->iId)) {
                     $oSearch->iSearchRank++;
                 }
                 // also must not appear in the middle of the address
                     $oSearch->iSearchRank++;
                 }
                 // also must not appear in the middle of the address
@@ -254,27 +252,34 @@ class SearchDescription
                 }
                 $aNewSearches[] = $oSearch;
             }
                 }
                 $aNewSearches[] = $oSearch;
             }
-        } elseif ($sPhraseType == '' && $aSearchTerm['class']) {
+        } elseif ($sPhraseType == ''
+                  && is_a($oSearchTerm, '\Nominatim\Token\SpecialTerm')
+        ) {
             if ($this->iOperator == Operator::NONE) {
                 $oSearch = clone $this;
                 $oSearch->iSearchRank++;
 
             if ($this->iOperator == Operator::NONE) {
                 $oSearch = clone $this;
                 $oSearch->iSearchRank++;
 
-                $iOp = Operator::NEAR; // near == in for the moment
-                if ($aSearchTerm['operator'] == '') {
+                $iOp = $oSearchTerm->iOperator;
+                if ($iOp == Operator::NONE) {
                     if (!empty($this->aName) || $this->oContext->isBoundedSearch()) {
                         $iOp = Operator::NAME;
                     if (!empty($this->aName) || $this->oContext->isBoundedSearch()) {
                         $iOp = Operator::NAME;
+                    } else {
+                        $iOp = Operator::NEAR;
                     }
                     $oSearch->iSearchRank += 2;
                 }
 
                     }
                     $oSearch->iSearchRank += 2;
                 }
 
-                $oSearch->setPoiSearch($iOp, $aSearchTerm['class'], $aSearchTerm['type']);
+                $oSearch->setPoiSearch(
+                    $iOp,
+                    $oSearchTerm->sClass,
+                    $oSearchTerm->sType
+                );
                 $aNewSearches[] = $oSearch;
             }
                 $aNewSearches[] = $oSearch;
             }
-        } elseif (isset($aSearchTerm['word_id'])
-                  && $aSearchTerm['word_id']
-                  && $sPhraseType != 'country'
+        } elseif ($sPhraseType != 'country'
+                  && is_a($oSearchTerm, '\Nominatim\Token\Word')
         ) {
         ) {
-            $iWordID = $aSearchTerm['word_id'];
+            $iWordID = $oSearchTerm->iId;
             // Full words can only be a name if they appear at the beginning
             // of the phrase. In structured search the name must forcably in
             // the first phrase. In unstructured search it may be in a later
             // Full words can only be a name if they appear at the beginning
             // of the phrase. In structured search the name must forcably in
             // the first phrase. In unstructured search it may be in a later
@@ -282,7 +287,7 @@ class SearchDescription
             if (!empty($this->aName) || !($bFirstPhrase || $sPhraseType == '')) {
                 if (($sPhraseType == '' || !$bFirstPhrase) && !$bHasPartial) {
                     $oSearch = clone $this;
             if (!empty($this->aName) || !($bFirstPhrase || $sPhraseType == '')) {
                 if (($sPhraseType == '' || !$bFirstPhrase) && !$bHasPartial) {
                     $oSearch = clone $this;
-                    $oSearch->iSearchRank++;
+                    $oSearch->iSearchRank += 2;
                     $oSearch->aAddress[$iWordID] = $iWordID;
                     $aNewSearches[] = $oSearch;
                 } else {
                     $oSearch->aAddress[$iWordID] = $iWordID;
                     $aNewSearches[] = $oSearch;
                 } else {
@@ -292,6 +297,11 @@ class SearchDescription
                 $oSearch = clone $this;
                 $oSearch->iSearchRank++;
                 $oSearch->aName = array($iWordID => $iWordID);
                 $oSearch = clone $this;
                 $oSearch->iSearchRank++;
                 $oSearch->aName = array($iWordID => $iWordID);
+                if (CONST_Search_NameOnlySearchFrequencyThreshold) {
+                    $oSearch->bRareName =
+                        $oSearchTerm->iSearchNameCount
+                          < CONST_Search_NameOnlySearchFrequencyThreshold;
+                }
                 $aNewSearches[] = $oSearch;
             }
         }
                 $aNewSearches[] = $oSearch;
             }
         }
@@ -302,7 +312,8 @@ class SearchDescription
     /**
      * Derive new searches by adding a partial term to the existing search.
      *
     /**
      * Derive new searches by adding a partial term to the existing search.
      *
-     * @param mixed[] $aSearchTerm        Description of the token.
+     * @param string  $sToken             Term for the token.
+     * @param object  $oSearchTerm        Description of the token.
      * @param bool    $bStructuredPhrases True if the search is structured.
      * @param integer $iPhrase            Number of the phrase the token is in.
      * @param array[] $aFullTokens        List of full term tokens with the
      * @param bool    $bStructuredPhrases True if the search is structured.
      * @param integer $iPhrase            Number of the phrase the token is in.
      * @param array[] $aFullTokens        List of full term tokens with the
@@ -310,21 +321,21 @@ class SearchDescription
      *
      * @return SearchDescription[] List of derived search descriptions.
      */
      *
      * @return SearchDescription[] List of derived search descriptions.
      */
-    public function extendWithPartialTerm($aSearchTerm, $bStructuredPhrases, $iPhrase, $aFullTokens)
+    public function extendWithPartialTerm($sToken, $oSearchTerm, $bStructuredPhrases, $iPhrase, $aFullTokens)
     {
         // Only allow name terms.
     {
         // Only allow name terms.
-        if (!(isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'])) {
+        if (!(is_a($oSearchTerm, '\Nominatim\Token\Word'))) {
             return array();
         }
 
         $aNewSearches = array();
             return array();
         }
 
         $aNewSearches = array();
-        $iWordID = $aSearchTerm['word_id'];
+        $iWordID = $oSearchTerm->iId;
 
         if ((!$bStructuredPhrases || $iPhrase > 0)
             && (!empty($this->aName))
 
         if ((!$bStructuredPhrases || $iPhrase > 0)
             && (!empty($this->aName))
-            && strpos($aSearchTerm['word_token'], ' ') === false
+            && strpos($sToken, ' ') === false
         ) {
         ) {
-            if ($aSearchTerm['search_name_count'] + 1 < CONST_Max_Word_Frequency) {
+            if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) {
                 $oSearch = clone $this;
                 $oSearch->iSearchRank += 2;
                 $oSearch->aAddress[$iWordID] = $iWordID;
                 $oSearch = clone $this;
                 $oSearch->iSearchRank += 2;
                 $oSearch->aAddress[$iWordID] = $iWordID;
@@ -333,7 +344,7 @@ class SearchDescription
                 $oSearch = clone $this;
                 $oSearch->iSearchRank++;
                 $oSearch->aAddressNonSearch[$iWordID] = $iWordID;
                 $oSearch = clone $this;
                 $oSearch->iSearchRank++;
                 $oSearch->aAddressNonSearch[$iWordID] = $iWordID;
-                if (preg_match('#^[0-9]+$#', $aSearchTerm['word_token'])) {
+                if (preg_match('#^[0-9]+$#', $sToken)) {
                     $oSearch->iSearchRank += 2;
                 }
                 if (!empty($aFullTokens)) {
                     $oSearch->iSearchRank += 2;
                 }
                 if (!empty($aFullTokens)) {
@@ -342,14 +353,12 @@ class SearchDescription
                 $aNewSearches[] = $oSearch;
 
                 // revert to the token version?
                 $aNewSearches[] = $oSearch;
 
                 // revert to the token version?
-                foreach ($aFullTokens as $aSearchTermToken) {
-                    if (empty($aSearchTermToken['country_code'])
-                        && empty($aSearchTermToken['lat'])
-                        && empty($aSearchTermToken['class'])
-                    ) {
+                foreach ($aFullTokens as $oSearchTermToken) {
+                    if (is_a($oSearchTermToken, '\Nominatim\Token\Word')) {
                         $oSearch = clone $this;
                         $oSearch->iSearchRank++;
                         $oSearch = clone $this;
                         $oSearch->iSearchRank++;
-                        $oSearch->aAddress[$aSearchTermToken['word_id']] = $aSearchTermToken['word_id'];
+                        $oSearch->aAddress[$oSearchTermToken->iId]
+                            = $oSearchTermToken->iId;
                         $aNewSearches[] = $oSearch;
                     }
                 }
                         $aNewSearches[] = $oSearch;
                     }
                 }
@@ -364,10 +373,19 @@ class SearchDescription
             if (empty($this->aName)) {
                 $oSearch->iSearchRank += 1;
             }
             if (empty($this->aName)) {
                 $oSearch->iSearchRank += 1;
             }
-            if (preg_match('#^[0-9]+$#', $aSearchTerm['word_token'])) {
+            if (preg_match('#^[0-9]+$#', $sToken)) {
                 $oSearch->iSearchRank += 2;
             }
                 $oSearch->iSearchRank += 2;
             }
-            if ($aSearchTerm['search_name_count'] + 1 < CONST_Max_Word_Frequency) {
+            if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) {
+                if (empty($this->aName)
+                    && CONST_Search_NameOnlySearchFrequencyThreshold
+                ) {
+                    $oSearch->bRareName =
+                        $oSearchTerm->iSearchNameCount
+                          < CONST_Search_NameOnlySearchFrequencyThreshold;
+                } else {
+                    $oSearch->bRareName = false;
+                }
                 $oSearch->aName[$iWordID] = $iWordID;
             } else {
                 $oSearch->aNameNonSearch[$iWordID] = $iWordID;
                 $oSearch->aName[$iWordID] = $iWordID;
             } else {
                 $oSearch->aNameNonSearch[$iWordID] = $iWordID;
@@ -385,20 +403,16 @@ class SearchDescription
     /**
      * Query database for places that match this search.
      *
     /**
      * Query database for places that match this search.
      *
-     * @param object  $oDB                  Database connection to use.
-     * @param mixed[] $aWordFrequencyScores Number of times tokens appears
-     *                                      overall in a planet database.
-     * @param integer $iMinRank             Minimum address rank to restrict
-     *                                      search to.
-     * @param integer $iMaxRank             Maximum address rank to restrict
-     *                                      search to.
-     * @param integer $iLimit               Maximum number of results.
+     * @param object  $oDB      Database connection to use.
+     * @param integer $iMinRank Minimum address rank to restrict search to.
+     * @param integer $iMaxRank Maximum address rank to restrict search to.
+     * @param integer $iLimit   Maximum number of results.
      *
      * @return mixed[] An array with two fields: IDs contains the list of
      *                 matching place IDs and houseNumber the houseNumber
      *                 if appicable or -1 if not.
      */
      *
      * @return mixed[] An array with two fields: IDs contains the list of
      *                 matching place IDs and houseNumber the houseNumber
      *                 if appicable or -1 if not.
      */
-    public function query(&$oDB, &$aWordFrequencyScores, $iMinRank, $iMaxRank, $iLimit)
+    public function query(&$oDB, $iMinRank, $iMaxRank, $iLimit)
     {
         $aResults = array();
         $iHousenumber = -1;
     {
         $aResults = array();
         $iHousenumber = -1;
@@ -427,7 +441,6 @@ class SearchDescription
             // First search for places according to name and address.
             $aResults = $this->queryNamedPlace(
                 $oDB,
             // First search for places according to name and address.
             $aResults = $this->queryNamedPlace(
                 $oDB,
-                $aWordFrequencyScores,
                 $iMinRank,
                 $iMaxRank,
                 $iLimit
                 $iMinRank,
                 $iMaxRank,
                 $iLimit
@@ -579,12 +592,16 @@ class SearchDescription
         return $aResults;
     }
 
         return $aResults;
     }
 
-    private function queryNamedPlace(&$oDB, $aWordFrequencyScores, $iMinAddressRank, $iMaxAddressRank, $iLimit)
+    private function queryNamedPlace(&$oDB, $iMinAddressRank, $iMaxAddressRank, $iLimit)
     {
         $aTerms = array();
         $aOrder = array();
 
     {
         $aTerms = array();
         $aOrder = array();
 
-        if ($this->sHouseNumber && !empty($this->aAddress)) {
+        // Sort by existence of the requested house number but only if not
+        // too many results are expected for the street, i.e. if the result
+        // will be narrowed down by an address. Remeber that with ordering
+        // every single result has to be checked.
+        if ($this->sHouseNumber && (!empty($this->aAddress) || $this->sPostcode)) {
             $sHouseNumberRegex = '\\\\m'.$this->sHouseNumber.'\\\\M';
             $aOrder[] = ' (';
             $aOrder[0] .= 'EXISTS(';
             $sHouseNumberRegex = '\\\\m'.$this->sHouseNumber.'\\\\M';
             $aOrder[] = ' (';
             $aOrder[0] .= 'EXISTS(';
@@ -615,11 +632,7 @@ class SearchDescription
         }
         if (!empty($this->aAddress)) {
             // For infrequent name terms disable index usage for address
         }
         if (!empty($this->aAddress)) {
             // For infrequent name terms disable index usage for address
-            if (CONST_Search_NameOnlySearchFrequencyThreshold
-                && count($this->aName) == 1
-                && $aWordFrequencyScores[$this->aName[reset($this->aName)]]
-                     < CONST_Search_NameOnlySearchFrequencyThreshold
-            ) {
+            if ($this->bRareName) {
                 $aTerms[] = 'array_cat(nameaddress_vector,ARRAY[]::integer[]) @> '.getArraySQL($this->aAddress);
             } else {
                 $aTerms[] = 'nameaddress_vector @> '.getArraySQL($this->aAddress);
                 $aTerms[] = 'array_cat(nameaddress_vector,ARRAY[]::integer[]) @> '.getArraySQL($this->aAddress);
             } else {
                 $aTerms[] = 'nameaddress_vector @> '.getArraySQL($this->aAddress);