]> git.openstreetmap.org Git - nominatim.git/commitdiff
factor out query position
authorSarah Hoffmann <lonvia@denofr.de>
Thu, 15 Jul 2021 12:12:59 +0000 (14:12 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Thu, 15 Jul 2021 12:12:59 +0000 (14:12 +0200)
Moves token and phrase position and phrase type into a separate
class that is handed in when assembling the search description.
This drastically reduces the number of parameters for the function
to extend the search descriptions and gives us more flexibility
in the future for more complex positional analysis.

lib-php/Geocode.php
lib-php/SearchDescription.php
lib-php/SearchPosition.php [new file with mode: 0644]

index eda6df5492c0e9c160e5c4ae7507b2fa1c96cd9b..c2b4f4e4c0704e035f37518b784c251bc62a8c02 100644 (file)
@@ -7,6 +7,7 @@ require_once(CONST_LibDir.'/Phrase.php');
 require_once(CONST_LibDir.'/ReverseGeocode.php');
 require_once(CONST_LibDir.'/SearchDescription.php');
 require_once(CONST_LibDir.'/SearchContext.php');
 require_once(CONST_LibDir.'/ReverseGeocode.php');
 require_once(CONST_LibDir.'/SearchDescription.php');
 require_once(CONST_LibDir.'/SearchContext.php');
+require_once(CONST_LibDir.'/SearchPosition.php');
 require_once(CONST_LibDir.'/TokenList.php');
 require_once(CONST_TokenizerDir.'/tokenizer.php');
 
 require_once(CONST_LibDir.'/TokenList.php');
 require_once(CONST_TokenizerDir.'/tokenizer.php');
 
@@ -345,7 +346,11 @@ class Geocode
          */
         foreach ($aPhrases as $iPhrase => $oPhrase) {
             $aNewPhraseSearches = array();
          */
         foreach ($aPhrases as $iPhrase => $oPhrase) {
             $aNewPhraseSearches = array();
-            $sPhraseType = $oPhrase->getPhraseType();
+            $oPosition = new SearchPosition(
+                $oPhrase->getPhraseType(),
+                $iPhrase,
+                count($aPhrases)
+            );
 
             foreach ($oPhrase->getWordSets() as $aWordset) {
                 $aWordsetSearches = $aSearches;
 
             foreach ($oPhrase->getWordSets() as $aWordset) {
                 $aWordsetSearches = $aSearches;
@@ -353,17 +358,14 @@ class Geocode
                 // Add all words from this wordset
                 foreach ($aWordset as $iToken => $sToken) {
                     $aNewWordsetSearches = array();
                 // Add all words from this wordset
                 foreach ($aWordset as $iToken => $sToken) {
                     $aNewWordsetSearches = array();
+                    $oPosition->setTokenPosition($iToken, count($aWordset));
 
                     foreach ($aWordsetSearches as $oCurrentSearch) {
                         foreach ($oValidTokens->get($sToken) as $oSearchTerm) {
                             $aNewSearches = $oCurrentSearch->extendWithSearchTerm(
                                 $sToken,
                                 $oSearchTerm,
 
                     foreach ($aWordsetSearches as $oCurrentSearch) {
                         foreach ($oValidTokens->get($sToken) as $oSearchTerm) {
                             $aNewSearches = $oCurrentSearch->extendWithSearchTerm(
                                 $sToken,
                                 $oSearchTerm,
-                                $sPhraseType,
-                                $iToken == 0 && $iPhrase == 0,
-                                $iToken + 1 == count($aWordset)
-                                  && $iPhrase + 1 == count($aPhrases),
-                                $iPhrase
+                                $oPosition
                             );
 
                             foreach ($aNewSearches as $oSearch) {
                             );
 
                             foreach ($aNewSearches as $oSearch) {
index 938beb61206d457cc577c3ade63bc072c2b8ed17..8924287aa06483cff4b70cbd7c7fc17420bacb97 100644 (file)
@@ -154,19 +154,16 @@ class SearchDescription
      *
      * @param string  $sToken       Term for the token.
      * @param object  $oSearchTerm  Description of the token.
      *
      * @param string  $sToken       Term for the token.
      * @param object  $oSearchTerm  Description of the token.
-     * @param string  $sPhraseType  Type of phrase the token is contained in.
-     * @param bool    $bFirstToken  True if the token is at the beginning of the
-     *                              query.
-     * @param bool    $bLastToken   True if the token is at the end of the query.
-     * @param integer $iPhrase      Number of the phrase the token is in.
+     * @param object  $oPosition    Description of the token position within
+                                    the query.
      *
      * @return SearchDescription[] List of derived search descriptions.
      */
      *
      * @return SearchDescription[] List of derived search descriptions.
      */
-    public function extendWithSearchTerm($sToken, $oSearchTerm, $sPhraseType, $bFirstToken, $bLastToken, $iPhrase)
+    public function extendWithSearchTerm($sToken, $oSearchTerm, $oPosition)
     {
         $aNewSearches = array();
 
     {
         $aNewSearches = array();
 
-        if (($sPhraseType == '' || $sPhraseType == 'country')
+        if ($oPosition->maybePhrase('country')
             && is_a($oSearchTerm, '\Nominatim\Token\Country')
         ) {
             if (!$this->sCountryCode) {
             && is_a($oSearchTerm, '\Nominatim\Token\Country')
         ) {
             if (!$this->sCountryCode) {
@@ -175,19 +172,19 @@ class SearchDescription
                 $oSearch->sCountryCode = $oSearchTerm->sCountryCode;
                 // Country is almost always at the end of the string
                 // - increase score for finding it anywhere else (optimisation)
                 $oSearch->sCountryCode = $oSearchTerm->sCountryCode;
                 // Country is almost always at the end of the string
                 // - increase score for finding it anywhere else (optimisation)
-                if (!$bLastToken) {
+                if (!$oPosition->isLastToken()) {
                     $oSearch->iSearchRank += 5;
                     $oSearch->iNamePhrase = -1;
                 }
                 $aNewSearches[] = $oSearch;
             }
                     $oSearch->iSearchRank += 5;
                     $oSearch->iNamePhrase = -1;
                 }
                 $aNewSearches[] = $oSearch;
             }
-        } elseif (($sPhraseType == '' || $sPhraseType == 'postalcode')
+        } elseif ($oPosition->maybePhrase('postalcode')
                   && is_a($oSearchTerm, '\Nominatim\Token\Postcode')
         ) {
             if (!$this->sPostcode) {
                 // If we have structured search or this is the first term,
                 // make the postcode the primary search element.
                   && is_a($oSearchTerm, '\Nominatim\Token\Postcode')
         ) {
             if (!$this->sPostcode) {
                 // If we have structured search or this is the first term,
                 // make the postcode the primary search element.
-                if ($this->iOperator == Operator::NONE && $bFirstToken) {
+                if ($this->iOperator == Operator::NONE && $oPosition->isFirstToken()) {
                     $oSearch = clone $this;
                     $oSearch->iSearchRank++;
                     $oSearch->iOperator = Operator::POSTCODE;
                     $oSearch = clone $this;
                     $oSearch->iSearchRank++;
                     $oSearch->iOperator = Operator::POSTCODE;
@@ -200,7 +197,7 @@ class SearchDescription
                 // If we have a structured search or this is not the first term,
                 // add the postcode as an addendum.
                 if ($this->iOperator != Operator::POSTCODE
                 // If we have a structured search or this is not the first term,
                 // add the postcode as an addendum.
                 if ($this->iOperator != Operator::POSTCODE
-                    && ($sPhraseType == 'postalcode' || !empty($this->aName))
+                    && ($oPosition->isPhrase('postalcode') || !empty($this->aName))
                 ) {
                     $oSearch = clone $this;
                     $oSearch->iSearchRank++;
                 ) {
                     $oSearch = clone $this;
                     $oSearch->iSearchRank++;
@@ -212,7 +209,7 @@ class SearchDescription
                     $aNewSearches[] = $oSearch;
                 }
             }
                     $aNewSearches[] = $oSearch;
                 }
             }
-        } elseif (($sPhraseType == '' || $sPhraseType == 'street')
+        } elseif ($oPosition->maybePhrase('street')
                  && is_a($oSearchTerm, '\Nominatim\Token\HouseNumber')
         ) {
             if (!$this->sHouseNumber && $this->iOperator != Operator::POSTCODE) {
                  && is_a($oSearchTerm, '\Nominatim\Token\HouseNumber')
         ) {
             if (!$this->sHouseNumber && $this->iOperator != Operator::POSTCODE) {
@@ -257,7 +254,7 @@ class SearchDescription
                     $aNewSearches[] = $oSearch;
                 }
             }
                     $aNewSearches[] = $oSearch;
                 }
             }
-        } elseif ($sPhraseType == ''
+        } elseif ($oPosition->isPhrase('')
                   && is_a($oSearchTerm, '\Nominatim\Token\SpecialTerm')
         ) {
             if ($this->iOperator == Operator::NONE) {
                   && is_a($oSearchTerm, '\Nominatim\Token\SpecialTerm')
         ) {
             if ($this->iOperator == Operator::NONE) {
@@ -273,7 +270,7 @@ class SearchDescription
                         $iOp = Operator::NEAR;
                     }
                     $oSearch->iSearchRank += 2;
                         $iOp = Operator::NEAR;
                     }
                     $oSearch->iSearchRank += 2;
-                } elseif (!$bFirstToken && !$bLastToken) {
+                } elseif (!$oPosition->isFirstToken() && !$oPosition->isLastToken()) {
                     $oSearch->iSearchRank += 2;
                 }
                 if ($this->sHouseNumber) {
                     $oSearch->iSearchRank += 2;
                 }
                 if ($this->sHouseNumber) {
@@ -287,7 +284,7 @@ class SearchDescription
                 );
                 $aNewSearches[] = $oSearch;
             }
                 );
                 $aNewSearches[] = $oSearch;
             }
-        } elseif ($sPhraseType != 'country'
+        } elseif (!$oPosition->isPhrase('country')
                   && is_a($oSearchTerm, '\Nominatim\Token\Word')
         ) {
             $iWordID = $oSearchTerm->iId;
                   && is_a($oSearchTerm, '\Nominatim\Token\Word')
         ) {
             $iWordID = $oSearchTerm->iId;
@@ -295,8 +292,10 @@ class SearchDescription
             // of the phrase. In structured search the name must forcably in
             // the first phrase. In unstructured search it may be in a later
             // phrase when the first phrase is a house number.
             // of the phrase. In structured search the name must forcably in
             // the first phrase. In unstructured search it may be in a later
             // phrase when the first phrase is a house number.
-            if (!empty($this->aName) || !($iPhrase == 0 || $sPhraseType == '')) {
-                if (($sPhraseType == '' || $iPhrase > 0) && $oSearchTerm->iTermCount > 1) {
+            if (!empty($this->aName) || !($oPosition->isFirstPhrase() || $oPosition->isPhrase(''))) {
+                if (($oPosition->isPhrase('') || !$oPosition->isFirstPhrase())
+                    && $oSearchTerm->iTermCount > 1
+                ) {
                     $oSearch = clone $this;
                     $oSearch->iNamePhrase = -1;
                     $oSearch->iSearchRank += 1;
                     $oSearch = clone $this;
                     $oSearch->iNamePhrase = -1;
                     $oSearch->iSearchRank += 1;
@@ -314,15 +313,14 @@ class SearchDescription
                 }
                 $aNewSearches[] = $oSearch;
             }
                 }
                 $aNewSearches[] = $oSearch;
             }
-        } elseif ($sPhraseType != 'country'
+        } elseif (!$oPosition->isPhrase('country')
                   && is_a($oSearchTerm, '\Nominatim\Token\Partial')
                   && strpos($sToken, ' ') === false
         ) {
             $aNewSearches = $this->extendWithPartialTerm(
                 $sToken,
                 $oSearchTerm,
                   && is_a($oSearchTerm, '\Nominatim\Token\Partial')
                   && strpos($sToken, ' ') === false
         ) {
             $aNewSearches = $this->extendWithPartialTerm(
                 $sToken,
                 $oSearchTerm,
-                (bool) $sPhraseType,
-                $iPhrase
+                $oPosition
             );
         }
 
             );
         }
 
@@ -332,19 +330,19 @@ class SearchDescription
     /**
      * Derive new searches by adding a partial term to the existing search.
      *
     /**
      * Derive new searches by adding a partial term to the existing search.
      *
-     * @param string  $sToken             Term for the token.
-     * @param object  $oSearchTerm        Description of the token.
-     * @param bool    $bStructuredPhrases True if the search is structured.
-     * @param integer $iPhrase            Number of the phrase the token is in.
+     * @param string  $sToken       Term for the token.
+     * @param object  $oSearchTerm  Description of the token.
+     * @param object  $oPosition    Description of the token position within
+                                    the query.
      *
      * @return SearchDescription[] List of derived search descriptions.
      */
      *
      * @return SearchDescription[] List of derived search descriptions.
      */
-    private function extendWithPartialTerm($sToken, $oSearchTerm, $bStructuredPhrases, $iPhrase)
+    private function extendWithPartialTerm($sToken, $oSearchTerm, $oPosition)
     {
         $aNewSearches = array();
         $iWordID = $oSearchTerm->iId;
 
     {
         $aNewSearches = array();
         $iWordID = $oSearchTerm->iId;
 
-        if ((!$bStructuredPhrases || $iPhrase > 0)
+        if (($oPosition->isPhrase('') || !$oPosition->isFirstPhrase())
             && (!empty($this->aName))
         ) {
             $oSearch = clone $this;
             && (!empty($this->aName))
         ) {
             $oSearch = clone $this;
@@ -361,7 +359,8 @@ class SearchDescription
         }
 
         if ((!$this->sPostcode && !$this->aAddress && !$this->aAddressNonSearch)
         }
 
         if ((!$this->sPostcode && !$this->aAddress && !$this->aAddressNonSearch)
-            && ((empty($this->aName) && empty($this->aNameNonSearch)) || $this->iNamePhrase == $iPhrase)
+            && ((empty($this->aName) && empty($this->aNameNonSearch))
+                || $this->iNamePhrase == $oPosition->getPhrase())
         ) {
             $oSearch = clone $this;
             $oSearch->iSearchRank++;
         ) {
             $oSearch = clone $this;
             $oSearch->iSearchRank++;
@@ -385,7 +384,7 @@ class SearchDescription
             } else {
                 $oSearch->aNameNonSearch[$iWordID] = $iWordID;
             }
             } else {
                 $oSearch->aNameNonSearch[$iWordID] = $iWordID;
             }
-            $oSearch->iNamePhrase = $iPhrase;
+            $oSearch->iNamePhrase = $oPosition->getPhrase();
             $aNewSearches[] = $oSearch;
         }
 
             $aNewSearches[] = $oSearch;
         }
 
diff --git a/lib-php/SearchPosition.php b/lib-php/SearchPosition.php
new file mode 100644 (file)
index 0000000..e4260bf
--- /dev/null
@@ -0,0 +1,87 @@
+<?php
+
+namespace Nominatim;
+
+/**
+ * Description of the position of a token within a query.
+ */
+class SearchPosition
+{
+    private $sPhraseType;
+
+    private $iPhrase;
+    private $iNumPhrases;
+
+    private $iToken;
+    private $iNumTokens;
+
+
+    public function __construct($sPhraseType, $iPhrase, $iNumPhrases)
+    {
+        $this->sPhraseType = $sPhraseType;
+        $this->iPhrase = $iPhrase;
+        $this->iNumPhrases = $iNumPhrases;
+    }
+
+    public function setTokenPosition($iToken, $iNumTokens)
+    {
+        $this->iToken = $iToken;
+        $this->iNumTokens = $iNumTokens;
+    }
+
+    /**
+     * Check if the phrase can be of the given type.
+     *
+     * @param string  $sType  Type of phrse requested.
+     *
+     * @return True if the phrase is untyped or of the given type.
+     */
+    public function maybePhrase($sType)
+    {
+        return $this->sPhraseType == '' || $this->sPhraseType == $sType;
+    }
+
+    /**
+     * Check if the phrase is exactly of the given type.
+     *
+     * @param string  $sType  Type of phrse requested.
+     *
+     * @return True if the phrase of the given type.
+     */
+    public function isPhrase($sType)
+    {
+        return $this->sPhraseType == $sType;
+    }
+
+    /**
+     * Return true if the token is the very first in the query.
+     */
+    public function isFirstToken()
+    {
+        return $this->iPhrase == 0 && $this->iToken == 0;
+    }
+
+    /**
+     * Check if the token is the final one in the query.
+     */
+    public function isLastToken()
+    {
+        return $this->iToken + 1 == $this->iNumTokens && $this->iPhrase + 1 == $this->iNumPhrases;
+    }
+
+    /**
+     * Check if the current token is part of the first phrase in the query.
+     */
+    public function isFirstPhrase()
+    {
+        return $this->iPhrase == 0;
+    }
+
+    /**
+     * Get the phrase position in the query.
+     */
+    public function getPhrase()
+    {
+        return $this->iPhrase;
+    }
+}