]> git.openstreetmap.org Git - nominatim.git/commitdiff
Merge pull request #2074 from lonvia/add-housenumber-to-unknown-places
authorSarah Hoffmann <lonvia@denofr.de>
Wed, 25 Nov 2020 15:57:09 +0000 (16:57 +0100)
committerGitHub <noreply@github.com>
Wed, 25 Nov 2020 15:57:09 +0000 (16:57 +0100)
Improve finding addresses that have their own search_name entry because of unknown addr:* parts

lib/Geocode.php
lib/SearchContext.php
lib/SearchDescription.php
lib/TokenList.php
lib/TokenWord.php
test/php/Nominatim/TokenListTest.php

index 69b6f41ca46e1085d2917592dc2757780bbbc64a..ed02848eac4c2c846ea1a655089caa75aca879cd 100644 (file)
@@ -650,6 +650,8 @@ class Geocode
                     $this->oNormalizer
                 );
 
+                $oCtx->setFullNameWords($oValidTokens->getFullWordIDs());
+
                 // Try more interpretations for Tokens that could not be matched.
                 foreach ($aTokens as $sToken) {
                     if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) {
index 3d399bdcf62967d63fe96b5f3d614d315f1c9c4e..c2898d274b44266aa567fb74cddfd9790a67df13 100644 (file)
@@ -32,7 +32,18 @@ class SearchContext
     public $sqlCountryList = '';
     /// List of place IDs to exclude (as SQL).
     private $sqlExcludeList = '';
+    /// Subset of word ids of full words in the query.
+    private $aFullNameWords = array();
 
+    public function setFullNameWords($aWordList)
+    {
+        $this->aFullNameWords = $aWordList;
+    }
+
+    public function getFullNameTerms()
+    {
+        return $this->aFullNameWords;
+    }
 
     /**
      * Check if a reference point is defined.
index d8c541dbc8f8514d04a5f297d5681c4fb6e8ab54..1e1955c249a7e8a498a9c1d92d4bc08991fbf6b1 100644 (file)
@@ -21,8 +21,6 @@ class SearchDescription
     private $bRareName = false;
     /// List of word ids making up the address of the object.
     private $aAddress = array();
-    /// Subset of word ids of full words making up the address.
-    private $aFullNameAddress = array();
     /// List of word ids that appear in the name but should be ignored.
     private $aNameNonSearch = array();
     /// List of word ids that appear in the address but should be ignored.
@@ -219,6 +217,9 @@ class SearchDescription
                 ) {
                     $oSearch = clone $this;
                     $oSearch->iSearchRank++;
+                    if (strlen($oSearchTerm->sPostcode) < 4) {
+                        $oSearch->iSearchRank += 4 - strlen($oSearchTerm->sPostcode);
+                    }
                     $oSearch->sPostcode = $oSearchTerm->sPostcode;
                     $aNewSearches[] = $oSearch;
                 }
@@ -295,11 +296,9 @@ class SearchDescription
             if (!empty($this->aName) || !($bFirstPhrase || $sPhraseType == '')) {
                 if (($sPhraseType == '' || !$bFirstPhrase) && !$bHasPartial) {
                     $oSearch = clone $this;
-                    $oSearch->iSearchRank += 2;
+                    $oSearch->iSearchRank += 3 * $oSearchTerm->iTermCount;
                     $oSearch->aAddress[$iWordID] = $iWordID;
                     $aNewSearches[] = $oSearch;
-                } else {
-                    $this->aFullNameAddress[$iWordID] = $iWordID;
                 }
             } else {
                 $oSearch = clone $this;
@@ -345,16 +344,19 @@ class SearchDescription
         ) {
             if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) {
                 $oSearch = clone $this;
-                $oSearch->iSearchRank += 2;
+                $oSearch->iSearchRank += $oSearchTerm->iTermCount;
+                if (empty($this->aName)) {
+                    $oSearch->iSearchRank++;
+                }
+                if (preg_match('#^[0-9]+$#', $sToken)) {
+                    $oSearch->iSearchRank++;
+                }
                 $oSearch->aAddress[$iWordID] = $iWordID;
                 $aNewSearches[] = $oSearch;
             } else {
                 $oSearch = clone $this;
                 $oSearch->iSearchRank++;
                 $oSearch->aAddressNonSearch[$iWordID] = $iWordID;
-                if (preg_match('#^[0-9]+$#', $sToken)) {
-                    $oSearch->iSearchRank += 2;
-                }
                 if (!empty($aFullTokens)) {
                     $oSearch->iSearchRank++;
                 }
@@ -364,7 +366,7 @@ class SearchDescription
                 foreach ($aFullTokens as $oSearchTermToken) {
                     if (is_a($oSearchTermToken, '\Nominatim\Token\Word')) {
                         $oSearch = clone $this;
-                        $oSearch->iSearchRank++;
+                        $oSearch->iSearchRank += 3;
                         $oSearch->aAddress[$oSearchTermToken->iId]
                             = $oSearchTermToken->iId;
                         $aNewSearches[] = $oSearch;
@@ -703,10 +705,11 @@ class SearchDescription
         $sImportanceSQL .= $this->oContext->viewboxImportanceSQL('centroid');
         $aOrder[] = "$sImportanceSQL DESC";
 
-        if (!empty($this->aFullNameAddress)) {
+        $aFullNameAddress = $this->oContext->getFullNameTerms();
+        if (!empty($aFullNameAddress)) {
             $sExactMatchSQL = ' ( ';
             $sExactMatchSQL .= ' SELECT count(*) FROM ( ';
-            $sExactMatchSQL .= '  SELECT unnest('.$oDB->getArraySQL($this->aFullNameAddress).')';
+            $sExactMatchSQL .= '  SELECT unnest('.$oDB->getArraySQL($aFullNameAddress).')';
             $sExactMatchSQL .= '    INTERSECT ';
             $sExactMatchSQL .= '  SELECT unnest(nameaddress_vector)';
             $sExactMatchSQL .= ' ) s';
index fce5f940b84513a6bc1850cbbbdb5e9fa043682c..1b6a1dcf37f7629667d6a388d4052ff81cc3e1ed 100644 (file)
@@ -80,6 +80,21 @@ class TokenList
         return isset($this->aTokens[$sWord]) ? $this->aTokens[$sWord] : array();
     }
 
+    public function getFullWordIDs()
+    {
+        $ids = array();
+
+        foreach ($this->aTokens as $aTokenList) {
+            foreach ($aTokenList as $oToken) {
+                if (is_a($oToken, '\Nominatim\Token\Word') && !$oToken->bPartial) {
+                    $ids[$oToken->iId] = $oToken->iId;
+                }
+            }
+        }
+
+        return $ids;
+    }
+
     /**
      * Add token information from the word table in the database.
      *
@@ -151,7 +166,8 @@ class TokenList
                 $oToken = new Token\Word(
                     $iId,
                     $aWord['word_token'][0] != ' ',
-                    (int) $aWord['count']
+                    (int) $aWord['count'],
+                    substr_count($aWord['word_token'], ' ')
                 );
             }
 
index 54622cbcb081c05cd3bd3b67d663ef938455763c..fc28535d4582e459f5d88c72b8977efaf1930fa9 100644 (file)
@@ -13,12 +13,15 @@ class Word
     public $bPartial;
     /// Number of appearances in the database.
     public $iSearchNameCount;
+    /// Number of terms in the word.
+    public $iTermCount;
 
-    public function __construct($iId, $bPartial, $iSearchNameCount)
+    public function __construct($iId, $bPartial, $iSearchNameCount, $iTermCount)
     {
         $this->iId = $iId;
         $this->bPartial = $bPartial;
         $this->iSearchNameCount = $iSearchNameCount;
+        $this->iTermCount = $iTermCount;
     }
 
     public function debugInfo()
index 191a09dceeb6a002a5c6c4103cb0557ad06178a3..ca43aabb3d71bfa6780d50b24d26aa6f5cbcf81c 100644 (file)
@@ -121,6 +121,6 @@ class TokenTest extends \PHPUnit\Framework\TestCase
         $this->assertEquals(array(new Token\HouseNumber(999, '1051')), $TL->get('1051'));
         $this->assertEquals(array(new Token\Country(999, 'de')), $TL->get('alemagne'));
         $this->assertEquals(array(new Token\Postcode(999, '64286')), $TL->get('64286'));
-        $this->assertEquals(array(new Token\Word(999, true, 533)), $TL->get('darmstadt'));
+        $this->assertEquals(array(new Token\Word(999, true, 533, 0)), $TL->get('darmstadt'));
     }
 }