From: Sarah Hoffmann Date: Wed, 25 Nov 2020 15:57:09 +0000 (+0100) Subject: Merge pull request #2074 from lonvia/add-housenumber-to-unknown-places X-Git-Tag: v3.6.0~21 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/c5d98effc0f5fcd8d154b0a3ba3fccd374445685?hp=3cf763475ffe3b5b5d65955a7128bd77aebd69de Merge pull request #2074 from lonvia/add-housenumber-to-unknown-places Improve finding addresses that have their own search_name entry because of unknown addr:* parts --- diff --git a/lib/Geocode.php b/lib/Geocode.php index 69b6f41c..ed02848e 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -650,6 +650,8 @@ class Geocode $this->oNormalizer ); + $oCtx->setFullNameWords($oValidTokens->getFullWordIDs()); + // Try more interpretations for Tokens that could not be matched. foreach ($aTokens as $sToken) { if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) { diff --git a/lib/SearchContext.php b/lib/SearchContext.php index 3d399bdc..c2898d27 100644 --- a/lib/SearchContext.php +++ b/lib/SearchContext.php @@ -32,7 +32,18 @@ class SearchContext public $sqlCountryList = ''; /// List of place IDs to exclude (as SQL). private $sqlExcludeList = ''; + /// Subset of word ids of full words in the query. + private $aFullNameWords = array(); + public function setFullNameWords($aWordList) + { + $this->aFullNameWords = $aWordList; + } + + public function getFullNameTerms() + { + return $this->aFullNameWords; + } /** * Check if a reference point is defined. diff --git a/lib/SearchDescription.php b/lib/SearchDescription.php index d8c541db..1e1955c2 100644 --- a/lib/SearchDescription.php +++ b/lib/SearchDescription.php @@ -21,8 +21,6 @@ class SearchDescription private $bRareName = false; /// List of word ids making up the address of the object. private $aAddress = array(); - /// Subset of word ids of full words making up the address. - private $aFullNameAddress = array(); /// List of word ids that appear in the name but should be ignored. private $aNameNonSearch = array(); /// List of word ids that appear in the address but should be ignored. @@ -219,6 +217,9 @@ class SearchDescription ) { $oSearch = clone $this; $oSearch->iSearchRank++; + if (strlen($oSearchTerm->sPostcode) < 4) { + $oSearch->iSearchRank += 4 - strlen($oSearchTerm->sPostcode); + } $oSearch->sPostcode = $oSearchTerm->sPostcode; $aNewSearches[] = $oSearch; } @@ -295,11 +296,9 @@ class SearchDescription if (!empty($this->aName) || !($bFirstPhrase || $sPhraseType == '')) { if (($sPhraseType == '' || !$bFirstPhrase) && !$bHasPartial) { $oSearch = clone $this; - $oSearch->iSearchRank += 2; + $oSearch->iSearchRank += 3 * $oSearchTerm->iTermCount; $oSearch->aAddress[$iWordID] = $iWordID; $aNewSearches[] = $oSearch; - } else { - $this->aFullNameAddress[$iWordID] = $iWordID; } } else { $oSearch = clone $this; @@ -345,16 +344,19 @@ class SearchDescription ) { if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) { $oSearch = clone $this; - $oSearch->iSearchRank += 2; + $oSearch->iSearchRank += $oSearchTerm->iTermCount; + if (empty($this->aName)) { + $oSearch->iSearchRank++; + } + if (preg_match('#^[0-9]+$#', $sToken)) { + $oSearch->iSearchRank++; + } $oSearch->aAddress[$iWordID] = $iWordID; $aNewSearches[] = $oSearch; } else { $oSearch = clone $this; $oSearch->iSearchRank++; $oSearch->aAddressNonSearch[$iWordID] = $iWordID; - if (preg_match('#^[0-9]+$#', $sToken)) { - $oSearch->iSearchRank += 2; - } if (!empty($aFullTokens)) { $oSearch->iSearchRank++; } @@ -364,7 +366,7 @@ class SearchDescription foreach ($aFullTokens as $oSearchTermToken) { if (is_a($oSearchTermToken, '\Nominatim\Token\Word')) { $oSearch = clone $this; - $oSearch->iSearchRank++; + $oSearch->iSearchRank += 3; $oSearch->aAddress[$oSearchTermToken->iId] = $oSearchTermToken->iId; $aNewSearches[] = $oSearch; @@ -703,10 +705,11 @@ class SearchDescription $sImportanceSQL .= $this->oContext->viewboxImportanceSQL('centroid'); $aOrder[] = "$sImportanceSQL DESC"; - if (!empty($this->aFullNameAddress)) { + $aFullNameAddress = $this->oContext->getFullNameTerms(); + if (!empty($aFullNameAddress)) { $sExactMatchSQL = ' ( '; $sExactMatchSQL .= ' SELECT count(*) FROM ( '; - $sExactMatchSQL .= ' SELECT unnest('.$oDB->getArraySQL($this->aFullNameAddress).')'; + $sExactMatchSQL .= ' SELECT unnest('.$oDB->getArraySQL($aFullNameAddress).')'; $sExactMatchSQL .= ' INTERSECT '; $sExactMatchSQL .= ' SELECT unnest(nameaddress_vector)'; $sExactMatchSQL .= ' ) s'; diff --git a/lib/TokenList.php b/lib/TokenList.php index fce5f940..1b6a1dcf 100644 --- a/lib/TokenList.php +++ b/lib/TokenList.php @@ -80,6 +80,21 @@ class TokenList return isset($this->aTokens[$sWord]) ? $this->aTokens[$sWord] : array(); } + public function getFullWordIDs() + { + $ids = array(); + + foreach ($this->aTokens as $aTokenList) { + foreach ($aTokenList as $oToken) { + if (is_a($oToken, '\Nominatim\Token\Word') && !$oToken->bPartial) { + $ids[$oToken->iId] = $oToken->iId; + } + } + } + + return $ids; + } + /** * Add token information from the word table in the database. * @@ -151,7 +166,8 @@ class TokenList $oToken = new Token\Word( $iId, $aWord['word_token'][0] != ' ', - (int) $aWord['count'] + (int) $aWord['count'], + substr_count($aWord['word_token'], ' ') ); } diff --git a/lib/TokenWord.php b/lib/TokenWord.php index 54622cbc..fc28535d 100644 --- a/lib/TokenWord.php +++ b/lib/TokenWord.php @@ -13,12 +13,15 @@ class Word public $bPartial; /// Number of appearances in the database. public $iSearchNameCount; + /// Number of terms in the word. + public $iTermCount; - public function __construct($iId, $bPartial, $iSearchNameCount) + public function __construct($iId, $bPartial, $iSearchNameCount, $iTermCount) { $this->iId = $iId; $this->bPartial = $bPartial; $this->iSearchNameCount = $iSearchNameCount; + $this->iTermCount = $iTermCount; } public function debugInfo() diff --git a/test/php/Nominatim/TokenListTest.php b/test/php/Nominatim/TokenListTest.php index 191a09dc..ca43aabb 100644 --- a/test/php/Nominatim/TokenListTest.php +++ b/test/php/Nominatim/TokenListTest.php @@ -121,6 +121,6 @@ class TokenTest extends \PHPUnit\Framework\TestCase $this->assertEquals(array(new Token\HouseNumber(999, '1051')), $TL->get('1051')); $this->assertEquals(array(new Token\Country(999, 'de')), $TL->get('alemagne')); $this->assertEquals(array(new Token\Postcode(999, '64286')), $TL->get('64286')); - $this->assertEquals(array(new Token\Word(999, true, 533)), $TL->get('darmstadt')); + $this->assertEquals(array(new Token\Word(999, true, 533, 0)), $TL->get('darmstadt')); } }