Multi-word partial terms had an undue advantage over separate partial
terms because they only need to pay the penalty once. This changes
the behaviour by setting the penalty according to the number of
words in the token. This should get rid of search interpretations
with low chance of matching.
This also fixes handling of exact term matching. We now match against
all exact terms of the query, not just a couple of them collected
while building the interpretations.
Also adds a penalty to very short postcodes.
+ $oCtx->setFullNameWords($oValidTokens->getFullWordIDs());
+
// Try more interpretations for Tokens that could not be matched.
foreach ($aTokens as $sToken) {
if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) {
// Try more interpretations for Tokens that could not be matched.
foreach ($aTokens as $sToken) {
if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) {
public $sqlCountryList = '';
/// List of place IDs to exclude (as SQL).
private $sqlExcludeList = '';
public $sqlCountryList = '';
/// List of place IDs to exclude (as SQL).
private $sqlExcludeList = '';
+ /// Subset of word ids of full words in the query.
+ private $aFullNameWords = array();
+ public function setFullNameWords($aWordList)
+ {
+ $this->aFullNameWords = $aWordList;
+ }
+
+ public function getFullNameTerms()
+ {
+ return $this->aFullNameWords;
+ }
/**
* Check if a reference point is defined.
/**
* Check if a reference point is defined.
private $bRareName = false;
/// List of word ids making up the address of the object.
private $aAddress = array();
private $bRareName = false;
/// List of word ids making up the address of the object.
private $aAddress = array();
- /// Subset of word ids of full words making up the address.
- private $aFullNameAddress = array();
/// List of word ids that appear in the name but should be ignored.
private $aNameNonSearch = array();
/// List of word ids that appear in the address but should be ignored.
/// List of word ids that appear in the name but should be ignored.
private $aNameNonSearch = array();
/// List of word ids that appear in the address but should be ignored.
) {
$oSearch = clone $this;
$oSearch->iSearchRank++;
) {
$oSearch = clone $this;
$oSearch->iSearchRank++;
+ if (strlen($oSearchTerm->sPostcode) < 4) {
+ $oSearch->iSearchRank += 4 - strlen($oSearchTerm->sPostcode);
+ }
$oSearch->sPostcode = $oSearchTerm->sPostcode;
$aNewSearches[] = $oSearch;
}
$oSearch->sPostcode = $oSearchTerm->sPostcode;
$aNewSearches[] = $oSearch;
}
if (!empty($this->aName) || !($bFirstPhrase || $sPhraseType == '')) {
if (($sPhraseType == '' || !$bFirstPhrase) && !$bHasPartial) {
$oSearch = clone $this;
if (!empty($this->aName) || !($bFirstPhrase || $sPhraseType == '')) {
if (($sPhraseType == '' || !$bFirstPhrase) && !$bHasPartial) {
$oSearch = clone $this;
- $oSearch->iSearchRank += 2;
+ $oSearch->iSearchRank += 3 * $oSearchTerm->iTermCount;
$oSearch->aAddress[$iWordID] = $iWordID;
$aNewSearches[] = $oSearch;
$oSearch->aAddress[$iWordID] = $iWordID;
$aNewSearches[] = $oSearch;
- } else {
- $this->aFullNameAddress[$iWordID] = $iWordID;
}
} else {
$oSearch = clone $this;
}
} else {
$oSearch = clone $this;
) {
if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) {
$oSearch = clone $this;
) {
if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) {
$oSearch = clone $this;
- $oSearch->iSearchRank += 2;
+ $oSearch->iSearchRank += $oSearchTerm->iTermCount;
+ if (empty($this->aName)) {
+ $oSearch->iSearchRank++;
+ }
+ if (preg_match('#^[0-9]+$#', $sToken)) {
+ $oSearch->iSearchRank++;
+ }
$oSearch->aAddress[$iWordID] = $iWordID;
$aNewSearches[] = $oSearch;
} else {
$oSearch = clone $this;
$oSearch->iSearchRank++;
$oSearch->aAddressNonSearch[$iWordID] = $iWordID;
$oSearch->aAddress[$iWordID] = $iWordID;
$aNewSearches[] = $oSearch;
} else {
$oSearch = clone $this;
$oSearch->iSearchRank++;
$oSearch->aAddressNonSearch[$iWordID] = $iWordID;
- if (preg_match('#^[0-9]+$#', $sToken)) {
- $oSearch->iSearchRank += 2;
- }
if (!empty($aFullTokens)) {
$oSearch->iSearchRank++;
}
if (!empty($aFullTokens)) {
$oSearch->iSearchRank++;
}
foreach ($aFullTokens as $oSearchTermToken) {
if (is_a($oSearchTermToken, '\Nominatim\Token\Word')) {
$oSearch = clone $this;
foreach ($aFullTokens as $oSearchTermToken) {
if (is_a($oSearchTermToken, '\Nominatim\Token\Word')) {
$oSearch = clone $this;
- $oSearch->iSearchRank++;
+ $oSearch->iSearchRank += 3;
$oSearch->aAddress[$oSearchTermToken->iId]
= $oSearchTermToken->iId;
$aNewSearches[] = $oSearch;
$oSearch->aAddress[$oSearchTermToken->iId]
= $oSearchTermToken->iId;
$aNewSearches[] = $oSearch;
$sImportanceSQL .= $this->oContext->viewboxImportanceSQL('centroid');
$aOrder[] = "$sImportanceSQL DESC";
$sImportanceSQL .= $this->oContext->viewboxImportanceSQL('centroid');
$aOrder[] = "$sImportanceSQL DESC";
- if (!empty($this->aFullNameAddress)) {
+ $aFullNameAddress = $this->oContext->getFullNameTerms();
+ if (!empty($aFullNameAddress)) {
$sExactMatchSQL = ' ( ';
$sExactMatchSQL .= ' SELECT count(*) FROM ( ';
$sExactMatchSQL = ' ( ';
$sExactMatchSQL .= ' SELECT count(*) FROM ( ';
- $sExactMatchSQL .= ' SELECT unnest('.$oDB->getArraySQL($this->aFullNameAddress).')';
+ $sExactMatchSQL .= ' SELECT unnest('.$oDB->getArraySQL($aFullNameAddress).')';
$sExactMatchSQL .= ' INTERSECT ';
$sExactMatchSQL .= ' SELECT unnest(nameaddress_vector)';
$sExactMatchSQL .= ' ) s';
$sExactMatchSQL .= ' INTERSECT ';
$sExactMatchSQL .= ' SELECT unnest(nameaddress_vector)';
$sExactMatchSQL .= ' ) s';
return isset($this->aTokens[$sWord]) ? $this->aTokens[$sWord] : array();
}
return isset($this->aTokens[$sWord]) ? $this->aTokens[$sWord] : array();
}
+ public function getFullWordIDs()
+ {
+ $ids = array();
+
+ foreach($this->aTokens as $aTokenList) {
+ foreach($aTokenList as $oToken) {
+ if (is_a($oToken, '\Nominatim\Token\Word') && !$oToken->bPartial) {
+ $ids[$oToken->iId] = $oToken->iId;
+ }
+ }
+ }
+
+ return $ids;
+ }
+
/**
* Add token information from the word table in the database.
*
/**
* Add token information from the word table in the database.
*
$oToken = new Token\Word(
$iId,
$aWord['word_token'][0] != ' ',
$oToken = new Token\Word(
$iId,
$aWord['word_token'][0] != ' ',
+ (int) $aWord['count'],
+ substr_count($aWord['word_token'], ' ')
public $bPartial;
/// Number of appearances in the database.
public $iSearchNameCount;
public $bPartial;
/// Number of appearances in the database.
public $iSearchNameCount;
+ /// Number of terms in the word.
+ public $iTermCount;
- public function __construct($iId, $bPartial, $iSearchNameCount)
+ public function __construct($iId, $bPartial, $iSearchNameCount, $iTermCount)
{
$this->iId = $iId;
$this->bPartial = $bPartial;
$this->iSearchNameCount = $iSearchNameCount;
{
$this->iId = $iId;
$this->bPartial = $bPartial;
$this->iSearchNameCount = $iSearchNameCount;
+ $this->iTermCount = $iTermCount;
}
public function debugInfo()
}
public function debugInfo()
$this->assertEquals(array(new Token\HouseNumber(999, '1051')), $TL->get('1051'));
$this->assertEquals(array(new Token\Country(999, 'de')), $TL->get('alemagne'));
$this->assertEquals(array(new Token\Postcode(999, '64286')), $TL->get('64286'));
$this->assertEquals(array(new Token\HouseNumber(999, '1051')), $TL->get('1051'));
$this->assertEquals(array(new Token\Country(999, 'de')), $TL->get('alemagne'));
$this->assertEquals(array(new Token\Postcode(999, '64286')), $TL->get('64286'));
- $this->assertEquals(array(new Token\Word(999, true, 533)), $TL->get('darmstadt'));
+ $this->assertEquals(array(new Token\Word(999, true, 533, 0)), $TL->get('darmstadt'));