Full-word tokens are no longer marked by a space at the
beginning of the token. Use the new Partial token category
instead. This removes a couple of special casing, we don't
really need.
The word table still has the space for compatibility reasons,
so the tokenizer code needs to get rid of it when loading the
tokens.
$aNewWordsetSearches = array();
foreach ($aWordsetSearches as $oCurrentSearch) {
$aNewWordsetSearches = array();
foreach ($aWordsetSearches as $oCurrentSearch) {
- // Tokens with full name matches.
- foreach ($oValidTokens->get(' '.$sToken) as $oSearchTerm) {
- $aNewSearches = $oCurrentSearch->extendWithFullTerm(
+ foreach ($oValidTokens->get($sToken) as $oSearchTerm) {
+ $aNewSearches = $oCurrentSearch->extendWithSearchTerm(
+ $sToken,
$oSearchTerm,
$sPhraseType,
$iToken == 0 && $iPhrase == 0,
$oSearchTerm,
$sPhraseType,
$iToken == 0 && $iPhrase == 0,
$iToken + 1 == count($aWordset)
$iToken + 1 == count($aWordset)
- && $iPhrase + 1 == count($aPhrases)
+ && $iPhrase + 1 == count($aPhrases),
+ $iPhrase
);
foreach ($aNewSearches as $oSearch) {
);
foreach ($aNewSearches as $oSearch) {
- // Look for partial matches.
- // Note that there is no point in adding country terms here
- // because country is omitted in the address.
- if ($sPhraseType != 'country') {
- // Allow searching for a word - but at extra cost
- foreach ($oValidTokens->get($sToken) as $oSearchTerm) {
- $aNewSearches = $oCurrentSearch->extendWithPartialTerm(
- $sToken,
- $oSearchTerm,
- (bool) $sPhraseType,
- $iPhrase,
- $oValidTokens->get(' '.$sToken)
- );
-
- foreach ($aNewSearches as $oSearch) {
- if ($oSearch->getRank() < $this->iMaxRank) {
- $aNewWordsetSearches[] = $oSearch;
- }
- }
- }
- }
}
// Sort and cut
usort($aNewWordsetSearches, array('Nominatim\SearchDescription', 'bySearchRank'));
}
// Sort and cut
usort($aNewWordsetSearches, array('Nominatim\SearchDescription', 'bySearchRank'));
/**
* Derive new searches by adding a full term to the existing search.
*
/**
* Derive new searches by adding a full term to the existing search.
*
- * @param object $oSearchTerm Description of the token.
- * @param string $sPhraseType Type of phrase the token is contained in.
- * @param bool $bFirstToken True if the token is at the beginning of the
- * query.
- * @param bool $bFirstPhrase True if the token is in the first phrase of
- * the query.
- * @param bool $bLastToken True if the token is at the end of the query.
+ * @param string $sToken Term for the token.
+ * @param object $oSearchTerm Description of the token.
+ * @param string $sPhraseType Type of phrase the token is contained in.
+ * @param bool $bFirstToken True if the token is at the beginning of the
+ * query.
+ * @param bool $bLastToken True if the token is at the end of the query.
+ * @param integer $iPhrase Number of the phrase the token is in.
*
* @return SearchDescription[] List of derived search descriptions.
*/
*
* @return SearchDescription[] List of derived search descriptions.
*/
- public function extendWithFullTerm($oSearchTerm, $sPhraseType, $bFirstToken, $bFirstPhrase, $bLastToken)
+ public function extendWithSearchTerm($sToken, $oSearchTerm, $sPhraseType, $bFirstToken, $bLastToken, $iPhrase)
{
$aNewSearches = array();
{
$aNewSearches = array();
// of the phrase. In structured search the name must forcably in
// the first phrase. In unstructured search it may be in a later
// phrase when the first phrase is a house number.
// of the phrase. In structured search the name must forcably in
// the first phrase. In unstructured search it may be in a later
// phrase when the first phrase is a house number.
- if (!empty($this->aName) || !($bFirstPhrase || $sPhraseType == '')) {
- if (($sPhraseType == '' || !$bFirstPhrase) && $oSearchTerm->iTermCount > 1) {
+ if (!empty($this->aName) || !($iPhrase == 0 || $sPhraseType == '')) {
+ if (($sPhraseType == '' || $iPhrase > 0) && $oSearchTerm->iTermCount > 1) {
$oSearch = clone $this;
$oSearch->iNamePhrase = -1;
$oSearch->iSearchRank += 1;
$oSearch = clone $this;
$oSearch->iNamePhrase = -1;
$oSearch->iSearchRank += 1;
}
$aNewSearches[] = $oSearch;
}
}
$aNewSearches[] = $oSearch;
}
+ } elseif ($sPhraseType != 'country'
+ && is_a($oSearchTerm, '\Nominatim\Token\Partial')
+ && strpos($sToken, ' ') === false
+ ) {
+ $aNewSearches = $this->extendWithPartialTerm(
+ $sToken,
+ $oSearchTerm,
+ (bool) $sPhraseType,
+ $iPhrase
+ );
* @param object $oSearchTerm Description of the token.
* @param bool $bStructuredPhrases True if the search is structured.
* @param integer $iPhrase Number of the phrase the token is in.
* @param object $oSearchTerm Description of the token.
* @param bool $bStructuredPhrases True if the search is structured.
* @param integer $iPhrase Number of the phrase the token is in.
- * @param array[] $aFullTokens List of full term tokens with the
- * same name.
*
* @return SearchDescription[] List of derived search descriptions.
*/
*
* @return SearchDescription[] List of derived search descriptions.
*/
- public function extendWithPartialTerm($sToken, $oSearchTerm, $bStructuredPhrases, $iPhrase, $aFullTokens)
+ private function extendWithPartialTerm($sToken, $oSearchTerm, $bStructuredPhrases, $iPhrase)
- // Only allow name terms.
- if (!(is_a($oSearchTerm, '\Nominatim\Token\Word'))
- || strpos($sToken, ' ') !== false
- ) {
- return array();
- }
-
$aNewSearches = array();
$iWordID = $oSearchTerm->iId;
$aNewSearches = array();
$iWordID = $oSearchTerm->iId;
$oSearch->aAddress[$iWordID] = $iWordID;
} else {
$oSearch->aAddressNonSearch[$iWordID] = $iWordID;
$oSearch->aAddress[$iWordID] = $iWordID;
} else {
$oSearch->aAddressNonSearch[$iWordID] = $iWordID;
- if (!empty($aFullTokens)) {
- $oSearch->iSearchRank++;
- }
}
$aNewSearches[] = $oSearch;
}
}
$aNewSearches[] = $oSearch;
}
}
$oSearch->aName[$iWordID] = $iWordID;
} else {
}
$oSearch->aName[$iWordID] = $iWordID;
} else {
- if (!empty($aFullTokens)) {
- $oSearch->iSearchRank++;
- }
$oSearch->aNameNonSearch[$iWordID] = $iWordID;
}
$oSearch->iNamePhrase = $iPhrase;
$oSearch->aNameNonSearch[$iWordID] = $iWordID;
}
$oSearch->iNamePhrase = $iPhrase;
* tokens do not have a common base class. All tokens need to have a field
* with the word id that points to an entry in the `word` database table
* but otherwise the information saved about a token can be very different.
* tokens do not have a common base class. All tokens need to have a field
* with the word id that points to an entry in the `word` database table
* but otherwise the information saved about a token can be very different.
- *
- * There are two different kinds of token words: full words and partial terms.
- *
- * Full words start with a space. They represent a complete name of a place.
- * All special tokens are normally full words.
- *
- * Partial terms have no space at the beginning. They may represent a part of
- * a name of a place (e.g. in the name 'World Trade Center' a partial term
- * would be 'Trade' or 'Trade Center'). They are only used in TokenWord.
*/
public function containsAny($sWord)
{
*/
public function containsAny($sWord)
{
- return isset($this->aTokens[$sWord]) || isset($this->aTokens[' '.$sWord]);
+ return isset($this->aTokens[$sWord]);
foreach ($this->aTokens as $aTokenList) {
foreach ($aTokenList as $oToken) {
foreach ($this->aTokens as $aTokenList) {
foreach ($aTokenList as $oToken) {
- if (is_a($oToken, '\Nominatim\Token\Word') && !$oToken->bPartial) {
+ if (is_a($oToken, '\Nominatim\Token\Word')) {
$ids[$oToken->iId] = $oToken->iId;
}
}
$ids[$oToken->iId] = $oToken->iId;
}
}
// Try more interpretations for Tokens that could not be matched.
foreach ($aTokens as $sToken) {
// Try more interpretations for Tokens that could not be matched.
foreach ($aTokens as $sToken) {
- if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) {
- if (preg_match('/^ ([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
+ if ($sToken[0] != ' ' && !$oValidTokens->contains($sToken)) {
+ if (preg_match('/^([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
// US ZIP+4 codes - merge in the 5-digit ZIP code
$oValidTokens->addToken(
$sToken,
new Token\Postcode(null, $aData[1], 'us')
);
// US ZIP+4 codes - merge in the 5-digit ZIP code
$oValidTokens->addToken(
$sToken,
new Token\Postcode(null, $aData[1], 'us')
);
- } elseif (preg_match('/^ [0-9]+$/', $sToken)) {
+ } elseif (preg_match('/^[0-9]+$/', $sToken)) {
// Unknown single word token with a number.
// Assume it is a house number.
$oValidTokens->addToken(
// Unknown single word token with a number.
// Assume it is a house number.
$oValidTokens->addToken(
// Try more interpretations for Tokens that could not be matched.
foreach ($aTokens as $sToken) {
// Try more interpretations for Tokens that could not be matched.
foreach ($aTokens as $sToken) {
- if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) {
- if (preg_match('/^ ([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
+ if ($sToken[0] != ' ' && !$oValidTokens->contains($sToken)) {
+ if (preg_match('/^([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
// US ZIP+4 codes - merge in the 5-digit ZIP code
$oValidTokens->addToken(
$sToken,
new Token\Postcode(null, $aData[1], 'us')
);
// US ZIP+4 codes - merge in the 5-digit ZIP code
$oValidTokens->addToken(
$sToken,
new Token\Postcode(null, $aData[1], 'us')
);
- } elseif (preg_match('/^ [0-9]+$/', $sToken)) {
+ } elseif (preg_match('/^[0-9]+$/', $sToken)) {
// Unknown single word token with a number.
// Assume it is a house number.
$oValidTokens->addToken(
// Unknown single word token with a number.
// Assume it is a house number.
$oValidTokens->addToken(