require_once(CONST_LibDir.'/ReverseGeocode.php');
require_once(CONST_LibDir.'/SearchDescription.php');
require_once(CONST_LibDir.'/SearchContext.php');
+require_once(CONST_LibDir.'/SearchPosition.php');
require_once(CONST_LibDir.'/TokenList.php');
require_once(CONST_TokenizerDir.'/tokenizer.php');
*/
foreach ($aPhrases as $iPhrase => $oPhrase) {
$aNewPhraseSearches = array();
- $sPhraseType = $oPhrase->getPhraseType();
+ $oPosition = new SearchPosition(
+ $oPhrase->getPhraseType(),
+ $iPhrase,
+ count($aPhrases)
+ );
foreach ($oPhrase->getWordSets() as $aWordset) {
$aWordsetSearches = $aSearches;
// Add all words from this wordset
foreach ($aWordset as $iToken => $sToken) {
$aNewWordsetSearches = array();
+ $oPosition->setTokenPosition($iToken, count($aWordset));
foreach ($aWordsetSearches as $oCurrentSearch) {
- // Tokens with full name matches.
- foreach ($oValidTokens->get(' '.$sToken) as $oSearchTerm) {
- $aNewSearches = $oCurrentSearch->extendWithFullTerm(
- $oSearchTerm,
- $sPhraseType,
- $iToken == 0 && $iPhrase == 0,
- $iPhrase == 0,
- $iToken + 1 == count($aWordset)
- && $iPhrase + 1 == count($aPhrases)
- );
-
- foreach ($aNewSearches as $oSearch) {
- if ($oSearch->getRank() < $this->iMaxRank) {
- $aNewWordsetSearches[] = $oSearch;
- }
- }
- }
- // Look for partial matches.
- // Note that there is no point in adding country terms here
- // because country is omitted in the address.
- if ($sPhraseType != 'country') {
- // Allow searching for a word - but at extra cost
- foreach ($oValidTokens->get($sToken) as $oSearchTerm) {
- $aNewSearches = $oCurrentSearch->extendWithPartialTerm(
- $sToken,
- $oSearchTerm,
- (bool) $sPhraseType,
- $iPhrase,
- $oValidTokens->get(' '.$sToken)
+ foreach ($oValidTokens->get($sToken) as $oSearchTerm) {
+ if ($oSearchTerm->isExtendable($oCurrentSearch, $oPosition)) {
+ $aNewSearches = $oSearchTerm->extendSearch(
+ $oCurrentSearch,
+ $oPosition
);
foreach ($aNewSearches as $oSearch) {
if (!empty($aTokens)) {
$aNewSearches = array();
+ $oPosition = new SearchPosition('', 0, 1);
+ $oPosition->setTokenPosition(0, 1);
+
foreach ($aSearches as $oSearch) {
foreach ($aTokens as $oToken) {
- $oNewSearch = clone $oSearch;
- $oNewSearch->setPoiSearch(
- $oToken->iOperator,
- $oToken->sClass,
- $oToken->sType
+ $aNewSearches = array_merge(
+ $aNewSearches,
+ $oToken->extendSearch($oSearch, $oPosition)
);
- $aNewSearches[] = $oNewSearch;
}
}
$aSearches = $aNewSearches;
return $this->iSearchRank;
}
- /**
- * Make this search a POI search.
- *
- * In a POI search, objects are not (only) searched by their name
- * but also by the primary OSM key/value pair (class and type in Nominatim).
- *
- * @param integer $iOperator Type of POI search
- * @param string $sClass Class (or OSM tag key) of POI.
- * @param string $sType Type (or OSM tag value) of POI.
- *
- * @return void
- */
- public function setPoiSearch($iOperator, $sClass, $sType)
- {
- $this->iOperator = $iOperator;
- $this->sClass = $sClass;
- $this->sType = $sType;
- }
-
- /**
- * Check if any operator is set.
- *
- * @return bool True, if this is a special search operation.
- */
- public function hasOperator()
- {
- return $this->iOperator != Operator::NONE;
- }
-
/**
* Extract key/value pairs from a query.
*
/////////// Search building functions
-
/**
- * Derive new searches by adding a full term to the existing search.
+ * Create a copy of this search description adding to search rank.
*
- * @param object $oSearchTerm Description of the token.
- * @param string $sPhraseType Type of phrase the token is contained in.
- * @param bool $bFirstToken True if the token is at the beginning of the
- * query.
- * @param bool $bFirstPhrase True if the token is in the first phrase of
- * the query.
- * @param bool $bLastToken True if the token is at the end of the query.
+ * @param integer $iTermCost Cost to add to the current search rank.
*
- * @return SearchDescription[] List of derived search descriptions.
+ * @return object Cloned search description.
*/
- public function extendWithFullTerm($oSearchTerm, $sPhraseType, $bFirstToken, $bFirstPhrase, $bLastToken)
+ public function clone($iTermCost)
{
- $aNewSearches = array();
+ $oSearch = clone $this;
+ $oSearch->iSearchRank += $iTermCost;
- if (($sPhraseType == '' || $sPhraseType == 'country')
- && is_a($oSearchTerm, '\Nominatim\Token\Country')
- ) {
- if (!$this->sCountryCode) {
- $oSearch = clone $this;
- $oSearch->iSearchRank++;
- $oSearch->sCountryCode = $oSearchTerm->sCountryCode;
- // Country is almost always at the end of the string
- // - increase score for finding it anywhere else (optimisation)
- if (!$bLastToken) {
- $oSearch->iSearchRank += 5;
- $oSearch->iNamePhrase = -1;
- }
- $aNewSearches[] = $oSearch;
- }
- } elseif (($sPhraseType == '' || $sPhraseType == 'postalcode')
- && is_a($oSearchTerm, '\Nominatim\Token\Postcode')
- ) {
- if (!$this->sPostcode) {
- // If we have structured search or this is the first term,
- // make the postcode the primary search element.
- if ($this->iOperator == Operator::NONE && $bFirstToken) {
- $oSearch = clone $this;
- $oSearch->iSearchRank++;
- $oSearch->iOperator = Operator::POSTCODE;
- $oSearch->aAddress = array_merge($this->aAddress, $this->aName);
- $oSearch->aName =
- array($oSearchTerm->iId => $oSearchTerm->sPostcode);
- $aNewSearches[] = $oSearch;
- }
+ return $oSearch;
+ }
- // If we have a structured search or this is not the first term,
- // add the postcode as an addendum.
- if ($this->iOperator != Operator::POSTCODE
- && ($sPhraseType == 'postalcode' || !empty($this->aName))
- ) {
- $oSearch = clone $this;
- $oSearch->iSearchRank++;
- $oSearch->iNamePhrase = -1;
- if (strlen($oSearchTerm->sPostcode) < 4) {
- $oSearch->iSearchRank += 4 - strlen($oSearchTerm->sPostcode);
- }
- $oSearch->sPostcode = $oSearchTerm->sPostcode;
- $aNewSearches[] = $oSearch;
- }
- }
- } elseif (($sPhraseType == '' || $sPhraseType == 'street')
- && is_a($oSearchTerm, '\Nominatim\Token\HouseNumber')
- ) {
- if (!$this->sHouseNumber && $this->iOperator != Operator::POSTCODE) {
- // sanity check: if the housenumber is not mainly made
- // up of numbers, add a penalty
- $iSearchCost = 1;
- if (preg_match('/\\d/', $oSearchTerm->sToken) === 0
- || preg_match_all('/[^0-9]/', $oSearchTerm->sToken, $aMatches) > 2) {
- $iSearchCost++;
- }
- if ($this->iOperator != Operator::NONE) {
- $iSearchCost++;
- }
- if (empty($oSearchTerm->iId)) {
- $iSearchCost++;
- }
- // also must not appear in the middle of the address
- if (!empty($this->aAddress)
- || (!empty($this->aAddressNonSearch))
- || $this->sPostcode
- ) {
- $iSearchCost++;
- }
+ /**
+ * Check if the search currently includes a name.
+ *
+ * @param bool bIncludeNonNames If true stop-word tokens are taken into
+ * account, too.
+ *
+ * @return bool True, if search has a name.
+ */
+ public function hasName($bIncludeNonNames = false)
+ {
+ return !empty($this->aName)
+ || (!empty($this->aNameNonSearch) && $bIncludeNonNames);
+ }
- $oSearch = clone $this;
- $oSearch->iSearchRank += $iSearchCost;
- $oSearch->iNamePhrase = -1;
- $oSearch->sHouseNumber = $oSearchTerm->sToken;
- $aNewSearches[] = $oSearch;
-
- // Housenumbers may appear in the name when the place has its own
- // address terms.
- if ($oSearchTerm->iId !== null
- && ($this->iNamePhrase >= 0 || empty($this->aName))
- && empty($this->aAddress)
- ) {
- $oSearch = clone $this;
- $oSearch->iSearchRank += $iSearchCost;
- $oSearch->aAddress = $this->aName;
- $oSearch->bRareName = false;
- $oSearch->aName = array($oSearchTerm->iId => $oSearchTerm->iId);
- $aNewSearches[] = $oSearch;
- }
- }
- } elseif ($sPhraseType == ''
- && is_a($oSearchTerm, '\Nominatim\Token\SpecialTerm')
- ) {
- if ($this->iOperator == Operator::NONE) {
- $oSearch = clone $this;
- $oSearch->iSearchRank += 2;
- $oSearch->iNamePhrase = -1;
-
- $iOp = $oSearchTerm->iOperator;
- if ($iOp == Operator::NONE) {
- if (!empty($this->aName) || $this->oContext->isBoundedSearch()) {
- $iOp = Operator::NAME;
- } else {
- $iOp = Operator::NEAR;
- }
- $oSearch->iSearchRank += 2;
- } elseif (!$bFirstToken && !$bLastToken) {
- $oSearch->iSearchRank += 2;
- }
- if ($this->sHouseNumber) {
- $oSearch->iSearchRank++;
- }
+ /**
+ * Check if the search currently includes an address term.
+ *
+ * @return bool True, if any address term is included, including stop-word
+ * terms.
+ */
+ public function hasAddress()
+ {
+ return !empty($this->aAddress) || !empty($this->aAddressNonSearch);
+ }
- $oSearch->setPoiSearch(
- $iOp,
- $oSearchTerm->sClass,
- $oSearchTerm->sType
- );
- $aNewSearches[] = $oSearch;
- }
- } elseif ($sPhraseType != 'country'
- && is_a($oSearchTerm, '\Nominatim\Token\Word')
- ) {
- $iWordID = $oSearchTerm->iId;
- // Full words can only be a name if they appear at the beginning
- // of the phrase. In structured search the name must forcably in
- // the first phrase. In unstructured search it may be in a later
- // phrase when the first phrase is a house number.
- if (!empty($this->aName) || !($bFirstPhrase || $sPhraseType == '')) {
- if (($sPhraseType == '' || !$bFirstPhrase) && $oSearchTerm->iTermCount > 1) {
- $oSearch = clone $this;
- $oSearch->iNamePhrase = -1;
- $oSearch->iSearchRank += 1;
- $oSearch->aAddress[$iWordID] = $iWordID;
- $aNewSearches[] = $oSearch;
- }
- } elseif (empty($this->aNameNonSearch)) {
- $oSearch = clone $this;
- $oSearch->iSearchRank++;
- $oSearch->aName = array($iWordID => $iWordID);
- if (CONST_Search_NameOnlySearchFrequencyThreshold) {
- $oSearch->bRareName =
- $oSearchTerm->iSearchNameCount
- < CONST_Search_NameOnlySearchFrequencyThreshold;
- }
- $aNewSearches[] = $oSearch;
- }
- }
+ /**
+ * Check if a country restriction is currently included in the search.
+ *
+ * @return bool True, if a country restriction is set.
+ */
+ public function hasCountry()
+ {
+ return $this->sCountryCode !== '';
+ }
- return $aNewSearches;
+ /**
+ * Check if a postcode is currently included in the search.
+ *
+ * @return bool True, if a postcode is set.
+ */
+ public function hasPostcode()
+ {
+ return $this->sPostcode !== '';
}
/**
- * Derive new searches by adding a partial term to the existing search.
+ * Check if a house number is set for the search.
*
- * @param string $sToken Term for the token.
- * @param object $oSearchTerm Description of the token.
- * @param bool $bStructuredPhrases True if the search is structured.
- * @param integer $iPhrase Number of the phrase the token is in.
- * @param array[] $aFullTokens List of full term tokens with the
- * same name.
+ * @return bool True, if a house number is set.
+ */
+ public function hasHousenumber()
+ {
+ return $this->sHouseNumber !== '';
+ }
+
+ /**
+ * Check if a special type of place is requested.
*
- * @return SearchDescription[] List of derived search descriptions.
+ * param integer iOperator When set, check for the particular
+ * operator used for the special type.
+ *
+ * @return bool True, if speial type is requested or, if requested,
+ * a special type with the given operator.
*/
- public function extendWithPartialTerm($sToken, $oSearchTerm, $bStructuredPhrases, $iPhrase, $aFullTokens)
+ public function hasOperator($iOperator = null)
{
- // Only allow name terms.
- if (!(is_a($oSearchTerm, '\Nominatim\Token\Word'))
- || strpos($sToken, ' ') !== false
- ) {
- return array();
+ return $iOperator === null ? $this->iOperator != Operator::NONE : $this->iOperator == $iOperator;
+ }
+
+ /**
+ * Add the given token to the list of terms to search for in the address.
+ *
+ * @param integer iID ID of term to add.
+ * @param bool bSearchable Term should be used to search for result
+ * (i.e. term is not a stop word).
+ */
+ public function addAddressToken($iId, $bSearchable = true)
+ {
+ if ($bSearchable) {
+ $this->aAddress[$iId] = $iId;
+ } else {
+ $this->aAddressNonSearch[$iId] = $iId;
}
+ }
- $aNewSearches = array();
- $iWordID = $oSearchTerm->iId;
+ /**
+ * Add the given full-word token to the list of terms to search for in the
+ * name.
+ *
+ * @param interger iId ID of term to add.
+ * @param bool bRareName True if the term is infrequent enough to not
+ * require other constraints for efficient search.
+ */
+ public function addNameToken($iId, $bRareName)
+ {
+ $this->aName[$iId] = $iId;
+ $this->bRareName = $bRareName;
+ }
- if ((!$bStructuredPhrases || $iPhrase > 0)
- && (!empty($this->aName))
- ) {
- $oSearch = clone $this;
- $oSearch->iSearchRank++;
- if (preg_match('#^[0-9 ]+$#', $sToken)) {
- $oSearch->iSearchRank++;
- }
- if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) {
- $oSearch->aAddress[$iWordID] = $iWordID;
- } else {
- $oSearch->aAddressNonSearch[$iWordID] = $iWordID;
- if (!empty($aFullTokens)) {
- $oSearch->iSearchRank++;
- }
- }
- $aNewSearches[] = $oSearch;
+ /**
+ * Add the given partial token to the list of terms to search for in
+ * the name.
+ *
+ * @param integer iID ID of term to add.
+ * @param bool bSearchable Term should be used to search for result
+ * (i.e. term is not a stop word).
+ * @param integer iPhraseNumber Index of phrase, where the partial term
+ * appears.
+ */
+ public function addPartialNameToken($iId, $bSearchable, $iPhraseNumber)
+ {
+ if ($bSearchable) {
+ $this->aName[$iId] = $iId;
+ } else {
+ $this->aNameNonSearch[$iId] = $iId;
}
+ $this->iNamePhrase = $iPhraseNumber;
+ }
- if ((!$this->sPostcode && !$this->aAddress && !$this->aAddressNonSearch)
- && ((empty($this->aName) && empty($this->aNameNonSearch)) || $this->iNamePhrase == $iPhrase)
- ) {
- $oSearch = clone $this;
- $oSearch->iSearchRank++;
- if (empty($this->aName) && empty($this->aNameNonSearch)) {
- $oSearch->iSearchRank++;
- }
- if (preg_match('#^[0-9 ]+$#', $sToken)) {
- $oSearch->iSearchRank++;
- }
- if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) {
- if (empty($this->aName)
- && CONST_Search_NameOnlySearchFrequencyThreshold
- ) {
- $oSearch->bRareName =
- $oSearchTerm->iSearchNameCount
- < CONST_Search_NameOnlySearchFrequencyThreshold;
- } else {
- $oSearch->bRareName = false;
- }
- $oSearch->aName[$iWordID] = $iWordID;
- } else {
- if (!empty($aFullTokens)) {
- $oSearch->iSearchRank++;
- }
- $oSearch->aNameNonSearch[$iWordID] = $iWordID;
- }
- $oSearch->iNamePhrase = $iPhrase;
- $aNewSearches[] = $oSearch;
- }
+ /**
+ * Set country restriction for the search.
+ *
+ * @param string sCountryCode Country code of country to restrict search to.
+ */
+ public function setCountry($sCountryCode)
+ {
+ $this->sCountryCode = $sCountryCode;
+ $this->iNamePhrase = -1;
+ }
+
+ /**
+ * Set postcode search constraint.
+ *
+ * @param string sPostcode Postcode the result should have.
+ */
+ public function setPostcode($sPostcode)
+ {
+ $this->sPostcode = $sPostcode;
+ $this->iNamePhrase = -1;
+ }
+
+ /**
+ * Make this search a search for a postcode object.
+ *
+ * @param integer iId Token Id for the postcode.
+ * @param string sPostcode Postcode to look for.
+ */
+ public function setPostcodeAsName($iId, $sPostcode)
+ {
+ $this->iOperator = Operator::POSTCODE;
+ $this->aAddress = array_merge($this->aAddress, $this->aName);
+ $this->aName = array($iId => $sPostcode);
+ $this->bRareName = true;
+ $this->iNamePhrase = -1;
+ }
+
+ /**
+ * Set house number search cnstraint.
+ *
+ * @param string sNumber House number the result should have.
+ */
+ public function setHousenumber($sNumber)
+ {
+ $this->sHouseNumber = $sNumber;
+ $this->iNamePhrase = -1;
+ }
+
+ /**
+ * Make this search a search for a house number.
+ *
+ * @param integer iId Token Id for the house number.
+ */
+ public function setHousenumberAsName($iId)
+ {
+ $this->aAddress = array_merge($this->aAddress, $this->aName);
+ $this->bRareName = false;
+ $this->aName = array($iId => $iId);
+ $this->iNamePhrase = -1;
+ }
+
+ /**
+ * Make this search a POI search.
+ *
+ * In a POI search, objects are not (only) searched by their name
+ * but also by the primary OSM key/value pair (class and type in Nominatim).
+ *
+ * @param integer $iOperator Type of POI search
+ * @param string $sClass Class (or OSM tag key) of POI.
+ * @param string $sType Type (or OSM tag value) of POI.
+ *
+ * @return void
+ */
+ public function setPoiSearch($iOperator, $sClass, $sType)
+ {
+ $this->iOperator = $iOperator;
+ $this->sClass = $sClass;
+ $this->sType = $sType;
+ $this->iNamePhrase = -1;
+ }
+
+ public function getNamePhrase()
+ {
+ return $this->iNamePhrase;
+ }
- return $aNewSearches;
+ /**
+ * Get the global search context.
+ *
+ * @return object Objects of global search constraints.
+ */
+ public function getContext()
+ {
+ return $this->oContext;
}
/////////// Query functions
--- /dev/null
+<?php
+
+namespace Nominatim;
+
+/**
+ * Description of the position of a token within a query.
+ */
+class SearchPosition
+{
+ private $sPhraseType;
+
+ private $iPhrase;
+ private $iNumPhrases;
+
+ private $iToken;
+ private $iNumTokens;
+
+
+ public function __construct($sPhraseType, $iPhrase, $iNumPhrases)
+ {
+ $this->sPhraseType = $sPhraseType;
+ $this->iPhrase = $iPhrase;
+ $this->iNumPhrases = $iNumPhrases;
+ }
+
+ public function setTokenPosition($iToken, $iNumTokens)
+ {
+ $this->iToken = $iToken;
+ $this->iNumTokens = $iNumTokens;
+ }
+
+ /**
+ * Check if the phrase can be of the given type.
+ *
+ * @param string $sType Type of phrse requested.
+ *
+ * @return True if the phrase is untyped or of the given type.
+ */
+ public function maybePhrase($sType)
+ {
+ return $this->sPhraseType == '' || $this->sPhraseType == $sType;
+ }
+
+ /**
+ * Check if the phrase is exactly of the given type.
+ *
+ * @param string $sType Type of phrse requested.
+ *
+ * @return True if the phrase of the given type.
+ */
+ public function isPhrase($sType)
+ {
+ return $this->sPhraseType == $sType;
+ }
+
+ /**
+ * Return true if the token is the very first in the query.
+ */
+ public function isFirstToken()
+ {
+ return $this->iPhrase == 0 && $this->iToken == 0;
+ }
+
+ /**
+ * Check if the token is the final one in the query.
+ */
+ public function isLastToken()
+ {
+ return $this->iToken + 1 == $this->iNumTokens && $this->iPhrase + 1 == $this->iNumPhrases;
+ }
+
+ /**
+ * Check if the current token is part of the first phrase in the query.
+ */
+ public function isFirstPhrase()
+ {
+ return $this->iPhrase == 0;
+ }
+
+ /**
+ * Get the phrase position in the query.
+ */
+ public function getPhrase()
+ {
+ return $this->iPhrase;
+ }
+}
class Country
{
/// Database word id, if available.
- public $iId;
+ private $iId;
/// Two-letter country code (lower-cased).
- public $sCountryCode;
+ private $sCountryCode;
public function __construct($iId, $sCountryCode)
{
$this->sCountryCode = $sCountryCode;
}
+ public function getId()
+ {
+ return $this->iId;
+ }
+
+ /**
+ * Check if the token can be added to the given search.
+ * Derive new searches by adding this token to an existing search.
+ *
+ * @param object $oSearch Partial search description derived so far.
+ * @param object $oPosition Description of the token position within
+ the query.
+ *
+ * @return True if the token is compatible with the search configuration
+ * given the position.
+ */
+ public function isExtendable($oSearch, $oPosition)
+ {
+ return !$oSearch->hasCountry() && $oPosition->maybePhrase('country');
+ }
+
+ /**
+ * Derive new searches by adding this token to an existing search.
+ *
+ * @param object $oSearch Partial search description derived so far.
+ * @param object $oPosition Description of the token position within
+ the query.
+ *
+ * @return SearchDescription[] List of derived search descriptions.
+ */
+ public function extendSearch($oSearch, $oPosition)
+ {
+ $oNewSearch = $oSearch->clone($oPosition->isLastToken() ? 1 : 6);
+ $oNewSearch->setCountry($this->sCountryCode);
+
+ return array($oNewSearch);
+ }
+
public function debugInfo()
{
return array(
'Info' => $this->sCountryCode
);
}
+
+ public function debugCode()
+ {
+ return 'C';
+ }
}
class HouseNumber
{
/// Database word id, if available.
- public $iId;
+ private $iId;
/// Normalized house number.
- public $sToken;
+ private $sToken;
public function __construct($iId, $sToken)
{
$this->sToken = $sToken;
}
+ public function getId()
+ {
+ return $this->iId;
+ }
+
+ /**
+ * Check if the token can be added to the given search.
+ * Derive new searches by adding this token to an existing search.
+ *
+ * @param object $oSearch Partial search description derived so far.
+ * @param object $oPosition Description of the token position within
+ the query.
+ *
+ * @return True if the token is compatible with the search configuration
+ * given the position.
+ */
+ public function isExtendable($oSearch, $oPosition)
+ {
+ return !$oSearch->hasHousenumber()
+ && !$oSearch->hasOperator(\Nominatim\Operator::POSTCODE)
+ && $oPosition->maybePhrase('street');
+ }
+
+ /**
+ * Derive new searches by adding this token to an existing search.
+ *
+ * @param object $oSearch Partial search description derived so far.
+ * @param object $oPosition Description of the token position within
+ the query.
+ *
+ * @return SearchDescription[] List of derived search descriptions.
+ */
+ public function extendSearch($oSearch, $oPosition)
+ {
+ $aNewSearches = array();
+
+ // sanity check: if the housenumber is not mainly made
+ // up of numbers, add a penalty
+ $iSearchCost = 1;
+ if (preg_match('/\\d/', $this->sToken) === 0
+ || preg_match_all('/[^0-9]/', $this->sToken, $aMatches) > 2) {
+ $iSearchCost++;
+ }
+ if (!$oSearch->hasOperator(\Nominatim\Operator::NONE)) {
+ $iSearchCost++;
+ }
+ if (empty($this->iId)) {
+ $iSearchCost++;
+ }
+ // also must not appear in the middle of the address
+ if ($oSearch->hasAddress() || $oSearch->hasPostcode()) {
+ $iSearchCost++;
+ }
+
+ $oNewSearch = $oSearch->clone($iSearchCost);
+ $oNewSearch->setHousenumber($this->sToken);
+ $aNewSearches[] = $oNewSearch;
+
+ // Housenumbers may appear in the name when the place has its own
+ // address terms.
+ if ($this->iId !== null
+ && ($oSearch->getNamePhrase() >= 0 || !$oSearch->hasName())
+ && !$oSearch->hasAddress()
+ ) {
+ $oNewSearch = $oSearch->clone($iSearchCost);
+ $oNewSearch->setHousenumberAsName($this->iId);
+
+ $aNewSearches[] = $oNewSearch;
+ }
+
+ return $aNewSearches;
+ }
+
+
public function debugInfo()
{
return array(
'Info' => array('nr' => $this->sToken)
);
}
+
+ public function debugCode()
+ {
+ return 'H';
+ }
}
require_once(CONST_LibDir.'/TokenPostcode.php');
require_once(CONST_LibDir.'/TokenSpecialTerm.php');
require_once(CONST_LibDir.'/TokenWord.php');
+require_once(CONST_LibDir.'/TokenPartial.php');
require_once(CONST_LibDir.'/SpecialSearchOperator.php');
/**
* tokens do not have a common base class. All tokens need to have a field
* with the word id that points to an entry in the `word` database table
* but otherwise the information saved about a token can be very different.
- *
- * There are two different kinds of token words: full words and partial terms.
- *
- * Full words start with a space. They represent a complete name of a place.
- * All special tokens are normally full words.
- *
- * Partial terms have no space at the beginning. They may represent a part of
- * a name of a place (e.g. in the name 'World Trade Center' a partial term
- * would be 'Trade' or 'Trade Center'). They are only used in TokenWord.
*/
class TokenList
{
*/
public function containsAny($sWord)
{
- return isset($this->aTokens[$sWord]) || isset($this->aTokens[' '.$sWord]);
+ return isset($this->aTokens[$sWord]);
}
/**
foreach ($this->aTokens as $aTokenList) {
foreach ($aTokenList as $oToken) {
- if (is_a($oToken, '\Nominatim\Token\Word') && !$oToken->bPartial) {
- $ids[$oToken->iId] = $oToken->iId;
+ if (is_a($oToken, '\Nominatim\Token\Word')) {
+ $ids[$oToken->getId()] = $oToken->getId();
}
}
}
$aWordsIDs = array();
foreach ($this->aTokens as $sToken => $aWords) {
foreach ($aWords as $aToken) {
- if ($aToken->iId !== null) {
- $aWordsIDs[$aToken->iId] =
- '#'.$sToken.'('.$aToken->iId.')#';
+ $iId = $aToken->getId();
+ if ($iId !== null) {
+ $aWordsIDs[$iId] = '#'.$sToken.'('.$aToken->debugCode().' '.$iId.')#';
}
}
}
--- /dev/null
+<?php
+
+namespace Nominatim\Token;
+
+/**
+ * A standard word token.
+ */
+class Partial
+{
+ /// Database word id, if applicable.
+ private $iId;
+ /// Number of appearances in the database.
+ private $iSearchNameCount;
+ /// True, if the token consists exclusively of digits and spaces.
+ private $bNumberToken;
+
+ public function __construct($iId, $sToken, $iSearchNameCount)
+ {
+ $this->iId = $iId;
+ $this->bNumberToken = (bool) preg_match('#^[0-9 ]+$#', $sToken);
+ $this->iSearchNameCount = $iSearchNameCount;
+ }
+
+ public function getId()
+ {
+ return $this->iId;
+ }
+
+ /**
+ * Check if the token can be added to the given search.
+ * Derive new searches by adding this token to an existing search.
+ *
+ * @param object $oSearch Partial search description derived so far.
+ * @param object $oPosition Description of the token position within
+ the query.
+ *
+ * @return True if the token is compatible with the search configuration
+ * given the position.
+ */
+ public function isExtendable($oSearch, $oPosition)
+ {
+ return !$oPosition->isPhrase('country');
+ }
+
+ /**
+ * Derive new searches by adding this token to an existing search.
+ *
+ * @param object $oSearch Partial search description derived so far.
+ * @param object $oPosition Description of the token position within
+ the query.
+ *
+ * @return SearchDescription[] List of derived search descriptions.
+ */
+ public function extendSearch($oSearch, $oPosition)
+ {
+ $aNewSearches = array();
+
+ // Partial token in Address.
+ if (($oPosition->isPhrase('') || !$oPosition->isFirstPhrase())
+ && $oSearch->hasName()
+ ) {
+ $iSearchCost = $this->bNumberToken ? 2 : 1;
+ if ($this->iSearchNameCount >= CONST_Max_Word_Frequency) {
+ $iSearchCost += 1;
+ }
+
+ $oNewSearch = $oSearch->clone($iSearchCost);
+ $oNewSearch->addAddressToken(
+ $this->iId,
+ $this->iSearchNameCount < CONST_Max_Word_Frequency
+ );
+
+ $aNewSearches[] = $oNewSearch;
+ }
+
+ // Partial token in Name.
+ if ((!$oSearch->hasPostcode() && !$oSearch->hasAddress())
+ && (!$oSearch->hasName(true)
+ || $oSearch->getNamePhrase() == $oPosition->getPhrase())
+ ) {
+ $iSearchCost = 1;
+ if (!$oSearch->hasName(true)) {
+ $iSearchCost += 1;
+ }
+ if ($this->bNumberToken) {
+ $iSearchCost += 1;
+ }
+
+ $oNewSearch = $oSearch->clone($iSearchCost);
+ $oNewSearch->addPartialNameToken(
+ $this->iId,
+ $this->iSearchNameCount < CONST_Max_Word_Frequency,
+ $oPosition->getPhrase()
+ );
+
+ $aNewSearches[] = $oNewSearch;
+ }
+
+ return $aNewSearches;
+ }
+
+
+ public function debugInfo()
+ {
+ return array(
+ 'ID' => $this->iId,
+ 'Type' => 'partial',
+ 'Info' => array(
+ 'count' => $this->iSearchNameCount
+ )
+ );
+ }
+
+ public function debugCode()
+ {
+ return 'w';
+ }
+}
class Postcode
{
/// Database word id, if available.
- public $iId;
+ private $iId;
/// Full nomralized postcode (upper cased).
- public $sPostcode;
+ private $sPostcode;
// Optional country code the postcode belongs to (currently unused).
- public $sCountryCode;
+ private $sCountryCode;
public function __construct($iId, $sPostcode, $sCountryCode = '')
{
$this->sCountryCode = empty($sCountryCode) ? '' : $sCountryCode;
}
+ public function getId()
+ {
+ return $this->iId;
+ }
+
+ /**
+ * Check if the token can be added to the given search.
+ * Derive new searches by adding this token to an existing search.
+ *
+ * @param object $oSearch Partial search description derived so far.
+ * @param object $oPosition Description of the token position within
+ the query.
+ *
+ * @return True if the token is compatible with the search configuration
+ * given the position.
+ */
+ public function isExtendable($oSearch, $oPosition)
+ {
+ return !$oSearch->hasPostcode() && $oPosition->maybePhrase('postalcode');
+ }
+
+ /**
+ * Derive new searches by adding this token to an existing search.
+ *
+ * @param object $oSearch Partial search description derived so far.
+ * @param object $oPosition Description of the token position within
+ the query.
+ *
+ * @return SearchDescription[] List of derived search descriptions.
+ */
+ public function extendSearch($oSearch, $oPosition)
+ {
+ $aNewSearches = array();
+
+ // If we have structured search or this is the first term,
+ // make the postcode the primary search element.
+ if ($oSearch->hasOperator(\Nominatim\Operator::NONE) && $oPosition->isFirstToken()) {
+ $oNewSearch = $oSearch->clone(1);
+ $oNewSearch->setPostcodeAsName($this->iId, $this->sPostcode);
+
+ $aNewSearches[] = $oNewSearch;
+ }
+
+ // If we have a structured search or this is not the first term,
+ // add the postcode as an addendum.
+ if (!$oSearch->hasOperator(\Nominatim\Operator::POSTCODE)
+ && ($oPosition->isPhrase('postalcode') || $oSearch->hasName())
+ ) {
+ $iPenalty = 1;
+ if (strlen($this->sPostcode) < 4) {
+ $iPenalty += 4 - strlen($this->sPostcode);
+ }
+ $oNewSearch = $oSearch->clone($iPenalty);
+ $oNewSearch->setPostcode($this->sPostcode);
+
+ $aNewSearches[] = $oNewSearch;
+ }
+
+ return $aNewSearches;
+ }
+
public function debugInfo()
{
return array(
'Info' => $this->sPostcode.'('.$this->sCountryCode.')'
);
}
+
+ public function debugCode()
+ {
+ return 'P';
+ }
}
class SpecialTerm
{
/// Database word id, if applicable.
- public $iId;
+ private $iId;
/// Class (or OSM tag key) of the place to look for.
- public $sClass;
+ private $sClass;
/// Type (or OSM tag value) of the place to look for.
- public $sType;
+ private $sType;
/// Relationship of the operator to the object (see Operator class).
- public $iOperator;
+ private $iOperator;
public function __construct($iID, $sClass, $sType, $iOperator)
{
$this->iOperator = $iOperator;
}
+ public function getId()
+ {
+ return $this->iId;
+ }
+
+ /**
+ * Check if the token can be added to the given search.
+ * Derive new searches by adding this token to an existing search.
+ *
+ * @param object $oSearch Partial search description derived so far.
+ * @param object $oPosition Description of the token position within
+ the query.
+ *
+ * @return True if the token is compatible with the search configuration
+ * given the position.
+ */
+ public function isExtendable($oSearch, $oPosition)
+ {
+ return !$oSearch->hasOperator() && $oPosition->isPhrase('');
+ }
+
+ /**
+ * Derive new searches by adding this token to an existing search.
+ *
+ * @param object $oSearch Partial search description derived so far.
+ * @param object $oPosition Description of the token position within
+ the query.
+ *
+ * @return SearchDescription[] List of derived search descriptions.
+ */
+ public function extendSearch($oSearch, $oPosition)
+ {
+ $iSearchCost = 2;
+
+ $iOp = $this->iOperator;
+ if ($iOp == \Nominatim\Operator::NONE) {
+ if ($oSearch->hasName() || $oSearch->getContext()->isBoundedSearch()) {
+ $iOp = \Nominatim\Operator::NAME;
+ } else {
+ $iOp = \Nominatim\Operator::NEAR;
+ }
+ $iSearchCost += 2;
+ } elseif (!$oPosition->isFirstToken() && !$oPosition->isLastToken()) {
+ $iSearchCost += 2;
+ }
+ if ($oSearch->hasHousenumber()) {
+ $iSearchCost ++;
+ }
+
+ $oNewSearch = $oSearch->clone($iSearchCost);
+ $oNewSearch->setPoiSearch($iOp, $this->sClass, $this->sType);
+
+ return array($oNewSearch);
+ }
+
+
public function debugInfo()
{
return array(
)
);
}
+
+ public function debugCode()
+ {
+ return 'S';
+ }
}
class Word
{
/// Database word id, if applicable.
- public $iId;
- /// If true, the word may represent only part of a place name.
- public $bPartial;
+ private $iId;
/// Number of appearances in the database.
- public $iSearchNameCount;
+ private $iSearchNameCount;
/// Number of terms in the word.
- public $iTermCount;
+ private $iTermCount;
- public function __construct($iId, $bPartial, $iSearchNameCount, $iTermCount)
+ public function __construct($iId, $iSearchNameCount, $iTermCount)
{
$this->iId = $iId;
- $this->bPartial = $bPartial;
$this->iSearchNameCount = $iSearchNameCount;
$this->iTermCount = $iTermCount;
}
+ public function getId()
+ {
+ return $this->iId;
+ }
+
+ /**
+ * Check if the token can be added to the given search.
+ * Derive new searches by adding this token to an existing search.
+ *
+ * @param object $oSearch Partial search description derived so far.
+ * @param object $oPosition Description of the token position within
+ the query.
+ *
+ * @return True if the token is compatible with the search configuration
+ * given the position.
+ */
+ public function isExtendable($oSearch, $oPosition)
+ {
+ return !$oPosition->isPhrase('country');
+ }
+
+ /**
+ * Derive new searches by adding this token to an existing search.
+ *
+ * @param object $oSearch Partial search description derived so far.
+ * @param object $oPosition Description of the token position within
+ the query.
+ *
+ * @return SearchDescription[] List of derived search descriptions.
+ */
+ public function extendSearch($oSearch, $oPosition)
+ {
+ // Full words can only be a name if they appear at the beginning
+ // of the phrase. In structured search the name must forcably in
+ // the first phrase. In unstructured search it may be in a later
+ // phrase when the first phrase is a house number.
+ if ($oSearch->hasName()
+ || !($oPosition->isFirstPhrase() || $oPosition->isPhrase(''))
+ ) {
+ if ($this->iTermCount > 1
+ && ($oPosition->isPhrase('') || !$oPosition->isFirstPhrase())
+ ) {
+ $oNewSearch = $oSearch->clone(1);
+ $oNewSearch->addAddressToken($this->iId);
+
+ return array($oNewSearch);
+ }
+ } elseif (!$oSearch->hasName(true)) {
+ $oNewSearch = $oSearch->clone(1);
+ $oNewSearch->addNameToken(
+ $this->iId,
+ CONST_Search_NameOnlySearchFrequencyThreshold
+ && $this->iSearchNameCount
+ < CONST_Search_NameOnlySearchFrequencyThreshold
+ );
+
+ return array($oNewSearch);
+ }
+
+ return array();
+ }
+
public function debugInfo()
{
return array(
'ID' => $this->iId,
'Type' => 'word',
'Info' => array(
- 'partial' => $this->bPartial,
- 'count' => $this->iSearchNameCount
+ 'count' => $this->iSearchNameCount,
+ 'terms' => $this->iTermCount
)
);
}
+
+ public function debugCode()
+ {
+ return 'W';
+ }
}
// Try more interpretations for Tokens that could not be matched.
foreach ($aTokens as $sToken) {
- if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) {
- if (preg_match('/^ ([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
+ if ($sToken[0] != ' ' && !$oValidTokens->contains($sToken)) {
+ if (preg_match('/^([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
// US ZIP+4 codes - merge in the 5-digit ZIP code
$oValidTokens->addToken(
$sToken,
new Token\Postcode(null, $aData[1], 'us')
);
- } elseif (preg_match('/^ [0-9]+$/', $sToken)) {
+ } elseif (preg_match('/^[0-9]+$/', $sToken)) {
// Unknown single word token with a number.
// Assume it is a house number.
$oValidTokens->addToken(
) {
$oToken = new Token\Country($iId, $aWord['country_code']);
}
+ } elseif ($aWord['word_token'][0] == ' ') {
+ $oToken = new Token\Word(
+ $iId,
+ $aWord['word_token'][0] != ' ',
+ (int) $aWord['count'],
+ substr_count($aWord['word_token'], ' ')
+ );
} else {
- $oToken = new Token\Word(
+ $oToken = new Token\Partial(
$iId,
- $aWord['word_token'][0] != ' ',
- (int) $aWord['count'],
- substr_count($aWord['word_token'], ' ')
+ $aWord['word_token'],
+ (int) $aWord['count']
);
}
if ($oToken) {
- $oValidTokens->addToken($aWord['word_token'], $oToken);
+ // remove any leading spaces
+ if ($aWord['word_token'][0] == ' ') {
+ $oValidTokens->addToken(substr($aWord['word_token'], 1), $oToken);
+ } else {
+ $oValidTokens->addToken($aWord['word_token'], $oToken);
+ }
}
}
}
// Try more interpretations for Tokens that could not be matched.
foreach ($aTokens as $sToken) {
- if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) {
- if (preg_match('/^ ([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
+ if ($sToken[0] != ' ' && !$oValidTokens->contains($sToken)) {
+ if (preg_match('/^([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
// US ZIP+4 codes - merge in the 5-digit ZIP code
$oValidTokens->addToken(
$sToken,
new Token\Postcode(null, $aData[1], 'us')
);
- } elseif (preg_match('/^ [0-9]+$/', $sToken)) {
+ } elseif (preg_match('/^[0-9]+$/', $sToken)) {
// Unknown single word token with a number.
// Assume it is a house number.
$oValidTokens->addToken(
) {
$oToken = new Token\Country($iId, $aWord['country_code']);
}
- } else {
+ } elseif ($aWord['word_token'][0] == ' ') {
$oToken = new Token\Word(
$iId,
- $aWord['word_token'][0] != ' ',
(int) $aWord['count'],
substr_count($aWord['word_token'], ' ')
);
+ // For backward compatibility: ignore all partial tokens with more
+ // than one word.
+ } elseif (strpos($aWord['word_token'], ' ') === false) {
+ $oToken = new Token\Partial(
+ $iId,
+ $aWord['word_token'],
+ (int) $aWord['count']
+ );
}
if ($oToken) {
- $oValidTokens->addToken($aWord['word_token'], $oToken);
+ // remove any leading spaces
+ if ($aWord['word_token'][0] == ' ') {
+ $oValidTokens->addToken(substr($aWord['word_token'], 1), $oToken);
+ } else {
+ $oValidTokens->addToken($aWord['word_token'], $oToken);
+ }
}
}
}