X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/f29c7bf910ea36fdc2cc70ba63c6dcece79c7b6c..ff85da0a31874050c32712d9bb1d1a5592c50a81:/lib/TokenList.php?ds=inline diff --git a/lib/TokenList.php b/lib/TokenList.php index 1dcaa7f5..1b6a1dcf 100644 --- a/lib/TokenList.php +++ b/lib/TokenList.php @@ -12,26 +12,93 @@ require_once(CONST_BasePath.'/lib/SpecialSearchOperator.php'); /** * Saves information about the tokens that appear in a search query. * + * Tokens are sorted by their normalized form, the token word. There are different + * kinds of tokens, represented by different Token* classes. Note that + * tokens do not have a common base class. All tokens need to have a field + * with the word id that points to an entry in the `word` database table + * but otherwise the information saved about a token can be very different. + * + * There are two different kinds of token words: full words and partial terms. + * + * Full words start with a space. They represent a complete name of a place. + * All special tokens are normally full words. + * + * Partial terms have no space at the beginning. They may represent a part of + * a name of a place (e.g. in the name 'World Trade Center' a partial term + * would be 'Trade' or 'Trade Center'). They are only used in TokenWord. */ class TokenList { // List of list of tokens indexed by their word_token. private $aTokens = array(); + + /** + * Return total number of tokens. + * + * @return Integer + */ + public function count() + { + return count($this->aTokens); + } + + /** + * Check if there are tokens for the given token word. + * + * @param string $sWord Token word to look for. + * + * @return bool True if there is one or more token for the token word. + */ public function contains($sWord) { return isset($this->aTokens[$sWord]); } + /** + * Check if there are partial or full tokens for the given word. + * + * @param string $sWord Token word to look for. + * + * @return bool True if there is one or more token for the token word. + */ + public function containsAny($sWord) + { + return isset($this->aTokens[$sWord]) || isset($this->aTokens[' '.$sWord]); + } + + /** + * Get the list of tokens for the given token word. + * + * @param string $sWord Token word to look for. + * + * @return object[] Array of tokens for the given token word or an + * empty array if no tokens could be found. + */ public function get($sWord) { return isset($this->aTokens[$sWord]) ? $this->aTokens[$sWord] : array(); } + public function getFullWordIDs() + { + $ids = array(); + + foreach ($this->aTokens as $aTokenList) { + foreach ($aTokenList as $oToken) { + if (is_a($oToken, '\Nominatim\Token\Word') && !$oToken->bPartial) { + $ids[$oToken->iId] = $oToken->iId; + } + } + } + + return $ids; + } + /** * Add token information from the word table in the database. * - * @param object $oDB Database connection. + * @param object $oDB Nominatim::DB instance. * @param string[] $aTokens List of tokens to look up in the database. * @param string[] $aCountryCodes List of country restrictions. * @param string $sNormQuery Normalized query string. @@ -45,11 +112,11 @@ class TokenList $sSQL = 'SELECT word_id, word_token, word, class, type, country_code,'; $sSQL .= ' operator, coalesce(search_name_count, 0) as count'; $sSQL .= ' FROM word WHERE word_token in ('; - $sSQL .= join(',', array_map('getDBQuoted', $aTokens)).')'; + $sSQL .= join(',', $oDB->getDBQuotedList($aTokens)).')'; Debug::printSQL($sSQL); - $aDBWords = chksql($oDB->getAll($sSQL), 'Could not get word tokens.'); + $aDBWords = $oDB->getAll($sSQL, null, 'Could not get word tokens.'); foreach ($aDBWords as $aWord) { $oToken = null; @@ -85,7 +152,7 @@ class TokenList $iId, $aWord['class'], $aWord['type'], - $aWord['operator'] ? Operator::NONE : Operator::NEAR + $aWord['operator'] ? Operator::NEAR : Operator::NONE ); } } elseif ($aWord['country_code']) { @@ -98,8 +165,9 @@ class TokenList } else { $oToken = new Token\Word( $iId, - $aWord['word'][0] != ' ', - (int) $aWord['count'] + $aWord['word_token'][0] != ' ', + (int) $aWord['count'], + substr_count($aWord['word_token'], ' ') ); }