X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/f29c7bf910ea36fdc2cc70ba63c6dcece79c7b6c..5b20fa7e38d81d0a7e63aaf3c64c3ab80916ff38:/lib/TokenList.php diff --git a/lib/TokenList.php b/lib/TokenList.php index 1dcaa7f5..fce5f940 100644 --- a/lib/TokenList.php +++ b/lib/TokenList.php @@ -12,17 +12,69 @@ require_once(CONST_BasePath.'/lib/SpecialSearchOperator.php'); /** * Saves information about the tokens that appear in a search query. * + * Tokens are sorted by their normalized form, the token word. There are different + * kinds of tokens, represented by different Token* classes. Note that + * tokens do not have a common base class. All tokens need to have a field + * with the word id that points to an entry in the `word` database table + * but otherwise the information saved about a token can be very different. + * + * There are two different kinds of token words: full words and partial terms. + * + * Full words start with a space. They represent a complete name of a place. + * All special tokens are normally full words. + * + * Partial terms have no space at the beginning. They may represent a part of + * a name of a place (e.g. in the name 'World Trade Center' a partial term + * would be 'Trade' or 'Trade Center'). They are only used in TokenWord. */ class TokenList { // List of list of tokens indexed by their word_token. private $aTokens = array(); + + /** + * Return total number of tokens. + * + * @return Integer + */ + public function count() + { + return count($this->aTokens); + } + + /** + * Check if there are tokens for the given token word. + * + * @param string $sWord Token word to look for. + * + * @return bool True if there is one or more token for the token word. + */ public function contains($sWord) { return isset($this->aTokens[$sWord]); } + /** + * Check if there are partial or full tokens for the given word. + * + * @param string $sWord Token word to look for. + * + * @return bool True if there is one or more token for the token word. + */ + public function containsAny($sWord) + { + return isset($this->aTokens[$sWord]) || isset($this->aTokens[' '.$sWord]); + } + + /** + * Get the list of tokens for the given token word. + * + * @param string $sWord Token word to look for. + * + * @return object[] Array of tokens for the given token word or an + * empty array if no tokens could be found. + */ public function get($sWord) { return isset($this->aTokens[$sWord]) ? $this->aTokens[$sWord] : array(); @@ -31,7 +83,7 @@ class TokenList /** * Add token information from the word table in the database. * - * @param object $oDB Database connection. + * @param object $oDB Nominatim::DB instance. * @param string[] $aTokens List of tokens to look up in the database. * @param string[] $aCountryCodes List of country restrictions. * @param string $sNormQuery Normalized query string. @@ -45,11 +97,11 @@ class TokenList $sSQL = 'SELECT word_id, word_token, word, class, type, country_code,'; $sSQL .= ' operator, coalesce(search_name_count, 0) as count'; $sSQL .= ' FROM word WHERE word_token in ('; - $sSQL .= join(',', array_map('getDBQuoted', $aTokens)).')'; + $sSQL .= join(',', $oDB->getDBQuotedList($aTokens)).')'; Debug::printSQL($sSQL); - $aDBWords = chksql($oDB->getAll($sSQL), 'Could not get word tokens.'); + $aDBWords = $oDB->getAll($sSQL, null, 'Could not get word tokens.'); foreach ($aDBWords as $aWord) { $oToken = null; @@ -85,7 +137,7 @@ class TokenList $iId, $aWord['class'], $aWord['type'], - $aWord['operator'] ? Operator::NONE : Operator::NEAR + $aWord['operator'] ? Operator::NEAR : Operator::NONE ); } } elseif ($aWord['country_code']) { @@ -98,7 +150,7 @@ class TokenList } else { $oToken = new Token\Word( $iId, - $aWord['word'][0] != ' ', + $aWord['word_token'][0] != ' ', (int) $aWord['count'] ); }