5 require_once(CONST_BasePath.'/lib/TokenCountry.php');
6 require_once(CONST_BasePath.'/lib/TokenHousenumber.php');
7 require_once(CONST_BasePath.'/lib/TokenPostcode.php');
8 require_once(CONST_BasePath.'/lib/TokenSpecialTerm.php');
9 require_once(CONST_BasePath.'/lib/TokenWord.php');
10 require_once(CONST_BasePath.'/lib/SpecialSearchOperator.php');
13 * Saves information about the tokens that appear in a search query.
15 * Tokens are sorted by their normalized form, the token word. There are different
16 * kinds of tokens, represented by different Token* classes. Note that
17 * tokens do not have a common base class. All tokens need to have a field
18 * with the word id that points to an entry in the `word` database table
19 * but otherwise the information saved about a token can be very different.
21 * There are two different kinds of token words: full words and partial terms.
23 * Full words start with a space. They represent a complete name of a place.
24 * All special tokens are normally full words.
26 * Partial terms have no space at the beginning. They may represent a part of
27 * a name of a place (e.g. in the name 'World Trade Center' a partial term
28 * would be 'Trade' or 'Trade Center'). They are only used in TokenWord.
32 // List of list of tokens indexed by their word_token.
33 private $aTokens = array();
37 * Return total number of tokens.
41 public function count()
43 return count($this->aTokens);
47 * Check if there are tokens for the given token word.
49 * @param string $sWord Token word to look for.
51 * @return bool True if there is one or more token for the token word.
53 public function contains($sWord)
55 return isset($this->aTokens[$sWord]);
59 * Check if there are partial or full tokens for the given word.
61 * @param string $sWord Token word to look for.
63 * @return bool True if there is one or more token for the token word.
65 public function containsAny($sWord)
67 return isset($this->aTokens[$sWord]) || isset($this->aTokens[' '.$sWord]);
71 * Get the list of tokens for the given token word.
73 * @param string $sWord Token word to look for.
75 * @return object[] Array of tokens for the given token word or an
76 * empty array if no tokens could be found.
78 public function get($sWord)
80 return isset($this->aTokens[$sWord]) ? $this->aTokens[$sWord] : array();
83 public function getFullWordIDs()
87 foreach ($this->aTokens as $aTokenList) {
88 foreach ($aTokenList as $oToken) {
89 if (is_a($oToken, '\Nominatim\Token\Word') && !$oToken->bPartial) {
90 $ids[$oToken->iId] = $oToken->iId;
99 * Add token information from the word table in the database.
101 * @param object $oDB Nominatim::DB instance.
102 * @param string[] $aTokens List of tokens to look up in the database.
103 * @param string[] $aCountryCodes List of country restrictions.
104 * @param string $sNormQuery Normalized query string.
105 * @param object $oNormalizer Normalizer function to use on tokens.
109 public function addTokensFromDB(&$oDB, &$aTokens, &$aCountryCodes, $sNormQuery, $oNormalizer)
111 // Check which tokens we have, get the ID numbers
112 $sSQL = 'SELECT word_id, word_token, word, class, type, country_code,';
113 $sSQL .= ' operator, coalesce(search_name_count, 0) as count';
114 $sSQL .= ' FROM word WHERE word_token in (';
115 $sSQL .= join(',', $oDB->getDBQuotedList($aTokens)).')';
117 Debug::printSQL($sSQL);
119 $aDBWords = $oDB->getAll($sSQL, null, 'Could not get word tokens.');
121 foreach ($aDBWords as $aWord) {
123 $iId = (int) $aWord['word_id'];
125 if ($aWord['class']) {
126 // Special terms need to appear in their normalized form.
127 if ($aWord['word']) {
128 $sNormWord = $aWord['word'];
129 if ($oNormalizer != null) {
130 $sNormWord = $oNormalizer->transliterate($aWord['word']);
132 if (strpos($sNormQuery, $sNormWord) === false) {
137 if ($aWord['class'] == 'place' && $aWord['type'] == 'house') {
138 $oToken = new Token\HouseNumber($iId, trim($aWord['word_token']));
139 } elseif ($aWord['class'] == 'place' && $aWord['type'] == 'postcode') {
141 && pg_escape_string($aWord['word']) == $aWord['word']
143 $oToken = new Token\Postcode(
146 $aWord['country_code']
150 // near and in operator the same at the moment
151 $oToken = new Token\SpecialTerm(
155 $aWord['operator'] ? Operator::NEAR : Operator::NONE
158 } elseif ($aWord['country_code']) {
159 // Filter country tokens that do not match restricted countries.
161 || in_array($aWord['country_code'], $aCountryCodes)
163 $oToken = new Token\Country($iId, $aWord['country_code']);
166 $oToken = new Token\Word(
168 $aWord['word_token'][0] != ' ',
169 (int) $aWord['count'],
170 substr_count($aWord['word_token'], ' ')
175 $this->addToken($aWord['word_token'], $oToken);
181 * Add a new token for the given word.
183 * @param string $sWord Word the token describes.
184 * @param object $oToken Token object to add.
188 public function addToken($sWord, $oToken)
190 if (isset($this->aTokens[$sWord])) {
191 $this->aTokens[$sWord][] = $oToken;
193 $this->aTokens[$sWord] = array($oToken);
197 public function debugTokenByWordIdList()
199 $aWordsIDs = array();
200 foreach ($this->aTokens as $sToken => $aWords) {
201 foreach ($aWords as $aToken) {
202 if ($aToken->iId !== null) {
203 $aWordsIDs[$aToken->iId] =
204 '#'.$sToken.'('.$aToken->iId.')#';
212 public function debugInfo()
214 return $this->aTokens;