X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/8413075249e1bb2832df4edd0f66d61f77fb9f99..6070c3d1d58fb8737b387e8a3ef1f17fb1eb5d54:/lib-php/tokenizer/legacy_icu_tokenizer.php?ds=sidebyside diff --git a/lib-php/tokenizer/legacy_icu_tokenizer.php b/lib-php/tokenizer/legacy_icu_tokenizer.php index 92dd7272..8cff6f32 100644 --- a/lib-php/tokenizer/legacy_icu_tokenizer.php +++ b/lib-php/tokenizer/legacy_icu_tokenizer.php @@ -195,17 +195,27 @@ class Tokenizer ) { $oToken = new Token\Country($iId, $aWord['country_code']); } + } elseif ($aWord['word_token'][0] == ' ') { + $oToken = new Token\Word( + $iId, + $aWord['word_token'][0] != ' ', + (int) $aWord['count'], + substr_count($aWord['word_token'], ' ') + ); } else { - $oToken = new Token\Word( + $oToken = new Token\Partial( $iId, - $aWord['word_token'][0] != ' ', - (int) $aWord['count'], - substr_count($aWord['word_token'], ' ') + (int) $aWord['count'] ); } if ($oToken) { - $oValidTokens->addToken($aWord['word_token'], $oToken); + // remove any leading spaces + if ($aWord['word_token'][0] == ' ') { + $oValidTokens->addToken(substr($aWord['word_token'], 1), $oToken); + } else { + $oValidTokens->addToken($aWord['word_token'], $oToken); + } } } }