private $oDB;
private $oNormalizer = null;
- private $aCountryRestriction = null;
public function __construct(&$oDB)
{
{
$sStandardWord = $this->oDB->getOne("SELECT make_standard_name('a')");
if ($sStandardWord === false) {
- throw new Exception('Module failed', 701);
+ throw new \Exception('Module failed', 701);
}
if ($sStandardWord != 'a') {
- throw new Exception('Module call failed', 702);
+ throw new \Exception('Module call failed', 702);
}
$sSQL = "SELECT word_id FROM word WHERE word_token IN (' a')";
$iWordID = $this->oDB->getOne($sSQL);
if ($iWordID === false) {
- throw new Exception('Query failed', 703);
+ throw new \Exception('Query failed', 703);
}
if (!$iWordID) {
- throw new Exception('No value', 704);
+ throw new \Exception('No value', 704);
}
}
- public function setCountryRestriction($aCountries)
- {
- $this->aCountryRestriction = $aCountries;
- }
-
-
public function normalizeString($sTerm)
{
if ($this->oNormalizer === null) {
// now compute all possible tokens
$aWordLists = array();
$aTokens = array();
- foreach ($aNormPhrases as $sTitle => $sPhrase) {
+ foreach ($aNormPhrases as $sPhrase) {
if (strlen($sPhrase) > 0) {
$aWords = explode(' ', $sPhrase);
Tokenizer::addTokens($aTokens, $aWords);
// Try more interpretations for Tokens that could not be matched.
foreach ($aTokens as $sToken) {
- if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) {
- if (preg_match('/^ ([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
+ if ($sToken[0] != ' ' && !$oValidTokens->contains($sToken)) {
+ if (preg_match('/^([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
// US ZIP+4 codes - merge in the 5-digit ZIP code
$oValidTokens->addToken(
$sToken,
new Token\Postcode(null, $aData[1], 'us')
);
- } elseif (preg_match('/^ [0-9]+$/', $sToken)) {
+ } elseif (preg_match('/^[0-9]+$/', $sToken)) {
// Unknown single word token with a number.
// Assume it is a house number.
$oValidTokens->addToken(
);
}
} elseif ($aWord['country_code']) {
- // Filter country tokens that do not match restricted countries.
- if (!$this->aCountryRestriction
- || in_array($aWord['country_code'], $this->aCountryRestriction)
- ) {
- $oToken = new Token\Country($iId, $aWord['country_code']);
- }
- } else {
+ $oToken = new Token\Country($iId, $aWord['country_code']);
+ } elseif ($aWord['word_token'][0] == ' ') {
$oToken = new Token\Word(
$iId,
- $aWord['word_token'][0] != ' ',
(int) $aWord['count'],
substr_count($aWord['word_token'], ' ')
);
+ // For backward compatibility: ignore all partial tokens with more
+ // than one word.
+ } elseif (strpos($aWord['word_token'], ' ') === false) {
+ $oToken = new Token\Partial(
+ $iId,
+ $aWord['word_token'],
+ (int) $aWord['count']
+ );
}
if ($oToken) {
- $oValidTokens->addToken($aWord['word_token'], $oToken);
+ // remove any leading spaces
+ if ($aWord['word_token'][0] == ' ') {
+ $oValidTokens->addToken(substr($aWord['word_token'], 1), $oToken);
+ } else {
+ $oValidTokens->addToken($aWord['word_token'], $oToken);
+ }
}
}
}