X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/4ae5ba7fc4a3451c806d89a583158412ea26bd01..452021ef0c9ac746949a2ef7fd12db4c2d5fee35:/lib-php/tokenizer/legacy_tokenizer.php diff --git a/lib-php/tokenizer/legacy_tokenizer.php b/lib-php/tokenizer/legacy_tokenizer.php index e5ffbe02..d5686f64 100644 --- a/lib-php/tokenizer/legacy_tokenizer.php +++ b/lib-php/tokenizer/legacy_tokenizer.php @@ -48,6 +48,14 @@ class Tokenizer } + public function mostFrequentWords($iNum) + { + $sSQL = 'SELECT word FROM word WHERE word is not null '; + $sSQL .= 'ORDER BY search_name_count DESC LIMIT '.$iNum; + return $this->oDB->getCol($sSQL); + } + + public function tokensForSpecialTerm($sTerm) { $aResults = array(); @@ -87,6 +95,23 @@ class Tokenizer $sNormQuery .= ','.$this->normalizeString($oPhrase->getPhrase()); $sSQL .= 'make_standard_name(:' .$iPhrase.') as p'.$iPhrase.','; $aParams[':'.$iPhrase] = $oPhrase->getPhrase(); + + // Conflicts between US state abbreviations and various words + // for 'the' in different languages + switch (strtolower($oPhrase->getPhrase())) { + case 'il': + $aParams[':'.$iPhrase] = 'illinois'; + break; + case 'al': + $aParams[':'.$iPhrase] = 'alabama'; + break; + case 'la': + $aParams[':'.$iPhrase] = 'louisiana'; + break; + default: + $aParams[':'.$iPhrase] = $oPhrase->getPhrase(); + break; + } } $sSQL = substr($sSQL, 0, -1);