X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/925195725dfcb7f1a6795c50244c1df6cb7242ce..674185a0651a4f6719e7fe71e00cca147783a1f4:/lib-php/tokenizer/icu_tokenizer.php?ds=sidebyside diff --git a/lib-php/tokenizer/icu_tokenizer.php b/lib-php/tokenizer/icu_tokenizer.php index ca224a22..ccce99ca 100644 --- a/lib-php/tokenizer/icu_tokenizer.php +++ b/lib-php/tokenizer/icu_tokenizer.php @@ -1,4 +1,12 @@ oNormalizer->transliterate($sTerm); } + + public function mostFrequentWords($iNum) + { + $sSQL = "SELECT word FROM word WHERE type = 'W'"; + $sSQL .= "ORDER BY info->'count' DESC LIMIT ".$iNum; + return $this->oDB->getCol($sSQL); + } + + private function makeStandardWord($sTerm) { return trim($this->oTransliterator->transliterate(' '.$sTerm.' ')); @@ -140,7 +157,8 @@ class Tokenizer $sSQL = 'SELECT word_id, word_token, type, word,'; $sSQL .= " info->>'op' as operator,"; $sSQL .= " info->>'class' as class, info->>'type' as ctype,"; - $sSQL .= " info->>'count' as count"; + $sSQL .= " info->>'count' as count,"; + $sSQL .= " info->>'lookup' as lookup"; $sSQL .= ' FROM word WHERE word_token in ('; $sSQL .= join(',', $this->oDB->getDBQuotedList($aTokens)).')'; @@ -162,7 +180,8 @@ class Tokenizer } break; case 'H': // house number tokens - $oValidTokens->addToken($sTok, new Token\HouseNumber($iId, $aWord['word_token'])); + $sLookup = $aWord['lookup'] ?? $aWord['word_token']; + $oValidTokens->addToken($sTok, new Token\HouseNumber($iId, $sLookup)); break; case 'P': // postcode tokens // Postcodes are not normalized, so they may have content