X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/8b1a509442a3fa051146f82b8293126916ad8617..2cae37ccdee1fc5bad88a492af91020ad4060069:/lib-php/Phrase.php diff --git a/lib-php/Phrase.php b/lib-php/Phrase.php index d14c842d..4ed4d402 100644 --- a/lib-php/Phrase.php +++ b/lib-php/Phrase.php @@ -1,4 +1,12 @@ sPhrase = trim($sPhrase); @@ -39,7 +32,7 @@ class Phrase } /** - * Get the orginal phrase of the string. + * Get the original phrase of the string. */ public function getPhrase() { @@ -57,6 +50,11 @@ class Phrase return $this->sPhraseType; } + public function setWordSets($aWordSets) + { + $this->aWordSets = $aWordSets; + } + /** * Return the array of possible segmentations of the phrase. * @@ -80,61 +78,6 @@ class Phrase } } - public function computeWordSets($aWords, $oTokens) - { - $iNumWords = count($aWords); - - if ($iNumWords == 0) { - $this->aWordSets = null; - return; - } - - // Caches the word set for the partial phrase up to word i. - $aSetCache = array_fill(0, $iNumWords, array()); - - // Initialise first element of cache. There can only be the word. - if ($oTokens->containsAny($aWords[0])) { - $aSetCache[0][] = array($aWords[0]); - } - - // Now do the next elements using what we already have. - for ($i = 1; $i < $iNumWords; $i++) { - for ($j = $i; $j > 0; $j--) { - $sPartial = $j == $i ? $aWords[$j] : $aWords[$j].' '.$sPartial; - if (!empty($aSetCache[$j - 1]) && $oTokens->containsAny($sPartial)) { - $aPartial = array($sPartial); - foreach ($aSetCache[$j - 1] as $aSet) { - if (count($aSet) < Phrase::MAX_WORDSET_LEN) { - $aSetCache[$i][] = array_merge($aSet, $aPartial); - } - } - if (count($aSetCache[$i]) > 2 * Phrase::MAX_WORDSETS) { - usort( - $aSetCache[$i], - array('\Nominatim\Phrase', 'cmpByArraylen') - ); - $aSetCache[$i] = array_slice( - $aSetCache[$i], - 0, - Phrase::MAX_WORDSETS - ); - } - } - } - - // finally the current full phrase - $sPartial = $aWords[0].' '.$sPartial; - if ($oTokens->containsAny($sPartial)) { - $aSetCache[$i][] = array($sPartial); - } - } - - $this->aWordSets = $aSetCache[$iNumWords - 1]; - usort($this->aWordSets, array('\Nominatim\Phrase', 'cmpByArraylen')); - $this->aWordSets = array_slice($this->aWordSets, 0, Phrase::MAX_WORDSETS); - } - - public function debugInfo() { return array(