X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/044bb6afa53f2c799490d8a95ca050c8b755ca4c..771aee8cd80f9dc5303331053e9b08a901a620e0:/lib-php/Phrase.php diff --git a/lib-php/Phrase.php b/lib-php/Phrase.php index d14c842d..4307a230 100644 --- a/lib-php/Phrase.php +++ b/lib-php/Phrase.php @@ -9,29 +9,14 @@ namespace Nominatim; */ class Phrase { - const MAX_WORDSET_LEN = 20; - const MAX_WORDSETS = 100; - - // Complete phrase as a string. + // Complete phrase as a string (guaranteed to have no leading or trailing + // spaces). private $sPhrase; // Element type for structured searches. private $sPhraseType; // Possible segmentations of the phrase. private $aWordSets; - public static function cmpByArraylen($aA, $aB) - { - $iALen = count($aA); - $iBLen = count($aB); - - if ($iALen == $iBLen) { - return 0; - } - - return ($iALen < $iBLen) ? -1 : 1; - } - - public function __construct($sPhrase, $sPhraseType) { $this->sPhrase = trim($sPhrase); @@ -57,6 +42,11 @@ class Phrase return $this->sPhraseType; } + public function setWordSets($aWordSets) + { + $this->aWordSets = $aWordSets; + } + /** * Return the array of possible segmentations of the phrase. * @@ -80,61 +70,6 @@ class Phrase } } - public function computeWordSets($aWords, $oTokens) - { - $iNumWords = count($aWords); - - if ($iNumWords == 0) { - $this->aWordSets = null; - return; - } - - // Caches the word set for the partial phrase up to word i. - $aSetCache = array_fill(0, $iNumWords, array()); - - // Initialise first element of cache. There can only be the word. - if ($oTokens->containsAny($aWords[0])) { - $aSetCache[0][] = array($aWords[0]); - } - - // Now do the next elements using what we already have. - for ($i = 1; $i < $iNumWords; $i++) { - for ($j = $i; $j > 0; $j--) { - $sPartial = $j == $i ? $aWords[$j] : $aWords[$j].' '.$sPartial; - if (!empty($aSetCache[$j - 1]) && $oTokens->containsAny($sPartial)) { - $aPartial = array($sPartial); - foreach ($aSetCache[$j - 1] as $aSet) { - if (count($aSet) < Phrase::MAX_WORDSET_LEN) { - $aSetCache[$i][] = array_merge($aSet, $aPartial); - } - } - if (count($aSetCache[$i]) > 2 * Phrase::MAX_WORDSETS) { - usort( - $aSetCache[$i], - array('\Nominatim\Phrase', 'cmpByArraylen') - ); - $aSetCache[$i] = array_slice( - $aSetCache[$i], - 0, - Phrase::MAX_WORDSETS - ); - } - } - } - - // finally the current full phrase - $sPartial = $aWords[0].' '.$sPartial; - if ($oTokens->containsAny($sPartial)) { - $aSetCache[$i][] = array($sPartial); - } - } - - $this->aWordSets = $aSetCache[$iNumWords - 1]; - usort($this->aWordSets, array('\Nominatim\Phrase', 'cmpByArraylen')); - $this->aWordSets = array_slice($this->aWordSets, 0, Phrase::MAX_WORDSETS); - } - - public function debugInfo() { return array(