X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/db3ced17bbfff00411f506d8c84419c875959d5e..74be6828dd971abbf670435d8b6db4834778c28d:/lib-php/Phrase.php diff --git a/lib-php/Phrase.php b/lib-php/Phrase.php index e2643e87..4307a230 100644 --- a/lib-php/Phrase.php +++ b/lib-php/Phrase.php @@ -9,36 +9,26 @@ namespace Nominatim; */ class Phrase { - const MAX_WORDSET_LEN = 20; - const MAX_WORDSETS = 100; - - // Complete phrase as a string. + // Complete phrase as a string (guaranteed to have no leading or trailing + // spaces). private $sPhrase; // Element type for structured searches. private $sPhraseType; - // Space-separated words of the phrase. - private $aWords; // Possible segmentations of the phrase. private $aWordSets; - public static function cmpByArraylen($aA, $aB) - { - $iALen = count($aA); - $iBLen = count($aB); - - if ($iALen == $iBLen) { - return 0; - } - - return ($iALen < $iBLen) ? -1 : 1; - } - - public function __construct($sPhrase, $sPhraseType) { $this->sPhrase = trim($sPhrase); $this->sPhraseType = $sPhraseType; - $this->aWords = explode(' ', $this->sPhrase); + } + + /** + * Get the orginal phrase of the string. + */ + public function getPhrase() + { + return $this->sPhrase; } /** @@ -52,6 +42,11 @@ class Phrase return $this->sPhraseType; } + public function setWordSets($aWordSets) + { + $this->aWordSets = $aWordSets; + } + /** * Return the array of possible segmentations of the phrase. * @@ -63,30 +58,6 @@ class Phrase return $this->aWordSets; } - /** - * Add the tokens from this phrase to the given list of tokens. - * - * @param string[] $aTokens List of tokens to append. - * - * @return void - */ - public function addTokens(&$aTokens) - { - $iNumWords = count($this->aWords); - - for ($i = 0; $i < $iNumWords; $i++) { - $sPhrase = $this->aWords[$i]; - $aTokens[' '.$sPhrase] = ' '.$sPhrase; - $aTokens[$sPhrase] = $sPhrase; - - for ($j = $i + 1; $j < $iNumWords; $j++) { - $sPhrase .= ' '.$this->aWords[$j]; - $aTokens[' '.$sPhrase] = ' '.$sPhrase; - $aTokens[$sPhrase] = $sPhrase; - } - } - } - /** * Invert the set of possible segmentations. * @@ -99,61 +70,11 @@ class Phrase } } - public function computeWordSets($oTokens) - { - $iNumWords = count($this->aWords); - // Caches the word set for the partial phrase up to word i. - $aSetCache = array_fill(0, $iNumWords, array()); - - // Initialise first element of cache. There can only be the word. - if ($oTokens->containsAny($this->aWords[0])) { - $aSetCache[0][] = array($this->aWords[0]); - } - - // Now do the next elements using what we already have. - for ($i = 1; $i < $iNumWords; $i++) { - for ($j = $i; $j > 0; $j--) { - $sPartial = $j == $i ? $this->aWords[$j] : $this->aWords[$j].' '.$sPartial; - if (!empty($aSetCache[$j - 1]) && $oTokens->containsAny($sPartial)) { - $aPartial = array($sPartial); - foreach ($aSetCache[$j - 1] as $aSet) { - if (count($aSet) < Phrase::MAX_WORDSET_LEN) { - $aSetCache[$i][] = array_merge($aSet, $aPartial); - } - } - if (count($aSetCache[$i]) > 2 * Phrase::MAX_WORDSETS) { - usort( - $aSetCache[$i], - array('\Nominatim\Phrase', 'cmpByArraylen') - ); - $aSetCache[$i] = array_slice( - $aSetCache[$i], - 0, - Phrase::MAX_WORDSETS - ); - } - } - } - - // finally the current full phrase - $sPartial = $this->aWords[0].' '.$sPartial; - if ($oTokens->containsAny($sPartial)) { - $aSetCache[$i][] = array($sPartial); - } - } - - $this->aWordSets = $aSetCache[$iNumWords - 1]; - usort($this->aWordSets, array('\Nominatim\Phrase', 'cmpByArraylen')); - $this->aWordSets = array_slice($this->aWordSets, 0, Phrase::MAX_WORDSETS); - } - - public function debugInfo() { return array( 'Type' => $this->sPhraseType, 'Phrase' => $this->sPhrase, - 'Words' => $this->aWords, 'WordSets' => $this->aWordSets ); }