6 * Segment of a query string.
8 * The parts of a query strings are usually separated by commas.
12 public const MAX_WORDSET_LEN = 20;
13 public const MAX_WORDSETS = 100;
15 // Complete phrase as a string.
17 // Element type for structured searches.
19 // Space-separated words of the phrase.
21 // Possible segmentations of the phrase.
24 public static function cmpByArraylen($aA, $aB)
29 if ($iALen == $iBLen) {
33 return ($iALen < $iBLen) ? -1 : 1;
37 public function __construct($sPhrase, $sPhraseType)
39 $this->sPhrase = trim($sPhrase);
40 $this->sPhraseType = $sPhraseType;
41 $this->aWords = explode(' ', $this->sPhrase);
45 * Return the element type of the phrase.
47 * @return string Pharse type if the phrase comes from a structured query
48 * or empty string otherwise.
50 public function getPhraseType()
52 return $this->sPhraseType;
56 * Return the array of possible segmentations of the phrase.
58 * @return string[][] Array of segmentations, each consisting of an
61 public function getWordSets()
63 return $this->aWordSets;
67 * Add the tokens from this phrase to the given list of tokens.
69 * @param string[] $aTokens List of tokens to append.
73 public function addTokens(&$aTokens)
75 $iNumWords = count($this->aWords);
77 for ($i = 0; $i < $iNumWords; $i++) {
78 $sPhrase = $this->aWords[$i];
79 $aTokens[' '.$sPhrase] = ' '.$sPhrase;
80 $aTokens[$sPhrase] = $sPhrase;
82 for ($j = $i + 1; $j < $iNumWords; $j++) {
83 $sPhrase .= ' '.$this->aWords[$j];
84 $aTokens[' '.$sPhrase] = ' '.$sPhrase;
85 $aTokens[$sPhrase] = $sPhrase;
91 * Invert the set of possible segmentations.
95 public function invertWordSets()
97 foreach ($this->aWordSets as $i => $aSet) {
98 $this->aWordSets[$i] = array_reverse($aSet);
102 public function computeWordSets($oTokens)
104 $iNumWords = count($this->aWords);
105 // Caches the word set for the partial phrase up to word i.
106 $aSetCache = array_fill(0, $iNumWords, array());
108 // Initialise first element of cache. There can only be the word.
109 if ($oTokens->containsAny($this->aWords[0])) {
110 $aSetCache[0][] = array($this->aWords[0]);
113 // Now do the next elements using what we already have.
114 for ($i = 1; $i < $iNumWords; $i++) {
115 for ($j = $i; $j > 0; $j--) {
116 $sPartial = $j == $i ? $this->aWords[$j] : $this->aWords[$j].' '.$sPartial;
117 if (!empty($aSetCache[$j - 1]) && $oTokens->containsAny($sPartial)) {
118 $aPartial = array($sPartial);
119 foreach ($aSetCache[$j - 1] as $aSet) {
120 if (count($aSet) < Phrase::MAX_WORDSET_LEN) {
121 $aSetCache[$i][] = array_merge($aSet, $aPartial);
124 if (count($aSetCache[$i]) > 2 * Phrase::MAX_WORDSETS) {
127 array('\Nominatim\Phrase', 'cmpByArraylen')
129 $aSetCache[$i] = array_slice(
138 // finally the current full phrase
139 $sPartial = $this->aWords[0].' '.$sPartial;
140 if ($oTokens->containsAny($sPartial)) {
141 $aSetCache[$i][] = array($sPartial);
145 $this->aWordSets = $aSetCache[$iNumWords - 1];
146 usort($this->aWordSets, array('\Nominatim\Phrase', 'cmpByArraylen'));
147 $this->aWordSets = array_slice($this->aWordSets, 0, Phrase::MAX_WORDSETS);
151 public function debugInfo()
154 'Type' => $this->sPhraseType,
155 'Phrase' => $this->sPhrase,
156 'Words' => $this->aWords,
157 'WordSets' => $this->aWordSets