6 * Segment of a query string.
8 * The parts of a query strings are usually separated by commas.
12 const MAX_WORDSET_LEN = 20;
13 const MAX_WORDSETS = 100;
15 // Complete phrase as a string.
17 // Element type for structured searches.
19 // Possible segmentations of the phrase.
22 public static function cmpByArraylen($aA, $aB)
27 if ($iALen == $iBLen) {
31 return ($iALen < $iBLen) ? -1 : 1;
35 public function __construct($sPhrase, $sPhraseType)
37 $this->sPhrase = trim($sPhrase);
38 $this->sPhraseType = $sPhraseType;
42 * Get the orginal phrase of the string.
44 public function getPhrase()
46 return $this->sPhrase;
50 * Return the element type of the phrase.
52 * @return string Pharse type if the phrase comes from a structured query
53 * or empty string otherwise.
55 public function getPhraseType()
57 return $this->sPhraseType;
61 * Return the array of possible segmentations of the phrase.
63 * @return string[][] Array of segmentations, each consisting of an
66 public function getWordSets()
68 return $this->aWordSets;
72 * Invert the set of possible segmentations.
76 public function invertWordSets()
78 foreach ($this->aWordSets as $i => $aSet) {
79 $this->aWordSets[$i] = array_reverse($aSet);
83 public function computeWordSets($aWords, $oTokens)
85 $iNumWords = count($aWords);
87 if ($iNumWords == 0) {
88 $this->aWordSets = null;
92 // Caches the word set for the partial phrase up to word i.
93 $aSetCache = array_fill(0, $iNumWords, array());
95 // Initialise first element of cache. There can only be the word.
96 if ($oTokens->containsAny($aWords[0])) {
97 $aSetCache[0][] = array($aWords[0]);
100 // Now do the next elements using what we already have.
101 for ($i = 1; $i < $iNumWords; $i++) {
102 for ($j = $i; $j > 0; $j--) {
103 $sPartial = $j == $i ? $aWords[$j] : $aWords[$j].' '.$sPartial;
104 if (!empty($aSetCache[$j - 1]) && $oTokens->containsAny($sPartial)) {
105 $aPartial = array($sPartial);
106 foreach ($aSetCache[$j - 1] as $aSet) {
107 if (count($aSet) < Phrase::MAX_WORDSET_LEN) {
108 $aSetCache[$i][] = array_merge($aSet, $aPartial);
111 if (count($aSetCache[$i]) > 2 * Phrase::MAX_WORDSETS) {
114 array('\Nominatim\Phrase', 'cmpByArraylen')
116 $aSetCache[$i] = array_slice(
125 // finally the current full phrase
126 $sPartial = $aWords[0].' '.$sPartial;
127 if ($oTokens->containsAny($sPartial)) {
128 $aSetCache[$i][] = array($sPartial);
132 $this->aWordSets = $aSetCache[$iNumWords - 1];
133 usort($this->aWordSets, array('\Nominatim\Phrase', 'cmpByArraylen'));
134 $this->aWordSets = array_slice($this->aWordSets, 0, Phrase::MAX_WORDSETS);
138 public function debugInfo()
141 'Type' => $this->sPhraseType,
142 'Phrase' => $this->sPhrase,
143 'WordSets' => $this->aWordSets