<?php
+/**
+ * SPDX-License-Identifier: GPL-2.0-only
+ *
+ * This file is part of Nominatim. (https://nominatim.org)
+ *
+ * Copyright (C) 2022 by the Nominatim developer community.
+ * For a full list of authors see the git log.
+ */
namespace Nominatim;
*/
class Phrase
{
- const MAX_WORDSET_LEN = 20;
- const MAX_WORDSETS = 100;
-
- // Complete phrase as a string.
+ // Complete phrase as a string (guaranteed to have no leading or trailing
+ // spaces).
private $sPhrase;
// Element type for structured searches.
private $sPhraseType;
// Possible segmentations of the phrase.
private $aWordSets;
- public static function cmpByArraylen($aA, $aB)
- {
- $iALen = count($aA);
- $iBLen = count($aB);
-
- if ($iALen == $iBLen) {
- return 0;
- }
-
- return ($iALen < $iBLen) ? -1 : 1;
- }
-
-
public function __construct($sPhrase, $sPhraseType)
{
$this->sPhrase = trim($sPhrase);
}
/**
- * Get the orginal phrase of the string.
+ * Get the original phrase of the string.
*/
public function getPhrase()
{
return $this->sPhraseType;
}
+ public function setWordSets($aWordSets)
+ {
+ $this->aWordSets = $aWordSets;
+ }
+
/**
* Return the array of possible segmentations of the phrase.
*
}
}
- public function computeWordSets($aWords, $oTokens)
- {
- $iNumWords = count($aWords);
-
- if ($iNumWords == 0) {
- $this->aWordSets = null;
- return;
- }
-
- // Caches the word set for the partial phrase up to word i.
- $aSetCache = array_fill(0, $iNumWords, array());
-
- // Initialise first element of cache. There can only be the word.
- if ($oTokens->containsAny($aWords[0])) {
- $aSetCache[0][] = array($aWords[0]);
- }
-
- // Now do the next elements using what we already have.
- for ($i = 1; $i < $iNumWords; $i++) {
- for ($j = $i; $j > 0; $j--) {
- $sPartial = $j == $i ? $aWords[$j] : $aWords[$j].' '.$sPartial;
- if (!empty($aSetCache[$j - 1]) && $oTokens->containsAny($sPartial)) {
- $aPartial = array($sPartial);
- foreach ($aSetCache[$j - 1] as $aSet) {
- if (count($aSet) < Phrase::MAX_WORDSET_LEN) {
- $aSetCache[$i][] = array_merge($aSet, $aPartial);
- }
- }
- if (count($aSetCache[$i]) > 2 * Phrase::MAX_WORDSETS) {
- usort(
- $aSetCache[$i],
- array('\Nominatim\Phrase', 'cmpByArraylen')
- );
- $aSetCache[$i] = array_slice(
- $aSetCache[$i],
- 0,
- Phrase::MAX_WORDSETS
- );
- }
- }
- }
-
- // finally the current full phrase
- $sPartial = $aWords[0].' '.$sPartial;
- if ($oTokens->containsAny($sPartial)) {
- $aSetCache[$i][] = array($sPartial);
- }
- }
-
- $this->aWordSets = $aSetCache[$iNumWords - 1];
- usort($this->aWordSets, array('\Nominatim\Phrase', 'cmpByArraylen'));
- $this->aWordSets = array_slice($this->aWordSets, 0, Phrase::MAX_WORDSETS);
- }
-
-
public function debugInfo()
{
return array(