namespace Nominatim;
require_once(CONST_BasePath.'/lib/PlaceLookup.php');
+require_once(CONST_BasePath.'/lib/Phrase.php');
require_once(CONST_BasePath.'/lib/ReverseGeocode.php');
require_once(CONST_BasePath.'/lib/SearchDescription.php');
require_once(CONST_BasePath.'/lib/SearchContext.php');
return $aSearchResults;
}
- public function getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases, $sNormQuery)
+ public function getGroupedSearches($aSearches, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bIsStructured, $sNormQuery)
{
/*
Calculate all searches using aValidTokens i.e.
*/
$iGlobalRank = 0;
- foreach ($aPhrases as $iPhrase => $aPhrase) {
+ foreach ($aPhrases as $iPhrase => $oPhrase) {
$aNewPhraseSearches = array();
- if ($bStructuredPhrases) {
- $sPhraseType = $aPhraseTypes[$iPhrase];
- } else {
- $sPhraseType = '';
- }
+ $sPhraseType = $bIsStructured ? $oPhrase->getPhraseType() : '';
- foreach ($aPhrase['wordsets'] as $iWordSet => $aWordset) {
+ foreach ($oPhrase->getWordSets() as $iWordSet => $aWordset) {
// Too many permutations - too expensive
if ($iWordSet > 120) break;
foreach ($aValidTokens[$sToken] as $aSearchTerm) {
$aNewSearches = $oCurrentSearch->extendWithPartialTerm(
$aSearchTerm,
- $bStructuredPhrases,
+ $bIsStructured,
$iPhrase,
$aWordFrequencyScores,
isset($aValidTokens[' '.$sToken]) ? $aValidTokens[' '.$sToken] : array()
// Split query into phrases
// Commas are used to reduce the search space by indicating where phrases split
if ($this->aStructuredQuery) {
- $aPhrases = $this->aStructuredQuery;
+ $aInPhrases = $this->aStructuredQuery;
$bStructuredPhrases = true;
} else {
- $aPhrases = explode(',', $sQuery);
+ $aInPhrases = explode(',', $sQuery);
$bStructuredPhrases = false;
}
// Get all 'sets' of words
// Generate a complete list of all
$aTokens = array();
- foreach ($aPhrases as $iPhrase => $sPhrase) {
- $aPhrase = chksql(
- $this->oDB->getRow("SELECT make_standard_name('".pg_escape_string($sPhrase)."') as string"),
+ $aPhrases = array();
+ foreach ($aInPhrases as $iPhrase => $sPhrase) {
+ $sPhrase = chksql(
+ $this->oDB->getOne('SELECT make_standard_name('.getDBQuoted($sPhrase).')'),
"Cannot normalize query string (is it a UTF-8 string?)"
);
- if (trim($aPhrase['string'])) {
- $aPhrases[$iPhrase] = $aPhrase;
- $aPhrases[$iPhrase]['words'] = explode(' ', $aPhrases[$iPhrase]['string']);
- $aPhrases[$iPhrase]['wordsets'] = getWordSets($aPhrases[$iPhrase]['words'], 0);
- $aTokens = array_merge($aTokens, getTokensFromSets($aPhrases[$iPhrase]['wordsets']));
- } else {
- unset($aPhrases[$iPhrase]);
+ if (trim($sPhrase)) {
+ $oPhrase = new Phrase($sPhrase, is_string($iPhrase) ? $iPhrase : '');
+ $oPhrase->addTokens($aTokens);
+ $aPhrases[] = $oPhrase;
}
}
- // Reindex phrases - we make assumptions later on that they are numerically keyed in order
- $aPhraseTypes = array_keys($aPhrases);
- $aPhrases = array_values($aPhrases);
-
if (sizeof($aTokens)) {
// Check which tokens we have, get the ID numbers
$sSQL = 'SELECT word_id, word_token, word, class, type, country_code, operator, search_name_count';
// Any words that have failed completely?
// TODO: suggestions
- $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhraseTypes, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases, $sNormQuery);
+ $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $aValidTokens, $aWordFrequencyScores, $bStructuredPhrases, $sNormQuery);
if ($this->bReverseInPlan) {
// Reverse phrase array and also reverse the order of the wordsets in
// the first and final phrase. Don't bother about phrases in the middle
// because order in the address doesn't matter.
$aPhrases = array_reverse($aPhrases);
- $aPhrases[0]['wordsets'] = getInverseWordSets($aPhrases[0]['words'], 0);
+ $aPhrases[0]->invertWordSets();
if (sizeof($aPhrases) > 1) {
- $aFinalPhrase = end($aPhrases);
- $aPhrases[sizeof($aPhrases)-1]['wordsets'] = getInverseWordSets($aFinalPhrase['words'], 0);
+ $aPhrases[sizeof($aPhrases)-1]->invertWordSets();
}
- $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, null, $aPhrases, $aValidTokens, $aWordFrequencyScores, false, $sNormQuery);
+ $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $aValidTokens, $aWordFrequencyScores, false, $sNormQuery);
foreach ($aGroupedSearches as $aSearches) {
foreach ($aSearches as $aSearch) {
--- /dev/null
+<?php
+
+namespace Nominatim;
+
+/**
+ * Segment of a query string.
+ *
+ * The parts of a query strings are usually separated by commas.
+ */
+class Phrase
+{
+ CONST MAX_DEPTH = 7;
+
+ // Complete phrase as a string.
+ private $sPhrase;
+ // Element type for structured searches.
+ private $sPhraseType;
+ // Space-separated words of the phrase.
+ private $aWords;
+ // Possible segmentations of the phrase.
+ private $aWordSets;
+
+
+ public function __construct($sPhrase, $sPhraseType)
+ {
+ $this->sPhrase = trim($sPhrase);
+ $this->sPhraseType = $sPhraseType;
+ $this->aWords = explode(' ', $this->sPhrase);
+ $this->aWordSets = $this->createWordSets($this->aWords, 0);
+ }
+
+ public function getPhraseType()
+ {
+ return $this->sPhraseType;
+ }
+
+ public function getWordSets()
+ {
+ return $this->aWordSets;
+ }
+
+ public function addTokens(&$aTokens)
+ {
+ foreach ($this->aWordSets as $aSet) {
+ foreach ($aSet as $sWord) {
+ $aTokens[' '.$sWord] = ' '.$sWord;
+ $aTokens[$sWord] = $sWord;
+ }
+ }
+ }
+
+ public function invertWordSets()
+ {
+ $this->aWordSets = $this->createInverseWordSets($this->aWords, 0);
+ }
+
+ private function createWordSets($aWords, $iDepth)
+ {
+ $aResult = array(array(join(' ', $aWords)));
+ $sFirstToken = '';
+ if ($iDepth < Phrase::MAX_DEPTH) {
+ while (sizeof($aWords) > 1) {
+ $sWord = array_shift($aWords);
+ $sFirstToken .= ($sFirstToken?' ':'').$sWord;
+ $aRest = $this->createWordSets($aWords, $iDepth + 1);
+ foreach ($aRest as $aSet) {
+ $aResult[] = array_merge(array($sFirstToken), $aSet);
+ }
+ }
+ }
+
+ return $aResult;
+ }
+
+ public function createInverseWordSets($aWords, $iDepth)
+ {
+ $aResult = array(array(join(' ', $aWords)));
+ $sFirstToken = '';
+ if ($iDepth < Phrase::MAX_DEPTH) {
+ while (sizeof($aWords) > 1) {
+ $sWord = array_pop($aWords);
+ $sFirstToken = $sWord.($sFirstToken?' ':'').$sFirstToken;
+ $aRest = $this->createInverseWordSets($aWords, $iDepth + 1);
+ foreach ($aRest as $aSet) {
+ $aResult[] = array_merge(array($sFirstToken), $aSet);
+ }
+ }
+ }
+
+ return $aResult;
+ }
+};
}
-function getWordSets($aWords, $iDepth)
-{
- $aResult = array(array(join(' ', $aWords)));
- $sFirstToken = '';
- if ($iDepth < 7) {
- while (sizeof($aWords) > 1) {
- $sWord = array_shift($aWords);
- $sFirstToken .= ($sFirstToken?' ':'').$sWord;
- $aRest = getWordSets($aWords, $iDepth+1);
- foreach ($aRest as $aSet) {
- $aResult[] = array_merge(array($sFirstToken), $aSet);
- }
- }
- }
- return $aResult;
-}
-
-function getInverseWordSets($aWords, $iDepth)
-{
- $aResult = array(array(join(' ', $aWords)));
- $sFirstToken = '';
- if ($iDepth < 8) {
- while (sizeof($aWords) > 1) {
- $sWord = array_pop($aWords);
- $sFirstToken = $sWord.($sFirstToken?' ':'').$sFirstToken;
- $aRest = getInverseWordSets($aWords, $iDepth+1);
- foreach ($aRest as $aSet) {
- $aResult[] = array_merge(array($sFirstToken), $aSet);
- }
- }
- }
- return $aResult;
-}
-
-
-function getTokensFromSets($aSets)
-{
- $aTokens = array();
- foreach ($aSets as $aSet) {
- foreach ($aSet as $sWord) {
- $aTokens[' '.$sWord] = ' '.$sWord;
- $aTokens[$sWord] = $sWord;
- }
- }
- return $aTokens;
-}
-
-
function getClassTypes()
{
return array(