- $hLog = logStart($oDB, 'search', $sQuery, $aLangPrefOrder);
-
- // Hack to make it handle "new york, ny" (and variants) correctly
- $sQuery = str_ireplace(array('New York, ny','new york, new york', 'New York ny','new york new york'), 'new york city, ny', $sQuery);
- if (isset($aLangPrefOrder['name:en']))
- {
- $sQuery = preg_replace('/,\s*il\s*(,|$)/',', illinois\1', $sQuery);
- $sQuery = preg_replace('/,\s*al\s*(,|$)/',', alabama\1', $sQuery);
- $sQuery = preg_replace('/,\s*la\s*(,|$)/',', louisiana\1', $sQuery);
- }
-
- // If we have a view box create the SQL
- // Small is the actual view box, Large is double (on each axis) that
- $sViewboxCentreSQL = $sViewboxSmallSQL = $sViewboxLargeSQL = false;
- if (isset($_GET['viewboxlbrt']) && $_GET['viewboxlbrt'])
- {
- $aCoOrdinatesLBRT = explode(',',$_GET['viewboxlbrt']);
- $_GET['viewbox'] = $aCoOrdinatesLBRT[0].','.$aCoOrdinatesLBRT[3].','.$aCoOrdinatesLBRT[2].','.$aCoOrdinatesLBRT[1];
- }
- if (isset($_GET['viewbox']) && $_GET['viewbox'])
- {
- $aCoOrdinates = explode(',',$_GET['viewbox']);
- $sViewboxSmallSQL = "ST_SetSRID(ST_MakeBox2D(ST_Point(".(float)$aCoOrdinates[0].",".(float)$aCoOrdinates[1]."),ST_Point(".(float)$aCoOrdinates[2].",".(float)$aCoOrdinates[3].")),4326)";
- $fHeight = $aCoOrdinates[0]-$aCoOrdinates[2];
- $fWidth = $aCoOrdinates[1]-$aCoOrdinates[3];
- $aCoOrdinates[0] += $fHeight;
- $aCoOrdinates[2] -= $fHeight;
- $aCoOrdinates[1] += $fWidth;
- $aCoOrdinates[3] -= $fWidth;
- $sViewboxLargeSQL = "ST_SetSRID(ST_MakeBox2D(ST_Point(".(float)$aCoOrdinates[0].",".(float)$aCoOrdinates[1]."),ST_Point(".(float)$aCoOrdinates[2].",".(float)$aCoOrdinates[3].")),4326)";
- } else {
- $bBoundingBoxSearch = false;
- }
- if (isset($_GET['route']) && $_GET['route'] && isset($_GET['routewidth']) && $_GET['routewidth'])
- {
- $aPoints = explode(',',$_GET['route']);
- if (sizeof($aPoints) % 2 != 0)
- {
- echo "Uneven number of points";
- exit;
- }
- $sViewboxCentreSQL = "ST_SetSRID('LINESTRING(";
- $fPrevCoord = false;
- foreach($aPoints as $i => $fPoint)
- {
- if ($i%2)
- {
- if ($i != 1) $sViewboxCentreSQL .= ",";
- $sViewboxCentreSQL .= ((float)$fPoint).' '.$fPrevCoord;
- }
- else
- {
- $fPrevCoord = (float)$fPoint;
- }
- }
- $sViewboxCentreSQL .= ")'::geometry,4326)";
-
- $sSQL = "select st_buffer(".$sViewboxCentreSQL.",".(float)($_GET['routewidth']/69).")";
- $sViewboxSmallSQL = $oDB->getOne($sSQL);
- if (PEAR::isError($sViewboxSmallSQL))
- {
- failInternalError("Could not get small viewbox.", $sSQL, $sViewboxSmallSQL);
- }
- $sViewboxSmallSQL = "'".$sViewboxSmallSQL."'::geometry";
-
- $sSQL = "select st_buffer(".$sViewboxCentreSQL.",".(float)($_GET['routewidth']/30).")";
- $sViewboxLargeSQL = $oDB->getOne($sSQL);
- if (PEAR::isError($sViewboxLargeSQL))
- {
- failInternalError("Could not get large viewbox.", $sSQL, $sViewboxLargeSQL);
- }
- $sViewboxLargeSQL = "'".$sViewboxLargeSQL."'::geometry";
- }
-
- // Do we have anything that looks like a lat/lon pair?
- if (preg_match('/\\b([NS])[ ]+([0-9]+[0-9.]*)[ ]+([0-9.]+)?[, ]+([EW])[ ]+([0-9]+)[ ]+([0-9]+[0-9.]*)?\\b/', $sQuery, $aData))
- {
- $fQueryLat = ($aData[1]=='N'?1:-1) * ($aData[2] + $aData[3]/60);
- $fQueryLon = ($aData[4]=='E'?1:-1) * ($aData[5] + $aData[6]/60);
- if ($fQueryLat <= 90.1 && $fQueryLat >= -90.1 && $fQueryLon <= 180.1 && $fQueryLon >= -180.1)
- {
- $_GET['nearlat'] = $fQueryLat;
- $_GET['nearlon'] = $fQueryLon;
- $sQuery = trim(str_replace($aData[0], ' ', $sQuery));
- }
- }
- elseif (preg_match('/\\b([0-9]+)[ ]+([0-9]+[0-9.]*)?[ ]+([NS])[, ]+([0-9]+)[ ]+([0-9]+[0-9.]*)?[ ]+([EW])\\b/', $sQuery, $aData))
- {
- $fQueryLat = ($aData[3]=='N'?1:-1) * ($aData[1] + $aData[2]/60);
- $fQueryLon = ($aData[6]=='E'?1:-1) * ($aData[4] + $aData[5]/60);
- if ($fQueryLat <= 90.1 && $fQueryLat >= -90.1 && $fQueryLon <= 180.1 && $fQueryLon >= -180.1)
- {
- $_GET['nearlat'] = $fQueryLat;
- $_GET['nearlon'] = $fQueryLon;
- $sQuery = trim(str_replace($aData[0], ' ', $sQuery));
- }
- }
- elseif (preg_match('/(\\[|^|\\b)(-?[0-9]+[0-9.]*)[, ]+(-?[0-9]+[0-9.]*)(\\]|$|\\b)/', $sQuery, $aData))
- {
- $fQueryLat = $aData[2];
- $fQueryLon = $aData[3];
- if ($fQueryLat <= 90.1 && $fQueryLat >= -90.1 && $fQueryLon <= 180.1 && $fQueryLon >= -180.1)
- {
- $_GET['nearlat'] = $fQueryLat;
- $_GET['nearlon'] = $fQueryLon;
- $sQuery = trim(str_replace($aData[0], ' ', $sQuery));
- }
- }
-
- if ($sQuery || $aStructuredQuery)
- {
- // Start with a blank search
- $aSearches = array(
- array('iSearchRank' => 0, 'iNamePhrase' => -1, 'sCountryCode' => false, 'aName'=>array(), 'aAddress'=>array(),
- 'sOperator'=>'', 'aFeatureName' => array(), 'sClass'=>'', 'sType'=>'', 'sHouseNumber'=>'', 'fLat'=>'', 'fLon'=>'', 'fRadius'=>'')
- );
-
- $sNearPointSQL = false;
- if (isset($_GET['nearlat']) && isset($_GET['nearlon']))
- {
- $sNearPointSQL = "ST_SetSRID(ST_Point(".(float)$_GET['nearlon'].",".$_GET['nearlat']."),4326)";
- $aSearches[0]['fLat'] = (float)$_GET['nearlat'];
- $aSearches[0]['fLon'] = (float)$_GET['nearlon'];
- $aSearches[0]['fRadius'] = 0.1;
- }
-
- $bSpecialTerms = false;
- preg_match_all('/\\[(.*)=(.*)\\]/', $sQuery, $aSpecialTermsRaw, PREG_SET_ORDER);
- $aSpecialTerms = array();
- foreach($aSpecialTermsRaw as $aSpecialTerm)
- {
- $sQuery = str_replace($aSpecialTerm[0], ' ', $sQuery);
- $aSpecialTerms[strtolower($aSpecialTerm[1])] = $aSpecialTerm[2];
- }
-
- preg_match_all('/\\[([a-zA-Z]*)\\]/', $sQuery, $aSpecialTermsRaw, PREG_SET_ORDER);
- $aSpecialTerms = array();
- if (isset($aStructuredQuery['amenity']) && $aStructuredQuery['amenity'])
- {
- $aSpecialTermsRaw[] = array('['.$aStructuredQuery['amenity'].']', $aStructuredQuery['amenity']);
- unset($aStructuredQuery['amenity']);
- }
- foreach($aSpecialTermsRaw as $aSpecialTerm)
- {
- $sQuery = str_replace($aSpecialTerm[0], ' ', $sQuery);
- $sToken = $oDB->getOne("select make_standard_name('".$aSpecialTerm[1]."') as string");
- $sSQL = 'select * from (select word_id,word_token, word, class, type, location, country_code, operator';
- $sSQL .= ' from word where word_token in (\' '.$sToken.'\')) as x where (class is not null and class not in (\'place\')) or country_code is not null';
- if (CONST_Debug) var_Dump($sSQL);
- $aSearchWords = $oDB->getAll($sSQL);
- $aNewSearches = array();
- foreach($aSearches as $aSearch)
- {
- foreach($aSearchWords as $aSearchTerm)
- {
- $aNewSearch = $aSearch;
- if ($aSearchTerm['country_code'])
- {
- $aNewSearch['sCountryCode'] = strtolower($aSearchTerm['country_code']);
- $aNewSearches[] = $aNewSearch;
- $bSpecialTerms = true;
- }
- if ($aSearchTerm['class'])
- {
- $aNewSearch['sClass'] = $aSearchTerm['class'];
- $aNewSearch['sType'] = $aSearchTerm['type'];
- $aNewSearches[] = $aNewSearch;
- $bSpecialTerms = true;
- }
- }
- }
- $aSearches = $aNewSearches;
- }
-
- // Split query into phrases
- // Commas are used to reduce the search space by indicating where phrases split
- if (sizeof($aStructuredQuery) > 0)
- {
- $aPhrases = $aStructuredQuery;
- $bStructuredPhrases = true;
- }
- else
- {
- $aPhrases = explode(',',$sQuery);
- $bStructuredPhrases = false;
- }
-
-
- // Convert each phrase to standard form
- // Create a list of standard words
- // Get all 'sets' of words
- // Generate a complete list of all
- $aTokens = array();
- foreach($aPhrases as $iPhrase => $sPhrase)
- {
- $aPhrase = $oDB->getRow("select make_standard_name('".pg_escape_string($sPhrase)."') as string");
- if (PEAR::isError($aPhrase))
- {
- echo "Illegal query string (not an UTF-8 string): ".$sPhrase;
- if (CONST_Debug) var_dump($aPhrase);
- exit;
- }
- if (trim($aPhrase['string']))
- {
- $aPhrases[$iPhrase] = $aPhrase;
- $aPhrases[$iPhrase]['words'] = explode(' ',$aPhrases[$iPhrase]['string']);
- $aPhrases[$iPhrase]['wordsets'] = getWordSets($aPhrases[$iPhrase]['words']);
- $aTokens = array_merge($aTokens, getTokensFromSets($aPhrases[$iPhrase]['wordsets']));
- }
- else
- {
- unset($aPhrases[$iPhrase]);
- }
- }
-
- // reindex phrases - we make assumptions later on
- $aPhraseTypes = array_keys($aPhrases);
- $aPhrases = array_values($aPhrases);
-
- if (sizeof($aTokens))
- {
-
- // Check which tokens we have, get the ID numbers
- $sSQL = 'select word_id,word_token, word, class, type, location, country_code, operator';
- $sSQL .= ' from word where word_token in ('.join(',',array_map("getDBQuoted",$aTokens)).')';
- $sSQL .= ' and search_name_count < '.CONST_Max_Word_Frequency;
-// $sSQL .= ' group by word_token, word, class, type, location, country_code';
-
- if (CONST_Debug) var_Dump($sSQL);
-
- $aValidTokens = array();
- if (sizeof($aTokens))
- $aDatabaseWords = $oDB->getAll($sSQL);
- else
- $aDatabaseWords = array();
- if (PEAR::IsError($aDatabaseWords))
- {
- failInternalError("Could not get word tokens.", $sSQL, $aDatabaseWords);
- }
- $aPossibleMainWordIDs = array();
- foreach($aDatabaseWords as $aToken)
- {
- if (isset($aValidTokens[$aToken['word_token']]))
- {
- $aValidTokens[$aToken['word_token']][] = $aToken;
- }
- else
- {
- $aValidTokens[$aToken['word_token']] = array($aToken);
- }
- if ($aToken['word_token'][0]==' ' && !$aToken['class'] && !$aToken['country_code']) $aPossibleMainWordIDs[$aToken['word_id']] = 1;
- }
- if (CONST_Debug) var_Dump($aPhrases, $aValidTokens);
-
- $aSuggestion = array();
- $bSuggestion = false;
- if (CONST_Suggestions_Enabled)
- {
- foreach($aPhrases as $iPhrase => $aPhrase)
- {
- if (!isset($aValidTokens[' '.$aPhrase['wordsets'][0][0]]))
- {
- $sQuotedPhrase = getDBQuoted(' '.$aPhrase['wordsets'][0][0]);
- $aSuggestionWords = getWordSuggestions($oDB, $aPhrase['wordsets'][0][0]);
- $aRow = $aSuggestionWords[0];
- if ($aRow && $aRow['word'])
- {
- $aSuggestion[] = $aRow['word'];
- $bSuggestion = true;
- }
- else
- {
- $aSuggestion[] = $aPhrase['string'];
- }
- }
- else
- {
- $aSuggestion[] = $aPhrase['string'];
- }
- }
- }
- if ($bSuggestion) $sSuggestion = join(', ',$aSuggestion);
-
- // Try and calculate GB postcodes we might be missing
- foreach($aTokens as $sToken)
- {
- // Source of gb postcodes is now definitive - always use
- if (preg_match('/^([A-Z][A-Z]?[0-9][0-9A-Z]? ?[0-9])([A-Z][A-Z])$/', strtoupper(trim($sToken)), $aData))
- {
- if (substr($aData[1],-2,1) != ' ')
- {
- $aData[0] = substr($aData[0],0,strlen($aData[1]-1)).' '.substr($aData[0],strlen($aData[1]-1));
- $aData[1] = substr($aData[1],0,-1).' '.substr($aData[1],-1,1);
- }
- $aGBPostcodeLocation = gbPostcodeCalculate($aData[0], $aData[1], $aData[2], $oDB);
- if ($aGBPostcodeLocation)
- {
- $aValidTokens[$sToken] = $aGBPostcodeLocation;
- }
- }
- }
-
- foreach($aTokens as $sToken)
- {
- // Unknown single word token with a number - assume it is a house number
- if (!isset($aValidTokens[' '.$sToken]) && strpos($sToken,' ') === false && preg_match('/[0-9]/', $sToken))
- {
- $aValidTokens[' '.$sToken] = array(array('class'=>'place','type'=>'house'));
- }
- }
-
- // Any words that have failed completely?
- // TODO: suggestions
-
- // Start the search process
- $aResultPlaceIDs = array();
-
- /*
- Calculate all searches using aValidTokens i.e.
-
- 'Wodsworth Road, Sheffield' =>
-
- Phrase Wordset
- 0 0 (wodsworth road)
- 0 1 (wodsworth)(road)
- 1 0 (sheffield)
-
- Score how good the search is so they can be ordered
- */
- foreach($aPhrases as $iPhrase => $sPhrase)
- {
- $aNewPhraseSearches = array();
- if ($bStructuredPhrases) $sPhraseType = $aPhraseTypes[$iPhrase];
- else $sPhraseType = '';
-
- foreach($aPhrases[$iPhrase]['wordsets'] as $aWordset)
- {
- $aWordsetSearches = $aSearches;
-
- // Add all words from this wordset
- foreach($aWordset as $iToken => $sToken)
- {
-//echo "<br><b>$sToken</b>";
- $aNewWordsetSearches = array();
-
- foreach($aWordsetSearches as $aCurrentSearch)
- {
-//echo "<i>";
-//var_dump($aCurrentSearch);
-//echo "</i>";
-
- // If the token is valid
- if (isset($aValidTokens[' '.$sToken]))
- {
- foreach($aValidTokens[' '.$sToken] as $aSearchTerm)
- {
- $aSearch = $aCurrentSearch;
- $aSearch['iSearchRank']++;
- if (($sPhraseType == '' || $sPhraseType == 'country') && $aSearchTerm['country_code'] !== null && $aSearchTerm['country_code'] != '0')
- {
- if ($aSearch['sCountryCode'] === false)
- {
- $aSearch['sCountryCode'] = strtolower($aSearchTerm['country_code']);
- // Country is almost always at the end of the string - increase score for finding it anywhere else (optimisation)
- if ($iToken+1 != sizeof($aWordset) || $iPhrase+1 != sizeof($aPhrases)) $aSearch['iSearchRank'] += 5;
- if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch;
- }
- }
- elseif (isset($aSearchTerm['lat']) && $aSearchTerm['lat'] !== '' && $aSearchTerm['lat'] !== null)
- {
- if ($aSearch['fLat'] === '')
- {
- $aSearch['fLat'] = $aSearchTerm['lat'];
- $aSearch['fLon'] = $aSearchTerm['lon'];
- $aSearch['fRadius'] = $aSearchTerm['radius'];
- if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch;
- }
- }
- elseif (($sPhraseType == '' || $sPhraseType == 'street') && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'house')
- {
- if ($aSearch['sHouseNumber'] === '')
- {
- $aSearch['sHouseNumber'] = $sToken;
- if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch;
-/*
- // Fall back to not searching for this item (better than nothing)
- $aSearch = $aCurrentSearch;
- $aSearch['iSearchRank'] += 1;
- if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch;
-*/
- }
- }
- elseif ($sPhraseType == '' && $aSearchTerm['class'] !== '' && $aSearchTerm['class'] !== null)
- {
- if ($aSearch['sClass'] === '')
- {
- $aSearch['sOperator'] = $aSearchTerm['operator'];
- $aSearch['sClass'] = $aSearchTerm['class'];
- $aSearch['sType'] = $aSearchTerm['type'];
- if (sizeof($aSearch['aName'])) $aSearch['sOperator'] = 'name';
- else $aSearch['sOperator'] = 'near'; // near = in for the moment
-
- // Do we have a shortcut id?
- if ($aSearch['sOperator'] == 'name')
- {
- $sSQL = "select get_tagpair('".$aSearch['sClass']."', '".$aSearch['sType']."')";
- if ($iAmenityID = $oDB->getOne($sSQL))
- {
- $aValidTokens[$aSearch['sClass'].':'.$aSearch['sType']] = array('word_id' => $iAmenityID);
- $aSearch['aName'][$iAmenityID] = $iAmenityID;
- $aSearch['sClass'] = '';
- $aSearch['sType'] = '';
- }
- }
- if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch;
- }
- }
- elseif (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'])
- {
- if (sizeof($aSearch['aName']))
- {
- if (($sPhraseType != 'street' && $sPhraseType != 'country') && (!isset($aValidTokens[$sToken]) || strlen($sToken) < 4 || strpos($sToken, ' ') !== false))
- {
- $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
- }
- else
- {
- $aSearch['iSearchRank'] += 1000; // skip;
- }
- }
- else
- {
- $aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
-// $aSearch['iNamePhrase'] = $iPhrase;
- }
- if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch;
- }
- }
- }
- if (isset($aValidTokens[$sToken]))
- {
- // Allow searching for a word - but at extra cost
- foreach($aValidTokens[$sToken] as $aSearchTerm)
- {
- if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'])
- {
- if (($sPhraseType != 'street') && sizeof($aCurrentSearch['aName']) && strlen($sToken) >= 4)
- {
- $aSearch = $aCurrentSearch;
- $aSearch['iSearchRank'] += 1;
- $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
- if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch;
- }
-
- if (!sizeof($aCurrentSearch['aName']) || $aCurrentSearch['iNamePhrase'] == $iPhrase)
- {
- $aSearch = $aCurrentSearch;
- $aSearch['iSearchRank'] += 2;
- if (preg_match('#^[0-9]+$#', $sToken)) $aSearch['iSearchRank'] += 2;
- $aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
- $aSearch['iNamePhrase'] = $iPhrase;
- if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch;
- }
- }
- }
- }
- else
- {
- // Allow skipping a word - but at EXTREAM cost
- //$aSearch = $aCurrentSearch;
- //$aSearch['iSearchRank']+=100;
- //$aNewWordsetSearches[] = $aSearch;
- }
- }
- // Sort and cut
- usort($aNewWordsetSearches, 'bySearchRank');
- $aWordsetSearches = array_slice($aNewWordsetSearches, 0, 50);
- }
-// var_Dump('<hr>',sizeof($aWordsetSearches)); exit;
-
- $aNewPhraseSearches = array_merge($aNewPhraseSearches, $aNewWordsetSearches);
- usort($aNewPhraseSearches, 'bySearchRank');
-
- $aSearchHash = array();
- foreach($aNewPhraseSearches as $iSearch => $aSearch)
- {
- $sHash = serialize($aSearch);
- if (isset($aSearchHash[$sHash]))
- {
- unset($aNewPhraseSearches[$iSearch]);
- }
- else
- {
- $aSearchHash[$sHash] = 1;
- }
- }
-
- $aNewPhraseSearches = array_slice($aNewPhraseSearches, 0, 50);
- }
-
- // Re-group the searches by their score, junk anything over 20 as just not worth trying
- $aGroupedSearches = array();
- foreach($aNewPhraseSearches as $aSearch)
- {
- if ($aSearch['iSearchRank'] < $iMaxRank)
- {
- if (!isset($aGroupedSearches[$aSearch['iSearchRank']])) $aGroupedSearches[$aSearch['iSearchRank']] = array();
- $aGroupedSearches[$aSearch['iSearchRank']][] = $aSearch;
- }
- }
- ksort($aGroupedSearches);
-
- $iSearchCount = 0;
- $aSearches = array();
- foreach($aGroupedSearches as $iScore => $aNewSearches)
- {
- $iSearchCount += sizeof($aNewSearches);
- $aSearches = array_merge($aSearches, $aNewSearches);
- if ($iSearchCount > 50) break;
- }
-
-// if (CONST_Debug) _debugDumpGroupedSearches($aGroupedSearches, $aValidTokens);
-
- }
- }
- else
- {
- // Re-group the searches by their score, junk anything over 20 as just not worth trying
- $aGroupedSearches = array();
- foreach($aSearches as $aSearch)
- {
- if ($aSearch['iSearchRank'] < $iMaxRank)
- {
- if (!isset($aGroupedSearches[$aSearch['iSearchRank']])) $aGroupedSearches[$aSearch['iSearchRank']] = array();
- $aGroupedSearches[$aSearch['iSearchRank']][] = $aSearch;
- }
- }
- ksort($aGroupedSearches);