From: Brian Quinion Date: Thu, 31 Jan 2013 14:17:41 +0000 (+0000) Subject: avoid dropping tokens completely just because they are expensive. Use ' ' token... X-Git-Tag: v2.2.0~127 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/3852096c80d5461eab21ec8c7e2452a85d631d7d?hp=-c avoid dropping tokens completely just because they are expensive. Use ' ' token in preference to just dropping --- 3852096c80d5461eab21ec8c7e2452a85d631d7d diff --git a/lib/lib.php b/lib/lib.php index 2c335c49..9d1120cb 100644 --- a/lib/lib.php +++ b/lib/lib.php @@ -646,7 +646,7 @@ } } echo ""; - echo ""; + echo ""; foreach($aData as $iRank => $aRankedSet) { foreach($aRankedSet as $aRow) @@ -663,6 +663,15 @@ } echo ""; + echo ""; + echo ""; + echo ""; + echo ""; echo ""; diff --git a/website/search.php b/website/search.php index 3e8e5b79..2c1d2bef 100755 --- a/website/search.php +++ b/website/search.php @@ -638,10 +638,30 @@ $aSearch = $aCurrentSearch; $aSearch['iSearchRank'] += 1; if ($aWordFrequencyScores[$aSearchTerm['word_id']] < CONST_Max_Word_Frequency) + { $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; + if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch; + } + elseif (isset($aValidTokens[' '.$sToken])) // revert to the token version? + { + foreach($aValidTokens[' '.$sToken] as $aSearchTermToken) + { + if (empty($aSearchTermToken['country_code']) + && empty($aSearchTermToken['lat']) + && empty($aSearchTermToken['class'])) + { + $aSearch = $aCurrentSearch; + $aSearch['iSearchRank'] += 1; + $aSearch['aAddress'][$aSearchTermToken['word_id']] = $aSearchTermToken['word_id']; + if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch; + } + } + } else + { $aSearch['aAddressNonSearch'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; - if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch; + if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch; + } } if (!sizeof($aCurrentSearch['aName']) || $aCurrentSearch['iNamePhrase'] == $iPhrase) @@ -900,6 +920,7 @@ // TODO: filter out the pointless search terms (2 letter name tokens and less) // they might be right - but they are just too darned expensive to run if (sizeof($aSearch['aName'])) $aTerms[] = "name_vector @> ARRAY[".join($aSearch['aName'],",")."]"; + if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'],",")."]"; if (sizeof($aSearch['aAddress']) && $aSearch['aName'] != $aSearch['aAddress']) { // For infrequent name terms disable index usage for address @@ -907,11 +928,12 @@ sizeof($aSearch['aName']) == 1 && $aWordFrequencyScores[$aSearch['aName'][reset($aSearch['aName'])]] < CONST_Search_NameOnlySearchFrequencyThreshold) { - $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddress'],",")."]"; + $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'],$aSearch['aAddressNonSearch']),",")."]"; } else { $aTerms[] = "nameaddress_vector @> ARRAY[".join($aSearch['aAddress'],",")."]"; + if (sizeof($aSearch['aAddressNonSearch'])) $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddressNonSearch'],",")."]"; } } if ($aSearch['sCountryCode']) $aTerms[] = "country_code = '".pg_escape_string($aSearch['sCountryCode'])."'";
rankName TokensAddress Tokenscountryoperatorclasstypehouse#LatLonRadius
rankName TokensName NotAddress TokensAddress Notcountryoperatorclasstypehouse#LatLonRadius
"; + $sSep = ''; + foreach($aRow['aNameNonSearch'] as $iWordID) + { + echo $sSep.'#'.$aWordsIDs[$iWordID].'#'; + $sSep = ', '; + } + echo ""; $sSep = ''; foreach($aRow['aAddress'] as $iWordID) @@ -672,6 +681,15 @@ } echo ""; + $sSep = ''; + foreach($aRow['aAddressNonSearch'] as $iWordID) + { + echo $sSep.'#'.$aWordsIDs[$iWordID].'#'; + $sSep = ', '; + } + echo "".$aRow['sCountryCode']."".$aRow['sOperator']."