]> git.openstreetmap.org Git - nominatim.git/commitdiff
avoid dropping tokens completely just because they are expensive. Use ' ' token...
authorBrian Quinion <openstreetmap@brian.quinion.co.uk>
Thu, 31 Jan 2013 14:17:41 +0000 (14:17 +0000)
committerBrian Quinion <openstreetmap@brian.quinion.co.uk>
Thu, 31 Jan 2013 14:17:41 +0000 (14:17 +0000)
lib/lib.php
website/search.php

index 2c335c49eaf6a83fdb3feea45d6775d2e67e6d0e..9d1120cbed9639c2aede011adbdef8a8e382b61a 100644 (file)
                        }
                }
                echo "<table border=\"1\">";
-               echo "<tr><th>rank</th><th>Name Tokens</th><th>Address Tokens</th><th>country</th><th>operator</th><th>class</th><th>type</th><th>house#</th><th>Lat</th><th>Lon</th><th>Radius</th></tr>";
+               echo "<tr><th>rank</th><th>Name Tokens</th><th>Name Not</th><th>Address Tokens</th><th>Address Not</th><th>country</th><th>operator</th><th>class</th><th>type</th><th>house#</th><th>Lat</th><th>Lon</th><th>Radius</th></tr>";
                foreach($aData as $iRank => $aRankedSet)
                {
                        foreach($aRankedSet as $aRow)
                                }
                                echo "</td>";
 
+                               echo "<td>";
+                               $sSep = '';
+                               foreach($aRow['aNameNonSearch'] as $iWordID)
+                               {
+                                       echo $sSep.'#'.$aWordsIDs[$iWordID].'#';
+                                       $sSep = ', ';
+                               }
+                               echo "</td>";
+
                                echo "<td>";
                                $sSep = '';
                                foreach($aRow['aAddress'] as $iWordID)
                                }
                                echo "</td>";
 
+                               echo "<td>";
+                               $sSep = '';
+                               foreach($aRow['aAddressNonSearch'] as $iWordID)
+                               {
+                                       echo $sSep.'#'.$aWordsIDs[$iWordID].'#';
+                                       $sSep = ', ';
+                               }
+                               echo "</td>";
+
                                echo "<td>".$aRow['sCountryCode']."</td>";
 
                                echo "<td>".$aRow['sOperator']."</td>";
index 3e8e5b7954742728400b0f6a5eee65cda2148646..2c1d2befecce66307a15714e7254c70fd53d378c 100755 (executable)
                                                                                                $aSearch = $aCurrentSearch;
                                                                                                $aSearch['iSearchRank'] += 1;
                                                                                                if ($aWordFrequencyScores[$aSearchTerm['word_id']] < CONST_Max_Word_Frequency)
+                                                                                               {
                                                                                                        $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
+                                                                                                       if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch;
+                                                                                               }
+                                                                                               elseif (isset($aValidTokens[' '.$sToken])) // revert to the token version?
+                                                                                               {
+                                                                                                       foreach($aValidTokens[' '.$sToken] as $aSearchTermToken)
+                                                                                                       {
+                                                                                                               if (empty($aSearchTermToken['country_code']) 
+                                                                                                                       && empty($aSearchTermToken['lat'])
+                                                                                                                       && empty($aSearchTermToken['class']))
+                                                                                                               {
+                                                                                                                       $aSearch = $aCurrentSearch;
+                                                                                                                       $aSearch['iSearchRank'] += 1;
+                                                                                                                       $aSearch['aAddress'][$aSearchTermToken['word_id']] = $aSearchTermToken['word_id'];
+                                                                                                                       if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch;
+                                                                                                               }
+                                                                                                       }
+                                                                                               }
                                                                                                else
+                                                                                               {
                                                                                                        $aSearch['aAddressNonSearch'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
-                                                                                               if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch;
+                                                                                                       if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch;
+                                                                                               }
                                                                                        }
 
                                                                                        if (!sizeof($aCurrentSearch['aName']) || $aCurrentSearch['iNamePhrase'] == $iPhrase)
                                                        // TODO: filter out the pointless search terms (2 letter name tokens and less)
                                                        // they might be right - but they are just too darned expensive to run
                                                        if (sizeof($aSearch['aName'])) $aTerms[] = "name_vector @> ARRAY[".join($aSearch['aName'],",")."]";
+                                                       if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'],",")."]";
                                                        if (sizeof($aSearch['aAddress']) && $aSearch['aName'] != $aSearch['aAddress']) 
                                                        {
                                                                // For infrequent name terms disable index usage for address
                                                                        sizeof($aSearch['aName']) == 1 && 
                                                                        $aWordFrequencyScores[$aSearch['aName'][reset($aSearch['aName'])]] < CONST_Search_NameOnlySearchFrequencyThreshold)
                                                                {
-                                                                       $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddress'],",")."]";
+                                                                       $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'],$aSearch['aAddressNonSearch']),",")."]";
                                                                }
                                                                else
                                                                {
                                                                        $aTerms[] = "nameaddress_vector @> ARRAY[".join($aSearch['aAddress'],",")."]";
+                                                                       if (sizeof($aSearch['aAddressNonSearch'])) $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddressNonSearch'],",")."]";
                                                                }
                                                        }
                                                        if ($aSearch['sCountryCode']) $aTerms[] = "country_code = '".pg_escape_string($aSearch['sCountryCode'])."'";