]> git.openstreetmap.org Git - nominatim.git/commitdiff
performance: for low frequency name terms disable use of postgresql address index...
authorBrian Quinion <openstreetmap@brian.quinion.co.uk>
Thu, 20 Dec 2012 17:47:57 +0000 (17:47 +0000)
committerBrian Quinion <openstreetmap@brian.quinion.co.uk>
Thu, 20 Dec 2012 17:47:57 +0000 (17:47 +0000)
settings/settings.php
website/search.php

index afa20d208aa106b9224b5bc1d5f802ad34baa3f3..6f6f843b929e830217e8e5b203121d0093c3ea9f 100644 (file)
@@ -60,6 +60,7 @@
        @define('CONST_Suggestions_Enabled', false);
 
        @define('CONST_Search_TryDroppedAddressTerms', false);
+       @define('CONST_Search_NameOnlySearchFrequencyThreshold', false);
 
        // Set to zero to disable polygon output
        @define('CONST_PolygonOutput_MaximumTypes', 1);
index 265f20115105825a4388f8a8465369ac1ebcc1af..5947fbc7140998a46d2a5457ca773f96a1295935 100755 (executable)
                                {
                                        foreach($aSearchWords as $aSearchTerm)
                                        {
-                                               $aNewSearch = $aSearch;                 
+                                               $aNewSearch = $aSearch;
                                                if ($aSearchTerm['country_code'])
                                                {
                                                        $aNewSearch['sCountryCode'] = strtolower($aSearchTerm['country_code']);
                        {
 
                        // Check which tokens we have, get the ID numbers                       
-                       $sSQL = 'select word_id,word_token, word, class, type, location, country_code, operator';
+                       $sSQL = 'select word_id,word_token, word, class, type, location, country_code, operator, search_name_count';
                        $sSQL .= ' from word where word_token in ('.join(',',array_map("getDBQuoted",$aTokens)).')';
                        $sSQL .= ' and search_name_count < '.CONST_Max_Word_Frequency;
 //                     $sSQL .= ' group by word_token, word, class, type, location, country_code';
                                {
                                        $aValidTokens[$aToken['word_token']] = array($aToken);
                                }
-                               if ($aToken['word_token'][0]==' ' && !$aToken['class'] && !$aToken['country_code']) $aPossibleMainWordIDs[$aToken['word_id']] = 1;
+                               if ($aToken['word_token'][0]==' ' && !$aToken['class'] && !$aToken['country_code']) $aPossibleMainWordIDs[$aToken['word_id']] = 1 + $aToken['search_name_count'];
                        }
                        if (CONST_Debug) var_Dump($aPhrases, $aValidTokens);
 
                                                                        {
                                                                                $aSearch = $aCurrentSearch;
                                                                                $aSearch['iSearchRank']++;
-                                                                               if (($sPhraseType == '' || $sPhraseType == 'country') && $aSearchTerm['country_code'] !== null && $aSearchTerm['country_code'] != '0')
+                                                                               if (($sPhraseType == '' || $sPhraseType == 'country') && !empty($aSearchTerm['country_code']) && $aSearchTerm['country_code'] != '0')
                                                                                {
                                                                                        if ($aSearch['sCountryCode'] === false)
                                                                                        {
                                                        if (CONST_Debug) var_dump('<hr>',$aSearch);
                                                        if (CONST_Debug) _debugDumpGroupedSearches(array($iGroupedRank => array($aSearch)), $aValidTokens);     
                                                        $aPlaceIDs = array();
-                                               
+
                                                        // First we need a position, either aName or fLat or both
                                                        $aTerms = array();
                                                        $aOrder = array();
                                                        // TODO: filter out the pointless search terms (2 letter name tokens and less)
                                                        // they might be right - but they are just too darned expensive to run
                                                        if (sizeof($aSearch['aName'])) $aTerms[] = "name_vector @> ARRAY[".join($aSearch['aName'],",")."]";
-                                                       if (sizeof($aSearch['aAddress']) && $aSearch['aName'] != $aSearch['aAddress']) $aTerms[] = "nameaddress_vector @> ARRAY[".join($aSearch['aAddress'],",")."]";
+                                                       if (sizeof($aSearch['aAddress']) && $aSearch['aName'] != $aSearch['aAddress']) 
+                                                       {
+                                                               // For infrequent name terms disable index usage for address
+                                                               if (CONST_Search_NameOnlySearchFrequencyThreshold && 
+                                                                       sizeof($aSearch['aName']) == 1 && 
+                                                                       $aPossibleMainWordIDs[$aSearch['aName'][reset($aSearch['aName'])]] < CONST_Search_NameOnlySearchFrequencyThreshold)
+                                                               {
+                                                                       $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddress'],",")."]";
+                                                               }
+                                                               else
+                                                               {
+                                                                       $aTerms[] = "nameaddress_vector @> ARRAY[".join($aSearch['aAddress'],",")."]";
+                                                               }
+                                                       }
                                                        if ($aSearch['sCountryCode']) $aTerms[] = "country_code = '".pg_escape_string($aSearch['sCountryCode'])."'";
                                                        if ($aSearch['sHouseNumber']) $aTerms[] = "address_rank in (26,27)";
                                                        if ($aSearch['fLon'] && $aSearch['fLat'])