]> git.openstreetmap.org Git - nominatim.git/commitdiff
Change round how CONST_Max_Word_Frequency is applied. Was causing some valid combina...
authorBrian Quinion <openstreetmap@brian.quinion.co.uk>
Thu, 24 Jan 2013 18:00:29 +0000 (18:00 +0000)
committerBrian Quinion <openstreetmap@brian.quinion.co.uk>
Wed, 30 Jan 2013 13:40:04 +0000 (13:40 +0000)
website/search.php

index a1860f25ed7f59372dcdc06687328a4369748c1e..80a05a6442c142a1fcc969c34d046c6d60d9bf70 100755 (executable)
                        // Start with a blank search
                        $aSearches = array(
                                array('iSearchRank' => 0, 'iNamePhrase' => -1, 'sCountryCode' => false, 'aName'=>array(), 'aAddress'=>array(), 
+                                       'aNameNonSearch'=>array(), 'aAddressNonSearch'=>array(),
                                        'sOperator'=>'', 'aFeatureName' => array(), 'sClass'=>'', 'sType'=>'', 'sHouseNumber'=>'', 'fLat'=>'', 'fLon'=>'', 'fRadius'=>'')
                        );
 
                        // Check which tokens we have, get the ID numbers                       
                        $sSQL = 'select word_id,word_token, word, class, type, location, country_code, operator, search_name_count';
                        $sSQL .= ' from word where word_token in ('.join(',',array_map("getDBQuoted",$aTokens)).')';
-                       $sSQL .= ' and search_name_count < '.CONST_Max_Word_Frequency;
+//                     $sSQL .= ' and search_name_count < '.CONST_Max_Word_Frequency;
 //                     $sSQL .= ' group by word_token, word, class, type, location, country_code';
 
                        if (CONST_Debug) var_Dump($sSQL);
                                failInternalError("Could not get word tokens.", $sSQL, $aDatabaseWords);
                        }
                        $aPossibleMainWordIDs = array();
+                       $aWordFrequencyScores = array();
                        foreach($aDatabaseWords as $aToken)
                        {
                                if (isset($aValidTokens[$aToken['word_token']]))
                                {
                                        $aValidTokens[$aToken['word_token']] = array($aToken);
                                }
-                               if ($aToken['word_token'][0]==' ' && !$aToken['class'] && !$aToken['country_code']) $aPossibleMainWordIDs[$aToken['word_id']] = 1 + $aToken['search_name_count'];
+                               if ($aToken['word_token'][0]==' ' && !$aToken['class'] && !$aToken['country_code']) $aPossibleMainWordIDs[$aToken['word_id']] = 1;
+                               $aWordFrequencyScores[$aToken['word_id']] = $aToken['search_name_count'] + 1;
                        }
                        if (CONST_Debug) var_Dump($aPhrases, $aValidTokens);
 
                                                                                {
                                                                                        if (sizeof($aSearch['aName']))
                                                                                        {
-                                                                                               if (($sPhraseType != 'street' && $sPhraseType != 'country') && (!isset($aValidTokens[$sToken]) || strlen($sToken) < 4 || strpos($sToken, ' ') !== false))
+                                                                                               if ((!$bStructuredPhrases || $iPhrase > 0) && $sPhraseType != 'country' && (!isset($aValidTokens[$sToken]) || strlen($sToken) < 4 || strpos($sToken, ' ') !== false))
                                                                                                {
                                                                                                        $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
                                                                                                }
                                                                        {
                                                                                if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'])
                                                                                {
-                                                                                       if (($sPhraseType != 'street') && sizeof($aCurrentSearch['aName']) && strlen($sToken) >= 4)
+                                                                                       if ((!$bStructuredPhrases || $iPhrase > 0) && sizeof($aCurrentSearch['aName']) && strlen($sToken) >= 4)
                                                                                        {
                                                                                                $aSearch = $aCurrentSearch;
                                                                                                $aSearch['iSearchRank'] += 1;
-                                                                                               $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
+                                                                                               if ($aWordFrequencyScores[$aSearchTerm['word_id']] < CONST_Max_Word_Frequency)
+                                                                                                       $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
+                                                                                               else
+                                                                                                       $aSearch['aAddressNonSearch'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
                                                                                                if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch;
                                                                                        }
 
                                                                                                $aSearch = $aCurrentSearch;
                                                                                                $aSearch['iSearchRank'] += 2;
                                                                                                if (preg_match('#^[0-9]+$#', $sToken)) $aSearch['iSearchRank'] += 2;
-                                                                                               $aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
+                                                                                               if ($aWordFrequencyScores[$aSearchTerm['word_id']] < CONST_Max_Word_Frequency)
+                                                                                                       $aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
+                                                                                               else
+                                                                                                       $aSearch['aNameNonSearch'][$aSearchTerm['word_id']] = $aSearchTerm['word_id'];
                                                                                                $aSearch['iNamePhrase'] = $iPhrase;
                                                                                                if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch;
                                                                                        }
                                                                // For infrequent name terms disable index usage for address
                                                                if (CONST_Search_NameOnlySearchFrequencyThreshold && 
                                                                        sizeof($aSearch['aName']) == 1 && 
-                                                                       $aPossibleMainWordIDs[$aSearch['aName'][reset($aSearch['aName'])]] < CONST_Search_NameOnlySearchFrequencyThreshold)
+                                                                       $aWordFrequencyScores[$aSearch['aName'][reset($aSearch['aName'])]] < CONST_Search_NameOnlySearchFrequencyThreshold)
                                                                {
                                                                        $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddress'],",")."]";
                                                                }