]> git.openstreetmap.org Git - nominatim.git/commitdiff
Merge remote-tracking branch 'upstream/master'
authorSarah Hoffmann <lonvia@denofr.de>
Fri, 6 Oct 2017 20:02:48 +0000 (22:02 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Fri, 6 Oct 2017 20:02:48 +0000 (22:02 +0200)
1  2 
lib/Geocode.php

diff --combined lib/Geocode.php
index ac92257f7782dc3fb9c9d8a612c8157f603446f4,fbb1b4e03f78df853a1e98c95915dd5e1512cb72..16fba050597f309cd023f53b0dccb3c75463a6a6
@@@ -25,7 -25,7 +25,7 @@@ class Geocod
  
      protected $aExcludePlaceIDs = array();
      protected $bDeDupe = true;
 -    protected $bReverseInPlan = false;
 +    protected $bReverseInPlan = true;
  
      protected $iLimit = 20;
      protected $iFinalLimit = 10;
  
               Score how good the search is so they can be ordered
           */
+         $iGlobalRank = 0;
          foreach ($aPhrases as $iPhrase => $aPhrase) {
              $aNewPhraseSearches = array();
              if ($bStructuredPhrases) $sPhraseType = $aPhraseTypes[$iPhrase];
                                              $aSearch['iSearchRank'] += 5;
                                          }
                                          if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
+                                         // If it is at the beginning, we can be almost sure that this is the wrong order
+                                         // Increase score for all searches.
+                                         if ($iToken == 0 && $iPhrase == 0) {
+                                             $iGlobalRank++;
+                                         }
                                      }
                                  } elseif (($sPhraseType == '' || $sPhraseType == 'postalcode') && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'postcode') {
                                      // We need to try the case where the postal code is the primary element (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode) so try both
-                                     if (!$bHavePostcode && $aSearch['sPostcode'] === '' && $aSearch['sHouseNumber'] === '' &&
+                                     if ($aSearch['sPostcode'] === '' &&
                                          isset($aSearchTerm['word']) && $aSearchTerm['word'] && strpos($sNormQuery, $this->normTerm($aSearchTerm['word'])) !== false) {
                                          // If we have structured search or this is the first term,
                                          // make the postcode the primary search element.
-                                         if ($aSearch['sOperator'] === '' && ($sPhraseType == 'postalcode' || ($iToken == 0 && $iPhrase == 0))) {
+                                         if (!$bHavePostcode && $aSearch['sOperator'] === '' && ($sPhraseType == 'postalcode' || ($iToken == 0 && $iPhrase == 0))) {
                                              $aNewSearch = $aSearch;
                                              $aNewSearch['sOperator'] = 'postcode';
                                              $aNewSearch['aAddress'] = array_merge($aNewSearch['aAddress'], $aNewSearch['aName']);
                                          // sanity check: if the housenumber is not mainly made
                                          // up of numbers, add a penalty
                                          if (preg_match_all("/[^0-9]/", $sToken, $aMatches) > 2) $aSearch['iSearchRank']++;
-                                         // also housenumbers should appear in the first or second phrase
-                                         if ($iPhrase > 1) $aSearch['iSearchRank'] += 1;
+                                         // also must not appear in the middle of the address
+                                         if ($aSearch['aAddress'] || $aSearch['aAddressNonSearch']) $aSearch['iSearchRank'] += 1;
                                          if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
                                          /*
                                          // Fall back to not searching for this item (better than nothing)
                                          }
                                      }
  
-                                     if (!sizeof($aCurrentSearch['aName']) || $aCurrentSearch['iNamePhrase'] == $iPhrase) {
+                                     if ((!$aCurrentSearch['sPostcode'] && !$aCurrentSearch['aAddress'] && !$aCurrentSearch['aAddressNonSearch'])
+                                         && (!sizeof($aCurrentSearch['aName']) || $aCurrentSearch['iNamePhrase'] == $iPhrase)) {
                                          $aSearch = $aCurrentSearch;
                                          $aSearch['iSearchRank'] += 1;
                                          if (!sizeof($aCurrentSearch['aName'])) $aSearch['iSearchRank'] += 1;
                      continue;
                  }
              }
+             $aSearch['iSearchRank'] += $iGlobalRank;
              $aGroupedSearches[$aSearch['iSearchRank']][] = $aSearch;
          }
          ksort($aGroupedSearches);
                          // TODO: filter out the pointless search terms (2 letter name tokens and less)
                          // they might be right - but they are just too darned expensive to run
                          if (sizeof($aSearch['aName'])) $aTerms[] = "name_vector @> ARRAY[".join($aSearch['aName'], ",")."]";
 -                        if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'], ",")."]";
 +                        //if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'], ",")."]";
                          if (sizeof($aSearch['aAddress']) && $aSearch['aName'] != $aSearch['aAddress']) {
                              // For infrequent name terms disable index usage for address
                              if (CONST_Search_NameOnlySearchFrequencyThreshold
                                  && sizeof($aSearch['aName']) == 1
                                  && $aWordFrequencyScores[$aSearch['aName'][reset($aSearch['aName'])]] < CONST_Search_NameOnlySearchFrequencyThreshold
                              ) {
 -                                $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'], $aSearch['aAddressNonSearch']), ",")."]";
 +                                //$aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'], $aSearch['aAddressNonSearch']), ",")."]";
 +                                $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddress'],",")."]";
                              } else {
                                  $aTerms[] = "nameaddress_vector @> ARRAY[".join($aSearch['aAddress'], ",")."]";
 -                                if (sizeof($aSearch['aAddressNonSearch'])) {
 +                                /*if (sizeof($aSearch['aAddressNonSearch'])) {
                                      $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddressNonSearch'], ",")."]";
 -                                }
 +                                }*/
                              }
                          }
                          if ($aSearch['sCountryCode']) $aTerms[] = "country_code = '".pg_escape_string($aSearch['sCountryCode'])."'";