]> git.openstreetmap.org Git - nominatim.git/commitdiff
Merge remote-tracking branch 'upstream/master'
authorSarah Hoffmann <lonvia@denofr.de>
Mon, 21 May 2018 10:01:56 +0000 (12:01 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Mon, 21 May 2018 10:01:56 +0000 (12:01 +0200)
1  2 
lib/Geocode.php
lib/lib.php

diff --combined lib/Geocode.php
index e9b304d251abbe4c6c7cf6e09c5c130297a27953,6cc2e4cb193d4069d377a9cbf002a7359337e891..18526f4382531fd2a9fa3a3b246970934690f821
@@@ -7,6 -7,7 +7,7 @@@ require_once(CONST_BasePath.'/lib/Phras
  require_once(CONST_BasePath.'/lib/ReverseGeocode.php');
  require_once(CONST_BasePath.'/lib/SearchDescription.php');
  require_once(CONST_BasePath.'/lib/SearchContext.php');
+ require_once(CONST_BasePath.'/lib/TokenList.php');
  
  class Geocode
  {
@@@ -19,7 -20,7 +20,7 @@@
      protected $bIncludeAddressDetails = false;
  
      protected $aExcludePlaceIDs = array();
 -    protected $bReverseInPlan = false;
 +    protected $bReverseInPlan = true;
  
      protected $iLimit = 20;
      protected $iFinalLimit = 10;
          return false;
      }
  
-     public function getGroupedSearches($aSearches, $aPhrases, $aValidTokens, $bIsStructured)
+     public function getGroupedSearches($aSearches, $aPhrases, $oValidTokens, $bIsStructured)
      {
          /*
-              Calculate all searches using aValidTokens i.e.
+              Calculate all searches using oValidTokens i.e.
               'Wodsworth Road, Sheffield' =>
  
               Phrase Wordset
                          //var_dump($oCurrentSearch);
                          //echo "</i>";
  
-                         // If the token is valid
-                         if (isset($aValidTokens[' '.$sToken])) {
-                             foreach ($aValidTokens[' '.$sToken] as $aSearchTerm) {
-                                 $aNewSearches = $oCurrentSearch->extendWithFullTerm(
-                                     $aSearchTerm,
-                                     isset($aValidTokens[$sToken])
-                                       && strpos($sToken, ' ') === false,
-                                     $sPhraseType,
-                                     $iToken == 0 && $iPhrase == 0,
-                                     $iPhrase == 0,
-                                     $iToken + 1 == count($aWordset)
-                                       && $iPhrase + 1 == count($aPhrases)
-                                 );
-                                 foreach ($aNewSearches as $oSearch) {
-                                     if ($oSearch->getRank() < $this->iMaxRank) {
-                                         $aNewWordsetSearches[] = $oSearch;
-                                     }
+                         // Tokens with full name matches.
+                         foreach ($oValidTokens->get(' '.$sToken) as $oSearchTerm) {
+                             $aNewSearches = $oCurrentSearch->extendWithFullTerm(
+                                 $oSearchTerm,
+                                 $oValidTokens->contains($sToken)
+                                   && strpos($sToken, ' ') === false,
+                                 $sPhraseType,
+                                 $iToken == 0 && $iPhrase == 0,
+                                 $iPhrase == 0,
+                                 $iToken + 1 == count($aWordset)
+                                   && $iPhrase + 1 == count($aPhrases)
+                             );
+                             foreach ($aNewSearches as $oSearch) {
+                                 if ($oSearch->getRank() < $this->iMaxRank) {
+                                     $aNewWordsetSearches[] = $oSearch;
                                  }
                              }
                          }
                          // Look for partial matches.
                          // Note that there is no point in adding country terms here
                          // because country is omitted in the address.
-                         if (isset($aValidTokens[$sToken]) && $sPhraseType != 'country') {
+                         if ($sPhraseType != 'country') {
                              // Allow searching for a word - but at extra cost
-                             foreach ($aValidTokens[$sToken] as $aSearchTerm) {
+                             foreach ($oValidTokens->get($sToken) as $oSearchTerm) {
                                  $aNewSearches = $oCurrentSearch->extendWithPartialTerm(
-                                     $aSearchTerm,
+                                     $sToken,
+                                     $oSearchTerm,
                                      $bIsStructured,
                                      $iPhrase,
-                                     isset($aValidTokens[' '.$sToken]) ? $aValidTokens[' '.$sToken] : array()
+                                     $oValidTokens->get(' '.$sToken)
                                  );
  
                                  foreach ($aNewSearches as $oSearch) {
              Debug::printDebugTable('Phrases', $aPhrases);
              Debug::printVar('Tokens', $aTokens);
  
+             $oValidTokens = new TokenList();
              if (!empty($aTokens)) {
-                 // Check which tokens we have, get the ID numbers
                  $sSQL = 'SELECT word_id, word_token, word, class, type, country_code, operator, search_name_count';
                  $sSQL .= ' FROM word ';
                  $sSQL .= ' WHERE word_token in ('.join(',', array_map('getDBQuoted', $aTokens)).')';
  
                  Debug::printSQL($sSQL);
  
-                 $aValidTokens = array();
-                 $aDatabaseWords = chksql(
-                     $this->oDB->getAll($sSQL),
-                     'Could not get word tokens.'
+                 $oValidTokens->addTokensFromDB(
+                     $this->oDB,
+                     $aTokens,
+                     $this->aCountryCodes,
+                     $sNormQuery,
+                     $this->oNormalizer
                  );
-                 foreach ($aDatabaseWords as $aToken) {
-                     // Filter country tokens that do not match restricted countries.
-                     if ($this->aCountryCodes
-                         && $aToken['country_code']
-                         && !in_array($aToken['country_code'], $this->aCountryCodes)
-                     ) {
-                         continue;
-                     }
-                     // Special terms need to appear in their normalized form.
-                     if ($aToken['word'] && $aToken['class']) {
-                         $sNormWord = $this->normTerm($aToken['word']);
-                         if (strpos($sNormQuery, $sNormWord) === false) {
-                             continue;
-                         }
-                     }
  
-                     if (isset($aValidTokens[$aToken['word_token']])) {
-                         $aValidTokens[$aToken['word_token']][] = $aToken;
-                     } else {
-                         $aValidTokens[$aToken['word_token']] = array($aToken);
-                     }
-                 }
-                 // US ZIP+4 codes - if there is no token, merge in the 5-digit ZIP code
+                 // Try more interpretations for Tokens that could not be matched.
                  foreach ($aTokens as $sToken) {
-                     if (!isset($aValidTokens[$sToken]) && preg_match('/^([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
-                         if (isset($aValidTokens[$aData[1]])) {
-                             foreach ($aValidTokens[$aData[1]] as $aToken) {
-                                 if (!$aToken['class']) {
-                                     if (isset($aValidTokens[$sToken])) {
-                                         $aValidTokens[$sToken][] = $aToken;
-                                     } else {
-                                         $aValidTokens[$sToken] = array($aToken);
-                                     }
-                                 }
-                             }
+                     if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) {
+                         if (preg_match('/^ ([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
+                             // US ZIP+4 codes - merge in the 5-digit ZIP code
+                             $oValidTokens->addToken(
+                                 $sToken,
+                                 new Token\Postcode(null, $aData[1], 'us')
+                             );
+                         } elseif (preg_match('/^ [0-9]+$/', $sToken)) {
+                             // Unknown single word token with a number.
+                             // Assume it is a house number.
+                             $oValidTokens->addToken(
+                                 $sToken,
+                                 new Token\HouseNumber(null, trim($sToken))
+                             );
                          }
                      }
                  }
  
-                 foreach ($aTokens as $sToken) {
-                     // Unknown single word token with a number - assume it is a house number
-                     if (!isset($aValidTokens[' '.$sToken]) && strpos($sToken, ' ') === false && preg_match('/^[0-9]+$/', $sToken)) {
-                         $aValidTokens[' '.$sToken] = array(array('class' => 'place', 'type' => 'house', 'word_token' => ' '.$sToken));
-                     }
-                 }
-                 Debug::printGroupTable('Valid Tokens', $aValidTokens);
                  // Any words that have failed completely?
                  // TODO: suggestions
+                 Debug::printGroupTable('Valid Tokens', $oValidTokens->debugInfo());
                  Debug::newSection('Search candidates');
  
-                 $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $aValidTokens, $bStructuredPhrases);
+                 $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens, $bStructuredPhrases);
  
                  if ($this->bReverseInPlan) {
                      // Reverse phrase array and also reverse the order of the wordsets in
                      if (count($aPhrases) > 1) {
                          $aPhrases[count($aPhrases)-1]->invertWordSets();
                      }
-                     $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $aValidTokens, false);
+                     $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens, false);
  
                      foreach ($aGroupedSearches as $aSearches) {
                          foreach ($aSearches as $aSearch) {
                  }
              }
  
-             if (CONST_Debug) _debugDumpGroupedSearches($aGroupedSearches, $aValidTokens);
+             Debug::printGroupedSearch(
+                 $aGroupedSearches,
+                 $oValidTokens->debugTokenByWordIdList()
+             );
  
              // Start the search process
              $iGroupLoop = 0;
                  foreach ($aSearches as $oSearch) {
                      $iQueryLoop++;
  
-                     if (CONST_Debug) {
-                         echo "<hr><b>Search Loop, group $iGroupLoop, loop $iQueryLoop</b>";
-                         _debugDumpGroupedSearches(array($iGroupedRank => array($oSearch)), $aValidTokens);
-                     }
+                     Debug::newSection("Search Loop, group $iGroupLoop, loop $iQueryLoop");
+                     Debug::printGroupedSearch(
+                         array($iGroupedRank => array($oSearch)),
+                         $oValidTokens->debugTokenByWordIdList()
+                     );
  
                      $aResults += $oSearch->query(
                          $this->oDB,
diff --combined lib/lib.php
index fa71d296277ef35a71f047c94673f870f8f34d86,0b939da47777f6b4dea4577ede5c25d81116abe2..317ba54997b66c3061440b0772904f004ec9542b
@@@ -426,32 -426,6 +426,6 @@@ function javascript_renderData($xVal, $
  }
  
  
- function _debugDumpGroupedSearches($aData, $aTokens)
- {
-     $aWordsIDs = array();
-     if ($aTokens) {
-         foreach ($aTokens as $sToken => $aWords) {
-             if ($aWords) {
-                 foreach ($aWords as $aToken) {
-                     $aWordsIDs[$aToken['word_id']] =
-                         '#'.$sToken.'('.$aToken['word_id'].')#';
-                 }
-             }
-         }
-     }
-     echo '<table border="1">';
-     echo '<tr><th>rank</th><th>Name Tokens</th><th>Name Not</th>';
-     echo '<th>Address Tokens</th><th>Address Not</th><th>country</th><th>operator</th>';
-     echo '<th>class</th><th>type</th><th>postcode</th><th>housenumber</th></tr>';
-     foreach ($aData as $iRank => $aRankedSet) {
-         foreach ($aRankedSet as $aRow) {
-             $aRow->dumpAsHtmlTableRow($aWordsIDs);
-         }
-     }
-     echo '</table>';
- }
  function getAddressDetails(&$oDB, $sLanguagePrefArraySQL, $iPlaceID, $sCountryCode = false, $housenumber = -1, $bRaw = false)
  {
      $sSQL = "select *,get_name_by_language(name,$sLanguagePrefArraySQL) as localname from get_addressdata($iPlaceID, $housenumber)";
@@@ -585,10 -559,10 +559,10 @@@ function geometryText2Points($geometry_
          //
          preg_match_all('/(-?[0-9.]+) (-?[0-9.]+)/', $aMatch[1], $aPolyPoints, PREG_SET_ORDER);
          //
 -    } elseif (preg_match('#MULTIPOLYGON\\(\\(\\(([- 0-9.,]+)#', $geometry_as_text, $aMatch)) {
 +/*    } elseif (preg_match('#MULTIPOLYGON\\(\\(\\(([- 0-9.,]+)#', $geometry_as_text, $aMatch)) {
          //
          preg_match_all('/(-?[0-9.]+) (-?[0-9.]+)/', $aMatch[1], $aPolyPoints, PREG_SET_ORDER);
 -        //
 +        */
      } elseif (preg_match('#POINT\\((-?[0-9.]+) (-?[0-9.]+)\\)#', $geometry_as_text, $aMatch)) {
          //
          $aPolyPoints = createPointsAroundCenter($aMatch[1], $aMatch[2], $fRadius);