Merge remote-tracking branch 'upstream/master'

author Sarah Hoffmann <lonvia@denofr.de>

Mon, 21 May 2018 10:01:56 +0000 (12:01 +0200)

committer Sarah Hoffmann <lonvia@denofr.de>

Mon, 21 May 2018 10:01:56 +0000 (12:01 +0200)
author Sarah Hoffmann <lonvia@denofr.de>
Mon, 21 May 2018 10:01:56 +0000 (12:01 +0200)
committer Sarah Hoffmann <lonvia@denofr.de>
Mon, 21 May 2018 10:01:56 +0000 (12:01 +0200)
diff --git a/docs/api/Reverse.md b/docs/api/Reverse.md

index 999827a1aa3655e9c4f1684880a203872063732c..0c3c3d6500ca33a1597bd30c28b291daf9594439 100644 (file)
--- a/docs/api/Reverse.md
+++ b/docs/api/Reverse.md
@@ -57,6 +57,12 @@ https://nominatim.openstreetmap.org/reverse?<query>
  * `polygon_text=1`
      * Output geometry of results as a WKT.
  
+* `polygon_threshold=0.0`
+    * defaults to 0.0
+    * Simplify the output geometry before returning. The parameter is the
+      tolerance in degrees with which the geometry may differ from the original
+      geometry. Topology is preserved in the result.
+
  * `extratags=1`
      * Include additional information in the result if available, e.g. wikipedia link, opening hours.
  
diff --git a/docs/api/Search.md b/docs/api/Search.md

index e166abaf60a7f1f74e20c786d4ba32f10966ca39..17e745877e74e3bbe707ad3cae54d5672fae6abf 100644 (file)
--- a/docs/api/Search.md
+++ b/docs/api/Search.md
@@ -106,6 +106,12 @@ Structured requests are faster and require fewer server resources. **Do not comb
  * `polygon_text=1`
      * Output geometry of results as a WKT.
  
+* `polygon_threshold=0.0`
+    * defaults to 0.0
+    * Simplify the output geometry before returning. The parameter is the
+      tolerance in degrees with which the geometry may differ from the original
+      geometry. Topology is preserved in the result.
+
  * `extratags=1`
      * Include additional information in the result if available, e.g. wikipedia link, opening hours.
  
@@ -213,4 +219,4 @@ Structured requests are faster and require fewer server resources. **Do not comb
          "place_id": "1453068",
          "type": "bakery"
      }
-```
-\ No newline at end of file
+```
diff --git a/lib/DebugHtml.php b/lib/DebugHtml.php

index 0f5af241022c9fd8b5252e99635719b6cab85ce7..ff1724d2a61a76ec6f674fedeb456f1c6feba19d 100644 (file)
--- a/lib/DebugHtml.php
+++ b/lib/DebugHtml.php
@@ -71,6 +71,21 @@ class Debug
          echo "</table>\n";
      }
  
+    public static function printGroupedSearch($aSearches, $aWordsIDs)
+    {
+        echo '<table border="1">';
+        echo '<tr><th>rank</th><th>Name Tokens</th><th>Name Not</th>';
+        echo '<th>Address Tokens</th><th>Address Not</th>';
+        echo '<th>country</th><th>operator</th>';
+        echo '<th>class</th><th>type</th><th>postcode</th><th>housenumber</th></tr>';
+        foreach ($aSearches as $iRank => $aRankedSet) {
+            foreach ($aRankedSet as $aRow) {
+                $aRow->dumpAsHtmlTableRow($aWordsIDs);
+            }
+        }
+        echo '</table>';
+    }
+
      public static function printGroupTable($sHeading, $aVar)
      {
          echo '<b>'.$sHeading.":</b>\n";
diff --git a/lib/Geocode.php b/lib/Geocode.php

index e9b304d251abbe4c6c7cf6e09c5c130297a27953..18526f4382531fd2a9fa3a3b246970934690f821 100644 (file)
--- a/lib/Geocode.php
+++ b/lib/Geocode.php
@@ -7,6 +7,7 @@ require_once(CONST_BasePath.'/lib/Phrase.php');
  require_once(CONST_BasePath.'/lib/ReverseGeocode.php');
  require_once(CONST_BasePath.'/lib/SearchDescription.php');
  require_once(CONST_BasePath.'/lib/SearchContext.php');
+require_once(CONST_BasePath.'/lib/TokenList.php');
  
  class Geocode
  {
@@ -332,10 +333,10 @@ class Geocode
          return false;
      }
  
-    public function getGroupedSearches($aSearches, $aPhrases, $aValidTokens, $bIsStructured)
+    public function getGroupedSearches($aSearches, $aPhrases, $oValidTokens, $bIsStructured)
      {
          /*
-             Calculate all searches using aValidTokens i.e.
+             Calculate all searches using oValidTokens i.e.
               'Wodsworth Road, Sheffield' =>
  
               Phrase Wordset
@@ -365,38 +366,37 @@ class Geocode
                          //var_dump($oCurrentSearch);
                          //echo "</i>";
  
-                        // If the token is valid
-                        if (isset($aValidTokens[' '.$sToken])) {
-                            foreach ($aValidTokens[' '.$sToken] as $aSearchTerm) {
-                                $aNewSearches = $oCurrentSearch->extendWithFullTerm(
-                                    $aSearchTerm,
-                                    isset($aValidTokens[$sToken])
-                                      && strpos($sToken, ' ') === false,
-                                    $sPhraseType,
-                                    $iToken == 0 && $iPhrase == 0,
-                                    $iPhrase == 0,
-                                    $iToken + 1 == count($aWordset)
-                                      && $iPhrase + 1 == count($aPhrases)
-                                );
-
-                                foreach ($aNewSearches as $oSearch) {
-                                    if ($oSearch->getRank() < $this->iMaxRank) {
-                                        $aNewWordsetSearches[] = $oSearch;
-                                    }
+                        // Tokens with full name matches.
+                        foreach ($oValidTokens->get(' '.$sToken) as $oSearchTerm) {
+                            $aNewSearches = $oCurrentSearch->extendWithFullTerm(
+                                $oSearchTerm,
+                                $oValidTokens->contains($sToken)
+                                  && strpos($sToken, ' ') === false,
+                                $sPhraseType,
+                                $iToken == 0 && $iPhrase == 0,
+                                $iPhrase == 0,
+                                $iToken + 1 == count($aWordset)
+                                  && $iPhrase + 1 == count($aPhrases)
+                            );
+
+                            foreach ($aNewSearches as $oSearch) {
+                                if ($oSearch->getRank() < $this->iMaxRank) {
+                                    $aNewWordsetSearches[] = $oSearch;
                                  }
                              }
                          }
                          // Look for partial matches.
                          // Note that there is no point in adding country terms here
                          // because country is omitted in the address.
-                        if (isset($aValidTokens[$sToken]) && $sPhraseType != 'country') {
+                        if ($sPhraseType != 'country') {
                              // Allow searching for a word - but at extra cost
-                            foreach ($aValidTokens[$sToken] as $aSearchTerm) {
+                            foreach ($oValidTokens->get($sToken) as $oSearchTerm) {
                                  $aNewSearches = $oCurrentSearch->extendWithPartialTerm(
-                                    $aSearchTerm,
+                                    $sToken,
+                                    $oSearchTerm,
                                      $bIsStructured,
                                      $iPhrase,
-                                    isset($aValidTokens[' '.$sToken]) ? $aValidTokens[' '.$sToken] : array()
+                                    $oValidTokens->get(' '.$sToken)
                                  );
  
                                  foreach ($aNewSearches as $oSearch) {
@@ -645,73 +645,51 @@ class Geocode
              Debug::printDebugTable('Phrases', $aPhrases);
              Debug::printVar('Tokens', $aTokens);
  
+            $oValidTokens = new TokenList();
+
              if (!empty($aTokens)) {
-                // Check which tokens we have, get the ID numbers
                  $sSQL = 'SELECT word_id, word_token, word, class, type, country_code, operator, search_name_count';
                  $sSQL .= ' FROM word ';
                  $sSQL .= ' WHERE word_token in ('.join(',', array_map('getDBQuoted', $aTokens)).')';
  
                  Debug::printSQL($sSQL);
  
-                $aValidTokens = array();
-                $aDatabaseWords = chksql(
-                    $this->oDB->getAll($sSQL),
-                    'Could not get word tokens.'
+                $oValidTokens->addTokensFromDB(
+                    $this->oDB,
+                    $aTokens,
+                    $this->aCountryCodes,
+                    $sNormQuery,
+                    $this->oNormalizer
                  );
-                foreach ($aDatabaseWords as $aToken) {
-                    // Filter country tokens that do not match restricted countries.
-                    if ($this->aCountryCodes
-                        && $aToken['country_code']
-                        && !in_array($aToken['country_code'], $this->aCountryCodes)
-                    ) {
-                        continue;
-                    }
-
-                    // Special terms need to appear in their normalized form.
-                    if ($aToken['word'] && $aToken['class']) {
-                        $sNormWord = $this->normTerm($aToken['word']);
-                        if (strpos($sNormQuery, $sNormWord) === false) {
-                            continue;
-                        }
-                    }
  
-                    if (isset($aValidTokens[$aToken['word_token']])) {
-                        $aValidTokens[$aToken['word_token']][] = $aToken;
-                    } else {
-                        $aValidTokens[$aToken['word_token']] = array($aToken);
-                    }
-                }
-
-                // US ZIP+4 codes - if there is no token, merge in the 5-digit ZIP code
+                // Try more interpretations for Tokens that could not be matched.
                  foreach ($aTokens as $sToken) {
-                    if (!isset($aValidTokens[$sToken]) && preg_match('/^([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
-                        if (isset($aValidTokens[$aData[1]])) {
-                            foreach ($aValidTokens[$aData[1]] as $aToken) {
-                                if (!$aToken['class']) {
-                                    if (isset($aValidTokens[$sToken])) {
-                                        $aValidTokens[$sToken][] = $aToken;
-                                    } else {
-                                        $aValidTokens[$sToken] = array($aToken);
-                                    }
-                                }
-                            }
+                    if ($sToken[0] == ' ' && !$oValidTokens->contains($sToken)) {
+                        if (preg_match('/^ ([0-9]{5}) [0-9]{4}$/', $sToken, $aData)) {
+                            // US ZIP+4 codes - merge in the 5-digit ZIP code
+                            $oValidTokens->addToken(
+                                $sToken,
+                                new Token\Postcode(null, $aData[1], 'us')
+                            );
+                        } elseif (preg_match('/^ [0-9]+$/', $sToken)) {
+                            // Unknown single word token with a number.
+                            // Assume it is a house number.
+                            $oValidTokens->addToken(
+                                $sToken,
+                                new Token\HouseNumber(null, trim($sToken))
+                            );
                          }
                      }
                  }
  
-                foreach ($aTokens as $sToken) {
-                    // Unknown single word token with a number - assume it is a house number
-                    if (!isset($aValidTokens[' '.$sToken]) && strpos($sToken, ' ') === false && preg_match('/^[0-9]+$/', $sToken)) {
-                        $aValidTokens[' '.$sToken] = array(array('class' => 'place', 'type' => 'house', 'word_token' => ' '.$sToken));
-                    }
-                }
-                Debug::printGroupTable('Valid Tokens', $aValidTokens);
-
                  // Any words that have failed completely?
                  // TODO: suggestions
+
+                Debug::printGroupTable('Valid Tokens', $oValidTokens->debugInfo());
+
                  Debug::newSection('Search candidates');
  
-                $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $aValidTokens, $bStructuredPhrases);
+                $aGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens, $bStructuredPhrases);
  
                  if ($this->bReverseInPlan) {
                      // Reverse phrase array and also reverse the order of the wordsets in
@@ -722,7 +700,7 @@ class Geocode
                      if (count($aPhrases) > 1) {
                          $aPhrases[count($aPhrases)-1]->invertWordSets();
                      }
-                    $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $aValidTokens, false);
+                    $aReverseGroupedSearches = $this->getGroupedSearches($aSearches, $aPhrases, $oValidTokens, false);
  
                      foreach ($aGroupedSearches as $aSearches) {
                          foreach ($aSearches as $aSearch) {
@@ -762,7 +740,10 @@ class Geocode
                  }
              }
  
-            if (CONST_Debug) _debugDumpGroupedSearches($aGroupedSearches, $aValidTokens);
+            Debug::printGroupedSearch(
+                $aGroupedSearches,
+                $oValidTokens->debugTokenByWordIdList()
+            );
  
              // Start the search process
              $iGroupLoop = 0;
@@ -772,10 +753,11 @@ class Geocode
                  foreach ($aSearches as $oSearch) {
                      $iQueryLoop++;
  
-                    if (CONST_Debug) {
-                        echo "<hr><b>Search Loop, group $iGroupLoop, loop $iQueryLoop</b>";
-                        _debugDumpGroupedSearches(array($iGroupedRank => array($oSearch)), $aValidTokens);
-                    }
+                    Debug::newSection("Search Loop, group $iGroupLoop, loop $iQueryLoop");
+                    Debug::printGroupedSearch(
+                        array($iGroupedRank => array($oSearch)),
+                        $oValidTokens->debugTokenByWordIdList()
+                    );
  
                      $aResults += $oSearch->query(
                          $this->oDB,
diff --git a/lib/SearchDescription.php b/lib/SearchDescription.php

index 5f01e01bcf9bb78f3a2dee104aab8d8158a345d3..079cb8a6e65d1c72d8dd8693e40d740569206d45 100644 (file)
--- a/lib/SearchDescription.php
+++ b/lib/SearchDescription.php
@@ -166,30 +166,29 @@ class SearchDescription
      /**
       * Derive new searches by adding a full term to the existing search.
       *
-     * @param mixed[] $aSearchTerm  Description of the token.
-     * @param bool    $bHasPartial  True if there are also tokens of partial terms
-     *                              with the same name.
-     * @param string  $sPhraseType  Type of phrase the token is contained in.
-     * @param bool    $bFirstToken  True if the token is at the beginning of the
-     *                              query.
-     * @param bool    $bFirstPhrase True if the token is in the first phrase of
-     *                              the query.
-     * @param bool    $bLastToken   True if the token is at the end of the query.
+     * @param object $oSearchTerm  Description of the token.
+     * @param bool   $bHasPartial  True if there are also tokens of partial terms
+     *                             with the same name.
+     * @param string $sPhraseType  Type of phrase the token is contained in.
+     * @param bool   $bFirstToken  True if the token is at the beginning of the
+     *                             query.
+     * @param bool   $bFirstPhrase True if the token is in the first phrase of
+     *                             the query.
+     * @param bool   $bLastToken   True if the token is at the end of the query.
       *
       * @return SearchDescription[] List of derived search descriptions.
       */
-    public function extendWithFullTerm($aSearchTerm, $bHasPartial, $sPhraseType, $bFirstToken, $bFirstPhrase, $bLastToken)
+    public function extendWithFullTerm($oSearchTerm, $bHasPartial, $sPhraseType, $bFirstToken, $bFirstPhrase, $bLastToken)
      {
          $aNewSearches = array();
  
          if (($sPhraseType == '' || $sPhraseType == 'country')
-            && !empty($aSearchTerm['country_code'])
-            && $aSearchTerm['country_code'] != '0'
+            && is_a($oSearchTerm, '\Nominatim\Token\Country')
          ) {
              if (!$this->sCountryCode) {
                  $oSearch = clone $this;
                  $oSearch->iSearchRank++;
-                $oSearch->sCountryCode = $aSearchTerm['country_code'];
+                $oSearch->sCountryCode = $oSearchTerm->sCountryCode;
                  // Country is almost always at the end of the string
                  // - increase score for finding it anywhere else (optimisation)
                  if (!$bLastToken) {
@@ -198,15 +197,12 @@ class SearchDescription
                  $aNewSearches[] = $oSearch;
              }
          } elseif (($sPhraseType == '' || $sPhraseType == 'postalcode')
-                  && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'postcode'
+                  && is_a($oSearchTerm, '\Nominatim\Token\Postcode')
          ) {
              // We need to try the case where the postal code is the primary element
              // (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode)
              // so try both.
-            if (!$this->sPostcode
-                && $aSearchTerm['word']
-                && pg_escape_string($aSearchTerm['word']) == $aSearchTerm['word']
-            ) {
+            if (!$this->sPostcode) {
                  // If we have structured search or this is the first term,
                  // make the postcode the primary search element.
                  if ($this->iOperator == Operator::NONE
@@ -217,7 +213,7 @@ class SearchDescription
                      $oSearch->iOperator = Operator::POSTCODE;
                      $oSearch->aAddress = array_merge($this->aAddress, $this->aName);
                      $oSearch->aName =
-                        array($aSearchTerm['word_id'] => $aSearchTerm['word']);
+                        array($oSearchTerm->iId => $oSearchTerm->sPostcode);
                      $aNewSearches[] = $oSearch;
                  }
  
@@ -228,23 +224,23 @@ class SearchDescription
                  ) {
                      $oSearch = clone $this;
                      $oSearch->iSearchRank++;
-                    $oSearch->sPostcode = $aSearchTerm['word'];
+                    $oSearch->sPostcode = $oSearchTerm->sPostcode;
                      $aNewSearches[] = $oSearch;
                  }
              }
          } elseif (($sPhraseType == '' || $sPhraseType == 'street')
-                 && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'house'
+                 && is_a($oSearchTerm, '\Nominatim\Token\HouseNumber')
          ) {
              if (!$this->sHouseNumber && $this->iOperator != Operator::POSTCODE) {
                  $oSearch = clone $this;
                  $oSearch->iSearchRank++;
-                $oSearch->sHouseNumber = trim($aSearchTerm['word_token']);
+                $oSearch->sHouseNumber = $oSearchTerm->sToken;
                  // sanity check: if the housenumber is not mainly made
                  // up of numbers, add a penalty
                  if (preg_match_all('/[^0-9]/', $oSearch->sHouseNumber, $aMatches) > 2) {
                      $oSearch->iSearchRank++;
                  }
-                if (!isset($aSearchTerm['word_id'])) {
+                if (empty($oSearchTerm->iId)) {
                      $oSearch->iSearchRank++;
                  }
                  // also must not appear in the middle of the address
@@ -256,27 +252,34 @@ class SearchDescription
                  }
                  $aNewSearches[] = $oSearch;
              }
-        } elseif ($sPhraseType == '' && $aSearchTerm['class']) {
+        } elseif ($sPhraseType == ''
+                  && is_a($oSearchTerm, '\Nominatim\Token\SpecialTerm')
+        ) {
              if ($this->iOperator == Operator::NONE) {
                  $oSearch = clone $this;
                  $oSearch->iSearchRank++;
  
-                $iOp = Operator::NEAR; // near == in for the moment
-                if ($aSearchTerm['operator'] == '') {
+                $iOp = $oSearchTerm->iOperator;
+                if ($iOp == Operator::NONE) {
                      if (!empty($this->aName) || $this->oContext->isBoundedSearch()) {
                          $iOp = Operator::NAME;
+                    } else {
+                        $iOp = Operator::NEAR;
                      }
                      $oSearch->iSearchRank += 2;
                  }
  
-                $oSearch->setPoiSearch($iOp, $aSearchTerm['class'], $aSearchTerm['type']);
+                $oSearch->setPoiSearch(
+                    $iOp,
+                    $oSearchTerm->sClass,
+                    $oSearchTerm->sType
+                );
                  $aNewSearches[] = $oSearch;
              }
-        } elseif (isset($aSearchTerm['word_id'])
-                  && $aSearchTerm['word_id']
-                  && $sPhraseType != 'country'
+        } elseif ($sPhraseType != 'country'
+                  && is_a($oSearchTerm, '\Nominatim\Token\Word')
          ) {
-            $iWordID = $aSearchTerm['word_id'];
+            $iWordID = $oSearchTerm->iId;
              // Full words can only be a name if they appear at the beginning
              // of the phrase. In structured search the name must forcably in
              // the first phrase. In unstructured search it may be in a later
@@ -296,7 +299,7 @@ class SearchDescription
                  $oSearch->aName = array($iWordID => $iWordID);
                  if (CONST_Search_NameOnlySearchFrequencyThreshold) {
                      $oSearch->bRareName =
-                        $aSearchTerm['search_name_count'] + 1
+                        $oSearchTerm->iSearchNameCount
                            < CONST_Search_NameOnlySearchFrequencyThreshold;
                  }
                  $aNewSearches[] = $oSearch;
@@ -309,7 +312,8 @@ class SearchDescription
      /**
       * Derive new searches by adding a partial term to the existing search.
       *
-     * @param mixed[] $aSearchTerm        Description of the token.
+     * @param string  $sToken             Term for the token.
+     * @param object  $oSearchTerm        Description of the token.
       * @param bool    $bStructuredPhrases True if the search is structured.
       * @param integer $iPhrase            Number of the phrase the token is in.
       * @param array[] $aFullTokens        List of full term tokens with the
@@ -317,21 +321,21 @@ class SearchDescription
       *
       * @return SearchDescription[] List of derived search descriptions.
       */
-    public function extendWithPartialTerm($aSearchTerm, $bStructuredPhrases, $iPhrase, $aFullTokens)
+    public function extendWithPartialTerm($sToken, $oSearchTerm, $bStructuredPhrases, $iPhrase, $aFullTokens)
      {
          // Only allow name terms.
-        if (!(isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'])) {
+        if (!(is_a($oSearchTerm, '\Nominatim\Token\Word'))) {
              return array();
          }
  
          $aNewSearches = array();
-        $iWordID = $aSearchTerm['word_id'];
+        $iWordID = $oSearchTerm->iId;
  
          if ((!$bStructuredPhrases || $iPhrase > 0)
              && (!empty($this->aName))
-            && strpos($aSearchTerm['word_token'], ' ') === false
+            && strpos($sToken, ' ') === false
          ) {
-            if ($aSearchTerm['search_name_count'] + 1 < CONST_Max_Word_Frequency) {
+            if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) {
                  $oSearch = clone $this;
                  $oSearch->iSearchRank += 2;
                  $oSearch->aAddress[$iWordID] = $iWordID;
@@ -340,7 +344,7 @@ class SearchDescription
                  $oSearch = clone $this;
                  $oSearch->iSearchRank++;
                  $oSearch->aAddressNonSearch[$iWordID] = $iWordID;
-                if (preg_match('#^[0-9]+$#', $aSearchTerm['word_token'])) {
+                if (preg_match('#^[0-9]+$#', $sToken)) {
                      $oSearch->iSearchRank += 2;
                  }
                  if (!empty($aFullTokens)) {
@@ -349,14 +353,12 @@ class SearchDescription
                  $aNewSearches[] = $oSearch;
  
                  // revert to the token version?
-                foreach ($aFullTokens as $aSearchTermToken) {
-                    if (empty($aSearchTermToken['country_code'])
-                        && empty($aSearchTermToken['lat'])
-                        && empty($aSearchTermToken['class'])
-                    ) {
+                foreach ($aFullTokens as $oSearchTermToken) {
+                    if (is_a($oSearchTermToken, '\Nominatim\Token\Word')) {
                          $oSearch = clone $this;
                          $oSearch->iSearchRank++;
-                        $oSearch->aAddress[$aSearchTermToken['word_id']] = $aSearchTermToken['word_id'];
+                        $oSearch->aAddress[$oSearchTermToken->iId]
+                            = $oSearchTermToken->iId;
                          $aNewSearches[] = $oSearch;
                      }
                  }
@@ -371,13 +373,15 @@ class SearchDescription
              if (empty($this->aName)) {
                  $oSearch->iSearchRank += 1;
              }
-            if (preg_match('#^[0-9]+$#', $aSearchTerm['word_token'])) {
+            if (preg_match('#^[0-9]+$#', $sToken)) {
                  $oSearch->iSearchRank += 2;
              }
-            if ($aSearchTerm['search_name_count'] + 1 < CONST_Max_Word_Frequency) {
-                if (empty($this->aName) && CONST_Search_NameOnlySearchFrequencyThreshold) {
+            if ($oSearchTerm->iSearchNameCount < CONST_Max_Word_Frequency) {
+                if (empty($this->aName)
+                    && CONST_Search_NameOnlySearchFrequencyThreshold
+                ) {
                      $oSearch->bRareName =
-                        $aSearchTerm['search_name_count'] + 1
+                        $oSearchTerm->iSearchNameCount
                            < CONST_Search_NameOnlySearchFrequencyThreshold;
                  } else {
                      $oSearch->bRareName = false;
diff --git a/lib/TokenCountry.php b/lib/TokenCountry.php

new file mode 100644 (file)

index 0000000..518c0a3
--- /dev/null
+++ b/lib/TokenCountry.php
@@ -0,0 +1,29 @@
+<?php
+
+namespace Nominatim\Token;
+
+/**
+ * A country token.
+ */
+class Country
+{
+    /// Database word id, if available.
+    public $iId;
+    /// Two-letter country code (lower-cased).
+    public $sCountryCode;
+
+    public function __construct($iId, $sCountryCode)
+    {
+        $this->iId = $iId;
+        $this->sCountryCode = $sCountryCode;
+    }
+
+    public function debugInfo()
+    {
+        return array(
+                'ID' => $this->iId,
+                'Type' => 'country',
+                'Info' => $this->sCountryCode
+               );
+    }
+}
diff --git a/lib/TokenHousenumber.php b/lib/TokenHousenumber.php

new file mode 100644 (file)

index 0000000..5c7c6e9
--- /dev/null
+++ b/lib/TokenHousenumber.php
@@ -0,0 +1,29 @@
+<?php
+
+namespace Nominatim\Token;
+
+/**
+ * A house number token.
+ */
+class HouseNumber
+{
+    /// Database word id, if available.
+    public $iId;
+    /// Normalized house number.
+    public $sToken;
+
+    public function __construct($iId, $sToken)
+    {
+        $this->iId = $iId;
+        $this->sToken = $sToken;
+    }
+
+    public function debugInfo()
+    {
+        return array(
+                'ID' => $this->iId,
+                'Type' => 'house number',
+                'Info' => array('nr' => $this->sToken)
+               );
+    }
+}
diff --git a/lib/TokenList.php b/lib/TokenList.php

new file mode 100644 (file)

index 0000000..96b756f
--- /dev/null
+++ b/lib/TokenList.php
@@ -0,0 +1,177 @@
+<?php
+
+namespace Nominatim;
+
+require_once(CONST_BasePath.'/lib/TokenCountry.php');
+require_once(CONST_BasePath.'/lib/TokenHousenumber.php');
+require_once(CONST_BasePath.'/lib/TokenPostcode.php');
+require_once(CONST_BasePath.'/lib/TokenSpecialTerm.php');
+require_once(CONST_BasePath.'/lib/TokenWord.php');
+require_once(CONST_BasePath.'/lib/SpecialSearchOperator.php');
+
+/**
+ * Saves information about the tokens that appear in a search query.
+ *
+ * Tokens are sorted by their normalized form, the token word. There are different
+ * kinds of tokens, represented by different Token* classes. Note that
+ * tokens do not have a common base class. All tokens need to have a field
+ * with the word id that points to an entry in the `word` database table
+ * but otherwise the information saved about a token can be very different.
+ *
+ * There are two different kinds of token words: full words and partial terms.
+ *
+ * Full words start with a space. They represent a complete name of a place.
+ * All special tokens are normally full words.
+ *
+ * Partial terms have no space at the beginning. They may represent a part of
+ * a name of a place (e.g. in the name 'World Trade Center' a partial term
+ * would be 'Trade' or 'Trade Center'). They are only used in TokenWord.
+ */
+class TokenList
+{
+    // List of list of tokens indexed by their word_token.
+    private $aTokens = array();
+
+    /**
+     * Check if there are tokens for the given token word.
+     *
+     * @param string $sWord Token word to look for.
+     *
+     * @return bool True if there is one or more token for the token word.
+     */
+    public function contains($sWord)
+    {
+        return isset($this->aTokens[$sWord]);
+    }
+
+    /**
+     * Get the list of tokens for the given token word.
+     *
+     * @param string $sWord Token word to look for.
+     *
+     * @return object[] Array of tokens for the given token word or an
+     *                  empty array if no tokens could be found.
+     */
+    public function get($sWord)
+    {
+        return isset($this->aTokens[$sWord]) ? $this->aTokens[$sWord] : array();
+    }
+
+    /**
+     * Add token information from the word table in the database.
+     *
+     * @param object   $oDB           Database connection.
+     * @param string[] $aTokens       List of tokens to look up in the database.
+     * @param string[] $aCountryCodes List of country restrictions.
+     * @param string   $sNormQuery    Normalized query string.
+     * @param object   $oNormalizer   Normalizer function to use on tokens.
+     *
+     * @return void
+     */
+    public function addTokensFromDB(&$oDB, &$aTokens, &$aCountryCodes, $sNormQuery, $oNormalizer)
+    {
+        // Check which tokens we have, get the ID numbers
+        $sSQL = 'SELECT word_id, word_token, word, class, type, country_code,';
+        $sSQL .= ' operator, coalesce(search_name_count, 0) as count';
+        $sSQL .= ' FROM word WHERE word_token in (';
+        $sSQL .= join(',', array_map('getDBQuoted', $aTokens)).')';
+
+        Debug::printSQL($sSQL);
+
+        $aDBWords = chksql($oDB->getAll($sSQL), 'Could not get word tokens.');
+
+        foreach ($aDBWords as $aWord) {
+            $oToken = null;
+            $iId = (int) $aWord['word_id'];
+
+            if ($aWord['class']) {
+                // Special terms need to appear in their normalized form.
+                if ($aWord['word']) {
+                    $sNormWord = $aWord['word'];
+                    if ($oNormalizer != null) {
+                        $sNormWord = $oNormalizer->transliterate($aWord['word']);
+                    }
+                    if (strpos($sNormQuery, $sNormWord) === false) {
+                        continue;
+                    }
+                }
+
+                if ($aWord['class'] == 'place' && $aWord['type'] == 'house') {
+                    $oToken = new Token\HouseNumber($iId, trim($aWord['word_token']));
+                } elseif ($aWord['class'] == 'place' && $aWord['type'] == 'postcode') {
+                    if ($aWord['word']
+                        && pg_escape_string($aWord['word']) == $aWord['word']
+                    ) {
+                        $oToken = new Token\Postcode(
+                            $iId,
+                            $aWord['word'],
+                            $aWord['country_code']
+                        );
+                    }
+                } else {
+                    // near and in operator the same at the moment
+                    $oToken = new Token\SpecialTerm(
+                        $iId,
+                        $aWord['class'],
+                        $aWord['type'],
+                        $aWord['operator'] ? Operator::NONE : Operator::NEAR
+                    );
+                }
+            } elseif ($aWord['country_code']) {
+                // Filter country tokens that do not match restricted countries.
+                if (!$aCountryCodes
+                    || in_array($aWord['country_code'], $aCountryCodes)
+                ) {
+                    $oToken = new Token\Country($iId, $aWord['country_code']);
+                }
+            } else {
+                $oToken = new Token\Word(
+                    $iId,
+                    $aWord['word'][0] != ' ',
+                    (int) $aWord['count']
+                );
+            }
+
+            if ($oToken) {
+                $this->addToken($aWord['word_token'], $oToken);
+            }
+        }
+    }
+
+    /**
+     * Add a new token for the given word.
+     *
+     * @param string $sWord  Word the token describes.
+     * @param object $oToken Token object to add.
+     *
+     * @return void
+     */
+    public function addToken($sWord, $oToken)
+    {
+        if (isset($this->aTokens[$sWord])) {
+            $this->aTokens[$sWord][] = $oToken;
+        } else {
+            $this->aTokens[$sWord] = array($oToken);
+        }
+    }
+
+    public function debugTokenByWordIdList()
+    {
+        $aWordsIDs = array();
+        foreach ($this->aTokens as $sToken => $aWords) {
+            foreach ($aWords as $aToken) {
+                if ($aToken->iId !== null) {
+                    $aWordsIDs[$aToken->iId] =
+                        '#'.$sToken.'('.$aToken->iId.')#';
+                }
+            }
+        }
+
+        return $aWordsIDs;
+    }
+
+    public function debugInfo()
+    {
+        return $this->aTokens;
+    }
+}
diff --git a/lib/TokenPostcode.php b/lib/TokenPostcode.php

new file mode 100644 (file)

index 0000000..8fa2ae8
--- /dev/null
+++ b/lib/TokenPostcode.php
@@ -0,0 +1,32 @@
+<?php
+
+namespace Nominatim\Token;
+
+/**
+ * A postcode token.
+ */
+class Postcode
+{
+    /// Database word id, if available.
+    public $iId;
+    /// Full nomralized postcode (upper cased).
+    public $sPostcode;
+    // Optional country code the postcode belongs to (currently unused).
+    public $sCountryCode;
+
+    public function __construct($iId, $sPostcode, $sCountryCode = '')
+    {
+        $this->iId = $iId;
+        $this->sPostcode = $sPostcode;
+        $this->sCountryCode = empty($sCountryCode) ? '' : $sCountryCode;
+    }
+
+    public function debugInfo()
+    {
+        return array(
+                'ID' => $this->iId,
+                'Type' => 'postcode',
+                'Info' => $this->sPostcode.'('.$this->sCountryCode.')'
+               );
+    }
+}
diff --git a/lib/TokenSpecialTerm.php b/lib/TokenSpecialTerm.php

new file mode 100644 (file)

index 0000000..46966a8
--- /dev/null
+++ b/lib/TokenSpecialTerm.php
@@ -0,0 +1,41 @@
+<?php
+
+namespace Nominatim\Token;
+
+require_once(CONST_BasePath.'/lib/SpecialSearchOperator.php');
+
+/**
+ * A word token describing a place type.
+ */
+class SpecialTerm
+{
+    /// Database word id, if applicable.
+    public $iId;
+    /// Class (or OSM tag key) of the place to look for.
+    public $sClass;
+    /// Type (or OSM tag value) of the place to look for.
+    public $sType;
+    /// Relationship of the operator to the object (see Operator class).
+    public $iOperator;
+
+    public function __construct($iID, $sClass, $sType, $iOperator)
+    {
+        $this->iId = $iID;
+        $this->sClass = $sClass;
+        $this->sType = $sType;
+        $this->iOperator = $iOperator;
+    }
+
+    public function debugInfo()
+    {
+        return array(
+                'ID' => $this->iId,
+                'Type' => 'special term',
+                'Info' => array(
+                           'class' => $this->sClass,
+                           'type' => $this->sType,
+                           'operator' => Operator::toString($this->iOperator)
+                          )
+               );
+    }
+}
diff --git a/lib/TokenWord.php b/lib/TokenWord.php

new file mode 100644 (file)

index 0000000..54622cb
--- /dev/null
+++ b/lib/TokenWord.php
@@ -0,0 +1,35 @@
+<?php
+
+namespace Nominatim\Token;
+
+/**
+ * A standard word token.
+ */
+class Word
+{
+    /// Database word id, if applicable.
+    public $iId;
+    /// If true, the word may represent only part of a place name.
+    public $bPartial;
+    /// Number of appearances in the database.
+    public $iSearchNameCount;
+
+    public function __construct($iId, $bPartial, $iSearchNameCount)
+    {
+        $this->iId = $iId;
+        $this->bPartial = $bPartial;
+        $this->iSearchNameCount = $iSearchNameCount;
+    }
+
+    public function debugInfo()
+    {
+        return array(
+                'ID' => $this->iId,
+                'Type' => 'word',
+                'Info' => array(
+                           'partial' => $this->bPartial,
+                           'count' => $this->iSearchNameCount
+                          )
+               );
+    }
+}
diff --git a/lib/lib.php b/lib/lib.php

index fa71d296277ef35a71f047c94673f870f8f34d86..317ba54997b66c3061440b0772904f004ec9542b 100644 (file)
--- a/lib/lib.php
+++ b/lib/lib.php
@@ -426,32 +426,6 @@ function javascript_renderData($xVal, $iOptions = 0)
  }
  
  
-function _debugDumpGroupedSearches($aData, $aTokens)
-{
-    $aWordsIDs = array();
-    if ($aTokens) {
-        foreach ($aTokens as $sToken => $aWords) {
-            if ($aWords) {
-                foreach ($aWords as $aToken) {
-                    $aWordsIDs[$aToken['word_id']] =
-                        '#'.$sToken.'('.$aToken['word_id'].')#';
-                }
-            }
-        }
-    }
-    echo '<table border="1">';
-    echo '<tr><th>rank</th><th>Name Tokens</th><th>Name Not</th>';
-    echo '<th>Address Tokens</th><th>Address Not</th><th>country</th><th>operator</th>';
-    echo '<th>class</th><th>type</th><th>postcode</th><th>housenumber</th></tr>';
-    foreach ($aData as $iRank => $aRankedSet) {
-        foreach ($aRankedSet as $aRow) {
-            $aRow->dumpAsHtmlTableRow($aWordsIDs);
-        }
-    }
-    echo '</table>';
-}
-
-
  function getAddressDetails(&$oDB, $sLanguagePrefArraySQL, $iPlaceID, $sCountryCode = false, $housenumber = -1, $bRaw = false)
  {
      $sSQL = "select *,get_name_by_language(name,$sLanguagePrefArraySQL) as localname from get_addressdata($iPlaceID, $housenumber)";
diff --git a/lib/template/details-json.php b/lib/template/details-json.php

index 288c914c10294c9bc0c2d7817f97ba8c879e7b08..9b4237c70720331a82b20160f2570c1b7bc3c0df 100644 (file)
--- a/lib/template/details-json.php
+++ b/lib/template/details-json.php
@@ -33,33 +33,33 @@ $aPlaceDetails['rank_search'] = (int) $aPointDetails['rank_search'];
  
  $aPlaceDetails['isarea'] = ($aPointDetails['isarea'] == 't');
  $aPlaceDetails['centroid'] = array(
-    'type' => 'Point',
-    'coordinates' => array( (float) $aPointDetails['lon'], (float) $aPointDetails['lat'] )
-);
+                              'type' => 'Point',
+                              'coordinates' => array( (float) $aPointDetails['lon'], (float) $aPointDetails['lat'] )
+                             );
  
  $aPlaceDetails['geometry'] = json_decode($aPointDetails['asgeojson']);
  
  $funcMapAddressLine = function ($aFull) {
      $aMapped = array(
-        'localname' => $aFull['localname'],
-        'place_id' => isset($aFull['place_id']) ? (int) $aFull['place_id'] : null,
-        'osm_id' => isset($aFull['osm_id']) ? (int) $aFull['osm_id'] : null,
-        'osm_type' => isset($aFull['osm_type']) ? $aFull['osm_type'] : null,
-        'class' => $aFull['class'],
-        'type' => $aFull['type'],
-        'admin_level' => isset($aFull['admin_level']) ? (int) $aFull['admin_level'] : null,
-        'rank_address' => $aFull['rank_address'] ? (int) $aFull['rank_address'] : null,
-        'distance' => (float) $aFull['distance']
-    );
+                'localname' => $aFull['localname'],
+                'place_id' => isset($aFull['place_id']) ? (int) $aFull['place_id'] : null,
+                'osm_id' => isset($aFull['osm_id']) ? (int) $aFull['osm_id'] : null,
+                'osm_type' => isset($aFull['osm_type']) ? $aFull['osm_type'] : null,
+                'class' => $aFull['class'],
+                'type' => $aFull['type'],
+                'admin_level' => isset($aFull['admin_level']) ? (int) $aFull['admin_level'] : null,
+                'rank_address' => $aFull['rank_address'] ? (int) $aFull['rank_address'] : null,
+                'distance' => (float) $aFull['distance']
+               );
  
      return $aMapped;
  };
  
  $funcMapKeyword = function ($aFull) {
      $aMapped = array(
-        'id' => (int) $aFull['word_id'],
-        'token' => $aFull['word_token']
-    );
+                'id' => (int) $aFull['word_id'],
+                'token' => $aFull['word_token']
+               );
      return $aMapped;
  };
  
diff --git a/phpcs.xml b/phpcs.xml

index 09360731510b48b7ecdf8ebe8827897029b5a3db..ab9d3969284b0961f0b4f31c9d4243de05556e5d 100644 (file)
--- a/phpcs.xml
+++ b/phpcs.xml
@@ -93,6 +93,8 @@
         INDENTATION, SPACING
         ************************************************************** -->
  
+  <rule ref="Squiz.Arrays.ArrayDeclaration.KeyNotAligned" />
+
    <!-- Aligned looks nicer, but causes too many warnings currently -->
    <rule ref="Squiz.Arrays.ArrayDeclaration.DoubleArrowNotAligned">
      <severity>0</severity>
@@ -103,7 +105,6 @@
  
  
  
-
    <!-- **************************************************************
         VARIABLES
         ************************************************************** -->
diff --git a/test/bdd/steps/queries.py b/test/bdd/steps/queries.py

index 7266be55913fc3cff9962819e4cf781b725de823..caefb661487692a45c6fb8d2930496eb2ebfb911 100644 (file)
--- a/test/bdd/steps/queries.py
+++ b/test/bdd/steps/queries.py
@@ -321,12 +321,15 @@ def send_api_query(endpoint, params, fmt, context):
  
      (outp, err) = proc.communicate()
      outp = outp.decode('utf-8')
+    err = err.decode("utf-8")
  
      logger.debug("Result: \n===============================\n"
                   + outp + "\n===============================\n")
  
      assert_equals(0, proc.returncode,
-                  "%s failed with message: %s\noutput: %s" % (env['SCRIPT_FILENAME'], err, outp))
+                  "%s failed with message: %s" % (
+                      os.path.basename(env['SCRIPT_FILENAME']),
+                      err))
  
      assert_equals(0, len(err), "Unexpected PHP error: %s" % (err))
  
diff --git a/test/php/Nominatim/DebugTest.php b/test/php/Nominatim/DebugTest.php

index 7ed08122f8e32db2270c497e90c553d4deabaab2..6e0c25f3a22a150b4b4838c5ffeaba03713f09e5 100644 (file)
--- a/test/php/Nominatim/DebugTest.php
+++ b/test/php/Nominatim/DebugTest.php
@@ -179,19 +179,19 @@ EOT
  
          // header are taken from first group item, thus no key3 gets printed
          $aGroups = array(
-            'group1' => array(
-                array('key1' => 'val1', 'key2' => 'val2'),
-                array('key1' => 'one', 'key2' => 'two', 'unknown' => 1),
-            ),
-            'group2' => array(
-                array('key1' => 'val1', 'key2' => 'val2', 'key3' => 'val3'),
-            )
-        );
+                    'group1' => array(
+                                 array('key1' => 'val1', 'key2' => 'val2'),
+                                 array('key1' => 'one', 'key2' => 'two', 'unknown' => 1),
+                                ),
+                    'group2' => array(
+                                 array('key1' => 'val1', 'key2' => 'val2', 'key3' => 'val3'),
+                                )
+                   );
          Debug::printGroupTable('Table3', $aGroups);
  
          $aGroups = array(
-            'group1' => array($this->oWithDebuginfo, $this->oWithDebuginfo),
-        );
+                    'group1' => array($this->oWithDebuginfo, $this->oWithDebuginfo),
+                   );
          Debug::printGroupTable('Table4', $aGroups);
      }
  }
diff --git a/website/status.php b/website/status.php

index 262fe8e0159cd59e8148f8c26038fb4a223d048d..c0c379d0a983e6dd6d331d5a0089a75866570952 100644 (file)
--- a/website/status.php
+++ b/website/status.php
@@ -24,9 +24,9 @@ try {
  } catch (Exception $oErr) {
      if ($sOutputFormat == 'json') {
          $aResponse = array(
-                  'status' => $oErr->getCode(),
-                  'message' => $oErr->getMessage()
-                 );
+                      'status' => $oErr->getCode(),
+                      'message' => $oErr->getMessage()
+                     );
          javascript_renderData($aResponse);
      } else {
          header('HTTP/1.0 500 Internal Server Error');
@@ -39,10 +39,10 @@ try {
  if ($sOutputFormat == 'json') {
      $epoch = $oStatus->dataDate();
      $aResponse = array(
-              'status' => 0,
-              'message' => 'OK',
-              'data_updated' => (new DateTime('@'.$epoch))->format(DateTime::RFC3339)
-             );
+                  'status' => 0,
+                  'message' => 'OK',
+                  'data_updated' => (new DateTime('@'.$epoch))->format(DateTime::RFC3339)
+                 );
      javascript_renderData($aResponse);
  } else {
      echo 'OK';
author	Sarah Hoffmann <lonvia@denofr.de>
	Mon, 21 May 2018 10:01:56 +0000 (12:01 +0200)
committer	Sarah Hoffmann <lonvia@denofr.de>
	Mon, 21 May 2018 10:01:56 +0000 (12:01 +0200)
docs/api/Reverse.md		patch \| blob \| history
docs/api/Search.md		patch \| blob \| history
lib/DebugHtml.php		patch \| blob \| history
lib/Geocode.php		patch \| blob \| history
lib/SearchDescription.php		patch \| blob \| history
lib/TokenCountry.php	[new file with mode: 0644]	patch \| blob
lib/TokenHousenumber.php	[new file with mode: 0644]	patch \| blob
lib/TokenList.php	[new file with mode: 0644]	patch \| blob
lib/TokenPostcode.php	[new file with mode: 0644]	patch \| blob
lib/TokenSpecialTerm.php	[new file with mode: 0644]	patch \| blob
lib/TokenWord.php	[new file with mode: 0644]	patch \| blob
lib/lib.php		patch \| blob \| history
lib/template/details-json.php		patch \| blob \| history
phpcs.xml		patch \| blob \| history
test/bdd/steps/queries.py		patch \| blob \| history
test/php/Nominatim/DebugTest.php		patch \| blob \| history
website/status.php		patch \| blob \| history