]> git.openstreetmap.org Git - nominatim.git/blobdiff - lib/Geocode.php
move initial search setup to new class type
[nominatim.git] / lib / Geocode.php
index a7987d2400940b5fe07c8c529366eb25df4d0cd1..88a969a54b93a6e324703e01fc4e033af5abb403 100644 (file)
@@ -313,12 +313,7 @@ class Geocode
                 if (count($aViewbox) != 4) {
                     userError("Bad parmater 'viewbox'. Expected 4 coordinates.");
                 }
-                $this->setViewBox(array(
-                                   $aViewbox[0],
-                                   $aViewbox[3],
-                                   $aViewbox[2],
-                                   $aViewbox[1]
-                                  ));
+                $this->setViewBox($aViewbox);
             } else {
                 $aRoute = $oParams->getStringList('route');
                 $fRouteWidth = $oParams->getFloat('routewidth');
@@ -714,6 +709,8 @@ class Geocode
 
              Score how good the search is so they can be ordered
          */
+        $iGlobalRank = 0;
+
         foreach ($aPhrases as $iPhrase => $aPhrase) {
             $aNewPhraseSearches = array();
             if ($bStructuredPhrases) $sPhraseType = $aPhraseTypes[$iPhrase];
@@ -737,6 +734,8 @@ class Geocode
 
                         // If the token is valid
                         if (isset($aValidTokens[' '.$sToken])) {
+                            // TODO variable should go into aCurrentSearch
+                            $bHavePostcode = false;
                             foreach ($aValidTokens[' '.$sToken] as $aSearchTerm) {
                                 $aSearch = $aCurrentSearch;
                                 $aSearch['iSearchRank']++;
@@ -748,19 +747,25 @@ class Geocode
                                             $aSearch['iSearchRank'] += 5;
                                         }
                                         if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
+                                        // If it is at the beginning, we can be almost sure that this is the wrong order
+                                        // Increase score for all searches.
+                                        if ($iToken == 0 && $iPhrase == 0) {
+                                            $iGlobalRank++;
+                                        }
                                     }
-                                } elseif ($sPhraseType == 'postalcode' || ($aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'postcode')) {
+                                } elseif (($sPhraseType == '' || $sPhraseType == 'postalcode') && $aSearchTerm['class'] == 'place' && $aSearchTerm['type'] == 'postcode') {
                                     // We need to try the case where the postal code is the primary element (i.e. no way to tell if it is (postalcode, city) OR (city, postalcode) so try both
-                                    if ($aSearch['sPostcode'] === '' && $aSearch['sHouseNumber'] === '' &&
+                                    if ($aSearch['sPostcode'] === '' &&
                                         isset($aSearchTerm['word']) && $aSearchTerm['word'] && strpos($sNormQuery, $this->normTerm($aSearchTerm['word'])) !== false) {
                                         // If we have structured search or this is the first term,
                                         // make the postcode the primary search element.
-                                        if ($aSearch['sOperator'] === '' && ($sPhraseType == 'postalcode' || ($iToken == 0 && $iPhrase == 0))) {
+                                        if (!$bHavePostcode && $aSearch['sOperator'] === '' && ($sPhraseType == 'postalcode' || ($iToken == 0 && $iPhrase == 0))) {
                                             $aNewSearch = $aSearch;
                                             $aNewSearch['sOperator'] = 'postcode';
                                             $aNewSearch['aAddress'] = array_merge($aNewSearch['aAddress'], $aNewSearch['aName']);
-                                            $aNewSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word'];
+                                            $aNewSearch['aName'] = array($aSearchTerm['word_id'] => $aSearchTerm['word']);
                                             if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aNewSearch;
+                                            $bHavePostcode = true;
                                         }
 
                                         // If we have a structured search or this is not the first term,
@@ -776,8 +781,8 @@ class Geocode
                                         // sanity check: if the housenumber is not mainly made
                                         // up of numbers, add a penalty
                                         if (preg_match_all("/[^0-9]/", $sToken, $aMatches) > 2) $aSearch['iSearchRank']++;
-                                        // also housenumbers should appear in the first or second phrase
-                                        if ($iPhrase > 1) $aSearch['iSearchRank'] += 1;
+                                        // also must not appear in the middle of the address
+                                        if ($aSearch['aAddress'] || $aSearch['aAddressNonSearch']) $aSearch['iSearchRank'] += 1;
                                         if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch;
                                         /*
                                         // Fall back to not searching for this item (better than nothing)
@@ -790,7 +795,7 @@ class Geocode
                                     // require a normalized exact match of the term
                                     // if we have the normalizer version of the query
                                     // available
-                                    if ($aSearch['sClass'] === ''
+                                    if ($aSearch['sOperator'] === ''
                                         && ($sNormQuery === null || !($aSearchTerm['word'] && strpos($sNormQuery, $aSearchTerm['word']) === false))) {
                                         $aSearch['sClass'] = $aSearchTerm['class'];
                                         $aSearch['sType'] = $aSearchTerm['type'];
@@ -854,7 +859,8 @@ class Geocode
                                         }
                                     }
 
-                                    if (!sizeof($aCurrentSearch['aName']) || $aCurrentSearch['iNamePhrase'] == $iPhrase) {
+                                    if ((!$aCurrentSearch['sPostcode'] && !$aCurrentSearch['aAddress'] && !$aCurrentSearch['aAddressNonSearch'])
+                                        && (!sizeof($aCurrentSearch['aName']) || $aCurrentSearch['iNamePhrase'] == $iPhrase)) {
                                         $aSearch = $aCurrentSearch;
                                         $aSearch['iSearchRank'] += 1;
                                         if (!sizeof($aCurrentSearch['aName'])) $aSearch['iSearchRank'] += 1;
@@ -915,6 +921,25 @@ class Geocode
 
             //if (CONST_Debug) _debugDumpGroupedSearches($aGroupedSearches, $aValidTokens);
         }
+
+        // Revisit searches, drop bad searches and give penalty to unlikely combinations.
+        $aGroupedSearches = array();
+        foreach ($aSearches as $aSearch) {
+            if (!$aSearch['aName']) {
+                if ($aSearch['sHouseNumber']) {
+                    continue;
+                }
+            }
+            if ($this->aCountryCodes && $aSearch['sCountryCode']
+                && !in_array($aSearch['sCountryCode'], $this->aCountryCodes)) {
+                continue;
+            }
+
+            $aSearch['iSearchRank'] += $iGlobalRank;
+            $aGroupedSearches[$aSearch['iSearchRank']][] = $aSearch;
+        }
+        ksort($aGroupedSearches);
+
         return $aGroupedSearches;
     }
 
@@ -998,76 +1023,59 @@ class Geocode
 
         $aSearchResults = array();
         if ($sQuery || $this->aStructuredQuery) {
-            // Start with a blank search
-            $aSearches = array(
-                          array(
-                           'iSearchRank' => 0,
-                           'iNamePhrase' => -1,
-                           'sCountryCode' => false,
-                           'aName' => array(),
-                           'aAddress' => array(),
-                           'aFullNameAddress' => array(),
-                           'aNameNonSearch' => array(),
-                           'aAddressNonSearch' => array(),
-                           'sOperator' => '',
-                           'aFeatureName' => array(),
-                           'sClass' => '',
-                           'sType' => '',
-                           'sHouseNumber' => '',
-                           'sPostcode' => '',
-                           'oNear' => $oNearPoint
-                          )
-                         );
-
-            // Any 'special' terms in the search?
-            $bSpecialTerms = false;
-            preg_match_all('/\\[([\\w_]*)=([\\w_]*)\\]/', $sQuery, $aSpecialTermsRaw, PREG_SET_ORDER);
-            foreach ($aSpecialTermsRaw as $aSpecialTerm) {
-                $sQuery = str_replace($aSpecialTerm[0], ' ', $sQuery);
-                if (!$bSpecialTerms) {
-                    $aNewSearches = array();
-                    foreach ($aSearches as $aSearch) {
-                        $aNewSearch = $aSearch;
-                        $aNewSearch['sClass'] = $aSpecialTerm[1];
-                        $aNewSearch['sType'] = $aSpecialTerm[2];
-                        $aNewSearches[] = $aNewSearch;
-                    }
+            // Start with a single blank search
+            $aSearches = array(new SearchDescription());
 
-                    $aSearches = $aNewSearches;
-                    $bSpecialTerms = true;
-                }
+            if ($oNearPoint) {
+                $aSearches[0]->setNear($oNearPoint);
             }
 
-            preg_match_all('/\\[([\\w ]*)\\]/u', $sQuery, $aSpecialTermsRaw, PREG_SET_ORDER);
-            if (isset($this->aStructuredQuery['amenity']) && $this->aStructuredQuery['amenity']) {
-                $aSpecialTermsRaw[] = array('['.$this->aStructuredQuery['amenity'].']', $this->aStructuredQuery['amenity']);
-                unset($this->aStructuredQuery['amenity']);
+            if ($sQuery) {
+                $sQuery = $aSearches[0]->extractKeyValuePairs($sQuery);
             }
 
-            foreach ($aSpecialTermsRaw as $aSpecialTerm) {
-                $sQuery = str_replace($aSpecialTerm[0], ' ', $sQuery);
-                if ($bSpecialTerms) {
-                    continue;
+            $sSpecialTerm = '';
+            if ($sQuery) {
+                preg_match_all(
+                    '/\\[([\\w ]*)\\]/u',
+                    $sQuery,
+                    $aSpecialTermsRaw,
+                    PREG_SET_ORDER
+                );
+                foreach ($aSpecialTermsRaw as $aSpecialTerm) {
+                    $sQuery = str_replace($aSpecialTerm[0], ' ', $sQuery);
+                    if (!$sSpecialTerm) {
+                        $sSpecialTerm = $aSpecialTerm[1];
+                    }
                 }
+            }
+            if (!$sSpecialTerm && $this->aStructuredQuery
+                && isset($this->aStructuredQuery['amenity'])) {
+                $sSpecialTerm = $this->aStructuredQuery['amenity'];
+                unset($this->aStructuredQuery['amenity']);
+            }
 
-                $sToken = chksql($this->oDB->getOne("SELECT make_standard_name('".$aSpecialTerm[1]."') AS string"));
-                $sSQL = 'SELECT * ';
-                $sSQL .= 'FROM ( ';
-                $sSQL .= '   SELECT word_id, word_token, word, class, type, country_code, operator';
-                $sSQL .= '   FROM word ';
+            if ($sSpecialTerm && !$aSearches[0]->hasOperator()) {
+                $sSpecialTerm = pg_escape_string($sSpecialTerm);
+                $sToken = chksql(
+                    $this->oDB->getOne("SELECT make_standard_name('$sSpecialTerm')"),
+                    "Cannot decode query. Wrong encoding?"
+                );
+                $sSQL = 'SELECT class, type FROM word ';
                 $sSQL .= '   WHERE word_token in (\' '.$sToken.'\')';
-                $sSQL .= ') AS x ';
-                $sSQL .= ' WHERE (class is not null AND class not in (\'place\'))';
+                $sSQL .= '   AND class is not null AND class not in (\'place\')';
                 if (CONST_Debug) var_Dump($sSQL);
                 $aSearchWords = chksql($this->oDB->getAll($sSQL));
                 $aNewSearches = array();
-                foreach ($aSearches as $aSearch) {
+                foreach ($aSearches as $oSearch) {
                     foreach ($aSearchWords as $aSearchTerm) {
-                        $aNewSearch = $aSearch;
-                        $aNewSearch['sClass'] = $aSearchTerm['class'];
-                        $aNewSearch['sType'] = $aSearchTerm['type'];
-                        $aNewSearches[] = $aNewSearch;
-                        $bSpecialTerms = true;
+                        $oNewSearch = clone $oSearch;
+                        $oNewSearch->setPoiSearch(
+                            Operator::TYPE,
+                            $aSearchTerm['class'],
+                            $aSearchTerm['type'],
+                        );
+                        $aNewSearches[] = $oNewSearch;
                     }
                 }
                 $aSearches = $aNewSearches;
@@ -1116,14 +1124,10 @@ class Geocode
                 if (CONST_Debug) var_Dump($sSQL);
 
                 $aValidTokens = array();
-                if (sizeof($aTokens)) {
-                    $aDatabaseWords = chksql(
-                        $this->oDB->getAll($sSQL),
-                        "Could not get word tokens."
-                    );
-                } else {
-                    $aDatabaseWords = array();
-                }
+                $aDatabaseWords = chksql(
+                    $this->oDB->getAll($sSQL),
+                    "Could not get word tokens."
+                );
                 $aPossibleMainWordIDs = array();
                 $aWordFrequencyScores = array();
                 foreach ($aDatabaseWords as $aToken) {
@@ -1191,10 +1195,10 @@ class Geocode
 
                     foreach ($aGroupedSearches as $aSearches) {
                         foreach ($aSearches as $aSearch) {
-                            if ($aSearch['iSearchRank'] < $this->iMaxRank) {
-                                if (!isset($aReverseGroupedSearches[$aSearch['iSearchRank']])) $aReverseGroupedSearches[$aSearch['iSearchRank']] = array();
-                                $aReverseGroupedSearches[$aSearch['iSearchRank']][] = $aSearch;
+                            if (!isset($aReverseGroupedSearches[$aSearch->getRank()])) {
+                                $aReverseGroupedSearches[$aSearch->getRank()] = array();
                             }
+                            $aReverseGroupedSearches[$aSearch->getRank()][] = $aSearch;
                         }
                     }
 
@@ -1205,38 +1209,9 @@ class Geocode
                 // Re-group the searches by their score, junk anything over 20 as just not worth trying
                 $aGroupedSearches = array();
                 foreach ($aSearches as $aSearch) {
-                    if ($aSearch['iSearchRank'] < $this->iMaxRank) {
-                        if (!isset($aGroupedSearches[$aSearch['iSearchRank']])) $aGroupedSearches[$aSearch['iSearchRank']] = array();
-                        $aGroupedSearches[$aSearch['iSearchRank']][] = $aSearch;
-                    }
-                }
-                ksort($aGroupedSearches);
-            }
-
-            if (CONST_Search_TryDroppedAddressTerms && sizeof($this->aStructuredQuery) > 0) {
-                $aCopyGroupedSearches = $aGroupedSearches;
-                foreach ($aCopyGroupedSearches as $iGroup => $aSearches) {
-                    foreach ($aSearches as $iSearch => $aSearch) {
-                        $aReductionsList = array($aSearch['aAddress']);
-                        $iSearchRank = $aSearch['iSearchRank'];
-                        while (sizeof($aReductionsList) > 0) {
-                            $iSearchRank += 5;
-                            if ($iSearchRank > iMaxRank) break 3;
-                            $aNewReductionsList = array();
-                            foreach ($aReductionsList as $aReductionsWordList) {
-                                for ($iReductionWord = 0; $iReductionWord < sizeof($aReductionsWordList); $iReductionWord++) {
-                                    $aReductionsWordListResult = array_merge(array_slice($aReductionsWordList, 0, $iReductionWord), array_slice($aReductionsWordList, $iReductionWord+1));
-                                    $aReverseSearch = $aSearch;
-                                    $aSearch['aAddress'] = $aReductionsWordListResult;
-                                    $aSearch['iSearchRank'] = $iSearchRank;
-                                    $aGroupedSearches[$iSearchRank][] = $aReverseSearch;
-                                    if (sizeof($aReductionsWordListResult) > 0) {
-                                        $aNewReductionsList[] = $aReductionsWordListResult;
-                                    }
-                                }
-                            }
-                            $aReductionsList = $aNewReductionsList;
-                        }
+                    if ($aSearch->getRank() < $this->iMaxRank) {
+                        if (!isset($aGroupedSearches[$aSearch->getRank()])) $aGroupedSearches[$aSearch->getRank()] = array();
+                        $aGroupedSearches[$aSearch->getRank()][] = $aSearch;
                     }
                 }
                 ksort($aGroupedSearches);
@@ -1269,10 +1244,6 @@ class Geocode
                     if (CONST_Debug) echo "<hr><b>Search Loop, group $iGroupLoop, loop $iQueryLoop</b>";
                     if (CONST_Debug) _debugDumpGroupedSearches(array($iGroupedRank => array($aSearch)), $aValidTokens);
 
-                    if ($sCountryCodesSQL && $aSearch['sCountryCode'] && !in_array($aSearch['sCountryCode'], $this->aCountryCodes)) {
-                        continue;
-                    }
-
                     // No location term?
                     if (!sizeof($aSearch['aName']) && !sizeof($aSearch['aAddress'])) {
                         if ($aSearch['sCountryCode'] && !$aSearch['sClass'] && !$aSearch['sHouseNumber'] && !$aSearch['oNear']) {
@@ -1568,7 +1539,8 @@ class Geocode
                             }
 
                             // Fallback to the road (if no housenumber was found)
-                            if (!sizeof($aPlaceIDs) && preg_match('/[0-9]+/', $aSearch['sHouseNumber'])) {
+                            if (!sizeof($aPlaceIDs) && preg_match('/[0-9]+/', $aSearch['sHouseNumber'])
+                                && ($aSearch['aAddress'] || $aSearch['sCountryCode'])) {
                                 $aPlaceIDs = $aRoadPlaceIDs;
                                 //set to -1, if no housenumbers were found
                                 $searchedHousenumber = -1;
@@ -1733,9 +1705,11 @@ class Geocode
                     // Need to verify passes rank limits before dropping out of the loop (yuk!)
                     // reduces the number of place ids, like a filter
                     // rank_address is 30 for interpolated housenumbers
+                    $sWherePlaceId = 'WHERE place_id in (';
+                    $sWherePlaceId .= join(',', array_keys($aResultPlaceIDs)).') ';
+
                     $sSQL = "SELECT place_id ";
-                    $sSQL .= "FROM placex ";
-                    $sSQL .= "WHERE place_id in (".join(',', array_keys($aResultPlaceIDs)).") ";
+                    $sSQL .= "FROM placex ".$sWherePlaceId;
                     $sSQL .= "  AND (";
                     $sSQL .= "         placex.rank_address between $this->iMinAddressRank and $this->iMaxAddressRank ";
                     if (14 >= $this->iMinAddressRank && 14 <= $this->iMaxAddressRank) {
@@ -1744,20 +1718,23 @@ class Geocode
                     if ($this->aAddressRankList) {
                         $sSQL .= "     OR placex.rank_address in (".join(',', $this->aAddressRankList).")";
                     }
-                    if (CONST_Use_US_Tiger_Data) {
-                        $sSQL .= "  ) ";
+                    $sSQL .= "  ) UNION ";
+                    $sSQL .= " SELECT place_id FROM location_postcode lp ".$sWherePlaceId;
+                    $sSQL .= "  AND (lp.rank_address between $this->iMinAddressRank and $this->iMaxAddressRank ";
+                    if ($this->aAddressRankList) {
+                        $sSQL .= "     OR lp.rank_address in (".join(',', $this->aAddressRankList).")";
+                    }
+                    $sSQL .= ") ";
+                    if (CONST_Use_US_Tiger_Data && $this->iMaxAddressRank == 30) {
+                        $sSQL .= "UNION ";
+                        $sSQL .= "  SELECT place_id ";
+                        $sSQL .= "  FROM location_property_tiger ".$sWherePlaceId;
+                    }
+                    if ($this->iMaxAddressRank == 30) {
                         $sSQL .= "UNION ";
                         $sSQL .= "  SELECT place_id ";
-                        $sSQL .= "  FROM location_property_tiger ";
-                        $sSQL .= "  WHERE place_id in (".join(',', array_keys($aResultPlaceIDs)).") ";
-                        $sSQL .= "    AND (30 between $this->iMinAddressRank and $this->iMaxAddressRank ";
-                        if ($this->aAddressRankList) $sSQL .= " OR 30 in (".join(',', $this->aAddressRankList).")";
+                        $sSQL .= "  FROM location_property_osmline ".$sWherePlaceId;
                     }
-                    $sSQL .= ") UNION ";
-                    $sSQL .= "  SELECT place_id ";
-                    $sSQL .= "  FROM location_property_osmline ";
-                    $sSQL .= "  WHERE place_id in (".join(',', array_keys($aResultPlaceIDs)).")";
-                    $sSQL .= "    AND startnumber is not NULL AND (30 between $this->iMinAddressRank and $this->iMaxAddressRank)";
                     if (CONST_Debug) var_dump($sSQL);
                     $aFilteredPlaceIDs = chksql($this->oDB->getCol($sSQL));
                     $tempIDs = array();