]> git.openstreetmap.org Git - nominatim.git/blobdiff - lib-php/tokenizer/legacy_tokenizer.php
take Tiger housenumbers into account when ranking street results
[nominatim.git] / lib-php / tokenizer / legacy_tokenizer.php
index e5ffbe025f05aabb268b886bc6c055174eb831e5..d5686f6464eaddf0c4a8aa8cd2f6bc0024d4b887 100644 (file)
@@ -48,6 +48,14 @@ class Tokenizer
     }
 
 
+    public function mostFrequentWords($iNum)
+    {
+        $sSQL = 'SELECT word FROM word WHERE word is not null ';
+        $sSQL .= 'ORDER BY search_name_count DESC LIMIT '.$iNum;
+        return $this->oDB->getCol($sSQL);
+    }
+
+
     public function tokensForSpecialTerm($sTerm)
     {
         $aResults = array();
@@ -87,6 +95,23 @@ class Tokenizer
             $sNormQuery .= ','.$this->normalizeString($oPhrase->getPhrase());
             $sSQL .= 'make_standard_name(:' .$iPhrase.') as p'.$iPhrase.',';
             $aParams[':'.$iPhrase] = $oPhrase->getPhrase();
+
+            // Conflicts between US state abbreviations and various words
+            // for 'the' in different languages
+            switch (strtolower($oPhrase->getPhrase())) {
+                case 'il':
+                    $aParams[':'.$iPhrase] = 'illinois';
+                    break;
+                case 'al':
+                    $aParams[':'.$iPhrase] = 'alabama';
+                    break;
+                case 'la':
+                    $aParams[':'.$iPhrase] = 'louisiana';
+                    break;
+                default:
+                    $aParams[':'.$iPhrase] = $oPhrase->getPhrase();
+                    break;
+            }
         }
         $sSQL = substr($sSQL, 0, -1);