]> git.openstreetmap.org Git - nominatim.git/commitdiff
Merge pull request #2517 from lonvia/transliteration-special-chars
authorSarah Hoffmann <lonvia@denofr.de>
Thu, 11 Nov 2021 06:42:42 +0000 (07:42 +0100)
committerGitHub <noreply@github.com>
Thu, 11 Nov 2021 06:42:42 +0000 (07:42 +0100)
ICU: avoid non-alphanumerical characters in transliteration

lib-php/SearchDescription.php
settings/icu_tokenizer.yaml

index e13a0e9daccd2b511b8c0dcb3664e05776643e44..4962d9ff8c91a0e31142924422f8d081346a8057 100644 (file)
@@ -584,11 +584,11 @@ class SearchDescription
         // will be narrowed down by an address. Remember that with ordering
         // every single result has to be checked.
         if ($this->sHouseNumber && ($this->bRareName || !empty($this->aAddress) || $this->sPostcode)) {
-            $sHouseNumberRegex = '\\\\m'.$this->sHouseNumber.'\\\\M';
+            $sHouseNumberRegex = $oDB->getDBQuoted('\\\\m'.$this->sHouseNumber.'\\\\M');
 
             // Housenumbers on streets and places.
             $sChildHnr = 'SELECT * FROM placex WHERE parent_place_id = search_name.place_id';
-            $sChildHnr .= "    AND housenumber ~* E'".$sHouseNumberRegex."'";
+            $sChildHnr .= '    AND housenumber ~* E'.$sHouseNumberRegex;
             // Interpolations on streets and places.
             if (preg_match('/^[0-9]+$/', $this->sHouseNumber)) {
                 $sIpolHnr = 'SELECT * FROM location_property_osmline ';
@@ -601,7 +601,7 @@ class SearchDescription
             }
             // Housenumbers on the object iteself for unlisted places.
             $sSelfHnr = 'SELECT * FROM placex WHERE place_id = search_name.place_id';
-            $sSelfHnr .= "    AND housenumber ~* E'".$sHouseNumberRegex."'";
+            $sSelfHnr .= '    AND housenumber ~* E'.$sHouseNumberRegex;
 
             $sSql = '(CASE WHEN address_rank = 30 THEN EXISTS('.$sSelfHnr.') ';
             $sSql .= ' ELSE EXISTS('.$sChildHnr.') ';
@@ -739,9 +739,9 @@ class SearchDescription
             return $aResults;
         }
 
-        $sHouseNumberRegex = '\\\\m'.$this->sHouseNumber.'\\\\M';
+        $sHouseNumberRegex = $oDB->getDBQuoted('\\\\m'.$this->sHouseNumber.'\\\\M');
         $sSQL = 'SELECT place_id FROM placex WHERE';
-        $sSQL .= "  housenumber ~* E'".$sHouseNumberRegex."'";
+        $sSQL .= '  housenumber ~* E'.$sHouseNumberRegex;
         $sSQL .= ' AND ('.join(' OR ', $aIDCondition).')';
         $sSQL .= $this->oContext->excludeSQL(' AND place_id');
 
index 41760c49e0fbd2122d2f1e7fd1966fc4278d1975..e5cbeb6f54ffcef3c3c0340e078c76f6084123e0 100644 (file)
@@ -21,8 +21,8 @@ transliteration:
     - !include icu-rules/extended-unicode-to-asccii.yaml
     - ":: Ascii ()"
     - ":: NFD ()"
-    - "[^[:Ascii:]] >"
     - ":: lower ()"
+    - "[^a-z0-9[:Space:]] >"
     - ":: NFC ()"
 sanitizers:
     - step: split-name-list