From: Sarah Hoffmann Date: Thu, 11 Nov 2021 06:42:42 +0000 (+0100) Subject: Merge pull request #2517 from lonvia/transliteration-special-chars X-Git-Tag: v4.0.1~4 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/8dc14416358027de7033e94a86be324db1e328dd?hp=7326b246b7ce97cbfe76da1d3c600128e15a46d5 Merge pull request #2517 from lonvia/transliteration-special-chars ICU: avoid non-alphanumerical characters in transliteration --- diff --git a/lib-php/SearchDescription.php b/lib-php/SearchDescription.php index e13a0e9d..4962d9ff 100644 --- a/lib-php/SearchDescription.php +++ b/lib-php/SearchDescription.php @@ -584,11 +584,11 @@ class SearchDescription // will be narrowed down by an address. Remember that with ordering // every single result has to be checked. if ($this->sHouseNumber && ($this->bRareName || !empty($this->aAddress) || $this->sPostcode)) { - $sHouseNumberRegex = '\\\\m'.$this->sHouseNumber.'\\\\M'; + $sHouseNumberRegex = $oDB->getDBQuoted('\\\\m'.$this->sHouseNumber.'\\\\M'); // Housenumbers on streets and places. $sChildHnr = 'SELECT * FROM placex WHERE parent_place_id = search_name.place_id'; - $sChildHnr .= " AND housenumber ~* E'".$sHouseNumberRegex."'"; + $sChildHnr .= ' AND housenumber ~* E'.$sHouseNumberRegex; // Interpolations on streets and places. if (preg_match('/^[0-9]+$/', $this->sHouseNumber)) { $sIpolHnr = 'SELECT * FROM location_property_osmline '; @@ -601,7 +601,7 @@ class SearchDescription } // Housenumbers on the object iteself for unlisted places. $sSelfHnr = 'SELECT * FROM placex WHERE place_id = search_name.place_id'; - $sSelfHnr .= " AND housenumber ~* E'".$sHouseNumberRegex."'"; + $sSelfHnr .= ' AND housenumber ~* E'.$sHouseNumberRegex; $sSql = '(CASE WHEN address_rank = 30 THEN EXISTS('.$sSelfHnr.') '; $sSql .= ' ELSE EXISTS('.$sChildHnr.') '; @@ -739,9 +739,9 @@ class SearchDescription return $aResults; } - $sHouseNumberRegex = '\\\\m'.$this->sHouseNumber.'\\\\M'; + $sHouseNumberRegex = $oDB->getDBQuoted('\\\\m'.$this->sHouseNumber.'\\\\M'); $sSQL = 'SELECT place_id FROM placex WHERE'; - $sSQL .= " housenumber ~* E'".$sHouseNumberRegex."'"; + $sSQL .= ' housenumber ~* E'.$sHouseNumberRegex; $sSQL .= ' AND ('.join(' OR ', $aIDCondition).')'; $sSQL .= $this->oContext->excludeSQL(' AND place_id'); diff --git a/settings/icu_tokenizer.yaml b/settings/icu_tokenizer.yaml index 41760c49..e5cbeb6f 100644 --- a/settings/icu_tokenizer.yaml +++ b/settings/icu_tokenizer.yaml @@ -21,8 +21,8 @@ transliteration: - !include icu-rules/extended-unicode-to-asccii.yaml - ":: Ascii ()" - ":: NFD ()" - - "[^[:Ascii:]] >" - ":: lower ()" + - "[^a-z0-9[:Space:]] >" - ":: NFC ()" sanitizers: - step: split-name-list