From: Sarah Hoffmann Date: Fri, 2 Dec 2022 09:15:02 +0000 (+0100) Subject: contract duplicate spaces in transliteration string X-Git-Tag: v4.3.0~129^2 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/0e186835b914074e1784ccce398a6eca3e78bfb7?ds=inline;hp=--cc contract duplicate spaces in transliteration string There are some pathological cases where an isolated letter may be deleted because it is in itself meaningless. If this happens in the middle of a sentence, then the transliteration contains two consecutive spaces. Add a final rule to fix this. See #2909. --- 0e186835b914074e1784ccce398a6eca3e78bfb7 diff --git a/settings/icu_tokenizer.yaml b/settings/icu_tokenizer.yaml index 16339970..f30578a2 100644 --- a/settings/icu_tokenizer.yaml +++ b/settings/icu_tokenizer.yaml @@ -24,6 +24,7 @@ transliteration: - ":: lower ()" - "[^a-z0-9[:Space:]] >" - ":: NFC ()" + - "[:Space:]+ > ' '" sanitizers: - step: clean-housenumbers filter-kind: