X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/d984100e23d28253b3659e7b628cde2c8b436cf8..86ad9efa8abb1fb478b3be5b6c469877aad05a51:/src/nominatim_db/tokenizer/icu_token_analysis.py diff --git a/src/nominatim_db/tokenizer/icu_token_analysis.py b/src/nominatim_db/tokenizer/icu_token_analysis.py index a3cdcb7a..c1ba106c 100644 --- a/src/nominatim_db/tokenizer/icu_token_analysis.py +++ b/src/nominatim_db/tokenizer/icu_token_analysis.py @@ -25,6 +25,8 @@ class ICUTokenAnalysis: def __init__(self, norm_rules: str, trans_rules: str, analysis_rules: Mapping[Optional[str], 'TokenAnalyzerRule']): + # additional break signs are not relevant during name analysis + norm_rules += ";[[:Space:][-:]]+ > ' ';" self.normalizer = Transliterator.createFromRules("icu_normalization", norm_rules) trans_rules += ";[:Space:]+ > ' '"