]> git.openstreetmap.org Git - nominatim.git/blobdiff - src/nominatim_db/tokenizer/icu_token_analysis.py
keep break indicators [:-] during normalisation
[nominatim.git] / src / nominatim_db / tokenizer / icu_token_analysis.py
index a3cdcb7afdb9a008ee4c19f642b18542da369ac0..c1ba106c48775498ad7b2596ef460be8f85bc299 100644 (file)
@@ -25,6 +25,8 @@ class ICUTokenAnalysis:
 
     def __init__(self, norm_rules: str, trans_rules: str,
                  analysis_rules: Mapping[Optional[str], 'TokenAnalyzerRule']):
+        # additional break signs are not relevant during name analysis
+        norm_rules += ";[[:Space:][-:]]+ > ' ';"
         self.normalizer = Transliterator.createFromRules("icu_normalization",
                                                          norm_rules)
         trans_rules += ";[:Space:]+ > ' '"