From 7e70e5f50310b7bdf79e39b4a5a4964f9a6d051b Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Tue, 10 May 2022 15:36:29 +0200 Subject: [PATCH] always state encoding when opening files in text mode Also applies to Path.write_text(). --- nominatim/config.py | 2 +- nominatim/tokenizer/icu_tokenizer.py | 2 +- nominatim/tokenizer/legacy_tokenizer.py | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/nominatim/config.py b/nominatim/config.py index 13d9cd8a..a3f91055 100644 --- a/nominatim/config.py +++ b/nominatim/config.py @@ -187,7 +187,7 @@ class Configuration: if configfile.suffix in ('.yaml', '.yml'): result = self._load_from_yaml(configfile) elif configfile.suffix == '.json': - with configfile.open('r') as cfg: + with configfile.open('r', encoding='utf-8') as cfg: result = json.load(cfg) else: raise UsageError(f"Config file '{configfile}' has unknown format.") diff --git a/nominatim/tokenizer/icu_tokenizer.py b/nominatim/tokenizer/icu_tokenizer.py index b553dbc6..9c7138ce 100644 --- a/nominatim/tokenizer/icu_tokenizer.py +++ b/nominatim/tokenizer/icu_tokenizer.py @@ -187,7 +187,7 @@ class LegacyICUTokenizer(AbstractTokenizer): @define('CONST_Max_Word_Frequency', 10000000); @define('CONST_Term_Normalization_Rules', "{self.loader.normalization_rules}"); @define('CONST_Transliteration', "{self.loader.get_search_rules()}"); - require_once('{phpdir}/tokenizer/icu_tokenizer.php');""")) + require_once('{phpdir}/tokenizer/icu_tokenizer.php');"""), encoding='utf-8') def _save_config(self): diff --git a/nominatim/tokenizer/legacy_tokenizer.py b/nominatim/tokenizer/legacy_tokenizer.py index 3b8f7569..97ce6d16 100644 --- a/nominatim/tokenizer/legacy_tokenizer.py +++ b/nominatim/tokenizer/legacy_tokenizer.py @@ -255,7 +255,7 @@ class LegacyTokenizer(AbstractTokenizer): @define('CONST_Max_Word_Frequency', {0.MAX_WORD_FREQUENCY}); @define('CONST_Term_Normalization_Rules', "{0.TERM_NORMALIZATION}"); require_once('{0.lib_dir.php}/tokenizer/legacy_tokenizer.php'); - """.format(config))) + """.format(config)), encoding='utf-8') def _init_db_tables(self, config): -- 2.39.5