]> git.openstreetmap.org Git - nominatim.git/commitdiff
always state encoding when opening files in text mode
authorSarah Hoffmann <lonvia@denofr.de>
Tue, 10 May 2022 13:36:29 +0000 (15:36 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Tue, 10 May 2022 13:36:29 +0000 (15:36 +0200)
Also applies to Path.write_text().

nominatim/config.py
nominatim/tokenizer/icu_tokenizer.py
nominatim/tokenizer/legacy_tokenizer.py

index 13d9cd8a0d502e4b1f1b5ab58ca7e0761772cfaf..a3f91055fc76b37bf338291c6b7aa2350afb4d21 100644 (file)
@@ -187,7 +187,7 @@ class Configuration:
         if configfile.suffix in ('.yaml', '.yml'):
             result = self._load_from_yaml(configfile)
         elif configfile.suffix == '.json':
         if configfile.suffix in ('.yaml', '.yml'):
             result = self._load_from_yaml(configfile)
         elif configfile.suffix == '.json':
-            with configfile.open('r') as cfg:
+            with configfile.open('r', encoding='utf-8') as cfg:
                 result = json.load(cfg)
         else:
             raise UsageError(f"Config file '{configfile}' has unknown format.")
                 result = json.load(cfg)
         else:
             raise UsageError(f"Config file '{configfile}' has unknown format.")
index b553dbc641d708175e8f7281f05cf14cf4673484..9c7138ce67fa5174d0e947c72bf7a71313fe3435 100644 (file)
@@ -187,7 +187,7 @@ class LegacyICUTokenizer(AbstractTokenizer):
                 @define('CONST_Max_Word_Frequency', 10000000);
                 @define('CONST_Term_Normalization_Rules', "{self.loader.normalization_rules}");
                 @define('CONST_Transliteration', "{self.loader.get_search_rules()}");
                 @define('CONST_Max_Word_Frequency', 10000000);
                 @define('CONST_Term_Normalization_Rules', "{self.loader.normalization_rules}");
                 @define('CONST_Transliteration', "{self.loader.get_search_rules()}");
-                require_once('{phpdir}/tokenizer/icu_tokenizer.php');"""))
+                require_once('{phpdir}/tokenizer/icu_tokenizer.php');"""), encoding='utf-8')
 
 
     def _save_config(self):
 
 
     def _save_config(self):
index 3b8f75692f964e9c2e84dc3ada92b156dd0afb7b..97ce6d16644cff6a19369c9db5a7a12af8387078 100644 (file)
@@ -255,7 +255,7 @@ class LegacyTokenizer(AbstractTokenizer):
                 @define('CONST_Max_Word_Frequency', {0.MAX_WORD_FREQUENCY});
                 @define('CONST_Term_Normalization_Rules', "{0.TERM_NORMALIZATION}");
                 require_once('{0.lib_dir.php}/tokenizer/legacy_tokenizer.php');
                 @define('CONST_Max_Word_Frequency', {0.MAX_WORD_FREQUENCY});
                 @define('CONST_Term_Normalization_Rules', "{0.TERM_NORMALIZATION}");
                 require_once('{0.lib_dir.php}/tokenizer/legacy_tokenizer.php');
-                """.format(config)))
+                """.format(config)), encoding='utf-8')
 
 
     def _init_db_tables(self, config):
 
 
     def _init_db_tables(self, config):