]> git.openstreetmap.org Git - nominatim.git/blobdiff - nominatim/tokenizer/icu_tokenizer.py
remove redundant 'u' prefixes for unicode strings
[nominatim.git] / nominatim / tokenizer / icu_tokenizer.py
index 1799ae86d0330ee61c2fc5fe05118ff00e0ef162..9c7138ce67fa5174d0e947c72bf7a71313fe3435 100644 (file)
@@ -51,7 +51,7 @@ class LegacyICUTokenizer(AbstractTokenizer):
         """
         self.loader = ICURuleLoader(config)
 
         """
         self.loader = ICURuleLoader(config)
 
-        self._install_php(config.lib_dir.php)
+        self._install_php(config.lib_dir.php, overwrite=True)
         self._save_config()
 
         if init_db:
         self._save_config()
 
         if init_db:
@@ -67,6 +67,8 @@ class LegacyICUTokenizer(AbstractTokenizer):
         with connect(self.dsn) as conn:
             self.loader.load_config_from_db(conn)
 
         with connect(self.dsn) as conn:
             self.loader.load_config_from_db(conn)
 
+        self._install_php(config.lib_dir.php, overwrite=False)
+
 
     def finalize_import(self, config):
         """ Do any required postprocessing to make the tokenizer data ready
 
     def finalize_import(self, config):
         """ Do any required postprocessing to make the tokenizer data ready
@@ -174,16 +176,18 @@ class LegacyICUTokenizer(AbstractTokenizer):
                                      self.loader.make_token_analysis())
 
 
                                      self.loader.make_token_analysis())
 
 
-    def _install_php(self, phpdir):
+    def _install_php(self, phpdir, overwrite=True):
         """ Install the php script for the tokenizer.
         """
         php_file = self.data_dir / "tokenizer.php"
         """ Install the php script for the tokenizer.
         """
         php_file = self.data_dir / "tokenizer.php"
-        php_file.write_text(dedent(f"""\
-            <?php
-            @define('CONST_Max_Word_Frequency', 10000000);
-            @define('CONST_Term_Normalization_Rules', "{self.loader.normalization_rules}");
-            @define('CONST_Transliteration', "{self.loader.get_search_rules()}");
-            require_once('{phpdir}/tokenizer/icu_tokenizer.php');"""))
+
+        if not php_file.exists() or overwrite:
+            php_file.write_text(dedent(f"""\
+                <?php
+                @define('CONST_Max_Word_Frequency', 10000000);
+                @define('CONST_Term_Normalization_Rules', "{self.loader.normalization_rules}");
+                @define('CONST_Transliteration', "{self.loader.get_search_rules()}");
+                require_once('{phpdir}/tokenizer/icu_tokenizer.php');"""), encoding='utf-8')
 
 
     def _save_config(self):
 
 
     def _save_config(self):