]> git.openstreetmap.org Git - nominatim.git/commitdiff
restore the tokenizer directory when missing
authorSarah Hoffmann <lonvia@denofr.de>
Sun, 20 Mar 2022 10:31:42 +0000 (11:31 +0100)
committerSarah Hoffmann <lonvia@denofr.de>
Sun, 20 Mar 2022 10:31:42 +0000 (11:31 +0100)
Automatically repopulate the tokenizer/ directory with the PHP stub
and the postgresql module, when the directory is missing. This allows
to switch working directories and in particular run the service
from a different maschine then where it was installed.
Users still need to make sure that .env files are set up correctly
or they will shoot themselves in the foot.

See #2515.

nominatim/db/properties.py
nominatim/tokenizer/factory.py
nominatim/tokenizer/icu_tokenizer.py
nominatim/tokenizer/legacy_tokenizer.py
test/bdd/steps/nominatim_environment.py
test/python/tokenizer/test_factory.py

index 19c090069ac9ab9ccaf33bd20caff1f53088fcd7..270204872dd56459691c4105156fe8073d6aa815 100644 (file)
@@ -27,6 +27,9 @@ def get_property(conn, name):
     """ Return the current value of the given propery or None if the property
         is not set.
     """
     """ Return the current value of the given propery or None if the property
         is not set.
     """
+    if not conn.table_exists('nominatim_properties'):
+        return None
+
     with conn.cursor() as cur:
         cur.execute('SELECT value FROM nominatim_properties WHERE property = %s',
                     (name, ))
     with conn.cursor() as cur:
         cur.execute('SELECT value FROM nominatim_properties WHERE property = %s',
                     (name, ))
index fbda246238f16bebb6b75806a735975e564ca815..108c7841e0c7c3e4f8bf6bd25b3aa8d9c35bba42 100644 (file)
@@ -78,8 +78,8 @@ def get_tokenizer_for_db(config):
     """
     basedir = config.project_dir / 'tokenizer'
     if not basedir.is_dir():
     """
     basedir = config.project_dir / 'tokenizer'
     if not basedir.is_dir():
-        LOG.fatal("Cannot find tokenizer data in '%s'.", basedir)
-        raise UsageError('Cannot initialize tokenizer.')
+        # Directory will be repopulated by tokenizer below.
+        basedir.mkdir()
 
     with connect(config.get_libpq_dsn()) as conn:
         name = properties.get_property(conn, 'tokenizer')
 
     with connect(config.get_libpq_dsn()) as conn:
         name = properties.get_property(conn, 'tokenizer')
index 1799ae86d0330ee61c2fc5fe05118ff00e0ef162..b553dbc641d708175e8f7281f05cf14cf4673484 100644 (file)
@@ -51,7 +51,7 @@ class LegacyICUTokenizer(AbstractTokenizer):
         """
         self.loader = ICURuleLoader(config)
 
         """
         self.loader = ICURuleLoader(config)
 
-        self._install_php(config.lib_dir.php)
+        self._install_php(config.lib_dir.php, overwrite=True)
         self._save_config()
 
         if init_db:
         self._save_config()
 
         if init_db:
@@ -67,6 +67,8 @@ class LegacyICUTokenizer(AbstractTokenizer):
         with connect(self.dsn) as conn:
             self.loader.load_config_from_db(conn)
 
         with connect(self.dsn) as conn:
             self.loader.load_config_from_db(conn)
 
+        self._install_php(config.lib_dir.php, overwrite=False)
+
 
     def finalize_import(self, config):
         """ Do any required postprocessing to make the tokenizer data ready
 
     def finalize_import(self, config):
         """ Do any required postprocessing to make the tokenizer data ready
@@ -174,16 +176,18 @@ class LegacyICUTokenizer(AbstractTokenizer):
                                      self.loader.make_token_analysis())
 
 
                                      self.loader.make_token_analysis())
 
 
-    def _install_php(self, phpdir):
+    def _install_php(self, phpdir, overwrite=True):
         """ Install the php script for the tokenizer.
         """
         php_file = self.data_dir / "tokenizer.php"
         """ Install the php script for the tokenizer.
         """
         php_file = self.data_dir / "tokenizer.php"
-        php_file.write_text(dedent(f"""\
-            <?php
-            @define('CONST_Max_Word_Frequency', 10000000);
-            @define('CONST_Term_Normalization_Rules', "{self.loader.normalization_rules}");
-            @define('CONST_Transliteration', "{self.loader.get_search_rules()}");
-            require_once('{phpdir}/tokenizer/icu_tokenizer.php');"""))
+
+        if not php_file.exists() or overwrite:
+            php_file.write_text(dedent(f"""\
+                <?php
+                @define('CONST_Max_Word_Frequency', 10000000);
+                @define('CONST_Term_Normalization_Rules', "{self.loader.normalization_rules}");
+                @define('CONST_Transliteration', "{self.loader.get_search_rules()}");
+                require_once('{phpdir}/tokenizer/icu_tokenizer.php');"""))
 
 
     def _save_config(self):
 
 
     def _save_config(self):
index 28f4b32756c0756ea172ca3aa16a458ac6ce929d..3b8f75692f964e9c2e84dc3ada92b156dd0afb7b 100644 (file)
@@ -107,7 +107,7 @@ class LegacyTokenizer(AbstractTokenizer):
 
         self.normalization = config.TERM_NORMALIZATION
 
 
         self.normalization = config.TERM_NORMALIZATION
 
-        self._install_php(config)
+        self._install_php(config, overwrite=True)
 
         with connect(self.dsn) as conn:
             _check_module(module_dir, conn)
 
         with connect(self.dsn) as conn:
             _check_module(module_dir, conn)
@@ -119,12 +119,18 @@ class LegacyTokenizer(AbstractTokenizer):
             self._init_db_tables(config)
 
 
             self._init_db_tables(config)
 
 
-    def init_from_project(self, _):
+    def init_from_project(self, config):
         """ Initialise the tokenizer from the project directory.
         """
         with connect(self.dsn) as conn:
             self.normalization = properties.get_property(conn, DBCFG_NORMALIZATION)
 
         """ Initialise the tokenizer from the project directory.
         """
         with connect(self.dsn) as conn:
             self.normalization = properties.get_property(conn, DBCFG_NORMALIZATION)
 
+        if not (config.project_dir / 'module' / 'nominatim.so').exists():
+            _install_module(config.DATABASE_MODULE_PATH,
+                            config.lib_dir.module,
+                            config.project_dir / 'module')
+
+        self._install_php(config, overwrite=False)
 
     def finalize_import(self, config):
         """ Do any required postprocessing to make the tokenizer data ready
 
     def finalize_import(self, config):
         """ Do any required postprocessing to make the tokenizer data ready
@@ -238,16 +244,18 @@ class LegacyTokenizer(AbstractTokenizer):
         return LegacyNameAnalyzer(self.dsn, normalizer)
 
 
         return LegacyNameAnalyzer(self.dsn, normalizer)
 
 
-    def _install_php(self, config):
+    def _install_php(self, config, overwrite=True):
         """ Install the php script for the tokenizer.
         """
         php_file = self.data_dir / "tokenizer.php"
         """ Install the php script for the tokenizer.
         """
         php_file = self.data_dir / "tokenizer.php"
-        php_file.write_text(dedent("""\
-            <?php
-            @define('CONST_Max_Word_Frequency', {0.MAX_WORD_FREQUENCY});
-            @define('CONST_Term_Normalization_Rules', "{0.TERM_NORMALIZATION}");
-            require_once('{0.lib_dir.php}/tokenizer/legacy_tokenizer.php');
-            """.format(config)))
+
+        if not php_file.exists() or overwrite:
+            php_file.write_text(dedent("""\
+                <?php
+                @define('CONST_Max_Word_Frequency', {0.MAX_WORD_FREQUENCY});
+                @define('CONST_Term_Normalization_Rules', "{0.TERM_NORMALIZATION}");
+                require_once('{0.lib_dir.php}/tokenizer/legacy_tokenizer.php');
+                """.format(config)))
 
 
     def _init_db_tables(self, config):
 
 
     def _init_db_tables(self, config):
index 6f4f14a71b8a0f44fb190ea41d2c7722af70f8a4..7de32e484f50f1f645c79137a45541b2820538ca 100644 (file)
@@ -217,7 +217,7 @@ class NominatimEnvironment:
                     self.db_drop_database(self.api_test_db)
                     raise
 
                     self.db_drop_database(self.api_test_db)
                     raise
 
-        tokenizer_factory.create_tokenizer(self.get_test_config(), init_db=False)
+        tokenizer_factory.get_tokenizer_for_db(self.get_test_config())
 
 
     def setup_unknown_db(self):
 
 
     def setup_unknown_db(self):
index aa763e28fd835f3b0a7119b254b52ac5fd059b4f..166e6ba6388f424dbbd2347751398294eec45a96 100644 (file)
@@ -63,13 +63,13 @@ class TestFactory:
         assert tokenizer.init_state == "loaded"
 
 
         assert tokenizer.init_state == "loaded"
 
 
-    def test_load_no_tokenizer_dir(self):
+    def test_load_repopulate_tokenizer_dir(self):
         factory.create_tokenizer(self.config)
 
         factory.create_tokenizer(self.config)
 
-        self.config.project_dir = self.config.project_dir / 'foo'
+        self.config.project_dir = self.config.project_dir
 
 
-        with pytest.raises(UsageError):
-            factory.get_tokenizer_for_db(self.config)
+        factory.get_tokenizer_for_db(self.config)
+        assert (self.config.project_dir / 'tokenizer').exists()
 
 
     def test_load_missing_property(self, temp_db_cursor):
 
 
     def test_load_missing_property(self, temp_db_cursor):