From a0ed80d821bd3f7938a2e3d4e38357f0c03b919e Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Sun, 20 Mar 2022 11:31:42 +0100 Subject: [PATCH] restore the tokenizer directory when missing Automatically repopulate the tokenizer/ directory with the PHP stub and the postgresql module, when the directory is missing. This allows to switch working directories and in particular run the service from a different maschine then where it was installed. Users still need to make sure that .env files are set up correctly or they will shoot themselves in the foot. See #2515. --- nominatim/db/properties.py | 3 +++ nominatim/tokenizer/factory.py | 4 ++-- nominatim/tokenizer/icu_tokenizer.py | 20 +++++++++++-------- nominatim/tokenizer/legacy_tokenizer.py | 26 ++++++++++++++++--------- test/bdd/steps/nominatim_environment.py | 2 +- test/python/tokenizer/test_factory.py | 8 ++++---- 6 files changed, 39 insertions(+), 24 deletions(-) diff --git a/nominatim/db/properties.py b/nominatim/db/properties.py index 19c09006..27020487 100644 --- a/nominatim/db/properties.py +++ b/nominatim/db/properties.py @@ -27,6 +27,9 @@ def get_property(conn, name): """ Return the current value of the given propery or None if the property is not set. """ + if not conn.table_exists('nominatim_properties'): + return None + with conn.cursor() as cur: cur.execute('SELECT value FROM nominatim_properties WHERE property = %s', (name, )) diff --git a/nominatim/tokenizer/factory.py b/nominatim/tokenizer/factory.py index fbda2462..108c7841 100644 --- a/nominatim/tokenizer/factory.py +++ b/nominatim/tokenizer/factory.py @@ -78,8 +78,8 @@ def get_tokenizer_for_db(config): """ basedir = config.project_dir / 'tokenizer' if not basedir.is_dir(): - LOG.fatal("Cannot find tokenizer data in '%s'.", basedir) - raise UsageError('Cannot initialize tokenizer.') + # Directory will be repopulated by tokenizer below. + basedir.mkdir() with connect(config.get_libpq_dsn()) as conn: name = properties.get_property(conn, 'tokenizer') diff --git a/nominatim/tokenizer/icu_tokenizer.py b/nominatim/tokenizer/icu_tokenizer.py index 1799ae86..b553dbc6 100644 --- a/nominatim/tokenizer/icu_tokenizer.py +++ b/nominatim/tokenizer/icu_tokenizer.py @@ -51,7 +51,7 @@ class LegacyICUTokenizer(AbstractTokenizer): """ self.loader = ICURuleLoader(config) - self._install_php(config.lib_dir.php) + self._install_php(config.lib_dir.php, overwrite=True) self._save_config() if init_db: @@ -67,6 +67,8 @@ class LegacyICUTokenizer(AbstractTokenizer): with connect(self.dsn) as conn: self.loader.load_config_from_db(conn) + self._install_php(config.lib_dir.php, overwrite=False) + def finalize_import(self, config): """ Do any required postprocessing to make the tokenizer data ready @@ -174,16 +176,18 @@ class LegacyICUTokenizer(AbstractTokenizer): self.loader.make_token_analysis()) - def _install_php(self, phpdir): + def _install_php(self, phpdir, overwrite=True): """ Install the php script for the tokenizer. """ php_file = self.data_dir / "tokenizer.php" - php_file.write_text(dedent(f"""\ -