From: Sarah Hoffmann Date: Sun, 20 Mar 2022 20:46:07 +0000 (+0100) Subject: Merge pull request #2641 from lonvia/reinit-tokenizer-dir X-Git-Tag: v4.1.0~68 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/d33c82cb66a5d7edf7202e7e4ede8e2496011580?hp=2f266d946be66b6a0166856f83b813152e5c0dd3 Merge pull request #2641 from lonvia/reinit-tokenizer-dir Transparantly reinitialize tokenizer directory when necessary --- diff --git a/docs/admin/Advanced-Installations.md b/docs/admin/Advanced-Installations.md index ff267cee..aeb2fa5b 100644 --- a/docs/admin/Advanced-Installations.md +++ b/docs/admin/Advanced-Installations.md @@ -198,11 +198,10 @@ target machine. of a full database. Next install Nominatim on the target machine by following the standard installation -instructions. Again make sure to use the same version as the source machine. +instructions. Again, make sure to use the same version as the source machine. -You can now copy the project directory from the source machine to the new machine. -If necessary, edit the `.env` file to point it to the restored database. -Finally run +Create a project directory on your destination machine and set up the `.env` +file to match the configuration on the source machine. Finally run nominatim refresh --website @@ -210,6 +209,8 @@ to make sure that the local installation of Nominatim will be used. If you are using the legacy tokenizer you might also have to switch to the PostgreSQL module that was compiled on your target machine. If you get errors -that PostgreSQL cannot find or access `nominatim.so` then copy the installed -version into the `module` directory of your project directory. The installed -copy can usually be found under `/usr/local/lib/nominatim/module/nominatim.so`. +that PostgreSQL cannot find or access `nominatim.so` then rerun + + nominatim refresh --functions + +on the target machine to update the the location of the module. diff --git a/nominatim/clicmd/refresh.py b/nominatim/clicmd/refresh.py index b8a88b6d..3c245cd4 100644 --- a/nominatim/clicmd/refresh.py +++ b/nominatim/clicmd/refresh.py @@ -117,6 +117,10 @@ class UpdateRefresh: if args.website: webdir = args.project_dir / 'website' LOG.warning('Setting up website directory at %s', webdir) + # This is a little bit hacky: call the tokenizer setup, so that + # the tokenizer directory gets repopulated as well, in case it + # wasn't there yet. + self._get_tokenizer(args.config) with connect(args.config.get_libpq_dsn()) as conn: refresh.setup_website(webdir, args.config, conn) diff --git a/nominatim/config.py b/nominatim/config.py index 785f4acd..13d9cd8a 100644 --- a/nominatim/config.py +++ b/nominatim/config.py @@ -18,7 +18,7 @@ from dotenv import dotenv_values from nominatim.errors import UsageError LOG = logging.getLogger() - +CONFIG_CACHE = {} def flatten_config_list(content, section=''): """ Flatten YAML configuration lists that contain include sections @@ -181,14 +181,19 @@ class Configuration: """ configfile = self.find_config_file(filename, config) - if configfile.suffix in ('.yaml', '.yml'): - return self._load_from_yaml(configfile) + if str(configfile) in CONFIG_CACHE: + return CONFIG_CACHE[str(configfile)] - if configfile.suffix == '.json': + if configfile.suffix in ('.yaml', '.yml'): + result = self._load_from_yaml(configfile) + elif configfile.suffix == '.json': with configfile.open('r') as cfg: - return json.load(cfg) + result = json.load(cfg) + else: + raise UsageError(f"Config file '{configfile}' has unknown format.") - raise UsageError(f"Config file '{configfile}' has unknown format.") + CONFIG_CACHE[str(configfile)] = result + return result def find_config_file(self, filename, config=None): diff --git a/nominatim/db/properties.py b/nominatim/db/properties.py index 19c09006..27020487 100644 --- a/nominatim/db/properties.py +++ b/nominatim/db/properties.py @@ -27,6 +27,9 @@ def get_property(conn, name): """ Return the current value of the given propery or None if the property is not set. """ + if not conn.table_exists('nominatim_properties'): + return None + with conn.cursor() as cur: cur.execute('SELECT value FROM nominatim_properties WHERE property = %s', (name, )) diff --git a/nominatim/tokenizer/factory.py b/nominatim/tokenizer/factory.py index fbda2462..108c7841 100644 --- a/nominatim/tokenizer/factory.py +++ b/nominatim/tokenizer/factory.py @@ -78,8 +78,8 @@ def get_tokenizer_for_db(config): """ basedir = config.project_dir / 'tokenizer' if not basedir.is_dir(): - LOG.fatal("Cannot find tokenizer data in '%s'.", basedir) - raise UsageError('Cannot initialize tokenizer.') + # Directory will be repopulated by tokenizer below. + basedir.mkdir() with connect(config.get_libpq_dsn()) as conn: name = properties.get_property(conn, 'tokenizer') diff --git a/nominatim/tokenizer/icu_tokenizer.py b/nominatim/tokenizer/icu_tokenizer.py index 1799ae86..b553dbc6 100644 --- a/nominatim/tokenizer/icu_tokenizer.py +++ b/nominatim/tokenizer/icu_tokenizer.py @@ -51,7 +51,7 @@ class LegacyICUTokenizer(AbstractTokenizer): """ self.loader = ICURuleLoader(config) - self._install_php(config.lib_dir.php) + self._install_php(config.lib_dir.php, overwrite=True) self._save_config() if init_db: @@ -67,6 +67,8 @@ class LegacyICUTokenizer(AbstractTokenizer): with connect(self.dsn) as conn: self.loader.load_config_from_db(conn) + self._install_php(config.lib_dir.php, overwrite=False) + def finalize_import(self, config): """ Do any required postprocessing to make the tokenizer data ready @@ -174,16 +176,18 @@ class LegacyICUTokenizer(AbstractTokenizer): self.loader.make_token_analysis()) - def _install_php(self, phpdir): + def _install_php(self, phpdir, overwrite=True): """ Install the php script for the tokenizer. """ php_file = self.data_dir / "tokenizer.php" - php_file.write_text(dedent(f"""\ -