X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/b5540dc35c35c7fa8f01979e972ca429b0b521fb..4da1f0da6fc42f00b2a9aeeacd2bb1d41e664510:/nominatim/tokenizer/factory.py diff --git a/nominatim/tokenizer/factory.py b/nominatim/tokenizer/factory.py index 5f03ba58..67e22194 100644 --- a/nominatim/tokenizer/factory.py +++ b/nominatim/tokenizer/factory.py @@ -1,3 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2022 by the Nominatim developer community. +# For a full list of authors see the git log. """ Functions for creating a tokenizer or initialising the right one for an existing database. @@ -13,27 +19,33 @@ database. A tokenizer usually also includes PHP code for querying. The appropriate PHP normalizer module is installed, when the tokenizer is created. """ +from typing import Optional import logging import importlib +from pathlib import Path -from ..errors import UsageError -from ..db import properties -from ..db.connection import connect +from nominatim.errors import UsageError +from nominatim.db import properties +from nominatim.db.connection import connect +from nominatim.config import Configuration +from nominatim.tokenizer.base import AbstractTokenizer, TokenizerModule LOG = logging.getLogger() -def _import_tokenizer(name): +def _import_tokenizer(name: str) -> TokenizerModule: """ Load the tokenizer.py module from project directory. """ - try: - return importlib.import_module('nominatim.tokenizer.' + name + '_tokenizer') - except ModuleNotFoundError as exp: + src_file = Path(__file__).parent / (name + '_tokenizer.py') + if not src_file.is_file(): LOG.fatal("No tokenizer named '%s' available. " "Check the setting of NOMINATIM_TOKENIZER.", name) - raise UsageError('Tokenizer not found') from exp + raise UsageError('Tokenizer not found') + return importlib.import_module('nominatim.tokenizer.' + name + '_tokenizer') -def create_tokenizer(config, init_db=True, module_name=None): + +def create_tokenizer(config: Configuration, init_db: bool = True, + module_name: Optional[str] = None) -> AbstractTokenizer: """ Create a new tokenizer as defined by the given configuration. The tokenizer data and code is copied into the 'tokenizer' directory @@ -54,8 +66,7 @@ def create_tokenizer(config, init_db=True, module_name=None): tokenizer_module = _import_tokenizer(module_name) tokenizer = tokenizer_module.create(config.get_libpq_dsn(), basedir) - if init_db: - tokenizer.init_new_db(config) + tokenizer.init_new_db(config, init_db=init_db) with connect(config.get_libpq_dsn()) as conn: properties.set_property(conn, 'tokenizer', module_name) @@ -63,7 +74,7 @@ def create_tokenizer(config, init_db=True, module_name=None): return tokenizer -def get_tokenizer_for_db(config): +def get_tokenizer_for_db(config: Configuration) -> AbstractTokenizer: """ Instantiate a tokenizer for an existing database. The function looks up the appropriate tokenizer in the database @@ -71,8 +82,8 @@ def get_tokenizer_for_db(config): """ basedir = config.project_dir / 'tokenizer' if not basedir.is_dir(): - LOG.fatal("Cannot find tokenizer data in '%s'.", basedir) - raise UsageError('Cannot initialize tokenizer.') + # Directory will be repopulated by tokenizer below. + basedir.mkdir() with connect(config.get_libpq_dsn()) as conn: name = properties.get_property(conn, 'tokenizer') @@ -84,6 +95,6 @@ def get_tokenizer_for_db(config): tokenizer_module = _import_tokenizer(name) tokenizer = tokenizer_module.create(config.get_libpq_dsn(), basedir) - tokenizer.init_from_project() + tokenizer.init_from_project(config) return tokenizer