X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/16daa57e4757e4daeffec1e61630f989727dc563..cdfc8628f22a5be2d8ad402adeb9bd6bfcaeface:/nominatim/tokenizer/factory.py diff --git a/nominatim/tokenizer/factory.py b/nominatim/tokenizer/factory.py index dc3e7411..67e22194 100644 --- a/nominatim/tokenizer/factory.py +++ b/nominatim/tokenizer/factory.py @@ -1,3 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0-only +# +# This file is part of Nominatim. (https://nominatim.org) +# +# Copyright (C) 2022 by the Nominatim developer community. +# For a full list of authors see the git log. """ Functions for creating a tokenizer or initialising the right one for an existing database. @@ -13,17 +19,20 @@ database. A tokenizer usually also includes PHP code for querying. The appropriate PHP normalizer module is installed, when the tokenizer is created. """ +from typing import Optional import logging import importlib from pathlib import Path -from ..errors import UsageError -from ..db import properties -from ..db.connection import connect +from nominatim.errors import UsageError +from nominatim.db import properties +from nominatim.db.connection import connect +from nominatim.config import Configuration +from nominatim.tokenizer.base import AbstractTokenizer, TokenizerModule LOG = logging.getLogger() -def _import_tokenizer(name): +def _import_tokenizer(name: str) -> TokenizerModule: """ Load the tokenizer.py module from project directory. """ src_file = Path(__file__).parent / (name + '_tokenizer.py') @@ -35,7 +44,8 @@ def _import_tokenizer(name): return importlib.import_module('nominatim.tokenizer.' + name + '_tokenizer') -def create_tokenizer(config, init_db=True, module_name=None): +def create_tokenizer(config: Configuration, init_db: bool = True, + module_name: Optional[str] = None) -> AbstractTokenizer: """ Create a new tokenizer as defined by the given configuration. The tokenizer data and code is copied into the 'tokenizer' directory @@ -64,7 +74,7 @@ def create_tokenizer(config, init_db=True, module_name=None): return tokenizer -def get_tokenizer_for_db(config): +def get_tokenizer_for_db(config: Configuration) -> AbstractTokenizer: """ Instantiate a tokenizer for an existing database. The function looks up the appropriate tokenizer in the database @@ -72,8 +82,8 @@ def get_tokenizer_for_db(config): """ basedir = config.project_dir / 'tokenizer' if not basedir.is_dir(): - LOG.fatal("Cannot find tokenizer data in '%s'.", basedir) - raise UsageError('Cannot initialize tokenizer.') + # Directory will be repopulated by tokenizer below. + basedir.mkdir() with connect(config.get_libpq_dsn()) as conn: name = properties.get_property(conn, 'tokenizer')