+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2022 by the Nominatim developer community.
+# For a full list of authors see the git log.
"""
Functions for creating a tokenizer or initialising the right one for an
existing database.
"""
import logging
import importlib
+from pathlib import Path
from ..errors import UsageError
from ..db import properties
def _import_tokenizer(name):
""" Load the tokenizer.py module from project directory.
"""
- try:
- return importlib.import_module('nominatim.tokenizer.' + name + '_tokenizer')
- except ModuleNotFoundError as exp:
+ src_file = Path(__file__).parent / (name + '_tokenizer.py')
+ if not src_file.is_file():
LOG.fatal("No tokenizer named '%s' available. "
"Check the setting of NOMINATIM_TOKENIZER.", name)
- raise UsageError('Tokenizer not found') from exp
+ raise UsageError('Tokenizer not found')
+
+ return importlib.import_module('nominatim.tokenizer.' + name + '_tokenizer')
def create_tokenizer(config, init_db=True, module_name=None):
tokenizer_module = _import_tokenizer(module_name)
tokenizer = tokenizer_module.create(config.get_libpq_dsn(), basedir)
- if init_db:
- tokenizer.init_new_db(config)
+ tokenizer.init_new_db(config, init_db=init_db)
with connect(config.get_libpq_dsn()) as conn:
properties.set_property(conn, 'tokenizer', module_name)
"""
basedir = config.project_dir / 'tokenizer'
if not basedir.is_dir():
- LOG.fatal("Cannot find tokenizer data in '%s'.", basedir)
- raise UsageError('Cannot initialize tokenizer.')
+ # Directory will be repopulated by tokenizer below.
+ basedir.mkdir()
with connect(config.get_libpq_dsn()) as conn:
name = properties.get_property(conn, 'tokenizer')
tokenizer_module = _import_tokenizer(name)
tokenizer = tokenizer_module.create(config.get_libpq_dsn(), basedir)
- tokenizer.init_from_project()
+ tokenizer.init_from_project(config)
return tokenizer