raise UsageError('Tokenizer not found') from exp
-def create_tokenizer(config):
+def create_tokenizer(config, init_db=True, module_name=None):
""" Create a new tokenizer as defined by the given configuration.
The tokenizer data and code is copied into the 'tokenizer' directory
of the project directory and the tokenizer loaded from its new location.
"""
+ if module_name is None:
+ module_name = config.TOKENIZER
+
# Create the directory for the tokenizer data
basedir = config.project_dir / 'tokenizer'
if not basedir.exists():
LOG.fatal("Tokenizer directory '%s' cannot be created.", basedir)
raise UsageError("Tokenizer setup failed.")
- tokenizer_module = _import_tokenizer(config.TOKENIZER)
+ # Import and initialize the tokenizer.
+ tokenizer_module = _import_tokenizer(module_name)
tokenizer = tokenizer_module.create(config.get_libpq_dsn(), basedir)
- tokenizer.init_new_db(config)
+ if init_db:
+ tokenizer.init_new_db(config)
with connect(config.get_libpq_dsn()) as conn:
- properties.set_property(conn, 'tokenizer', config.TOKENIZER)
+ properties.set_property(conn, 'tokenizer', module_name)
return tokenizer
return LegacyTokenizer(dsn, data_dir)
-def _install_module(src_dir, module_dir):
+def _install_module(config_module_path, src_dir, module_dir):
""" Copies the PostgreSQL normalisation module into the project
directory if necessary. For historical reasons the module is
saved in the '/module' subdirectory and not with the other tokenizer
The function detects when the installation is run from the
build directory. It doesn't touch the module in that case.
"""
+ # Custom module locations are simply used as is.
+ if config_module_path:
+ LOG.info("Using custom path for database module at '%s'", config_module_path)
+ return config_module_path
+
+ # Compatibility mode for builddir installations.
if module_dir.exists() and src_dir.samefile(module_dir):
LOG.info('Running from build directory. Leaving database module as is.')
- return
+ return module_dir
+ # In any other case install the module in the project directory.
if not module_dir.exists():
module_dir.mkdir()
LOG.info('Database module installed at %s', str(destfile))
+ return module_dir
+
def _check_module(module_dir, conn):
with conn.cursor() as cur:
This copies all necessary data in the project directory to make
sure the tokenizer remains stable even over updates.
"""
- # Find and optionally install the PsotgreSQL normalization module.
- if config.DATABASE_MODULE_PATH:
- LOG.info("Using custom path for database module at '%s'",
- config.DATABASE_MODULE_PATH)
- module_dir = config.DATABASE_MODULE_PATH
- else:
- _install_module(config.lib_dir.module, config.project_dir / 'module')
- module_dir = config.project_dir / 'module'
+ module_dir = _install_module(config.DATABASE_MODULE_PATH,
+ config.lib_dir.module,
+ config.project_dir / 'module')
self.normalization = config.TERM_NORMALIZATION
with connect(self.dsn) as conn:
_check_module(module_dir, conn)
-
- # Stable configuration is saved in the database.
- properties.set_property(conn, DBCFG_NORMALIZATION, self.normalization)
-
- conn.commit()
+ self._save_config(conn)
def init_from_project(self):
"""
with connect(self.dsn) as conn:
self.normalization = properties.get_property(conn, DBCFG_NORMALIZATION)
+
+
+ def migrate_database(self, config):
+ """ Initialise the project directory of an existing database for
+ use with this tokenizer.
+
+ This is a special migration function for updating existing databases
+ to new software versions.
+ """
+ module_dir = _install_module(config.DATABASE_MODULE_PATH,
+ config.lib_dir.module,
+ config.project_dir / 'module')
+
+ with connect(self.dsn) as conn:
+ _check_module(module_dir, conn)
+ self._save_config(conn)
+
+
+ def _save_config(self, conn):
+ """ Save the configuration that needs to remain stable for the given
+ database as database properties.
+ """
+ properties.set_property(conn, DBCFG_NORMALIZATION, self.normalization)
from nominatim.db import properties
from nominatim.db.connection import connect
from nominatim.version import NOMINATIM_VERSION
-from nominatim.tools import refresh, database_import
+from nominatim.tools import refresh
+from nominatim.tokenizer import factory as tokenizer_factory
from nominatim.errors import UsageError
LOG = logging.getLogger()
TYPE timestamp with time zone;""")
-@_migration(3, 5, 0, 99)
-def install_database_module_in_project_directory(conn, config, paths, **_):
- """ Install database module in project directory.
-
- The database module needs to be present in the project directory
- since those were introduced.
- """
- database_import.install_module(paths.module_dir, paths.project_dir,
- config.DATABASE_MODULE_PATH, conn=conn)
-
-
@_migration(3, 5, 0, 99)
def add_nominatim_property_table(conn, config, **_):
""" Add nominatim_property table.
and class = 'place' and type != 'postcode'
and linked_place_id is null""")
cur.execute(""" DROP INDEX IF EXISTS idx_placex_adminname """)
+
+
+@_migration(3, 7, 0, 1)
+def install_legacy_tokenizer(conn, config, **_):
+ """ Setup legacy tokenizer.
+
+ If no other tokenizer has been configured yet, then create the
+ configuration for the backwards-compatible legacy tokenizer
+ """
+ if properties.get_property(conn, 'tokenizer') is None:
+ tokenizer = tokenizer_factory.create_tokenizer(config, init_db=False,
+ module_name='legacy')
+
+ tokenizer.migrate_database(config)