args.no_partitions,
rouser=args.config.DATABASE_WEBUSER)
- LOG.warning('Installing database module')
- with connect(args.config.get_libpq_dsn()) as conn:
- database_import.install_module(args.module_dir, args.project_dir,
- args.config.DATABASE_MODULE_PATH,
- conn=conn)
-
LOG.warning('Importing OSM data file')
database_import.import_osm_data(Path(args.osm_file),
args.osm2pgsql_options(0, 1),
"""
Tokenizer implementing normalisation as used before Nominatim 4.
"""
+import logging
+import shutil
+
+import psycopg2
+
from nominatim.db.connection import connect
from nominatim.db import properties
+from nominatim.errors import UsageError
DBCFG_NORMALIZATION = "tokenizer_normalization"
+LOG = logging.getLogger()
+
def create(dsn, data_dir):
""" Create a new instance of the tokenizer provided by this module.
"""
return LegacyTokenizer(dsn, data_dir)
+
+def _install_module(src_dir, module_dir):
+ """ Copies the PostgreSQL normalisation module into the project
+ directory if necessary. For historical reasons the module is
+ saved in the '/module' subdirectory and not with the other tokenizer
+ data.
+
+ The function detects when the installation is run from the
+ build directory. It doesn't touch the module in that case.
+ """
+ if module_dir.exists() and src_dir.samefile(module_dir):
+ LOG.info('Running from build directory. Leaving database module as is.')
+ return
+
+ if not module_dir.exists():
+ module_dir.mkdir()
+
+ destfile = module_dir / 'nominatim.so'
+ shutil.copy(str(src_dir / 'nominatim.so'), str(destfile))
+ destfile.chmod(0o755)
+
+ LOG.info('Database module installed at %s', str(destfile))
+
+
+def _check_module(module_dir, conn):
+ with conn.cursor() as cur:
+ try:
+ cur.execute("""CREATE FUNCTION nominatim_test_import_func(text)
+ RETURNS text AS '{}/nominatim.so', 'transliteration'
+ LANGUAGE c IMMUTABLE STRICT;
+ DROP FUNCTION nominatim_test_import_func(text)
+ """.format(module_dir))
+ except psycopg2.DatabaseError as err:
+ LOG.fatal("Error accessing database module: %s", err)
+ raise UsageError("Database module cannot be accessed.") from err
+
+
class LegacyTokenizer:
""" The legacy tokenizer uses a special PostgreSQL module to normalize
names and queries. The tokenizer thus implements normalization through
This copies all necessary data in the project directory to make
sure the tokenizer remains stable even over updates.
"""
+ # Find and optionally install the PsotgreSQL normalization module.
+ if config.DATABASE_MODULE_PATH:
+ LOG.info("Using custom path for database module at '%s'",
+ config.DATABASE_MODULE_PATH)
+ module_dir = config.DATABASE_MODULE_PATH
+ else:
+ _install_module(config.lib_dir.module, config.project_dir / 'module')
+ module_dir = config.project_dir / 'module'
+
self.normalization = config.TERM_NORMALIZATION
- # Stable configuration is saved in the database.
with connect(self.dsn) as conn:
- properties.set_property(conn, DBCFG_NORMALIZATION,
- self.normalization)
+ _check_module(module_dir, conn)
+
+ # Stable configuration is saved in the database.
+ properties.set_property(conn, DBCFG_NORMALIZATION, self.normalization)
+
+ conn.commit()
def init_from_project(self):
import os
import selectors
import subprocess
-import shutil
from pathlib import Path
import psutil
-import psycopg2
from nominatim.db.connection import connect, get_pg_env
from nominatim.db import utils as db_utils
raise UsageError('PostGIS version is too old.')
-def install_module(src_dir, project_dir, module_dir, conn=None):
- """ Copy the normalization module from src_dir into the project
- directory under the '/module' directory. If 'module_dir' is set, then
- use the module from there instead and check that it is accessible
- for Postgresql.
-
- The function detects when the installation is run from the
- build directory. It doesn't touch the module in that case.
-
- If 'conn' is given, then the function also tests if the module
- can be access via the given database.
- """
- if not module_dir:
- module_dir = project_dir / 'module'
-
- if not module_dir.exists() or not src_dir.samefile(module_dir):
-
- if not module_dir.exists():
- module_dir.mkdir()
-
- destfile = module_dir / 'nominatim.so'
- shutil.copy(str(src_dir / 'nominatim.so'), str(destfile))
- destfile.chmod(0o755)
-
- LOG.info('Database module installed at %s', str(destfile))
- else:
- LOG.info('Running from build directory. Leaving database module as is.')
- else:
- LOG.info("Using custom path for database module at '%s'", module_dir)
-
- if conn is not None:
- with conn.cursor() as cur:
- try:
- cur.execute("""CREATE FUNCTION nominatim_test_import_func(text)
- RETURNS text AS '{}/nominatim.so', 'transliteration'
- LANGUAGE c IMMUTABLE STRICT;
- DROP FUNCTION nominatim_test_import_func(text)
- """.format(module_dir))
- except psycopg2.DatabaseError as err:
- LOG.fatal("Error accessing database module: %s", err)
- raise UsageError("Database module cannot be accessed.") from err
-
-
def import_base_data(dsn, sql_dir, ignore_partitions=False):
""" Create and populate the tables with basic static data that provides
the background for geocoding. Data is assumed to not yet exist.
def test_import_full(temp_db, mock_func_factory):
mocks = [
mock_func_factory(nominatim.tools.database_import, 'setup_database_skeleton'),
- mock_func_factory(nominatim.tools.database_import, 'install_module'),
mock_func_factory(nominatim.tools.database_import, 'import_osm_data'),
mock_func_factory(nominatim.tools.refresh, 'import_wikipedia_articles'),
mock_func_factory(nominatim.tools.database_import, 'truncate_data_tables'),
from nominatim.tokenizer import legacy_tokenizer
from nominatim.db import properties
+from nominatim.errors import UsageError
@pytest.fixture
-def tokenizer(dsn, tmp_path, def_config, property_table):
- tok = legacy_tokenizer.create(dsn, tmp_path)
- tok.init_new_db(def_config)
+def test_config(def_config, tmp_path):
+ def_config.project_dir = tmp_path / 'project'
+ def_config.project_dir.mkdir()
- return tok
+ module_dir = tmp_path / 'module_src'
+ module_dir.mkdir()
+ (module_dir / 'nominatim.so').write_text('TEST nomiantim.so')
-def test_init_new(dsn, tmp_path, def_config, property_table, monkeypatch, temp_db_conn):
+ def_config.lib_dir.module = module_dir
+
+ return def_config
+
+
+@pytest.fixture
+def tokenizer_factory(dsn, tmp_path, monkeypatch):
+
+ def _maker():
+ return legacy_tokenizer.create(dsn, tmp_path / 'tokenizer')
+
+ return _maker
+
+@pytest.fixture
+def tokenizer_setup(tokenizer_factory, test_config, property_table, monkeypatch):
+ monkeypatch.setattr(legacy_tokenizer, '_check_module' , lambda m, c: None)
+ tok = tokenizer_factory()
+ tok.init_new_db(test_config)
+
+
+def test_init_new(tokenizer_factory, test_config, property_table, monkeypatch, temp_db_conn):
monkeypatch.setenv('NOMINATIM_TERM_NORMALIZATION', 'xxvv')
+ monkeypatch.setattr(legacy_tokenizer, '_check_module' , lambda m, c: None)
- tok = legacy_tokenizer.create(dsn, tmp_path)
- tok.init_new_db(def_config)
+ tok = tokenizer_factory()
+ tok.init_new_db(test_config)
assert properties.get_property(temp_db_conn, legacy_tokenizer.DBCFG_NORMALIZATION) == 'xxvv'
+ outfile = test_config.project_dir / 'module' / 'nominatim.so'
+
+ assert outfile.exists()
+ assert outfile.read_text() == 'TEST nomiantim.so'
+ assert outfile.stat().st_mode == 33261
+
+
+def test_init_module_load_failed(tokenizer_factory, test_config, property_table, monkeypatch, temp_db_conn):
+ tok = tokenizer_factory()
+
+ with pytest.raises(UsageError):
+ tok.init_new_db(test_config)
+
+
+def test_init_module_custom(tokenizer_factory, test_config, property_table,
+ monkeypatch, tmp_path):
+ module_dir = (tmp_path / 'custom').resolve()
+ module_dir.mkdir()
+ (module_dir/ 'nominatim.so').write_text('CUSTOM nomiantim.so')
+
+ monkeypatch.setenv('NOMINATIM_DATABASE_MODULE_PATH', str(module_dir))
+ monkeypatch.setattr(legacy_tokenizer, '_check_module' , lambda m, c: None)
+
+ tok = tokenizer_factory()
+ tok.init_new_db(test_config)
+
+ assert not (test_config.project_dir / 'module').exists()
+
+
+def test_init_from_project(tokenizer_setup, tokenizer_factory):
+ tok = tokenizer_factory()
-def test_init_from_project(tokenizer):
- tokenizer.init_from_project()
+ tok.init_from_project()
- assert tokenizer.normalization is not None
+ assert tok.normalization is not None
database_import.setup_extensions(temp_db_conn)
-def test_install_module(tmp_path):
- src_dir = tmp_path / 'source'
- src_dir.mkdir()
- (src_dir / 'nominatim.so').write_text('TEST nomiantim.so')
-
- project_dir = tmp_path / 'project'
- project_dir.mkdir()
-
- database_import.install_module(src_dir, project_dir, '')
-
- outfile = project_dir / 'module' / 'nominatim.so'
-
- assert outfile.exists()
- assert outfile.read_text() == 'TEST nomiantim.so'
- assert outfile.stat().st_mode == 33261
-
-
-def test_install_module_custom(tmp_path):
- (tmp_path / 'nominatim.so').write_text('TEST nomiantim.so')
-
- database_import.install_module(tmp_path, tmp_path, str(tmp_path.resolve()))
-
- assert not (tmp_path / 'module').exists()
-
-
-def test_install_module_fail_access(temp_db_conn, tmp_path):
- (tmp_path / 'nominatim.so').write_text('TEST nomiantim.so')
-
- with pytest.raises(UsageError, match='.*module cannot be accessed.*'):
- database_import.install_module(tmp_path, tmp_path, '',
- conn=temp_db_conn)
-
-
def test_import_base_data(src_dir, temp_db, temp_db_cursor):
temp_db_cursor.execute('CREATE EXTENSION hstore')
temp_db_cursor.execute('CREATE EXTENSION postgis')