Automatically repopulate the tokenizer/ directory with the PHP stub
and the postgresql module, when the directory is missing. This allows
to switch working directories and in particular run the service
from a different maschine then where it was installed.
Users still need to make sure that .env files are set up correctly
or they will shoot themselves in the foot.
See #2515.
""" Return the current value of the given propery or None if the property
is not set.
"""
""" Return the current value of the given propery or None if the property
is not set.
"""
+ if not conn.table_exists('nominatim_properties'):
+ return None
+
with conn.cursor() as cur:
cur.execute('SELECT value FROM nominatim_properties WHERE property = %s',
(name, ))
with conn.cursor() as cur:
cur.execute('SELECT value FROM nominatim_properties WHERE property = %s',
(name, ))
"""
basedir = config.project_dir / 'tokenizer'
if not basedir.is_dir():
"""
basedir = config.project_dir / 'tokenizer'
if not basedir.is_dir():
- LOG.fatal("Cannot find tokenizer data in '%s'.", basedir)
- raise UsageError('Cannot initialize tokenizer.')
+ # Directory will be repopulated by tokenizer below.
+ basedir.mkdir()
with connect(config.get_libpq_dsn()) as conn:
name = properties.get_property(conn, 'tokenizer')
with connect(config.get_libpq_dsn()) as conn:
name = properties.get_property(conn, 'tokenizer')
"""
self.loader = ICURuleLoader(config)
"""
self.loader = ICURuleLoader(config)
- self._install_php(config.lib_dir.php)
+ self._install_php(config.lib_dir.php, overwrite=True)
self._save_config()
if init_db:
self._save_config()
if init_db:
with connect(self.dsn) as conn:
self.loader.load_config_from_db(conn)
with connect(self.dsn) as conn:
self.loader.load_config_from_db(conn)
+ self._install_php(config.lib_dir.php, overwrite=False)
+
def finalize_import(self, config):
""" Do any required postprocessing to make the tokenizer data ready
def finalize_import(self, config):
""" Do any required postprocessing to make the tokenizer data ready
self.loader.make_token_analysis())
self.loader.make_token_analysis())
- def _install_php(self, phpdir):
+ def _install_php(self, phpdir, overwrite=True):
""" Install the php script for the tokenizer.
"""
php_file = self.data_dir / "tokenizer.php"
""" Install the php script for the tokenizer.
"""
php_file = self.data_dir / "tokenizer.php"
- php_file.write_text(dedent(f"""\
- <?php
- @define('CONST_Max_Word_Frequency', 10000000);
- @define('CONST_Term_Normalization_Rules', "{self.loader.normalization_rules}");
- @define('CONST_Transliteration', "{self.loader.get_search_rules()}");
- require_once('{phpdir}/tokenizer/icu_tokenizer.php');"""))
+
+ if not php_file.exists() or overwrite:
+ php_file.write_text(dedent(f"""\
+ <?php
+ @define('CONST_Max_Word_Frequency', 10000000);
+ @define('CONST_Term_Normalization_Rules', "{self.loader.normalization_rules}");
+ @define('CONST_Transliteration', "{self.loader.get_search_rules()}");
+ require_once('{phpdir}/tokenizer/icu_tokenizer.php');"""))
self.normalization = config.TERM_NORMALIZATION
self.normalization = config.TERM_NORMALIZATION
- self._install_php(config)
+ self._install_php(config, overwrite=True)
with connect(self.dsn) as conn:
_check_module(module_dir, conn)
with connect(self.dsn) as conn:
_check_module(module_dir, conn)
self._init_db_tables(config)
self._init_db_tables(config)
- def init_from_project(self, _):
+ def init_from_project(self, config):
""" Initialise the tokenizer from the project directory.
"""
with connect(self.dsn) as conn:
self.normalization = properties.get_property(conn, DBCFG_NORMALIZATION)
""" Initialise the tokenizer from the project directory.
"""
with connect(self.dsn) as conn:
self.normalization = properties.get_property(conn, DBCFG_NORMALIZATION)
+ if not (config.project_dir / 'module' / 'nominatim.so').exists():
+ _install_module(config.DATABASE_MODULE_PATH,
+ config.lib_dir.module,
+ config.project_dir / 'module')
+
+ self._install_php(config, overwrite=False)
def finalize_import(self, config):
""" Do any required postprocessing to make the tokenizer data ready
def finalize_import(self, config):
""" Do any required postprocessing to make the tokenizer data ready
return LegacyNameAnalyzer(self.dsn, normalizer)
return LegacyNameAnalyzer(self.dsn, normalizer)
- def _install_php(self, config):
+ def _install_php(self, config, overwrite=True):
""" Install the php script for the tokenizer.
"""
php_file = self.data_dir / "tokenizer.php"
""" Install the php script for the tokenizer.
"""
php_file = self.data_dir / "tokenizer.php"
- php_file.write_text(dedent("""\
- <?php
- @define('CONST_Max_Word_Frequency', {0.MAX_WORD_FREQUENCY});
- @define('CONST_Term_Normalization_Rules', "{0.TERM_NORMALIZATION}");
- require_once('{0.lib_dir.php}/tokenizer/legacy_tokenizer.php');
- """.format(config)))
+
+ if not php_file.exists() or overwrite:
+ php_file.write_text(dedent("""\
+ <?php
+ @define('CONST_Max_Word_Frequency', {0.MAX_WORD_FREQUENCY});
+ @define('CONST_Term_Normalization_Rules', "{0.TERM_NORMALIZATION}");
+ require_once('{0.lib_dir.php}/tokenizer/legacy_tokenizer.php');
+ """.format(config)))
def _init_db_tables(self, config):
def _init_db_tables(self, config):
self.db_drop_database(self.api_test_db)
raise
self.db_drop_database(self.api_test_db)
raise
- tokenizer_factory.create_tokenizer(self.get_test_config(), init_db=False)
+ tokenizer_factory.get_tokenizer_for_db(self.get_test_config())
def setup_unknown_db(self):
def setup_unknown_db(self):
assert tokenizer.init_state == "loaded"
assert tokenizer.init_state == "loaded"
- def test_load_no_tokenizer_dir(self):
+ def test_load_repopulate_tokenizer_dir(self):
factory.create_tokenizer(self.config)
factory.create_tokenizer(self.config)
- self.config.project_dir = self.config.project_dir / 'foo'
+ self.config.project_dir = self.config.project_dir
- with pytest.raises(UsageError):
- factory.get_tokenizer_for_db(self.config)
+ factory.get_tokenizer_for_db(self.config)
+ assert (self.config.project_dir / 'tokenizer').exists()
def test_load_missing_property(self, temp_db_cursor):
def test_load_missing_property(self, temp_db_cursor):