This introduces a finalization routing for the tokenizer
where it can post-process the import if necessary.
-- Indices used only during search and update.
-- These indices are created only after the indexing process is done.
-- Indices used only during search and update.
-- These indices are created only after the indexing process is done.
-CREATE INDEX {{sql.if_index_not_exists}} idx_word_word_id
- ON word USING BTREE (word_id) {{db.tablespace.search_index}};
-
CREATE INDEX {{sql.if_index_not_exists}} idx_place_addressline_address_place_id
ON place_addressline USING BTREE (address_place_id) {{db.tablespace.search_index}};
CREATE INDEX {{sql.if_index_not_exists}} idx_place_addressline_address_place_id
ON place_addressline USING BTREE (address_place_id) {{db.tablespace.search_index}};
--- /dev/null
+CREATE INDEX {{sql.if_index_not_exists}} idx_word_word_id
+ ON word USING BTREE (word_id) {{db.tablespace.search_index}};
LOG.warning('Create search index for default country names.')
database_import.create_country_names(conn, tokenizer,
args.config.LANGUAGES)
LOG.warning('Create search index for default country names.')
database_import.create_country_names(conn, tokenizer,
args.config.LANGUAGES)
+ tokenizer.finalize_import(args.config)
webdir = args.project_dir / 'website'
LOG.warning('Setup website at %s', webdir)
webdir = args.project_dir / 'website'
LOG.warning('Setup website at %s', webdir)
self.normalization = properties.get_property(conn, DBCFG_NORMALIZATION)
self.normalization = properties.get_property(conn, DBCFG_NORMALIZATION)
+ def finalize_import(self, config):
+ """ Do any required postprocessing to make the tokenizer data ready
+ for use.
+ """
+ with connect(self.dsn) as conn:
+ sqlp = SQLPreprocessor(conn, config)
+ sqlp.run_sql_file(conn, 'tokenizer/legacy_tokenizer_indices.sql')
+
+
def update_sql_functions(self, config):
""" Reimport the SQL functions for this tokenizer.
"""
def update_sql_functions(self, config):
""" Reimport the SQL functions for this tokenizer.
"""
self.init_state = "loaded"
self.init_state = "loaded"
+ def finalize_import(self, _):
+ pass
+
+
def name_analyzer(self):
return DummyNameAnalyzer(self.analyser_cache)
def name_analyzer(self):
return DummyNameAnalyzer(self.analyser_cache)
class DummyTokenizer:
def __init__(self, *args, **kwargs):
self.update_sql_functions_called = False
class DummyTokenizer:
def __init__(self, *args, **kwargs):
self.update_sql_functions_called = False
+ self.finalize_import_called = False
def update_sql_functions(self, *args):
self.update_sql_functions_called = True
def update_sql_functions(self, *args):
self.update_sql_functions_called = True
+ def finalize_import(self, *args):
+ self.finalize_import_called = True
+
tok = DummyTokenizer()
monkeypatch.setattr(nominatim.tokenizer.factory, 'get_tokenizer_for_db' ,
lambda *args: tok)
tok = DummyTokenizer()
monkeypatch.setattr(nominatim.tokenizer.factory, 'get_tokenizer_for_db' ,
lambda *args: tok)
+ monkeypatch.setattr(nominatim.tokenizer.factory, 'create_tokenizer' ,
+ lambda *args: tok)
assert 1 == call_nominatim('import', '--osm-file', '.')
assert 1 == call_nominatim('import', '--osm-file', '.')
-def test_import_full(temp_db, mock_func_factory):
+def test_import_full(temp_db, mock_func_factory, tokenizer_mock):
mocks = [
mock_func_factory(nominatim.tools.database_import, 'setup_database_skeleton'),
mock_func_factory(nominatim.tools.database_import, 'import_osm_data'),
mocks = [
mock_func_factory(nominatim.tools.database_import, 'setup_database_skeleton'),
mock_func_factory(nominatim.tools.database_import, 'import_osm_data'),
mock_func_factory(nominatim.tools.database_import, 'create_partition_tables'),
mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
mock_func_factory(nominatim.tools.database_import, 'create_country_names'),
mock_func_factory(nominatim.tools.database_import, 'create_partition_tables'),
mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
mock_func_factory(nominatim.tools.database_import, 'create_country_names'),
- mock_func_factory(nominatim.tokenizer.factory, 'create_tokenizer'),
mock_func_factory(nominatim.tools.refresh, 'load_address_levels_from_file'),
mock_func_factory(nominatim.tools.postcodes, 'import_postcodes'),
mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'),
mock_func_factory(nominatim.tools.refresh, 'load_address_levels_from_file'),
mock_func_factory(nominatim.tools.postcodes, 'import_postcodes'),
mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'),
cf_mock = mock_func_factory(nominatim.tools.refresh, 'create_functions')
assert 0 == call_nominatim('import', '--osm-file', __file__)
cf_mock = mock_func_factory(nominatim.tools.refresh, 'create_functions')
assert 0 == call_nominatim('import', '--osm-file', __file__)
+ assert tokenizer_mock.finalize_import_called
assert cf_mock.called > 1
assert cf_mock.called > 1
assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
-def test_import_continue_load_data(temp_db, mock_func_factory):
+def test_import_continue_load_data(temp_db, mock_func_factory, tokenizer_mock):
mocks = [
mock_func_factory(nominatim.tools.database_import, 'truncate_data_tables'),
mock_func_factory(nominatim.tools.database_import, 'load_data'),
mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
mock_func_factory(nominatim.tools.database_import, 'create_country_names'),
mocks = [
mock_func_factory(nominatim.tools.database_import, 'truncate_data_tables'),
mock_func_factory(nominatim.tools.database_import, 'load_data'),
mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
mock_func_factory(nominatim.tools.database_import, 'create_country_names'),
- mock_func_factory(nominatim.tokenizer.factory, 'create_tokenizer'),
mock_func_factory(nominatim.tools.postcodes, 'import_postcodes'),
mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'),
mock_func_factory(nominatim.tools.refresh, 'setup_website'),
mock_func_factory(nominatim.tools.postcodes, 'import_postcodes'),
mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'),
mock_func_factory(nominatim.tools.refresh, 'setup_website'),
]
assert 0 == call_nominatim('import', '--continue', 'load-data')
]
assert 0 == call_nominatim('import', '--continue', 'load-data')
+ assert tokenizer_mock.finalize_import_called
for mock in mocks:
assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
for mock in mocks:
assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
-def test_import_continue_indexing(temp_db, mock_func_factory, placex_table, temp_db_conn):
+def test_import_continue_indexing(temp_db, mock_func_factory, placex_table,
+ temp_db_conn, tokenizer_mock):
mocks = [
mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
mock_func_factory(nominatim.tools.database_import, 'create_country_names'),
mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'),
mocks = [
mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
mock_func_factory(nominatim.tools.database_import, 'create_country_names'),
mock_func_factory(nominatim.indexer.indexer.Indexer, 'index_full'),
- mock_func_factory(nominatim.tokenizer.factory, 'get_tokenizer_for_db'),
mock_func_factory(nominatim.tools.refresh, 'setup_website'),
mock_func_factory(nominatim.db.properties, 'set_property')
]
mock_func_factory(nominatim.tools.refresh, 'setup_website'),
mock_func_factory(nominatim.db.properties, 'set_property')
]
assert temp_db_conn.index_exists('idx_placex_pendingsector')
assert temp_db_conn.index_exists('idx_placex_pendingsector')
-def test_import_continue_postprocess(temp_db, mock_func_factory):
+def test_import_continue_postprocess(temp_db, mock_func_factory, tokenizer_mock):
mocks = [
mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
mock_func_factory(nominatim.tools.database_import, 'create_country_names'),
mock_func_factory(nominatim.tools.refresh, 'setup_website'),
mocks = [
mock_func_factory(nominatim.tools.database_import, 'create_search_indices'),
mock_func_factory(nominatim.tools.database_import, 'create_country_names'),
mock_func_factory(nominatim.tools.refresh, 'setup_website'),
- mock_func_factory(nominatim.tokenizer.factory, 'get_tokenizer_for_db'),
mock_func_factory(nominatim.db.properties, 'set_property')
]
assert 0 == call_nominatim('import', '--continue', 'db-postprocess')
mock_func_factory(nominatim.db.properties, 'set_property')
]
assert 0 == call_nominatim('import', '--continue', 'db-postprocess')
+ assert tokenizer_mock.finalize_import_called
+
for mock in mocks:
assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)
for mock in mocks:
assert mock.called == 1, "Mock '{}' not called".format(mock.func_name)