1 # SPDX-License-Identifier: GPL-3.0-or-later
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2025 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Tests for creating new tokenizers.
12 from nominatim_db.db import properties
13 from nominatim_db.tokenizer import factory
14 from nominatim_db.errors import UsageError
15 from dummy_tokenizer import DummyTokenizer
18 def test_setup_bad_tokenizer_name(project_env, monkeypatch):
19 monkeypatch.setenv('NOMINATIM_TOKENIZER', 'dummy')
21 with pytest.raises(UsageError):
22 factory.create_tokenizer(project_env)
26 @pytest.fixture(autouse=True)
27 def init_env(self, project_env, property_table, tokenizer_mock):
28 self.config = project_env
30 def test_setup_dummy_tokenizer(self, temp_db_conn):
31 tokenizer = factory.create_tokenizer(self.config)
33 assert isinstance(tokenizer, DummyTokenizer)
34 assert tokenizer.init_state == "new"
35 assert (self.config.project_dir / 'tokenizer').is_dir()
37 assert properties.get_property(temp_db_conn, 'tokenizer') == 'dummy'
39 def test_setup_tokenizer_dir_exists(self):
40 (self.config.project_dir / 'tokenizer').mkdir()
42 tokenizer = factory.create_tokenizer(self.config)
44 assert isinstance(tokenizer, DummyTokenizer)
45 assert tokenizer.init_state == "new"
47 def test_setup_tokenizer_dir_failure(self):
48 (self.config.project_dir / 'tokenizer').write_text("foo")
50 with pytest.raises(UsageError):
51 factory.create_tokenizer(self.config)
53 def test_load_tokenizer(self):
54 factory.create_tokenizer(self.config)
56 tokenizer = factory.get_tokenizer_for_db(self.config)
58 assert isinstance(tokenizer, DummyTokenizer)
59 assert tokenizer.init_state == "loaded"
61 def test_load_repopulate_tokenizer_dir(self):
62 factory.create_tokenizer(self.config)
64 self.config.project_dir = self.config.project_dir
66 factory.get_tokenizer_for_db(self.config)
67 assert (self.config.project_dir / 'tokenizer').exists()
69 def test_load_missing_property(self, temp_db_cursor):
70 factory.create_tokenizer(self.config)
72 temp_db_cursor.execute("TRUNCATE TABLE nominatim_properties")
74 with pytest.raises(UsageError):
75 factory.get_tokenizer_for_db(self.config)