]> git.openstreetmap.org Git - nominatim.git/blob - test/python/test_tokenizer_factory.py
make word count computation part of the import
[nominatim.git] / test / python / test_tokenizer_factory.py
1 """
2 Tests for creating new tokenizers.
3 """
4 import pytest
5
6 from nominatim.db import properties
7 from nominatim.tokenizer import factory
8 from nominatim.errors import UsageError
9 from dummy_tokenizer import DummyTokenizer
10
11 @pytest.fixture
12 def test_config(def_config, tmp_path, property_table, tokenizer_mock):
13     def_config.project_dir = tmp_path
14     return def_config
15
16
17 def test_setup_dummy_tokenizer(temp_db_conn, test_config):
18     tokenizer = factory.create_tokenizer(test_config)
19
20     assert isinstance(tokenizer, DummyTokenizer)
21     assert tokenizer.init_state == "new"
22     assert (test_config.project_dir / 'tokenizer').is_dir()
23
24     assert properties.get_property(temp_db_conn, 'tokenizer') == 'dummy'
25
26
27 def test_setup_tokenizer_dir_exists(test_config):
28     (test_config.project_dir / 'tokenizer').mkdir()
29
30     tokenizer = factory.create_tokenizer(test_config)
31
32     assert isinstance(tokenizer, DummyTokenizer)
33     assert tokenizer.init_state == "new"
34
35
36 def test_setup_tokenizer_dir_failure(test_config):
37     (test_config.project_dir / 'tokenizer').write_text("foo")
38
39     with pytest.raises(UsageError):
40         factory.create_tokenizer(test_config)
41
42
43 def test_setup_bad_tokenizer_name(def_config, tmp_path, monkeypatch):
44     def_config.project_dir = tmp_path
45     monkeypatch.setenv('NOMINATIM_TOKENIZER', 'dummy')
46
47     with pytest.raises(UsageError):
48         factory.create_tokenizer(def_config)
49
50
51 def test_load_tokenizer(test_config):
52     factory.create_tokenizer(test_config)
53
54     tokenizer = factory.get_tokenizer_for_db(test_config)
55
56     assert isinstance(tokenizer, DummyTokenizer)
57     assert tokenizer.init_state == "loaded"
58
59
60 def test_load_no_tokenizer_dir(test_config):
61     factory.create_tokenizer(test_config)
62
63     test_config.project_dir = test_config.project_dir / 'foo'
64
65     with pytest.raises(UsageError):
66         factory.get_tokenizer_for_db(test_config)
67
68
69 def test_load_missing_propoerty(temp_db_cursor, test_config):
70     factory.create_tokenizer(test_config)
71
72     temp_db_cursor.execute("TRUNCATE TABLE nominatim_properties")
73
74     with pytest.raises(UsageError):
75         factory.get_tokenizer_for_db(test_config)