]> git.openstreetmap.org Git - nominatim.git/blob - test/python/tokenizer/token_analysis/test_analysis_postcodes.py
add query analyser for legacy tokenizer
[nominatim.git] / test / python / tokenizer / token_analysis / test_analysis_postcodes.py
1 # SPDX-License-Identifier: GPL-2.0-only
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Tests for special postcode analysis and variant generation.
9 """
10 import pytest
11
12 from icu import Transliterator
13
14 import nominatim.tokenizer.token_analysis.postcodes as module
15 from nominatim.data.place_name import PlaceName
16 from nominatim.errors import UsageError
17
18 DEFAULT_NORMALIZATION = """ :: NFD ();
19                             '🜳' > ' ';
20                             [[:Nonspacing Mark:] [:Cf:]] >;
21                             :: lower ();
22                             [[:Punctuation:][:Space:]]+ > ' ';
23                             :: NFC ();
24                         """
25
26 DEFAULT_TRANSLITERATION = """ ::  Latin ();
27                               '🜵' > ' ';
28                           """
29
30 @pytest.fixture
31 def analyser():
32     rules = { 'analyzer': 'postcodes'}
33     config = module.configure(rules, DEFAULT_NORMALIZATION)
34
35     trans = Transliterator.createFromRules("test_trans", DEFAULT_TRANSLITERATION)
36     norm = Transliterator.createFromRules("test_norm", DEFAULT_NORMALIZATION)
37
38     return module.create(norm, trans, config)
39
40
41 def get_normalized_variants(proc, name):
42     norm = Transliterator.createFromRules("test_norm", DEFAULT_NORMALIZATION)
43     return proc.compute_variants(norm.transliterate(name).strip())
44
45
46 @pytest.mark.parametrize('name,norm', [('12', '12'),
47                                        ('A 34 ', 'A 34'),
48                                        ('34-av', '34-AV')])
49 def test_get_canonical_id(analyser, name, norm):
50     assert analyser.get_canonical_id(PlaceName(name=name, kind='', suffix='')) == norm
51
52
53 @pytest.mark.parametrize('postcode,variants', [('12345', {'12345'}),
54                                                ('AB-998', {'ab 998', 'ab998'}),
55                                                ('23 FGH D3', {'23 fgh d3', '23fgh d3',
56                                                               '23 fghd3', '23fghd3'})])
57 def test_compute_variants(analyser, postcode, variants):
58     out = analyser.compute_variants(postcode)
59
60     assert len(out) == len(set(out))
61     assert set(out) == variants