1 # SPDX-License-Identifier: GPL-2.0-only
3 # This file is part of Nominatim. (https://nominatim.org)
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
8 Tests for special postcode analysis and variant generation.
12 from icu import Transliterator
14 import nominatim.tokenizer.token_analysis.postcodes as module
15 from nominatim.errors import UsageError
17 DEFAULT_NORMALIZATION = """ :: NFD ();
19 [[:Nonspacing Mark:] [:Cf:]] >;
21 [[:Punctuation:][:Space:]]+ > ' ';
25 DEFAULT_TRANSLITERATION = """ :: Latin ();
31 rules = { 'analyzer': 'postcodes'}
32 config = module.configure(rules, DEFAULT_NORMALIZATION)
34 trans = Transliterator.createFromRules("test_trans", DEFAULT_TRANSLITERATION)
35 norm = Transliterator.createFromRules("test_norm", DEFAULT_NORMALIZATION)
37 return module.create(norm, trans, config)
40 def get_normalized_variants(proc, name):
41 norm = Transliterator.createFromRules("test_norm", DEFAULT_NORMALIZATION)
42 return proc.get_variants_ascii(norm.transliterate(name).strip())
45 @pytest.mark.parametrize('name,norm', [('12', '12'),
48 def test_normalize(analyser, name, norm):
49 assert analyser.normalize(name) == norm
52 @pytest.mark.parametrize('postcode,variants', [('12345', {'12345'}),
53 ('AB-998', {'ab 998', 'ab998'}),
54 ('23 FGH D3', {'23 fgh d3', '23fgh d3',
55 '23 fghd3', '23fghd3'})])
56 def test_get_variants_ascii(analyser, postcode, variants):
57 out = analyser.get_variants_ascii(postcode)
59 assert len(out) == len(set(out))
60 assert set(out) == variants