]> git.openstreetmap.org Git - nominatim.git/blob - nominatim/tokenizer/token_analysis/postcodes.py
introduce and use analyzer for postcodes
[nominatim.git] / nominatim / tokenizer / token_analysis / postcodes.py
1 # SPDX-License-Identifier: GPL-2.0-only
2 #
3 # This file is part of Nominatim. (https://nominatim.org)
4 #
5 # Copyright (C) 2022 by the Nominatim developer community.
6 # For a full list of authors see the git log.
7 """
8 Specialized processor for postcodes. Supports a 'lookup' variant of the
9 token, which produces variants with optional spaces.
10 """
11
12 from nominatim.tokenizer.token_analysis.generic_mutation import MutationVariantGenerator
13
14 ### Configuration section
15
16 def configure(rules, normalization_rules): # pylint: disable=W0613
17     """ All behaviour is currently hard-coded.
18     """
19     return None
20
21 ### Analysis section
22
23 def create(normalizer, transliterator, config): # pylint: disable=W0613
24     """ Create a new token analysis instance for this module.
25     """
26     return PostcodeTokenAnalysis(normalizer, transliterator)
27
28 class PostcodeTokenAnalysis:
29     """ Detects common housenumber patterns and normalizes them.
30     """
31     def __init__(self, norm, trans):
32         self.norm = norm
33         self.trans = trans
34
35         self.mutator = MutationVariantGenerator(' ', (' ', ''))
36
37
38     def normalize(self, name):
39         """ Return the standard form of the postcode.
40         """
41         return name.strip().upper()
42
43
44     def get_variants_ascii(self, norm_name):
45         """ Compute the spelling variants for the given normalized postcode.
46
47             The official form creates one variant. If a 'lookup version' is
48             given, then it will create variants with optional spaces.
49         """
50         # Postcodes follow their own transliteration rules.
51         # Make sure at this point, that the terms are normalized in a way
52         # that they are searchable with the standard transliteration rules.
53         return [self.trans.transliterate(term) for term in
54                 self.mutator.generate([self.norm.transliterate(norm_name)])]