nominatim/tokenizer/token_analysis/postcodes.py

   1 # SPDX-License-Identifier: GPL-2.0-only
   2 #
   3 # This file is part of Nominatim. (https://nominatim.org)
   4 #
   5 # Copyright (C) 2022 by the Nominatim developer community.
   6 # For a full list of authors see the git log.
   7 """
   8 Specialized processor for postcodes. Supports a 'lookup' variant of the
   9 token, which produces variants with optional spaces.
  10 """
  11
  12 from nominatim.tokenizer.token_analysis.generic_mutation import MutationVariantGenerator
  13
  14 ### Configuration section
  15
  16 def configure(rules, normalization_rules): # pylint: disable=W0613
  17     """ All behaviour is currently hard-coded.
  18     """
  19     return None
  20
  21 ### Analysis section
  22
  23 def create(normalizer, transliterator, config): # pylint: disable=W0613
  24     """ Create a new token analysis instance for this module.
  25     """
  26     return PostcodeTokenAnalysis(normalizer, transliterator)
  27
  28 class PostcodeTokenAnalysis:
  29     """ Detects common housenumber patterns and normalizes them.
  30     """
  31     def __init__(self, norm, trans):
  32         self.norm = norm
  33         self.trans = trans
  34
  35         self.mutator = MutationVariantGenerator(' ', (' ', ''))
  36
  37
  38     def normalize(self, name):
  39         """ Return the standard form of the postcode.
  40         """
  41         return name.strip().upper()
  42
  43
  44     def get_variants_ascii(self, norm_name):
  45         """ Compute the spelling variants for the given normalized postcode.
  46
  47             The official form creates one variant. If a 'lookup version' is
  48             given, then it will create variants with optional spaces.
  49         """
  50         # Postcodes follow their own transliteration rules.
  51         # Make sure at this point, that the terms are normalized in a way
  52         # that they are searchable with the standard transliteration rules.
  53         return [self.trans.transliterate(term) for term in
  54                 self.mutator.generate([self.norm.transliterate(norm_name)])]