X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/13ed184efd71c76fc0c69d9ab800ae44d82f9994..38f467bae3d9a6128180367c0e46ffd0bcad0961:/nominatim/tokenizer/token_analysis/housenumbers.py diff --git a/nominatim/tokenizer/token_analysis/housenumbers.py b/nominatim/tokenizer/token_analysis/housenumbers.py index 96e86b28..a8ad3ecb 100644 --- a/nominatim/tokenizer/token_analysis/housenumbers.py +++ b/nominatim/tokenizer/token_analysis/housenumbers.py @@ -8,8 +8,10 @@ Specialized processor for housenumbers. Analyses common housenumber patterns and creates variants for them. """ +from typing import Any, List, cast import re +from nominatim.data.place_name import PlaceName from nominatim.tokenizer.token_analysis.generic_mutation import MutationVariantGenerator RE_NON_DIGIT = re.compile('[^0-9]') @@ -19,14 +21,14 @@ RE_NAMED_PART = re.compile(r'[a-z]{4}') ### Configuration section -def configure(rules, normalization_rules): # pylint: disable=W0613 +def configure(*_: Any) -> None: """ All behaviour is currently hard-coded. """ return None ### Analysis section -def create(normalizer, transliterator, config): # pylint: disable=W0613 +def create(normalizer: Any, transliterator: Any, config: None) -> 'HousenumberTokenAnalysis': # pylint: disable=W0613 """ Create a new token analysis instance for this module. """ return HousenumberTokenAnalysis(normalizer, transliterator) @@ -35,20 +37,20 @@ def create(normalizer, transliterator, config): # pylint: disable=W0613 class HousenumberTokenAnalysis: """ Detects common housenumber patterns and normalizes them. """ - def __init__(self, norm, trans): + def __init__(self, norm: Any, trans: Any) -> None: self.norm = norm self.trans = trans self.mutator = MutationVariantGenerator('␣', (' ', '')) - def normalize(self, name): + def get_canonical_id(self, name: PlaceName) -> str: """ Return the normalized form of the housenumber. """ # shortcut for number-only numbers, which make up 90% of the data. - if RE_NON_DIGIT.search(name) is None: - return name + if RE_NON_DIGIT.search(name.name) is None: + return name.name - norm = self.trans.transliterate(self.norm.transliterate(name)) + norm = cast(str, self.trans.transliterate(self.norm.transliterate(name.name))) # If there is a significant non-numeric part, use as is. if RE_NAMED_PART.search(norm) is None: # Otherwise add optional spaces between digits and letters. @@ -60,7 +62,7 @@ class HousenumberTokenAnalysis: return norm - def get_variants_ascii(self, norm_name): + def compute_variants(self, norm_name: str) -> List[str]: """ Compute the spelling variants for the given normalized housenumber. Generates variants for optional spaces (marked with '␣').