X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/cdfc8628f22a5be2d8ad402adeb9bd6bfcaeface..6b55ab5581a3f6541d16fb7544ce8fd57dc2a673:/nominatim/tokenizer/token_analysis/housenumbers.py?ds=sidebyside diff --git a/nominatim/tokenizer/token_analysis/housenumbers.py b/nominatim/tokenizer/token_analysis/housenumbers.py index a0f4214d..a8ad3ecb 100644 --- a/nominatim/tokenizer/token_analysis/housenumbers.py +++ b/nominatim/tokenizer/token_analysis/housenumbers.py @@ -8,9 +8,10 @@ Specialized processor for housenumbers. Analyses common housenumber patterns and creates variants for them. """ -from typing import Mapping, Any, List, cast +from typing import Any, List, cast import re +from nominatim.data.place_name import PlaceName from nominatim.tokenizer.token_analysis.generic_mutation import MutationVariantGenerator RE_NON_DIGIT = re.compile('[^0-9]') @@ -20,7 +21,7 @@ RE_NAMED_PART = re.compile(r'[a-z]{4}') ### Configuration section -def configure(rules: Mapping[str, Any], normalization_rules: str) -> None: # pylint: disable=W0613 +def configure(*_: Any) -> None: """ All behaviour is currently hard-coded. """ return None @@ -42,14 +43,14 @@ class HousenumberTokenAnalysis: self.mutator = MutationVariantGenerator('␣', (' ', '')) - def normalize(self, name: str) -> str: + def get_canonical_id(self, name: PlaceName) -> str: """ Return the normalized form of the housenumber. """ # shortcut for number-only numbers, which make up 90% of the data. - if RE_NON_DIGIT.search(name) is None: - return name + if RE_NON_DIGIT.search(name.name) is None: + return name.name - norm = cast(str, self.trans.transliterate(self.norm.transliterate(name))) + norm = cast(str, self.trans.transliterate(self.norm.transliterate(name.name))) # If there is a significant non-numeric part, use as is. if RE_NAMED_PART.search(norm) is None: # Otherwise add optional spaces between digits and letters. @@ -61,7 +62,7 @@ class HousenumberTokenAnalysis: return norm - def get_variants_ascii(self, norm_name: str) -> List[str]: + def compute_variants(self, norm_name: str) -> List[str]: """ Compute the spelling variants for the given normalized housenumber. Generates variants for optional spaces (marked with '␣').