--- /dev/null
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2022 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Specialized processor for housenumbers. Analyses common housenumber patterns
+and creates variants for them.
+"""
+import re
+
+from nominatim.tokenizer.token_analysis.generic_mutation import MutationVariantGenerator
+
+RE_NON_DIGIT = re.compile('[^0-9]')
+RE_DIGIT_ALPHA = re.compile(r'(\d)\s*([^\d\s␣])')
+RE_ALPHA_DIGIT = re.compile(r'([^\s\d␣])\s*(\d)')
+
+### Configuration section
+
+def configure(rules, normalization_rules):
+ """ All behaviour is currently hard-coded.
+ """
+ return None
+
+### Analysis section
+
+def create(normalizer, transliterator, config):
+ """ Create a new token analysis instance for this module.
+ """
+ return HousenumberTokenAnalysis(normalizer, transliterator)
+
+
+class HousenumberTokenAnalysis:
+ """ Detects common housenumber patterns and normalizes them.
+ """
+ def __init__(self, norm, trans):
+ self.norm = norm
+ self.trans = trans
+
+ self.mutator = MutationVariantGenerator('␣', (' ', ''))
+
+ def normalize(self, name):
+ """ Return the normalized form of the housenumber.
+ """
+ # shortcut for number-only numbers, which make up 90% of the data.
+ if RE_NON_DIGIT.search(name) is None:
+ return name
+
+ norm = self.trans.transliterate(self.norm.transliterate(name))
+ norm = RE_DIGIT_ALPHA.sub(r'\1␣\2', norm)
+ norm = RE_ALPHA_DIGIT.sub(r'\1␣\2', norm)
+
+ return norm
+
+ def get_variants_ascii(self, norm_name):
+ """ Compute the spelling variants for the given normalized housenumber.
+
+ Generates variants for optional spaces (marked with '␣').
+ """
+ return list(self.mutator.generate([norm_name]))
| | | | | 4 |
- Scenario: A simple numeral housenumber is found
+ Scenario: A simple ascii digit housenumber is found
Given the places
- | osm | class | type | housenr | geometry |
- | N1 | building | yes | 45 | 9 |
+ | osm | class | type | housenr | geometry |
+ | N1 | building | yes | 45 | 9 |
And the places
| osm | class | type | name | geometry |
| W10 | highway | path | North Road | 1,2,3 |
| N1 |
+ Scenario Outline: Numeral housenumbers in any script are found
+ Given the places
+ | osm | class | type | housenr | geometry |
+ | N1 | building | yes | <number> | 9 |
+ And the places
+ | osm | class | type | name | geometry |
+ | W10 | highway | path | North Road | 1,2,3 |
+ When importing
+ And sending search query "45, North Road"
+ Then results contain
+ | osm |
+ | N1 |
+ When sending search query "North Road ④⑤"
+ Then results contain
+ | osm |
+ | N1 |
+ When sending search query "North Road 𑁪𑁫"
+ Then results contain
+ | osm |
+ | N1 |
+
+ Examples:
+ | number |
+ | 45 |
+ | ④⑤ |
+ | 𑁪𑁫 |
+
+
Scenario Outline: Each housenumber in a list is found
Given the places
| osm | class | type | housenr | geometry |
| 2, 4, 12 |
+ Scenario Outline: Housenumber - letter combinations are found
+ Given the places
+ | osm | class | type | housenr | geometry |
+ | N1 | building | yes | <hnr> | 9 |
+ And the places
+ | osm | class | type | name | geometry |
+ | W10 | highway | path | Multistr | 1,2,3 |
+ When importing
+ When sending search query "2A Multistr"
+ Then results contain
+ | osm |
+ | N1 |
+ When sending search query "2 a Multistr"
+ Then results contain
+ | osm |
+ | N1 |
+ When sending search query "2-A Multistr"
+ Then results contain
+ | osm |
+ | N1 |
+ When sending search query "Multistr 2 A"
+ Then results contain
+ | osm |
+ | N1 |
+
+ Examples:
+ | hnr |
+ | 2a |
+ | 2 A |
+ | 2-a |
+ | 2/A |
+
+
+ Scenario Outline: Number - Number combinations as a housenumber are found
+ Given the places
+ | osm | class | type | housenr | geometry |
+ | N1 | building | yes | <hnr> | 9 |
+ And the places
+ | osm | class | type | name | geometry |
+ | W10 | highway | path | Chester St | 1,2,3 |
+ When importing
+ When sending search query "34-10 Chester St"
+ Then results contain
+ | osm |
+ | N1 |
+ When sending search query "34/10 Chester St"
+ Then results contain
+ | osm |
+ | N1 |
+ When sending search query "34 10 Chester St"
+ Then results contain
+ | osm |
+ | N1 |
+ When sending search query "3410 Chester St"
+ Then results contain
+ | osm |
+ | W10 |
+
+ Examples:
+ | hnr |
+ | 34-10 |
+ | 34 10 |
+ | 34/10 |
+
+
+ Scenario Outline: a bis housenumber is found
+ Given the places
+ | osm | class | type | housenr | geometry |
+ | N1 | building | yes | <hnr> | 9 |
+ And the places
+ | osm | class | type | name | geometry |
+ | W10 | highway | path | Rue Paris | 1,2,3 |
+ When importing
+ When sending search query "Rue Paris 45bis"
+ Then results contain
+ | osm |
+ | N1 |
+ When sending search query "Rue Paris 45 BIS"
+ Then results contain
+ | osm |
+ | N1 |
+ When sending search query "Rue Paris 45BIS"
+ Then results contain
+ | osm |
+ | N1 |
+ When sending search query "Rue Paris 45 bis"
+ Then results contain
+ | osm |
+ | N1 |
+
+ Examples:
+ | hnr |
+ | 45bis |
+ | 45BIS |
+ | 45 BIS |
+ | 45 bis |
+
+ Scenario Outline: a ter housenumber is found
+ Given the places
+ | osm | class | type | housenr | geometry |
+ | N1 | building | yes | <hnr> | 9 |
+ And the places
+ | osm | class | type | name | geometry |
+ | W10 | highway | path | Rue du Berger | 1,2,3 |
+ When importing
+ When sending search query "Rue du Berger 45ter"
+ Then results contain
+ | osm |
+ | N1 |
+ When sending search query "Rue du Berger 45 TER"
+ Then results contain
+ | osm |
+ | N1 |
+ When sending search query "Rue du Berger 45TER"
+ Then results contain
+ | osm |
+ | N1 |
+ When sending search query "Rue du Berger 45 ter"
+ Then results contain
+ | osm |
+ | N1 |
+
+ Examples:
+ | hnr |
+ | 45ter |
+ | 45TER |
+ | 45 ter |
+ | 45 TER |
+
+
+ Scenario Outline: a number - letter - number combination housenumber is found
+ Given the places
+ | osm | class | type | housenr | geometry |
+ | N1 | building | yes | <hnr> | 9 |
+ And the places
+ | osm | class | type | name | geometry |
+ | W10 | highway | path | Herengracht | 1,2,3 |
+ When importing
+ When sending search query "501-H 1 Herengracht"
+ Then results contain
+ | osm |
+ | N1 |
+ When sending search query "501H-1 Herengracht"
+ Then results contain
+ | osm |
+ | N1 |
+ When sending search query "501H1 Herengracht"
+ Then results contain
+ | osm |
+ | N1 |
+ When sending search query "501-H1 Herengracht"
+ Then results contain
+ | osm |
+ | N1 |
+
+ Examples:
+ | hnr |
+ | 501 H1 |
+ | 501H 1 |
+ | 501/H/1 |
+ | 501h1 |
+
+
+ Scenario Outline: Russian housenumbers are found
+ Given the places
+ | osm | class | type | housenr | geometry |
+ | N1 | building | yes | <hnr> | 9 |
+ And the places
+ | osm | class | type | name | geometry |
+ | W10 | highway | path | Голубинская улица | 1,2,3 |
+ When importing
+ When sending search query "Голубинская улица 55к3"
+ Then results contain
+ | osm |
+ | N1 |
+ When sending search query "Голубинская улица 55 k3"
+ Then results contain
+ | osm |
+ | N1 |
+ When sending search query "Голубинская улица 55 к-3"
+ Then results contain
+ | osm |
+ | N1 |
+
+ Examples:
+ | hnr |
+ | 55к3 |
+ | 55 к3 |
+
+
Scenario: A name mapped as a housenumber is found
Given the places
| osm | class | type | housenr | geometry |