]> git.openstreetmap.org Git - nominatim.git/commitdiff
add new analyser for houenumbers
authorSarah Hoffmann <lonvia@denofr.de>
Wed, 16 Feb 2022 16:18:23 +0000 (17:18 +0100)
committerSarah Hoffmann <lonvia@denofr.de>
Tue, 1 Mar 2022 08:34:32 +0000 (09:34 +0100)
This analyser makes spaces optional.

nominatim/tokenizer/token_analysis/housenumbers.py [new file with mode: 0644]
settings/icu_tokenizer.yaml
test/bdd/db/query/housenumbers.feature

diff --git a/nominatim/tokenizer/token_analysis/housenumbers.py b/nominatim/tokenizer/token_analysis/housenumbers.py
new file mode 100644 (file)
index 0000000..6a838e0
--- /dev/null
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: GPL-2.0-only
+#
+# This file is part of Nominatim. (https://nominatim.org)
+#
+# Copyright (C) 2022 by the Nominatim developer community.
+# For a full list of authors see the git log.
+"""
+Specialized processor for housenumbers. Analyses common housenumber patterns
+and creates variants for them.
+"""
+import re
+
+from nominatim.tokenizer.token_analysis.generic_mutation import MutationVariantGenerator
+
+RE_NON_DIGIT = re.compile('[^0-9]')
+RE_DIGIT_ALPHA = re.compile(r'(\d)\s*([^\d\s␣])')
+RE_ALPHA_DIGIT = re.compile(r'([^\s\d␣])\s*(\d)')
+
+### Configuration section
+
+def configure(rules, normalization_rules):
+    """ All behaviour is currently hard-coded.
+    """
+    return None
+
+### Analysis section
+
+def create(normalizer, transliterator, config):
+    """ Create a new token analysis instance for this module.
+    """
+    return HousenumberTokenAnalysis(normalizer, transliterator)
+
+
+class HousenumberTokenAnalysis:
+    """ Detects common housenumber patterns and normalizes them.
+    """
+    def __init__(self, norm, trans):
+        self.norm = norm
+        self.trans = trans
+
+        self.mutator = MutationVariantGenerator('␣', (' ', ''))
+
+    def normalize(self, name):
+        """ Return the normalized form of the housenumber.
+        """
+        # shortcut for number-only numbers, which make up 90% of the data.
+        if RE_NON_DIGIT.search(name) is None:
+            return name
+
+        norm = self.trans.transliterate(self.norm.transliterate(name))
+        norm = RE_DIGIT_ALPHA.sub(r'\1␣\2', norm)
+        norm = RE_ALPHA_DIGIT.sub(r'\1␣\2', norm)
+
+        return norm
+
+    def get_variants_ascii(self, norm_name):
+        """ Compute the spelling variants for the given normalized housenumber.
+
+            Generates variants for optional spaces (marked with '␣').
+        """
+        return list(self.mutator.generate([norm_name]))
index 50bb72d2eed9c03b9a9d76823b684a9413c88277..bebd49e924bcf08fa3da5cbb04c5cf4d3c486533 100644 (file)
@@ -41,6 +41,8 @@ sanitizers:
       mode: append
 token-analysis:
     - analyzer: generic
+    - id: "@housenumber"
+      analyzer: housenumbers
     - id: bg
       analyzer: generic
       mode: variant-only
index 4d42da9f072d8d9abea3e4171bb137f39ace0b96..62963af435ade81cce95ac2ea5d5d2c4d1560a5a 100644 (file)
@@ -9,10 +9,10 @@ Feature: Searching of house numbers
          |   |   |   |   | 4 |
 
 
-    Scenario: A simple numeral housenumber is found
+    Scenario: A simple ascii digit housenumber is found
         Given the places
-         | osm | class    | type | housenr | geometry |
-         | N1  | building | yes  | 45      | 9        |
+         | osm | class    | type | housenr  | geometry |
+         | N1  | building | yes  | 45       | 9        |
         And the places
          | osm | class   | type | name       | geometry |
          | W10 | highway | path | North Road | 1,2,3    |
@@ -27,6 +27,34 @@ Feature: Searching of house numbers
          | N1  |
 
 
+    Scenario Outline: Numeral housenumbers in any script are found
+        Given the places
+         | osm | class    | type | housenr  | geometry |
+         | N1  | building | yes  | <number> | 9        |
+        And the places
+         | osm | class   | type | name       | geometry |
+         | W10 | highway | path | North Road | 1,2,3    |
+        When importing
+        And sending search query "45, North Road"
+        Then results contain
+         | osm |
+         | N1  |
+        When sending search query "North Road ④⑤"
+        Then results contain
+         | osm |
+         | N1  |
+        When sending search query "North Road 𑁪𑁫"
+        Then results contain
+         | osm |
+         | N1  |
+
+    Examples:
+        | number |
+        | 45     |
+        | ④⑤     |
+        | 𑁪𑁫     |
+
+
     Scenario Outline: Each housenumber in a list is found
         Given the places
          | osm | class    | type | housenr | geometry |
@@ -55,6 +83,196 @@ Feature: Searching of house numbers
         | 2, 4, 12 |
 
 
+    Scenario Outline: Housenumber - letter combinations are found
+        Given the places
+         | osm | class    | type | housenr | geometry |
+         | N1  | building | yes  | <hnr>   | 9        |
+        And the places
+         | osm | class   | type | name     | geometry |
+         | W10 | highway | path | Multistr | 1,2,3    |
+        When importing
+        When sending search query "2A Multistr"
+        Then results contain
+         | osm |
+         | N1  |
+        When sending search query "2 a Multistr"
+        Then results contain
+         | osm |
+         | N1  |
+        When sending search query "2-A Multistr"
+        Then results contain
+         | osm |
+         | N1  |
+        When sending search query "Multistr 2 A"
+        Then results contain
+         | osm |
+         | N1  |
+
+    Examples:
+        | hnr |
+        | 2a  |
+        | 2 A |
+        | 2-a |
+        | 2/A |
+
+
+    Scenario Outline: Number - Number combinations as a housenumber are found
+        Given the places
+         | osm | class    | type | housenr | geometry |
+         | N1  | building | yes  | <hnr>   | 9        |
+        And the places
+         | osm | class   | type | name       | geometry |
+         | W10 | highway | path | Chester St | 1,2,3    |
+        When importing
+        When sending search query "34-10 Chester St"
+        Then results contain
+         | osm |
+         | N1  |
+        When sending search query "34/10 Chester St"
+        Then results contain
+         | osm |
+         | N1  |
+        When sending search query "34 10 Chester St"
+        Then results contain
+         | osm |
+         | N1  |
+        When sending search query "3410 Chester St"
+        Then results contain
+         | osm |
+         | W10 |
+
+    Examples:
+        | hnr   |
+        | 34-10 |
+        | 34 10 |
+        | 34/10 |
+
+
+    Scenario Outline: a bis housenumber is found
+        Given the places
+         | osm | class    | type | housenr | geometry |
+         | N1  | building | yes  | <hnr>   | 9        |
+        And the places
+         | osm | class   | type | name       | geometry |
+         | W10 | highway | path | Rue Paris | 1,2,3    |
+        When importing
+        When sending search query "Rue Paris 45bis"
+        Then results contain
+         | osm |
+         | N1  |
+        When sending search query "Rue Paris 45 BIS"
+        Then results contain
+         | osm |
+         | N1  |
+        When sending search query "Rue Paris 45BIS"
+        Then results contain
+         | osm |
+         | N1  |
+        When sending search query "Rue Paris 45 bis"
+        Then results contain
+         | osm |
+         | N1  |
+
+    Examples:
+        | hnr   |
+        | 45bis |
+        | 45BIS |
+        | 45 BIS |
+        | 45 bis |
+
+    Scenario Outline: a ter housenumber is found
+        Given the places
+         | osm | class    | type | housenr | geometry |
+         | N1  | building | yes  | <hnr>   | 9        |
+        And the places
+         | osm | class   | type | name       | geometry |
+         | W10 | highway | path | Rue du Berger | 1,2,3    |
+        When importing
+        When sending search query "Rue du Berger 45ter"
+        Then results contain
+         | osm |
+         | N1  |
+        When sending search query "Rue du Berger 45 TER"
+        Then results contain
+         | osm |
+         | N1  |
+        When sending search query "Rue du Berger 45TER"
+        Then results contain
+         | osm |
+         | N1  |
+        When sending search query "Rue du Berger 45 ter"
+        Then results contain
+         | osm |
+         | N1  |
+
+    Examples:
+        | hnr   |
+        | 45ter |
+        | 45TER |
+        | 45 ter |
+        | 45 TER |
+
+
+    Scenario Outline: a number - letter - number combination housenumber is found
+        Given the places
+         | osm | class    | type | housenr | geometry |
+         | N1  | building | yes  | <hnr>   | 9        |
+        And the places
+         | osm | class   | type | name       | geometry |
+         | W10 | highway | path | Herengracht | 1,2,3    |
+        When importing
+        When sending search query "501-H 1 Herengracht"
+        Then results contain
+         | osm |
+         | N1  |
+        When sending search query "501H-1 Herengracht"
+        Then results contain
+         | osm |
+         | N1  |
+        When sending search query "501H1 Herengracht"
+        Then results contain
+         | osm |
+         | N1  |
+        When sending search query "501-H1 Herengracht"
+        Then results contain
+         | osm |
+         | N1  |
+
+    Examples:
+        | hnr |
+        | 501 H1 |
+        | 501H 1 |
+        | 501/H/1 |
+        | 501h1 |
+
+
+    Scenario Outline: Russian housenumbers are found
+        Given the places
+         | osm | class    | type | housenr | geometry |
+         | N1  | building | yes  | <hnr>   | 9        |
+        And the places
+         | osm | class   | type | name       | geometry |
+         | W10 | highway | path | Голубинская улица | 1,2,3    |
+        When importing
+        When sending search query "Голубинская улица 55к3"
+        Then results contain
+         | osm |
+         | N1  |
+        When sending search query "Голубинская улица 55 k3"
+        Then results contain
+         | osm |
+         | N1  |
+        When sending search query "Голубинская улица 55 к-3"
+        Then results contain
+         | osm |
+         | N1  |
+
+    Examples:
+        | hnr |
+        | 55к3 |
+        | 55 к3 |
+
+
     Scenario: A name mapped as a housenumber is found
         Given the places
          | osm | class    | type | housenr | geometry |