]> git.openstreetmap.org Git - nominatim.git/commitdiff
adapt search algorithm to new postcode format in word
authorSarah Hoffmann <lonvia@denofr.de>
Wed, 22 Jun 2022 07:54:47 +0000 (09:54 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Thu, 23 Jun 2022 21:42:31 +0000 (23:42 +0200)
lib-php/TokenPostcode.php
lib-php/tokenizer/icu_tokenizer.php
test/bdd/db/import/postcodes.feature
test/bdd/db/query/postcodes.feature [new file with mode: 0644]

index f0dbd4571676ac59b0030b91a5bd9700ad9b3860..0ff92929cb58f2b496275fe23d016ac0d4dfdef2 100644 (file)
@@ -25,7 +25,12 @@ class Postcode
     public function __construct($iId, $sPostcode, $sCountryCode = '')
     {
         $this->iId = $iId;
-        $this->sPostcode = $sPostcode;
+        $iSplitPos = strpos($sPostcode, '@');
+        if ($iSplitPos === false) {
+            $this->sPostcode = $sPostcode;
+        } else {
+            $this->sPostcode = substr($sPostcode, 0, $iSplitPos);
+        }
         $this->sCountryCode = empty($sCountryCode) ? '' : $sCountryCode;
     }
 
index ccce99ca1330d7a42a6976d7fb7c9eaf3d8a84d7..e45d07654843b9c1c6127f40f3d064441ea4bc79 100644 (file)
@@ -190,13 +190,17 @@ class Tokenizer
                     if ($aWord['word'] !== null
                         && pg_escape_string($aWord['word']) == $aWord['word']
                     ) {
-                        $sNormPostcode = $this->normalizeString($aWord['word']);
-                        if (strpos($sNormQuery, $sNormPostcode) !== false) {
-                            $oValidTokens->addToken(
-                                $sTok,
-                                new Token\Postcode($iId, $aWord['word'], null)
-                            );
+                        $iSplitPos = strpos($aWord['word'], '@');
+                        if ($iSplitPos === false) {
+                            $sPostcode = $aWord['word'];
+                        } else {
+                            $sPostcode = substr($aWord['word'], 0, $iSplitPos);
                         }
+
+                        $oValidTokens->addToken(
+                            $sTok,
+                            new Token\Postcode($iId, $sPostcode, null)
+                        );
                     }
                     break;
                 case 'S':  // tokens for classification terms (special phrases)
index 7636aea7eeecafd602ee8adc0e50f5a4eded9fae..4d146d18c191e95a4cad025bf5c823818072b7f6 100644 (file)
@@ -163,24 +163,6 @@ Feature: Import of postcodes
            | de      | 01982    | country:de |
         And there are word tokens for postcodes 01982
 
-    Scenario: Different postcodes with the same normalization can both be found
-        Given the places
-           | osm | class | type  | addr+postcode | addr+housenumber | geometry |
-           | N34 | place | house | EH4 7EA       | 111              | country:gb |
-           | N35 | place | house | E4 7EA        | 111              | country:gb |
-        When importing
-        Then location_postcode contains exactly
-           | country | postcode | geometry |
-           | gb      | EH4 7EA  | country:gb |
-           | gb      | E4 7EA   | country:gb |
-        When sending search query "EH4 7EA"
-        Then results contain
-           | type     | display_name |
-           | postcode | EH4 7EA      |
-        When sending search query "E4 7EA"
-        Then results contain
-           | type     | display_name |
-           | postcode | E4 7EA       |
 
     @Fail
     Scenario: search and address ranks for GB post codes correctly assigned
diff --git a/test/bdd/db/query/postcodes.feature b/test/bdd/db/query/postcodes.feature
new file mode 100644 (file)
index 0000000..c399b63
--- /dev/null
@@ -0,0 +1,95 @@
+@DB
+@fail-legacy
+Feature: Querying fo postcode variants
+
+    Scenario: Postcodes in Singapore (6-digit postcode)
+        Given the grid with origin SG
+            | 10 |   |   |   | 11 |
+        And the places
+            | osm | class   | type | name   | addr+postcode | geometry |
+            | W1  | highway | path | Lorang | 399174        | 10,11    |
+        When importing
+        When sending search query "399174"
+        Then results contain
+            | ID | type     | display_name |
+            | 0  | postcode | 399174       |
+
+
+    Scenario Outline: Postcodes in the Netherlands (mixed postcode with spaces)
+        Given the grid with origin NL
+            | 10 |   |   |   | 11 |
+        And the places
+            | osm | class   | type | name     | addr+postcode | geometry |
+            | W1  | highway | path | De Weide | 3993 DX       | 10,11    |
+        When importing
+        When sending search query "3993 DX"
+        Then results contain
+            | ID | type     | display_name |
+            | 0  | postcode | 3993 DX      |
+        When sending search query "3993dx"
+        Then results contain
+            | ID | type     | display_name |
+            | 0  | postcode | 3993 DX      |
+
+        Examples:
+            | postcode |
+            | 3993 DX  |
+            | 3993DX   |
+            | 3993 dx  |
+
+
+    Scenario: Postcodes in Singapore (6-digit postcode)
+        Given the grid with origin SG
+            | 10 |   |   |   | 11 |
+        And the places
+            | osm | class   | type | name   | addr+postcode | geometry |
+            | W1  | highway | path | Lorang | 399174        | 10,11    |
+        When importing
+        When sending search query "399174"
+        Then results contain
+            | ID | type     | display_name |
+            | 0  | postcode | 399174       |
+
+
+    Scenario Outline: Postcodes in Andorra (with country code)
+        Given the grid with origin AD
+            | 10 |   |   |   | 11 |
+        And the places
+            | osm | class   | type | name   | addr+postcode | geometry |
+            | W1  | highway | path | Lorang | <postcode>    | 10,11    |
+        When importing
+        When sending search query "675"
+        Then results contain
+            | ID | type     | display_name |
+            | 0  | postcode | AD675        |
+        When sending search query "AD675"
+        Then results contain
+            | ID | type     | display_name |
+            | 0  | postcode | AD675        |
+
+        Examples:
+            | postcode |
+            | 675      |
+            | AD 675   |
+            | AD675    |
+
+
+    Scenario: Different postcodes with the same normalization can both be found
+        Given the places
+           | osm | class | type  | addr+postcode | addr+housenumber | geometry |
+           | N34 | place | house | EH4 7EA       | 111              | country:gb |
+           | N35 | place | house | E4 7EA        | 111              | country:gb |
+        When importing
+        Then location_postcode contains exactly
+           | country | postcode | geometry |
+           | gb      | EH4 7EA  | country:gb |
+           | gb      | E4 7EA   | country:gb |
+        When sending search query "EH4 7EA"
+        Then results contain
+           | type     | display_name |
+           | postcode | EH4 7EA      |
+        When sending search query "E4 7EA"
+        Then results contain
+           | type     | display_name |
+           | postcode | E4 7EA       |
+