From 6eb90443530e31025802e27527faaa7da99b02b6 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Wed, 22 Jun 2022 09:54:47 +0200 Subject: [PATCH] adapt search algorithm to new postcode format in word --- lib-php/TokenPostcode.php | 7 +- lib-php/tokenizer/icu_tokenizer.php | 16 +++-- test/bdd/db/import/postcodes.feature | 18 ------ test/bdd/db/query/postcodes.feature | 95 ++++++++++++++++++++++++++++ 4 files changed, 111 insertions(+), 25 deletions(-) create mode 100644 test/bdd/db/query/postcodes.feature diff --git a/lib-php/TokenPostcode.php b/lib-php/TokenPostcode.php index f0dbd457..0ff92929 100644 --- a/lib-php/TokenPostcode.php +++ b/lib-php/TokenPostcode.php @@ -25,7 +25,12 @@ class Postcode public function __construct($iId, $sPostcode, $sCountryCode = '') { $this->iId = $iId; - $this->sPostcode = $sPostcode; + $iSplitPos = strpos($sPostcode, '@'); + if ($iSplitPos === false) { + $this->sPostcode = $sPostcode; + } else { + $this->sPostcode = substr($sPostcode, 0, $iSplitPos); + } $this->sCountryCode = empty($sCountryCode) ? '' : $sCountryCode; } diff --git a/lib-php/tokenizer/icu_tokenizer.php b/lib-php/tokenizer/icu_tokenizer.php index ccce99ca..e45d0765 100644 --- a/lib-php/tokenizer/icu_tokenizer.php +++ b/lib-php/tokenizer/icu_tokenizer.php @@ -190,13 +190,17 @@ class Tokenizer if ($aWord['word'] !== null && pg_escape_string($aWord['word']) == $aWord['word'] ) { - $sNormPostcode = $this->normalizeString($aWord['word']); - if (strpos($sNormQuery, $sNormPostcode) !== false) { - $oValidTokens->addToken( - $sTok, - new Token\Postcode($iId, $aWord['word'], null) - ); + $iSplitPos = strpos($aWord['word'], '@'); + if ($iSplitPos === false) { + $sPostcode = $aWord['word']; + } else { + $sPostcode = substr($aWord['word'], 0, $iSplitPos); } + + $oValidTokens->addToken( + $sTok, + new Token\Postcode($iId, $sPostcode, null) + ); } break; case 'S': // tokens for classification terms (special phrases) diff --git a/test/bdd/db/import/postcodes.feature b/test/bdd/db/import/postcodes.feature index 7636aea7..4d146d18 100644 --- a/test/bdd/db/import/postcodes.feature +++ b/test/bdd/db/import/postcodes.feature @@ -163,24 +163,6 @@ Feature: Import of postcodes | de | 01982 | country:de | And there are word tokens for postcodes 01982 - Scenario: Different postcodes with the same normalization can both be found - Given the places - | osm | class | type | addr+postcode | addr+housenumber | geometry | - | N34 | place | house | EH4 7EA | 111 | country:gb | - | N35 | place | house | E4 7EA | 111 | country:gb | - When importing - Then location_postcode contains exactly - | country | postcode | geometry | - | gb | EH4 7EA | country:gb | - | gb | E4 7EA | country:gb | - When sending search query "EH4 7EA" - Then results contain - | type | display_name | - | postcode | EH4 7EA | - When sending search query "E4 7EA" - Then results contain - | type | display_name | - | postcode | E4 7EA | @Fail Scenario: search and address ranks for GB post codes correctly assigned diff --git a/test/bdd/db/query/postcodes.feature b/test/bdd/db/query/postcodes.feature new file mode 100644 index 00000000..c399b63b --- /dev/null +++ b/test/bdd/db/query/postcodes.feature @@ -0,0 +1,95 @@ +@DB +@fail-legacy +Feature: Querying fo postcode variants + + Scenario: Postcodes in Singapore (6-digit postcode) + Given the grid with origin SG + | 10 | | | | 11 | + And the places + | osm | class | type | name | addr+postcode | geometry | + | W1 | highway | path | Lorang | 399174 | 10,11 | + When importing + When sending search query "399174" + Then results contain + | ID | type | display_name | + | 0 | postcode | 399174 | + + + Scenario Outline: Postcodes in the Netherlands (mixed postcode with spaces) + Given the grid with origin NL + | 10 | | | | 11 | + And the places + | osm | class | type | name | addr+postcode | geometry | + | W1 | highway | path | De Weide | 3993 DX | 10,11 | + When importing + When sending search query "3993 DX" + Then results contain + | ID | type | display_name | + | 0 | postcode | 3993 DX | + When sending search query "3993dx" + Then results contain + | ID | type | display_name | + | 0 | postcode | 3993 DX | + + Examples: + | postcode | + | 3993 DX | + | 3993DX | + | 3993 dx | + + + Scenario: Postcodes in Singapore (6-digit postcode) + Given the grid with origin SG + | 10 | | | | 11 | + And the places + | osm | class | type | name | addr+postcode | geometry | + | W1 | highway | path | Lorang | 399174 | 10,11 | + When importing + When sending search query "399174" + Then results contain + | ID | type | display_name | + | 0 | postcode | 399174 | + + + Scenario Outline: Postcodes in Andorra (with country code) + Given the grid with origin AD + | 10 | | | | 11 | + And the places + | osm | class | type | name | addr+postcode | geometry | + | W1 | highway | path | Lorang | | 10,11 | + When importing + When sending search query "675" + Then results contain + | ID | type | display_name | + | 0 | postcode | AD675 | + When sending search query "AD675" + Then results contain + | ID | type | display_name | + | 0 | postcode | AD675 | + + Examples: + | postcode | + | 675 | + | AD 675 | + | AD675 | + + + Scenario: Different postcodes with the same normalization can both be found + Given the places + | osm | class | type | addr+postcode | addr+housenumber | geometry | + | N34 | place | house | EH4 7EA | 111 | country:gb | + | N35 | place | house | E4 7EA | 111 | country:gb | + When importing + Then location_postcode contains exactly + | country | postcode | geometry | + | gb | EH4 7EA | country:gb | + | gb | E4 7EA | country:gb | + When sending search query "EH4 7EA" + Then results contain + | type | display_name | + | postcode | EH4 7EA | + When sending search query "E4 7EA" + Then results contain + | type | display_name | + | postcode | E4 7EA | + -- 2.39.5