X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/8e439d3dd9b74752594f5ff9e32f4e70b00d35ff..38369ca3cfe6e52bb6f7589c714a04294497520e:/lib-php/tokenizer/icu_tokenizer.php?ds=sidebyside diff --git a/lib-php/tokenizer/icu_tokenizer.php b/lib-php/tokenizer/icu_tokenizer.php index f4dd3aeb..e45d0765 100644 --- a/lib-php/tokenizer/icu_tokenizer.php +++ b/lib-php/tokenizer/icu_tokenizer.php @@ -1,4 +1,12 @@ >'op' as operator,"; $sSQL .= " info->>'class' as class, info->>'type' as ctype,"; - $sSQL .= " info->>'count' as count"; + $sSQL .= " info->>'count' as count,"; + $sSQL .= " info->>'lookup' as lookup"; $sSQL .= ' FROM word WHERE word_token in ('; $sSQL .= join(',', $this->oDB->getDBQuotedList($aTokens)).')'; @@ -171,7 +180,8 @@ class Tokenizer } break; case 'H': // house number tokens - $oValidTokens->addToken($sTok, new Token\HouseNumber($iId, $aWord['word_token'])); + $sLookup = $aWord['lookup'] ?? $aWord['word_token']; + $oValidTokens->addToken($sTok, new Token\HouseNumber($iId, $sLookup)); break; case 'P': // postcode tokens // Postcodes are not normalized, so they may have content @@ -180,13 +190,17 @@ class Tokenizer if ($aWord['word'] !== null && pg_escape_string($aWord['word']) == $aWord['word'] ) { - $sNormPostcode = $this->normalizeString($aWord['word']); - if (strpos($sNormQuery, $sNormPostcode) !== false) { - $oValidTokens->addToken( - $sTok, - new Token\Postcode($iId, $aWord['word'], null) - ); + $iSplitPos = strpos($aWord['word'], '@'); + if ($iSplitPos === false) { + $sPostcode = $aWord['word']; + } else { + $sPostcode = substr($aWord['word'], 0, $iSplitPos); } + + $oValidTokens->addToken( + $sTok, + new Token\Postcode($iId, $sPostcode, null) + ); } break; case 'S': // tokens for classification terms (special phrases)