From ac70726591928de57cf5d9cb6a1d9623de7d1838 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Thu, 1 May 2014 09:18:10 +0200 Subject: [PATCH] sanity check of house number format Add a penalty for house number search terms when they contain too many non-numeric characters in an attempt to filter tokens that ended up as house numbers in the word table as a result of mapping errors. --- lib/Geocode.php | 3 +++ 1 file changed, 3 insertions(+) diff --git a/lib/Geocode.php b/lib/Geocode.php index 92e1a0b3..847439c7 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -890,6 +890,9 @@ if ($aSearch['sHouseNumber'] === '') { $aSearch['sHouseNumber'] = $sToken; + // sanity check: if the housenumber is not mainly made + // up of numbers, add a penalty + if (preg_match_all("/[^0-9]/", $sToken, $aMatches) > 2) $aSearch['iSearchRank']++; if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; /* // Fall back to not searching for this item (better than nothing) -- 2.39.5