From: Brian Quinion Date: Mon, 26 Mar 2012 22:57:24 +0000 (+0100) Subject: Merge branch 'master' of github.com:twain47/Nominatim X-Git-Tag: v2.0.0~97 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/80cf5df1cd5c249a547f877c73fd6feba9b763b0?hp=-c Merge branch 'master' of github.com:twain47/Nominatim --- 80cf5df1cd5c249a547f877c73fd6feba9b763b0 diff --combined lib/lib.php index 069f3e25,d8ff2f64..1564a83c --- a/lib/lib.php +++ b/lib/lib.php @@@ -1,5 -1,30 +1,30 @@@

Internal Server Error

"; + echo '

Nominatim has encountered an internal error while processing your request. This is most likely because of a bug in the software.

'; + echo "

Details: ".$sError,"

"; + echo '

Feel free to report the bug in the OSM bug database. Please include the error message above an the URL you used.

'; + if (CONST_Debug) + { + echo "

Debugging Information


"; + if ($sSQL) { + echo "

SQL query

".$sSQL.""; + } + if ($vDumpVar) { + echo "

Result

"; + var_dump($vDumpVar); + echo ""; + } + } + echo "\n\n"; + exit; + + } + function fail($sError, $sUserError = false) { if (!$sUserError) $sUserError = $sError; @@@ -40,9 -65,9 +65,9 @@@ function byImportance($a, $b) { -/* if ($a['importance'] != $b['importance']) return ($a['importance'] > $b['importance']?-1:1); +/* if ($a['aPointPolygon']['numfeatures'] != $b['aPointPolygon']['numfeatures']) return ($a['aPointPolygon']['numfeatures'] > $b['aPointPolygon']['numfeatures']?-1:1); if ($a['aPointPolygon']['area'] != $b['aPointPolygon']['area']) @@@ -173,12 -198,60 +198,12 @@@ exit; } - if (sizeof($aNearPostcodes)) { return array(array('lat' => $aNearPostcodes[0]['lat'], 'lon' => $aNearPostcodes[0]['lon'], 'radius' => 0.005)); } return false; - - /* partial search disabled because it sequentially scans placex - - $sSQL = 'select substring(upper(postcode) from \'^[A-Z][A-Z]?[0-9][0-9A-Z]? [0-9]([A-Z][A-Z])$\'),ST_X(ST_Centroid(geometry)) as lon,ST_Y(ST_Centroid(geometry)) as lat from placex where country_code::text = \'gb\'::text AND substring(postcode from \'^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9])[A-Z][A-Z]$\') = \''.$sPostcodeSector.'\' and class=\'place\' and type=\'postcode\' '; - $sSQL .= ' union '; - $sSQL .= 'select substring(upper(postcode) from \'^[A-Z][A-Z]?[0-9][0-9A-Z]? [0-9]([A-Z][A-Z])$\'),ST_X(ST_Centroid(geometry)) as lon,ST_Y(ST_Centroid(geometry)) as lat from gb_postcode where substring(postcode from \'^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9])[A-Z][A-Z]$\') = \''.$sPostcodeSector.'\''; - $aNearPostcodes = $oDB->getAll($sSQL); - if (PEAR::IsError($aNearPostcodes)) - { - var_dump($sSQL, $aNearPostcodes); - exit; - } - - if (!sizeof($aNearPostcodes)) - { - return false; - } - - $fTotalLat = 0; - $fTotalLon = 0; - $fTotalFac = 0; - foreach($aNearPostcodes as $aPostcode) - { - $iDiff = gbPostcodeAlphaDifference($sPostcodeEnd, $aPostcode['substring'])*2 + 1; - if ($iDiff == 0) - $fFac = 1; - else - $fFac = 1/($iDiff*$iDiff); - - $fTotalFac += $fFac; - $fTotalLat += $aPostcode['lat'] * $fFac; - $fTotalLon += $aPostcode['lon'] * $fFac; - } - if ($fTotalFac) - { - $fLat = $fTotalLat / $fTotalFac; - $fLon = $fTotalLon / $fTotalFac; - $fRadius = min(0.1 / $fTotalFac, 0.02); - return array(array('lat' => $fLat, 'lon' => $fLon, 'radius' => $fRadius)); - } - return false; - */ - /* - $fTotalFac is a suprisingly good indicator of accuracy - $iZoom = 18 + round(log($fTotalFac,32)); - $iZoom = max(13,min(18,$iZoom)); - */ } function usPostcodeCalculate($sPostcode, &$oDB) diff --combined sql/functions.sql index 9a613152,a44fee9e..7e9f957b --- a/sql/functions.sql +++ b/sql/functions.sql @@@ -940,50 -940,7 +940,50 @@@ BEGI NEW.rank_address := NEW.rank_search; -- By doing in postgres we have the country available to us - currently only used for postcode - IF NEW.class = 'place' THEN + IF NEW.class in ('place','boundary') AND NEW.type in ('postcode','postal_code') THEN + + NEW.name := 'ref'=>NEW.postcode; + + IF NEW.country_code = 'gb' THEN + + IF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9][A-Z][A-Z])$' THEN + NEW.rank_search := 25; + NEW.rank_address := 5; + ELSEIF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9])$' THEN + NEW.rank_search := 23; + NEW.rank_address := 5; + ELSEIF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z])$' THEN + NEW.rank_search := 21; + NEW.rank_address := 5; + END IF; + + ELSEIF NEW.country_code = 'de' THEN + + IF NEW.postcode ~ '^([0-9]{5})$' THEN + NEW.rank_search := 21; + NEW.rank_address := 11; + END IF; + + ELSE + -- Guess at the postcode format and coverage (!) + IF upper(NEW.postcode) ~ '^[A-Z0-9]{1,5}$' THEN -- Probably too short to be very local + NEW.rank_search := 21; + NEW.rank_address := 11; + ELSE + -- Does it look splitable into and area and local code? + postcode := substring(upper(NEW.postcode) from '^([- :A-Z0-9]+)([- :][A-Z0-9]+)$'); + + IF postcode IS NOT NULL THEN + NEW.rank_search := 25; + NEW.rank_address := 11; + ELSEIF NEW.postcode ~ '^[- :A-Z0-9]{6,}$' THEN + NEW.rank_search := 21; + NEW.rank_address := 11; + END IF; + END IF; + END IF; + + ELSEIF NEW.class = 'place' THEN IF NEW.type in ('continent') THEN NEW.rank_search := 2; NEW.rank_address := NEW.rank_search; @@@ -1035,6 -992,49 +1035,6 @@@ ELSEIF NEW.type in ('hall_of_residence','neighbourhood','housing_estate','nature_reserve') THEN NEW.rank_search := 22; NEW.rank_address := 22; - ELSEIF NEW.type in ('postcode') THEN - - NEW.name := 'ref'=>NEW.postcode; - - IF NEW.country_code = 'gb' THEN - - IF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9][A-Z][A-Z])$' THEN - NEW.rank_search := 25; - NEW.rank_address := 5; - ELSEIF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9])$' THEN - NEW.rank_search := 23; - NEW.rank_address := 5; - ELSEIF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z])$' THEN - NEW.rank_search := 21; - NEW.rank_address := 5; - END IF; - - ELSEIF NEW.country_code = 'de' THEN - - IF NEW.postcode ~ '^([0-9]{5})$' THEN - NEW.rank_search := 21; - NEW.rank_address := 11; - END IF; - - ELSE - -- Guess at the postcode format and coverage (!) - IF upper(NEW.postcode) ~ '^[A-Z0-9]{1,5}$' THEN -- Probably too short to be very local - NEW.rank_search := 21; - NEW.rank_address := 11; - ELSE - -- Does it look splitable into and area and local code? - postcode := substring(upper(NEW.postcode) from '^([- :A-Z0-9]+)([- :][A-Z0-9]+)$'); - - IF postcode IS NOT NULL THEN - NEW.rank_search := 25; - NEW.rank_address := 11; - ELSEIF NEW.postcode ~ '^[- :A-Z0-9]{6,}$' THEN - NEW.rank_search := 21; - NEW.rank_address := 11; - END IF; - END IF; - END IF; - ELSEIF NEW.type in ('airport','street') THEN NEW.rank_search := 26; NEW.rank_address := NEW.rank_search; @@@ -1115,12 -1115,11 +1115,12 @@@ IF st_area(NEW.geometry) < 1 THEN -- mark items within the geometry for re-indexing -- RAISE WARNING 'placex poly insert: % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type; --- work around bug in postgis + + -- work around bug in postgis, this may have been fixed in 2.0.0 (see http://trac.osgeo.org/postgis/ticket/547) update placex set indexed_status = 2 where (ST_Contains(NEW.geometry, placex.geometry) OR ST_Intersects(NEW.geometry, placex.geometry)) - AND rank_search > NEW.rank_search and indexed_status = 0 and ST_geometrytype(placex.geometry) = 'ST_Point'; + AND rank_search > NEW.rank_search and indexed_status = 0 and ST_geometrytype(placex.geometry) = 'ST_Point' and (rank_search < 28 or name is not null); update placex set indexed_status = 2 where (ST_Contains(NEW.geometry, placex.geometry) OR ST_Intersects(NEW.geometry, placex.geometry)) - AND rank_search > NEW.rank_search and indexed_status = 0 and ST_geometrytype(placex.geometry) != 'ST_Point'; + AND rank_search > NEW.rank_search and indexed_status = 0 and ST_geometrytype(placex.geometry) != 'ST_Point' and (rank_search < 28 or name is not null); END IF; ELSE -- mark nearby items for re-indexing, where 'nearby' depends on the features rank_search and is a complete guess :( @@@ -1145,7 -1144,7 +1145,7 @@@ END IF; IF diameter > 0 THEN -- RAISE WARNING 'placex point insert: % % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type,diameter; - update placex set indexed_status = 2 where indexed_status = 0 and rank_search > NEW.rank_search and ST_DWithin(placex.geometry, NEW.geometry, diameter); + update placex set indexed_status = 2 where indexed_status = 0 and rank_search > NEW.rank_search and ST_DWithin(placex.geometry, NEW.geometry, diameter) and (rank_search < 28 or name is not null); END IF; END IF; @@@ -1204,7 -1203,6 +1204,7 @@@ DECLAR tagpairid INTEGER; + default_language TEXT; name_vector INTEGER[]; nameaddress_vector INTEGER[]; @@@ -1258,19 -1256,6 +1258,19 @@@ BEGI -- cheaper but less acurate place_centroid := ST_Centroid(NEW.geometry); + -- Thought this wasn't needed but when we add new languages to the country_name table + -- we need to update the existing names + IF NEW.name is not null AND array_upper(%#NEW.name,1) > 1 THEN + default_language := get_country_language_code(NEW.country_code); + IF default_language IS NOT NULL THEN + IF NEW.name ? 'name' AND NOT NEW.name ? ('name:'||default_language) THEN + NEW.name := NEW.name || (('name:'||default_language) => (NEW.name -> 'name')); + ELSEIF NEW.name ? ('name:'||default_language) AND NOT NEW.name ? 'name' THEN + NEW.name := NEW.name || ('name' => (NEW.name -> 'name:'||default_language)); + END IF; + END IF; + END IF; + -- Initialise the name vector using our name name_vector := make_keywords(NEW.name); nameaddress_vector := '{}'::int[]; @@@ -1807,12 -1792,12 +1807,12 @@@ BEGI update placex set indexed_status = 2 where indexed_status = 0 and (ST_Contains(NEW.geometry, placex.geometry) OR ST_Intersects(NEW.geometry, placex.geometry)) AND NOT (ST_Contains(existinggeometry, placex.geometry) OR ST_Intersects(existinggeometry, placex.geometry)) - AND rank_search > existingplacex.rank_search; + AND rank_search > existingplacex.rank_search AND (rank_search < 28 or name is not null); update placex set indexed_status = 2 where indexed_status = 0 and (ST_Contains(existinggeometry, placex.geometry) OR ST_Intersects(existinggeometry, placex.geometry)) AND NOT (ST_Contains(NEW.geometry, placex.geometry) OR ST_Intersects(NEW.geometry, placex.geometry)) - AND rank_search > existingplacex.rank_search; + AND rank_search > existingplacex.rank_search AND (rank_search < 28 or name is not null); END IF; @@@ -1832,7 -1817,8 +1832,8 @@@ IF st_area(NEW.geometry) < 0.5 THEN UPDATE placex set indexed_status = 2 from place_addressline where address_place_id = existingplacex.place_id - and placex.place_id = place_addressline.place_id and indexed_status = 0; + and placex.place_id = place_addressline.place_id and indexed_status = 0 + and (rank_search < 28 or name is not null); END IF; END IF; @@@ -2408,13 -2394,6 +2409,6 @@@ END $$ LANGUAGE plpgsql; - CREATE AGGREGATE array_agg(INT[]) - ( - sfunc = array_cat, - stype = INT[], - initcond = '{}' - ); - CREATE OR REPLACE FUNCTION tigger_create_interpolation(linegeo GEOMETRY, in_startnumber INTEGER, in_endnumber INTEGER, interpolationtype TEXT, in_street TEXT, in_isin TEXT, in_postcode TEXT) RETURNS INTEGER diff --combined utils/specialphrases.php index 81d240a6,b8c49da0..28d55010 --- a/utils/specialphrases.php +++ b/utils/specialphrases.php @@@ -15,39 -15,8 +15,8 @@@ ); getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true); - $aLanguageIn = array( - 'af', - 'ar', - 'br', - 'ca', - 'cs', - 'de', - 'en', - 'es', - 'et', - 'eu', - 'fa', - 'fi', - 'fr', - 'gl', - 'hr', - 'hu', - 'ia', - 'is', - 'it', - 'ja', - 'mk', - 'nl', - 'no', - 'pl', - 'ps', - 'pt', - 'ru', - 'sk', - 'sv', - 'uk', - 'vi', - ); + include(CONST_BasePath.'/settings/phrase_settings.php'); + if ($aCMDResult['countries']) { echo "select getorcreate_country(make_standard_name('uk'), 'gb');\n"; @@@ -84,7 -53,17 +53,17 @@@ preg_match('/^\\w+$/', $sType) < 1) { trigger_error("Bad class/type for language $sLanguage: $sClass=$sType"); exit; - } + } + # blacklisting: disallow certain class/type combinations + if (isset($aTagsBlacklist[$sClass]) && in_array($sType, $aTagsBlacklist[$sClass])) { + # fwrite(STDERR, "Blacklisted: ".$sClass."/".$sType."\n"); + continue; + } + # whitelisting: if class is in whitelist, allow only tags in the list + if (isset($aTagsWhitelist[$sClass]) && !in_array($sType, $aTagsWhitelist[$sClass])) { + # fwrite(STDERR, "Non-Whitelisted: ".$sClass."/".$sType."\n"); + continue; + } $aPairs[$sClass.'|'.$sType] = array($sClass, $sType); switch(trim($aMatch[4])) @@@ -107,9 -86,6 +86,9 @@@ foreach($aPairs as $aPair) { + if ($aPair[0] == 'yes') continue; + if ($aPair[1] == 'yes') continue; + if ($aPair[0] == 'highway') continue; if ($aPair[1] == 'highway') continue; echo "create table place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." as "; diff --combined website/search.php index 64cec021,50d009e1..ae2c3963 --- a/website/search.php +++ b/website/search.php @@@ -163,8 -163,7 +163,7 @@@ $sViewboxSmallSQL = $oDB->getOne($sSQL); if (PEAR::isError($sViewboxSmallSQL)) { - var_dump($sViewboxSmallSQL); - exit; + failInternalError("Could not get small viewbox.", $sSQL, $sViewboxSmallSQL); } $sViewboxSmallSQL = "'".$sViewboxSmallSQL."'::geometry"; @@@ -172,8 -171,7 +171,7 @@@ $sViewboxLargeSQL = $oDB->getOne($sSQL); if (PEAR::isError($sViewboxLargeSQL)) { - var_dump($sViewboxLargeSQL); - exit; + failInternalError("Could not get large viewbox.", $sSQL, $sViewboxLargeSQL); } $sViewboxLargeSQL = "'".$sViewboxLargeSQL."'::geometry"; } @@@ -247,7 -245,6 +245,7 @@@ $sToken = $oDB->getOne("select make_standard_name('".$aSpecialTerm[1]."') as string"); $sSQL = 'select * from (select word_id,word_token, word, class, type, location, country_code, operator'; $sSQL .= ' from word where word_token in (\' '.$sToken.'\')) as x where (class is not null and class not in (\'place\',\'highway\')) or country_code is not null'; + if (CONST_Debug) var_Dump($sSQL); $aSearchWords = $oDB->getAll($sSQL); $aNewSearches = array(); foreach($aSearches as $aSearch) @@@ -325,8 -322,7 +323,7 @@@ $aDatabaseWords = array(); if (PEAR::IsError($aDatabaseWords)) { - var_dump($sSQL, $aDatabaseWords); - exit; + failInternalError("Could not get word tokens.", $sSQL, $aDatabaseWords); } $aPossibleMainWordIDs = array(); foreach($aDatabaseWords as $aToken) @@@ -375,8 -371,7 +372,8 @@@ // Try and calculate GB postcodes we might be missing foreach($aTokens as $sToken) { - if (!isset($aValidTokens[$sToken]) && !isset($aValidTokens[' '.$sToken]) && preg_match('/^([A-Z][A-Z]?[0-9][0-9A-Z]? ?[0-9])([A-Z][A-Z])$/', strtoupper(trim($sToken)), $aData)) + // Source of gb postcodes is now definitive - always use + if (preg_match('/^([A-Z][A-Z]?[0-9][0-9A-Z]? ?[0-9])([A-Z][A-Z])$/', strtoupper(trim($sToken)), $aData)) { if (substr($aData[1],-2,1) != ' ') { @@@ -418,6 -413,7 +415,6 @@@ Score how good the search is so they can be ordered */ - foreach($aPhrases as $iPhrase => $sPhrase) { $aNewPhraseSearches = array(); @@@ -504,7 -500,7 +501,7 @@@ if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch; } } - else + elseif (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id']) { if (sizeof($aSearch['aName'])) { @@@ -531,8 -527,6 +528,8 @@@ // Allow searching for a word - but at extra cost foreach($aValidTokens[$sToken] as $aSearchTerm) { + if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id']) + { //var_Dump('
',$aSearch['aName']); if (sizeof($aCurrentSearch['aName']) && strlen($sToken) >= 4) @@@ -552,7 -546,6 +549,7 @@@ $aSearch['iNamePhrase'] = $iPhrase; if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch; } + } } } else @@@ -749,9 -742,6 +746,9 @@@ // First we need a position, either aName or fLat or both $aTerms = array(); $aOrder = array(); + + // TODO: filter out the pointless search terms (2 letter name tokens and less) + // they might be right - but they are just too darned expensive to run if (sizeof($aSearch['aName'])) $aTerms[] = "name_vector @> ARRAY[".join($aSearch['aName'],",")."]"; if (sizeof($aSearch['aAddress']) && $aSearch['aName'] != $aSearch['aAddress']) $aTerms[] = "nameaddress_vector @> ARRAY[".join($aSearch['aAddress'],",")."]"; if ($aSearch['sCountryCode']) $aTerms[] = "country_code = '".pg_escape_string($aSearch['sCountryCode'])."'"; @@@ -796,8 -786,7 +793,7 @@@ $aViewBoxPlaceIDs = $oDB->getAll($sSQL); if (PEAR::IsError($aViewBoxPlaceIDs)) { - var_dump($sSQL, $aViewBoxPlaceIDs); - exit; + failInternalError("Could not get places for search terms.", $sSQL, $aViewBoxPlaceIDs); } //var_dump($aViewBoxPlaceIDs); // Did we have an viewbox matches? @@@ -978,8 -967,7 +974,7 @@@ if (PEAR::IsError($aPlaceIDs)) { - var_dump($sSQL, $aPlaceIDs); - exit; + failInternalError("Could not get place IDs from tokens." ,$sSQL, $aPlaceIDs); } if (CONST_Debug) var_Dump($aPlaceIDs); @@@ -1055,8 -1043,7 +1050,7 @@@ if (PEAR::IsError($aSearchResults)) { - var_dump($sSQL, $aSearchResults); - exit; + failInternalError("Could not get details for place.", $sSQL, $aSearchResults); } } } // end if ($sQuery) @@@ -1124,8 -1111,7 +1118,7 @@@ if (PEAR::IsError($aSearchResults)) { - var_dump($sSQL, $aSearchResults); - exit; + failInternalError("Could not get details for place (near).", $sSQL, $aSearchResults); } } } @@@ -1139,11 -1125,6 +1132,11 @@@ //var_Dump($aSearchResults); //exit; $aClassType = getClassTypesWithImportance(); + $aRecheckWords = preg_split('/\b/',$sQuery); + foreach($aRecheckWords as $i => $sWord) + { + if (!$sWord) unset($aRecheckWords[$i]); + } foreach($aSearchResults as $iResNum => $aResult) { if (CONST_Search_AreaPolygons || true) @@@ -1161,8 -1142,7 +1154,7 @@@ $aPointPolygon = $oDB->getRow($sSQL); if (PEAR::IsError($aPointPolygon)) { - var_dump($sSQL, $aPointPolygon); - exit; + failInternalError("Could not get outline.", $sSQL, $aPointPolygon); } if ($aPointPolygon['place_id']) { @@@ -1261,16 -1241,6 +1253,16 @@@ //exit; } + // Adjust importance for the number of exact string matches in the result + $aResult['importance'] = max(0.001,$aResult['importance']); + $iCountWords = 0; + $sAddress = $aResult['langaddress']; + foreach($aRecheckWords as $i => $sWord) + { + if (stripos($sAddress, $sWord)!==false) $iCountWords++; + } + $aResult['importance'] = $aResult['importance'] + $iCountWords; + //if (CONST_Debug) var_dump($aResult['class'].':'.$aResult['type'].':'.$aResult['admin_level']); /* if (isset($aClassType[$aResult['class'].':'.$aResult['type'].':'.$aResult['admin_level']]['importance']) @@@ -1292,6 -1262,7 +1284,6 @@@ $aResult['foundorder'] = $iResNum; $aSearchResults[$iResNum] = $aResult; } - uasort($aSearchResults, 'byImportance'); //var_dump($aSearchResults);exit;