<?php
+ function failInternalError($sError, $sSQL = false, $vDumpVar = false)
+ {
+ header('HTTP/1.0 500 Internal Server Error');
+ header('Content-type: text/html; charset=utf-8');
+ echo "<html><body><h1>Internal Server Error</h1>";
+ echo '<p>Nominatim has encountered an internal error while processing your request. This is most likely because of a bug in the software.</p>';
+ echo "<p><b>Details:</b> ".$sError,"</p>";
+ echo '<p>Feel free to report the bug in the <a href="http://trac.openstreetmap.org">OSM bug database</a>. Please include the error message above an the URL you used.</p>';
+ if (CONST_Debug)
+ {
+ echo "<hr><h2>Debugging Information</h2><br>";
+ if ($sSQL) {
+ echo "<h3>SQL query</h3><code>".$sSQL."</code>";
+ }
+ if ($vDumpVar) {
+ echo "<h3>Result</h3> <code>";
+ var_dump($vDumpVar);
+ echo "</code>";
+ }
+ }
+ echo "\n</body></html>\n";
+ exit;
+
+ }
+
function fail($sError, $sUserError = false)
{
if (!$sUserError) $sUserError = $sError;
function byImportance($a, $b)
{
-/*
if ($a['importance'] != $b['importance'])
return ($a['importance'] > $b['importance']?-1:1);
+/*
if ($a['aPointPolygon']['numfeatures'] != $b['aPointPolygon']['numfeatures'])
return ($a['aPointPolygon']['numfeatures'] > $b['aPointPolygon']['numfeatures']?-1:1);
if ($a['aPointPolygon']['area'] != $b['aPointPolygon']['area'])
exit;
}
-
if (sizeof($aNearPostcodes))
{
return array(array('lat' => $aNearPostcodes[0]['lat'], 'lon' => $aNearPostcodes[0]['lon'], 'radius' => 0.005));
}
return false;
-
- /* partial search disabled because it sequentially scans placex
-
- $sSQL = 'select substring(upper(postcode) from \'^[A-Z][A-Z]?[0-9][0-9A-Z]? [0-9]([A-Z][A-Z])$\'),ST_X(ST_Centroid(geometry)) as lon,ST_Y(ST_Centroid(geometry)) as lat from placex where country_code::text = \'gb\'::text AND substring(postcode from \'^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9])[A-Z][A-Z]$\') = \''.$sPostcodeSector.'\' and class=\'place\' and type=\'postcode\' ';
- $sSQL .= ' union ';
- $sSQL .= 'select substring(upper(postcode) from \'^[A-Z][A-Z]?[0-9][0-9A-Z]? [0-9]([A-Z][A-Z])$\'),ST_X(ST_Centroid(geometry)) as lon,ST_Y(ST_Centroid(geometry)) as lat from gb_postcode where substring(postcode from \'^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9])[A-Z][A-Z]$\') = \''.$sPostcodeSector.'\'';
- $aNearPostcodes = $oDB->getAll($sSQL);
- if (PEAR::IsError($aNearPostcodes))
- {
- var_dump($sSQL, $aNearPostcodes);
- exit;
- }
-
- if (!sizeof($aNearPostcodes))
- {
- return false;
- }
-
- $fTotalLat = 0;
- $fTotalLon = 0;
- $fTotalFac = 0;
- foreach($aNearPostcodes as $aPostcode)
- {
- $iDiff = gbPostcodeAlphaDifference($sPostcodeEnd, $aPostcode['substring'])*2 + 1;
- if ($iDiff == 0)
- $fFac = 1;
- else
- $fFac = 1/($iDiff*$iDiff);
-
- $fTotalFac += $fFac;
- $fTotalLat += $aPostcode['lat'] * $fFac;
- $fTotalLon += $aPostcode['lon'] * $fFac;
- }
- if ($fTotalFac)
- {
- $fLat = $fTotalLat / $fTotalFac;
- $fLon = $fTotalLon / $fTotalFac;
- $fRadius = min(0.1 / $fTotalFac, 0.02);
- return array(array('lat' => $fLat, 'lon' => $fLon, 'radius' => $fRadius));
- }
- return false;
- */
- /*
- $fTotalFac is a suprisingly good indicator of accuracy
- $iZoom = 18 + round(log($fTotalFac,32));
- $iZoom = max(13,min(18,$iZoom));
- */
}
function usPostcodeCalculate($sPostcode, &$oDB)
NEW.rank_address := NEW.rank_search;
-- By doing in postgres we have the country available to us - currently only used for postcode
- IF NEW.class = 'place' THEN
+ IF NEW.class in ('place','boundary') AND NEW.type in ('postcode','postal_code') THEN
+
+ NEW.name := 'ref'=>NEW.postcode;
+
+ IF NEW.country_code = 'gb' THEN
+
+ IF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9][A-Z][A-Z])$' THEN
+ NEW.rank_search := 25;
+ NEW.rank_address := 5;
+ ELSEIF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9])$' THEN
+ NEW.rank_search := 23;
+ NEW.rank_address := 5;
+ ELSEIF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z])$' THEN
+ NEW.rank_search := 21;
+ NEW.rank_address := 5;
+ END IF;
+
+ ELSEIF NEW.country_code = 'de' THEN
+
+ IF NEW.postcode ~ '^([0-9]{5})$' THEN
+ NEW.rank_search := 21;
+ NEW.rank_address := 11;
+ END IF;
+
+ ELSE
+ -- Guess at the postcode format and coverage (!)
+ IF upper(NEW.postcode) ~ '^[A-Z0-9]{1,5}$' THEN -- Probably too short to be very local
+ NEW.rank_search := 21;
+ NEW.rank_address := 11;
+ ELSE
+ -- Does it look splitable into and area and local code?
+ postcode := substring(upper(NEW.postcode) from '^([- :A-Z0-9]+)([- :][A-Z0-9]+)$');
+
+ IF postcode IS NOT NULL THEN
+ NEW.rank_search := 25;
+ NEW.rank_address := 11;
+ ELSEIF NEW.postcode ~ '^[- :A-Z0-9]{6,}$' THEN
+ NEW.rank_search := 21;
+ NEW.rank_address := 11;
+ END IF;
+ END IF;
+ END IF;
+
+ ELSEIF NEW.class = 'place' THEN
IF NEW.type in ('continent') THEN
NEW.rank_search := 2;
NEW.rank_address := NEW.rank_search;
ELSEIF NEW.type in ('hall_of_residence','neighbourhood','housing_estate','nature_reserve') THEN
NEW.rank_search := 22;
NEW.rank_address := 22;
- ELSEIF NEW.type in ('postcode') THEN
-
- NEW.name := 'ref'=>NEW.postcode;
-
- IF NEW.country_code = 'gb' THEN
-
- IF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9][A-Z][A-Z])$' THEN
- NEW.rank_search := 25;
- NEW.rank_address := 5;
- ELSEIF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z]? [0-9])$' THEN
- NEW.rank_search := 23;
- NEW.rank_address := 5;
- ELSEIF NEW.postcode ~ '^([A-Z][A-Z]?[0-9][0-9A-Z])$' THEN
- NEW.rank_search := 21;
- NEW.rank_address := 5;
- END IF;
-
- ELSEIF NEW.country_code = 'de' THEN
-
- IF NEW.postcode ~ '^([0-9]{5})$' THEN
- NEW.rank_search := 21;
- NEW.rank_address := 11;
- END IF;
-
- ELSE
- -- Guess at the postcode format and coverage (!)
- IF upper(NEW.postcode) ~ '^[A-Z0-9]{1,5}$' THEN -- Probably too short to be very local
- NEW.rank_search := 21;
- NEW.rank_address := 11;
- ELSE
- -- Does it look splitable into and area and local code?
- postcode := substring(upper(NEW.postcode) from '^([- :A-Z0-9]+)([- :][A-Z0-9]+)$');
-
- IF postcode IS NOT NULL THEN
- NEW.rank_search := 25;
- NEW.rank_address := 11;
- ELSEIF NEW.postcode ~ '^[- :A-Z0-9]{6,}$' THEN
- NEW.rank_search := 21;
- NEW.rank_address := 11;
- END IF;
- END IF;
- END IF;
-
ELSEIF NEW.type in ('airport','street') THEN
NEW.rank_search := 26;
NEW.rank_address := NEW.rank_search;
IF st_area(NEW.geometry) < 1 THEN
-- mark items within the geometry for re-indexing
-- RAISE WARNING 'placex poly insert: % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type;
--- work around bug in postgis
+
+ -- work around bug in postgis, this may have been fixed in 2.0.0 (see http://trac.osgeo.org/postgis/ticket/547)
update placex set indexed_status = 2 where (ST_Contains(NEW.geometry, placex.geometry) OR ST_Intersects(NEW.geometry, placex.geometry))
- AND rank_search > NEW.rank_search and indexed_status = 0 and ST_geometrytype(placex.geometry) = 'ST_Point';
+ AND rank_search > NEW.rank_search and indexed_status = 0 and ST_geometrytype(placex.geometry) = 'ST_Point' and (rank_search < 28 or name is not null);
update placex set indexed_status = 2 where (ST_Contains(NEW.geometry, placex.geometry) OR ST_Intersects(NEW.geometry, placex.geometry))
- AND rank_search > NEW.rank_search and indexed_status = 0 and ST_geometrytype(placex.geometry) != 'ST_Point';
+ AND rank_search > NEW.rank_search and indexed_status = 0 and ST_geometrytype(placex.geometry) != 'ST_Point' and (rank_search < 28 or name is not null);
END IF;
ELSE
-- mark nearby items for re-indexing, where 'nearby' depends on the features rank_search and is a complete guess :(
END IF;
IF diameter > 0 THEN
-- RAISE WARNING 'placex point insert: % % % % %',NEW.osm_type,NEW.osm_id,NEW.class,NEW.type,diameter;
- update placex set indexed_status = 2 where indexed_status = 0 and rank_search > NEW.rank_search and ST_DWithin(placex.geometry, NEW.geometry, diameter);
+ update placex set indexed_status = 2 where indexed_status = 0 and rank_search > NEW.rank_search and ST_DWithin(placex.geometry, NEW.geometry, diameter) and (rank_search < 28 or name is not null);
END IF;
END IF;
tagpairid INTEGER;
+ default_language TEXT;
name_vector INTEGER[];
nameaddress_vector INTEGER[];
-- cheaper but less acurate
place_centroid := ST_Centroid(NEW.geometry);
+ -- Thought this wasn't needed but when we add new languages to the country_name table
+ -- we need to update the existing names
+ IF NEW.name is not null AND array_upper(%#NEW.name,1) > 1 THEN
+ default_language := get_country_language_code(NEW.country_code);
+ IF default_language IS NOT NULL THEN
+ IF NEW.name ? 'name' AND NOT NEW.name ? ('name:'||default_language) THEN
+ NEW.name := NEW.name || (('name:'||default_language) => (NEW.name -> 'name'));
+ ELSEIF NEW.name ? ('name:'||default_language) AND NOT NEW.name ? 'name' THEN
+ NEW.name := NEW.name || ('name' => (NEW.name -> 'name:'||default_language));
+ END IF;
+ END IF;
+ END IF;
+
-- Initialise the name vector using our name
name_vector := make_keywords(NEW.name);
nameaddress_vector := '{}'::int[];
update placex set indexed_status = 2 where indexed_status = 0 and
(ST_Contains(NEW.geometry, placex.geometry) OR ST_Intersects(NEW.geometry, placex.geometry))
AND NOT (ST_Contains(existinggeometry, placex.geometry) OR ST_Intersects(existinggeometry, placex.geometry))
- AND rank_search > existingplacex.rank_search;
+ AND rank_search > existingplacex.rank_search AND (rank_search < 28 or name is not null);
update placex set indexed_status = 2 where indexed_status = 0 and
(ST_Contains(existinggeometry, placex.geometry) OR ST_Intersects(existinggeometry, placex.geometry))
AND NOT (ST_Contains(NEW.geometry, placex.geometry) OR ST_Intersects(NEW.geometry, placex.geometry))
- AND rank_search > existingplacex.rank_search;
+ AND rank_search > existingplacex.rank_search AND (rank_search < 28 or name is not null);
END IF;
IF st_area(NEW.geometry) < 0.5 THEN
UPDATE placex set indexed_status = 2 from place_addressline where address_place_id = existingplacex.place_id
- and placex.place_id = place_addressline.place_id and indexed_status = 0;
+ and placex.place_id = place_addressline.place_id and indexed_status = 0
+ and (rank_search < 28 or name is not null);
END IF;
END IF;
$$
LANGUAGE plpgsql;
- CREATE AGGREGATE array_agg(INT[])
- (
- sfunc = array_cat,
- stype = INT[],
- initcond = '{}'
- );
-
CREATE OR REPLACE FUNCTION tigger_create_interpolation(linegeo GEOMETRY, in_startnumber INTEGER,
in_endnumber INTEGER, interpolationtype TEXT,
in_street TEXT, in_isin TEXT, in_postcode TEXT) RETURNS INTEGER
);
getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
- $aLanguageIn = array(
- 'af',
- 'ar',
- 'br',
- 'ca',
- 'cs',
- 'de',
- 'en',
- 'es',
- 'et',
- 'eu',
- 'fa',
- 'fi',
- 'fr',
- 'gl',
- 'hr',
- 'hu',
- 'ia',
- 'is',
- 'it',
- 'ja',
- 'mk',
- 'nl',
- 'no',
- 'pl',
- 'ps',
- 'pt',
- 'ru',
- 'sk',
- 'sv',
- 'uk',
- 'vi',
- );
+ include(CONST_BasePath.'/settings/phrase_settings.php');
+
if ($aCMDResult['countries']) {
echo "select getorcreate_country(make_standard_name('uk'), 'gb');\n";
preg_match('/^\\w+$/', $sType) < 1) {
trigger_error("Bad class/type for language $sLanguage: $sClass=$sType");
exit;
- }
+ }
+ # blacklisting: disallow certain class/type combinations
+ if (isset($aTagsBlacklist[$sClass]) && in_array($sType, $aTagsBlacklist[$sClass])) {
+ # fwrite(STDERR, "Blacklisted: ".$sClass."/".$sType."\n");
+ continue;
+ }
+ # whitelisting: if class is in whitelist, allow only tags in the list
+ if (isset($aTagsWhitelist[$sClass]) && !in_array($sType, $aTagsWhitelist[$sClass])) {
+ # fwrite(STDERR, "Non-Whitelisted: ".$sClass."/".$sType."\n");
+ continue;
+ }
$aPairs[$sClass.'|'.$sType] = array($sClass, $sType);
switch(trim($aMatch[4]))
foreach($aPairs as $aPair)
{
+ if ($aPair[0] == 'yes') continue;
+ if ($aPair[1] == 'yes') continue;
+ if ($aPair[0] == 'highway') continue;
if ($aPair[1] == 'highway') continue;
echo "create table place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." as ";
$sViewboxSmallSQL = $oDB->getOne($sSQL);
if (PEAR::isError($sViewboxSmallSQL))
{
- var_dump($sViewboxSmallSQL);
- exit;
+ failInternalError("Could not get small viewbox.", $sSQL, $sViewboxSmallSQL);
}
$sViewboxSmallSQL = "'".$sViewboxSmallSQL."'::geometry";
$sViewboxLargeSQL = $oDB->getOne($sSQL);
if (PEAR::isError($sViewboxLargeSQL))
{
- var_dump($sViewboxLargeSQL);
- exit;
+ failInternalError("Could not get large viewbox.", $sSQL, $sViewboxLargeSQL);
}
$sViewboxLargeSQL = "'".$sViewboxLargeSQL."'::geometry";
}
$sToken = $oDB->getOne("select make_standard_name('".$aSpecialTerm[1]."') as string");
$sSQL = 'select * from (select word_id,word_token, word, class, type, location, country_code, operator';
$sSQL .= ' from word where word_token in (\' '.$sToken.'\')) as x where (class is not null and class not in (\'place\',\'highway\')) or country_code is not null';
+ if (CONST_Debug) var_Dump($sSQL);
$aSearchWords = $oDB->getAll($sSQL);
$aNewSearches = array();
foreach($aSearches as $aSearch)
$aDatabaseWords = array();
if (PEAR::IsError($aDatabaseWords))
{
- var_dump($sSQL, $aDatabaseWords);
- exit;
+ failInternalError("Could not get word tokens.", $sSQL, $aDatabaseWords);
}
$aPossibleMainWordIDs = array();
foreach($aDatabaseWords as $aToken)
// Try and calculate GB postcodes we might be missing
foreach($aTokens as $sToken)
{
- if (!isset($aValidTokens[$sToken]) && !isset($aValidTokens[' '.$sToken]) && preg_match('/^([A-Z][A-Z]?[0-9][0-9A-Z]? ?[0-9])([A-Z][A-Z])$/', strtoupper(trim($sToken)), $aData))
+ // Source of gb postcodes is now definitive - always use
+ if (preg_match('/^([A-Z][A-Z]?[0-9][0-9A-Z]? ?[0-9])([A-Z][A-Z])$/', strtoupper(trim($sToken)), $aData))
{
if (substr($aData[1],-2,1) != ' ')
{
Score how good the search is so they can be ordered
*/
-
foreach($aPhrases as $iPhrase => $sPhrase)
{
$aNewPhraseSearches = array();
if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch;
}
}
- else
+ elseif (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'])
{
if (sizeof($aSearch['aName']))
{
// Allow searching for a word - but at extra cost
foreach($aValidTokens[$sToken] as $aSearchTerm)
{
+ if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id'])
+ {
//var_Dump('<hr>',$aSearch['aName']);
if (sizeof($aCurrentSearch['aName']) && strlen($sToken) >= 4)
$aSearch['iNamePhrase'] = $iPhrase;
if ($aSearch['iSearchRank'] < $iMaxRank) $aNewWordsetSearches[] = $aSearch;
}
+ }
}
}
else
// First we need a position, either aName or fLat or both
$aTerms = array();
$aOrder = array();
+
+ // TODO: filter out the pointless search terms (2 letter name tokens and less)
+ // they might be right - but they are just too darned expensive to run
if (sizeof($aSearch['aName'])) $aTerms[] = "name_vector @> ARRAY[".join($aSearch['aName'],",")."]";
if (sizeof($aSearch['aAddress']) && $aSearch['aName'] != $aSearch['aAddress']) $aTerms[] = "nameaddress_vector @> ARRAY[".join($aSearch['aAddress'],",")."]";
if ($aSearch['sCountryCode']) $aTerms[] = "country_code = '".pg_escape_string($aSearch['sCountryCode'])."'";
$aViewBoxPlaceIDs = $oDB->getAll($sSQL);
if (PEAR::IsError($aViewBoxPlaceIDs))
{
- var_dump($sSQL, $aViewBoxPlaceIDs);
- exit;
+ failInternalError("Could not get places for search terms.", $sSQL, $aViewBoxPlaceIDs);
}
//var_dump($aViewBoxPlaceIDs);
// Did we have an viewbox matches?
if (PEAR::IsError($aPlaceIDs))
{
- var_dump($sSQL, $aPlaceIDs);
- exit;
+ failInternalError("Could not get place IDs from tokens." ,$sSQL, $aPlaceIDs);
}
if (CONST_Debug) var_Dump($aPlaceIDs);
if (PEAR::IsError($aSearchResults))
{
- var_dump($sSQL, $aSearchResults);
- exit;
+ failInternalError("Could not get details for place.", $sSQL, $aSearchResults);
}
}
} // end if ($sQuery)
if (PEAR::IsError($aSearchResults))
{
- var_dump($sSQL, $aSearchResults);
- exit;
+ failInternalError("Could not get details for place (near).", $sSQL, $aSearchResults);
}
}
}
//var_Dump($aSearchResults);
//exit;
$aClassType = getClassTypesWithImportance();
+ $aRecheckWords = preg_split('/\b/',$sQuery);
+ foreach($aRecheckWords as $i => $sWord)
+ {
+ if (!$sWord) unset($aRecheckWords[$i]);
+ }
foreach($aSearchResults as $iResNum => $aResult)
{
if (CONST_Search_AreaPolygons || true)
$aPointPolygon = $oDB->getRow($sSQL);
if (PEAR::IsError($aPointPolygon))
{
- var_dump($sSQL, $aPointPolygon);
- exit;
+ failInternalError("Could not get outline.", $sSQL, $aPointPolygon);
}
if ($aPointPolygon['place_id'])
{
//exit;
}
+ // Adjust importance for the number of exact string matches in the result
+ $aResult['importance'] = max(0.001,$aResult['importance']);
+ $iCountWords = 0;
+ $sAddress = $aResult['langaddress'];
+ foreach($aRecheckWords as $i => $sWord)
+ {
+ if (stripos($sAddress, $sWord)!==false) $iCountWords++;
+ }
+ $aResult['importance'] = $aResult['importance'] + $iCountWords;
+
//if (CONST_Debug) var_dump($aResult['class'].':'.$aResult['type'].':'.$aResult['admin_level']);
/*
if (isset($aClassType[$aResult['class'].':'.$aResult['type'].':'.$aResult['admin_level']]['importance'])
$aResult['foundorder'] = $iResNum;
$aSearchResults[$iResNum] = $aResult;
}
-
uasort($aSearchResults, 'byImportance');
//var_dump($aSearchResults);exit;