From: Sarah Hoffmann Date: Wed, 12 Dec 2012 17:55:03 +0000 (+0100) Subject: Merge remote-tracking branch 'upstream/master' X-Git-Tag: deploy~663 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/7c56f24c1f83e9d19c9df723bb86335598add902?hp=fb141e027a459e4e32d0ad71803ba2ac90d1d6f1 Merge remote-tracking branch 'upstream/master' Conflicts: lib/init-website.php --- diff --git a/lib/init-website.php b/lib/init-website.php index 6db2ac37..ef8237fb 100644 --- a/lib/init-website.php +++ b/lib/init-website.php @@ -8,27 +8,44 @@ strpos(CONST_WhitelistedIPs, ','.$_SERVER["REMOTE_ADDR"].',') === false) { - if (strpos(CONST_BlockedIPs, ','.$_SERVER["REMOTE_ADDR"].',') !== false) - { - header('HTTP/1.0 403 Forbidden'); - header('Content-type: text/html; charset=utf-8'); - echo "

Access blocked

"; - echo "Your IP has been blocked for overusing OpenStreetMap's volunteer-run servers.
\n"; - echo 'Please consult the Nominatim usage policy for more information.'; - echo "\n\n"; - exit; - } + $aBucketKeys = array(); - $sTempBlockedIP = file_get_contents(CONST_IPBanFile); - if (preg_match('/\b'.$_SERVER["REMOTE_ADDR"].'\b/', $sTempBlockedIP)) - { - header('HTTP/1.0 503 Service Temporarily Unavailable'); - header('Content-type: text/html; charset=utf-8'); - echo "

Access blocked

"; - echo "Your IP has been blocked temporarily for overusing OpenStreetMap's volunteer-run servers. This ban will be lifted automatically in a while. To avoid further blocks, please read the
\n"; - echo 'Nominatim usage policy carefully before you continue to use this service.'; - echo "\n\n"; - exit; - } + if (isset($_SERVER["HTTP_REFERER"])) $aBucketKeys[] = str_replace('www.','',strtolower(parse_url($_SERVER["HTTP_REFERER"], PHP_URL_HOST))); + if (isset($_SERVER["REMOTE_ADDR"])) $aBucketKeys[] = $_SERVER["REMOTE_ADDR"]; + if (isset($_GET["email"])) $aBucketKeys[] = $_GET["email"]; - } + $fBucketVal = doBucket($aBucketKeys, + (defined('CONST_ConnectionBucket_PageType')?constant('CONST_ConnectionBucket_Cost_'.CONST_ConnectionBucket_PageType):1) + user_busy_cost(), + CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_BlockLimit); + + if ($fBucketVal > CONST_ConnectionBucket_WaitLimit && $fBucketVal < CONST_ConnectionBucket_BlockLimit) + { + $m = getBucketMemcache(); + $iCurrentSleeping = $m->increment('sleepCounter'); + if (false === $iCurrentSleeping) + { + $m->add('sleepCounter', 0); + $iCurrentSleeping = $m->increment('sleepCounter'); + } + if ($iCurrentSleeping >= CONST_ConnectionBucket_MaxSleeping || isBucketSleeping($aBucketKeys)) + { + // Too many threads sleeping already. This becomes a hard block. + $fBucketVal = doBucket($aBucketKeys, CONST_ConnectionBucket_BlockLimit, CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_BlockLimit); + } + else + { + setBucketSleeping($aBucketKeys, true); + sleep(($fBucketVal - CONST_ConnectionBucket_WaitLimit)/CONST_ConnectionBucket_LeakRate); + $fBucketVal = doBucket($aBucketKeys, CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_BlockLimit); + setBucketSleeping($aBucketKeys, false); + } + $m->decrement('sleepCounter'); + } + + if (strpos(CONST_BlockedIPs, ','.$_SERVER["REMOTE_ADDR"].',') !== false || $fBucketVal >= CONST_ConnectionBucket_BlockLimit) + { + echo "Your IP has been blocked. \n"; + echo "Please create a nominatim trac ticket (http://trac.openstreetmap.org/newticket?component=nominatim) to request this to be removed. \n"; + echo "Information on the Nominatim usage policy can be found here: http://wiki.openstreetmap.org/wiki/Nominatim#Usage_Policy \n"; + exit; + } diff --git a/lib/init.php b/lib/init.php index 15e38a5b..67efdbfd 100644 --- a/lib/init.php +++ b/lib/init.php @@ -4,6 +4,7 @@ require_once(CONST_BasePath.'/settings/settings.php'); require_once(CONST_BasePath.'/lib/lib.php'); + require_once(CONST_BasePath.'/lib/leakybucket.php'); require_once(CONST_BasePath.'/lib/db.php'); if (get_magic_quotes_gpc()) diff --git a/lib/leakybucket.php b/lib/leakybucket.php new file mode 100644 index 00000000..6d4e8f29 --- /dev/null +++ b/lib/leakybucket.php @@ -0,0 +1,168 @@ +addServer(CONST_ConnectionBucket_MemcacheServerAddress, CONST_ConnectionBucket_MemcacheServerPort); + } + return $m; + } + + function doBucket($asKey, $iRequestCost, $iLeakPerSecond, $iThreshold) + { + $m = getBucketMemcache(); + if (!$m) return 0; + + $iMaxVal = 0; + $t = time(); + + foreach($asKey as $sKey) + { + $aCurrentBlock = $m->get($sKey); + if (!$aCurrentBlock) + { + $aCurrentBlock = array($iRequestCost, $t, false); + } + else + { + // add RequestCost + // remove leak * the time since the last request + $aCurrentBlock[0] += $iRequestCost - ($t - $aCurrentBlock[1])*$iLeakPerSecond; + $aCurrentBlock[1] = $t; + } + + if ($aCurrentBlock[0] <= 0) + { + $m->delete($sKey); + } + else + { + // If we have hit the threshold stop and record this to the block list + if ($aCurrentBlock[0] >= $iThreshold) + { + $aCurrentBlock[0] = $iThreshold; + + // Make up to 10 attempts to record this to memcache (with locking to prevent conflicts) + $i = 10; + for($i = 0; $i < 10; $i++) + { + $aBlockedList = $m->get('blockedList', null, $hCasToken); + if (!$aBlockedList) + { + $aBlockedList = array(); + $m->add('blockedList', $aBlockedList); + $aBlockedList = $m->get('blockedList', null, $hCasToken); + } + if (!isset($aBlockedList[$sKey])) + { + $aBlockedList[$sKey] = array(1, $t); + } + else + { + $aBlockedList[$sKey][0]++; + $aBlockedList[$sKey][1] = $t; + } + if (sizeof($aBlockedList) > CONST_ConnectionBucket_MaxBlockList) + { + uasort($aBlockedList, 'byValue1'); + $aBlockedList = array_slice($aBlockedList, 0, CONST_ConnectionBucket_MaxBlockList); + } + $x = $m->cas($hCasToken, 'blockedList', $aBlockedList); + if ($x) break; + } + } + // Only keep in memcache until the time it would have expired (to avoid clutering memcache) + $m->set($sKey, $aCurrentBlock, $t + 1 + $aCurrentBlock[0]/$iLeakPerSecond); + } + + // Bucket result in the largest bucket we find + $iMaxVal = max($iMaxVal, $aCurrentBlock[0]); + } + + return $iMaxVal; + } + + function isBucketSleeping($asKey) + { + $m = getBucketMemcache(); + if (!$m) return false; + + foreach($asKey as $sKey) + { + $aCurrentBlock = $m->get($sKey); + if ($aCurrentBlock[2]) return true; + } + return false; + } + + function setBucketSleeping($asKey, $bVal) + { + $m = getBucketMemcache(); + if (!$m) return false; + + $iMaxVal = 0; + $t = time(); + + foreach($asKey as $sKey) + { + $aCurrentBlock = $m->get($sKey); + $aCurrentBlock[2] = $bVal; + $m->set($sKey, $aCurrentBlock, $t + 1 + $aCurrentBlock[0]/CONST_ConnectionBucket_LeakRate); + } + return true; + } + + function byValue1($a, $b) + { + if ($a[1] == $b[1]) + { + return 0; + } + return ($a[1] > $b[1]) ? -1 : 1; + } + + function byLastBlockTime($a, $b) + { + if ($a['lastBlockTimestamp'] == $b['lastBlockTimestamp']) + { + return 0; + } + return ($a['lastBlockTimestamp'] > $b['lastBlockTimestamp']) ? -1 : 1; + } + + function getBucketBlocks() + { + $m = getBucketMemcache(); + if (!$m) return null; + $t = time(); + $aBlockedList = $m->get('blockedList', null, $hCasToken); + if (!$aBlockedList) $aBlockedList = array(); + foreach($aBlockedList as $sKey => $aDetails) + { + $aCurrentBlock = $m->get($sKey); + if (!$aCurrentBlock) $aCurrentBlock = array(0, $t); + $iCurrentBucketSize = max(0, $aCurrentBlock[0] - ($t - $aCurrentBlock[1])*CONST_ConnectionBucket_LeakRate); + $aBlockedList[$sKey] = array( + 'totalBlocks' => $aDetails[0], + 'lastBlockTimestamp' => $aDetails[1], + 'isSleeping' => (isset($aCurrentBlock[2])?$aCurrentBlock[2]:false), + 'currentBucketSize' => $iCurrentBucketSize, + 'currentlyBlocked' => $iCurrentBucketSize + (CONST_ConnectionBucket_Cost_Reverse) >= CONST_ConnectionBucket_BlockLimit, + ); + } + uasort($aBlockedList, 'byLastBlockTime'); + return $aBlockedList; + } + + function clearBucketBlocks() + { + $m = getBucketMemcache(); + if (!$m) return false; + $m->delete('blockedList'); + return true; + } diff --git a/lib/lib.php b/lib/lib.php index f34b09b7..f0f7eceb 100644 --- a/lib/lib.php +++ b/lib/lib.php @@ -60,7 +60,7 @@ { $sLoadAverage = file_get_contents('/proc/loadavg'); $aLoadAverage = explode(' ',$sLoadAverage); - return (int)$aLoadAverage[0]; + return (float)$aLoadAverage[0]; } function getProcessorCount() @@ -116,15 +116,17 @@ } $aLanguages = array(); - if (preg_match_all('/(([a-z]{1,8})(-[a-z]{1,8})?)\s*(;\s*q\s*=\s*(1|0\.[0-9]+))?/i', $_SERVER['HTTP_ACCEPT_LANGUAGE'], $aLanguagesParse, PREG_SET_ORDER)) - { - foreach($aLanguagesParse as $iLang => $aLanguage) + if (isset($_SERVER["HTTP_ACCEPT_LANGUAGE"])) { + if (preg_match_all('/(([a-z]{1,8})(-[a-z]{1,8})?)\s*(;\s*q\s*=\s*(1|0\.[0-9]+))?/i', $_SERVER['HTTP_ACCEPT_LANGUAGE'], $aLanguagesParse, PREG_SET_ORDER)) { - $aLanguages[$aLanguage[1]] = isset($aLanguage[5])?(float)$aLanguage[5]:1 - ($iLang/100); - if (!isset($aLanguages[$aLanguage[2]])) $aLanguages[$aLanguage[2]] = $aLanguages[$aLanguage[1]]/10; + foreach($aLanguagesParse as $iLang => $aLanguage) + { + $aLanguages[$aLanguage[1]] = isset($aLanguage[5])?(float)$aLanguage[5]:1 - ($iLang/100); + if (!isset($aLanguages[$aLanguage[2]])) $aLanguages[$aLanguage[2]] = $aLanguages[$aLanguage[1]]/10; + } + arsort($aLanguages); } - arsort($aLanguages); - } + } if (!sizeof($aLanguages)) $aLanguages = array(CONST_Default_Language=>1); foreach($aLanguages as $sLangauge => $fLangauagePref) { @@ -295,19 +297,22 @@ function getClassTypes() { return array( + 'boundary:administrative:1' => array('label'=>'Continent','frequency'=>0,'icon'=>'poi_boundary_administrative', 'defdiameter' => 0.32,), 'boundary:administrative:2' => array('label'=>'Country','frequency'=>0,'icon'=>'poi_boundary_administrative', 'defdiameter' => 0.32,), 'place:country' => array('label'=>'Country','frequency'=>0,'icon'=>'poi_boundary_administrative','defzoom'=>6, 'defdiameter' => 15,), + 'boundary:administrative:3' => array('label'=>'State','frequency'=>0,'icon'=>'poi_boundary_administrative', 'defdiameter' => 0.32,), 'boundary:administrative:4' => array('label'=>'State','frequency'=>0,'icon'=>'poi_boundary_administrative', 'defdiameter' => 0.32,), 'place:state' => array('label'=>'State','frequency'=>0,'icon'=>'poi_boundary_administrative','defzoom'=>8, 'defdiameter' => 5.12,), 'boundary:administrative:5' => array('label'=>'State District','frequency'=>0,'icon'=>'poi_boundary_administrative', 'defdiameter' => 0.32,), 'boundary:administrative:6' => array('label'=>'County','frequency'=>0,'icon'=>'poi_boundary_administrative', 'defdiameter' => 0.32,), + 'boundary:administrative:7' => array('label'=>'County','frequency'=>0,'icon'=>'poi_boundary_administrative', 'defdiameter' => 0.32,), 'place:county' => array('label'=>'County','frequency'=>108,'icon'=>'poi_boundary_administrative','defzoom'=>10, 'defdiameter' => 1.28,), 'boundary:administrative:8' => array('label'=>'City','frequency'=>0,'icon'=>'poi_boundary_administrative', 'defdiameter' => 0.32,), 'place:city' => array('label'=>'City','frequency'=>66,'icon'=>'poi_place_city','defzoom'=>12, 'defdiameter' => 0.32,), 'boundary:administrative:9' => array('label'=>'City District','frequency'=>0,'icon'=>'poi_boundary_administrative', 'defdiameter' => 0.32,), 'boundary:administrative:10' => array('label'=>'Suburb','frequency'=>0,'icon'=>'poi_boundary_administrative', 'defdiameter' => 0.32,), 'boundary:administrative:11' => array('label'=>'Neighbourhood','frequency'=>0,'icon'=>'poi_boundary_administrative', 'defdiameter' => 0.32,), - 'place:region' => array('label'=>'Region','frequency'=>0,'icon'=>'poi_boundary_administrative','defzoom'=>8, 'defdiameter' => 5.12,), + 'place:region' => array('label'=>'Region','frequency'=>0,'icon'=>'poi_boundary_administrative','defzoom'=>8, 'defdiameter' => 0.04,), 'place:island' => array('label'=>'Island','frequency'=>288,'icon'=>'','defzoom'=>11, 'defdiameter' => 0.64,), 'boundary:administrative' => array('label'=>'Administrative','frequency'=>413,'icon'=>'poi_boundary_administrative', 'defdiameter' => 0.32,), 'place:town' => array('label'=>'Town','frequency'=>1497,'icon'=>'poi_place_town','defzoom'=>14, 'defdiameter' => 0.08,), @@ -460,7 +465,7 @@ 'leisure:pitch' => array('label'=>'Pitch','frequency'=>762,'icon'=>'',), 'highway:unsurfaced' => array('label'=>'Unsurfaced','frequency'=>492,'icon'=>'',), - 'historic:ruins' => array('label'=>'Ruins','frequency'=>483,'icon'=>'shopping_jewelry',), + 'historic:ruins' => array('label'=>'Ruins','frequency'=>483,'icon'=>'tourist_ruin',), 'amenity:college' => array('label'=>'College','frequency'=>473,'icon'=>'education_school',), 'historic:monument' => array('label'=>'Monument','frequency'=>470,'icon'=>'tourist_monument',), 'railway:subway' => array('label'=>'Subway','frequency'=>385,'icon'=>'',), @@ -590,9 +595,9 @@ 'railway:disused_station' => array('label'=>'Disused Station','frequency'=>114,'icon'=>'',), 'railway:abandoned' => array('label'=>'Abandoned','frequency'=>641,'icon'=>'',), 'railway:disused' => array('label'=>'Disused','frequency'=>72,'icon'=>'',), - ); + ); } - + function getClassTypesWithImportance() { $aOrders = getClassTypes(); @@ -603,8 +608,6 @@ } return $aOrders; } - - function javascript_renderData($xVal) { @@ -692,6 +695,7 @@ $sSQL = "select *,get_name_by_language(name,$sLanguagePrefArraySQL) as localname from get_addressdata($iPlaceID)"; IF (!$bRaw) $sSQL .= " WHERE isaddress OR type = 'country_code'"; $sSQL .= " order by rank_address desc,isaddress desc"; + $aAddressLines = $oDB->getAll($sSQL); if (PEAR::IsError($aAddressLines)) { @@ -708,16 +712,16 @@ $aTypeLabel = false; if (isset($aClassType[$aLine['class'].':'.$aLine['type'].':'.$aLine['admin_level']])) $aTypeLabel = $aClassType[$aLine['class'].':'.$aLine['type'].':'.$aLine['admin_level']]; elseif (isset($aClassType[$aLine['class'].':'.$aLine['type']])) $aTypeLabel = $aClassType[$aLine['class'].':'.$aLine['type']]; + elseif (isset($aClassType['boundary:administrative:'.((int)($aLine['rank_address']/2))])) $aTypeLabel = $aClassType['boundary:administrative:'.((int)($aLine['rank_address']/2))]; else $aTypeLabel = array('simplelabel'=>$aLine['class']); - if ($aTypeLabel && ($aLine['localname'] || $aLine['housenumber'])) + if ($aTypeLabel && ((isset($aLine['localname']) && $aLine['localname']) || (isset($aLine['housenumber']) && $aLine['housenumber']))) { $sTypeLabel = strtolower(isset($aTypeLabel['simplelabel'])?$aTypeLabel['simplelabel']:$aTypeLabel['label']); $sTypeLabel = str_replace(' ','_',$sTypeLabel); if (!isset($aAddress[$sTypeLabel]) && $aLine['localname']) $aAddress[$sTypeLabel] = $aLine['localname']?$aLine['localname']:$aLine['housenumber']; } } -//var_dump($aAddress); -//exit; + return $aAddress; $aHouseNumber = $oDB->getRow('select housenumber, get_name_by_language(name,ARRAY[\'addr:housename\']) as housename,rank_search,postcode from placex where place_id = '.$iPlaceID); @@ -916,4 +920,3 @@ } return true; } - diff --git a/lib/template/address-json.php b/lib/template/address-json.php index c05bce29..0a1d074a 100644 --- a/lib/template/address-json.php +++ b/lib/template/address-json.php @@ -3,7 +3,7 @@ if (!sizeof($aPlace)) { - if ($sError) + if (isset($sError)) $aFilteredPlaces['error'] = $sError; else $aFilteredPlaces['error'] = 'Unable to geocode'; diff --git a/lib/template/address-jsonv2.php b/lib/template/address-jsonv2.php index 4e751a0d..00f8750d 100644 --- a/lib/template/address-jsonv2.php +++ b/lib/template/address-jsonv2.php @@ -3,7 +3,7 @@ if (!sizeof($aPlace)) { - if ($sError) + if (isset($sError)) $aFilteredPlaces['error'] = $sError; else $aFilteredPlaces['error'] = 'Unable to geocode'; diff --git a/lib/template/address-xml.php b/lib/template/address-xml.php index 75f69e15..fcd90b85 100644 --- a/lib/template/address-xml.php +++ b/lib/template/address-xml.php @@ -14,7 +14,7 @@ if (!sizeof($aPlace)) { - if ($sError) + if (isset($sError)) echo "$sError"; else echo "Unable to geocode"; diff --git a/lib/template/details-html.php b/lib/template/details-html.php index 4cacb307..e2799539 100644 --- a/lib/template/details-html.php +++ b/lib/template/details-html.php @@ -9,7 +9,7 @@