]> git.openstreetmap.org Git - nominatim.git/commitdiff
Merge remote-tracking branch 'upstream/master'
authorSarah Hoffmann <lonvia@denofr.de>
Wed, 12 Dec 2012 17:55:03 +0000 (18:55 +0100)
committerSarah Hoffmann <lonvia@denofr.de>
Wed, 12 Dec 2012 17:55:03 +0000 (18:55 +0100)
Conflicts:
lib/init-website.php

1  2 
lib/init-website.php
settings/settings.php
website/reverse.php
website/search.php

diff --combined lib/init-website.php
index 6db2ac374d995837b2aac2dcb9861ec6922945b6,8603e309f426f888bb8bd1834ee8a6b86be75736..ef8237fbfa7d4fbc6d968fc243d3978ff18c169b
@@@ -1,34 -1,53 +1,51 @@@
  <?php
 -      require_once('init.php');
 +    require_once('init.php');
  
 -      if (CONST_ClosedForIndexing && strpos(CONST_ClosedForIndexingExceptionIPs, ','.$_SERVER["REMOTE_ADDR"].',') === false)
 -      {
 -              echo "Closed for re-indexing...";
 -              exit;
 -      }
 +    header('Content-type: text/html; charset=utf-8');
 +
 +    // check blocks in place for external servers
 +    if (strpos($_SERVER["REMOTE_ADDR"],'193.63.75.') !== 0 &&
 +        strpos(CONST_WhitelistedIPs, ','.$_SERVER["REMOTE_ADDR"].',') === false)
 +    {
  
-         if (strpos(CONST_BlockedIPs, ','.$_SERVER["REMOTE_ADDR"].',') !== false)
-         {
-             header('HTTP/1.0 403 Forbidden');
-             header('Content-type: text/html; charset=utf-8');
-             echo "<html><body><h1>Access blocked</h1>";
-             echo "Your IP has been blocked for overusing OpenStreetMap's volunteer-run servers.<br> \n";
-             echo 'Please consult the <a href="http://wiki.openstreetmap.org/wiki/Nominatim_usage_policy">Nominatim usage policy</a> for more information.';
-             echo "\n</body></html>\n";
-             exit;
-         }
+       $aBucketKeys = array();
  
-         $sTempBlockedIP = file_get_contents(CONST_IPBanFile);
-         if (preg_match('/\b'.$_SERVER["REMOTE_ADDR"].'\b/', $sTempBlockedIP))
-         {
-             header('HTTP/1.0 503 Service Temporarily Unavailable');
-             header('Content-type: text/html; charset=utf-8');
-             echo "<html><body><h1>Access blocked</h1>";
-             echo "Your IP has been blocked temporarily for overusing OpenStreetMap's volunteer-run servers. This ban will be lifted automatically in a while. To avoid further blocks, please read the<br> \n";
-             echo '<a href="http://wiki.openstreetmap.org/wiki/Nominatim_usage_policy">Nominatim usage policy</a> carefully before you continue to use this service.';
-             echo "\n</body></html>\n";
-             exit;
-         }
+       if (isset($_SERVER["HTTP_REFERER"])) $aBucketKeys[] = str_replace('www.','',strtolower(parse_url($_SERVER["HTTP_REFERER"], PHP_URL_HOST)));
+       if (isset($_SERVER["REMOTE_ADDR"])) $aBucketKeys[] = $_SERVER["REMOTE_ADDR"];
+       if (isset($_GET["email"])) $aBucketKeys[] = $_GET["email"];
  
-     }
+       $fBucketVal = doBucket($aBucketKeys, 
+                       (defined('CONST_ConnectionBucket_PageType')?constant('CONST_ConnectionBucket_Cost_'.CONST_ConnectionBucket_PageType):1) + user_busy_cost(),
+                       CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_BlockLimit);
+       if ($fBucketVal > CONST_ConnectionBucket_WaitLimit && $fBucketVal < CONST_ConnectionBucket_BlockLimit)
+       {
+               $m = getBucketMemcache();
+               $iCurrentSleeping = $m->increment('sleepCounter');
+               if (false === $iCurrentSleeping)
+               {
+                       $m->add('sleepCounter', 0);
+                       $iCurrentSleeping = $m->increment('sleepCounter');
+               }
+               if ($iCurrentSleeping >= CONST_ConnectionBucket_MaxSleeping || isBucketSleeping($aBucketKeys))
+               {
+                       // Too many threads sleeping already.  This becomes a hard block.
+                       $fBucketVal = doBucket($aBucketKeys, CONST_ConnectionBucket_BlockLimit, CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_BlockLimit);
+               }
+               else
+               {
+                       setBucketSleeping($aBucketKeys, true);
+                       sleep(($fBucketVal - CONST_ConnectionBucket_WaitLimit)/CONST_ConnectionBucket_LeakRate);
+                       $fBucketVal = doBucket($aBucketKeys, CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_BlockLimit);
+                       setBucketSleeping($aBucketKeys, false);
+               }
+               $m->decrement('sleepCounter');
+       }
+       if (strpos(CONST_BlockedIPs, ','.$_SERVER["REMOTE_ADDR"].',') !== false || $fBucketVal >= CONST_ConnectionBucket_BlockLimit)
+       {
+               echo "Your IP has been blocked. \n";
+               echo "Please create a nominatim trac ticket (http://trac.openstreetmap.org/newticket?component=nominatim) to request this to be removed. \n";
+               echo "Information on the Nominatim usage policy can be found here: http://wiki.openstreetmap.org/wiki/Nominatim#Usage_Policy \n";
+               exit;
+       }
 -
 -      header('Content-type: text/html; charset=utf-8');
 -
diff --combined settings/settings.php
index 8ebd81ee6c326b84f8b85999a975e1d2b8653840,58463a516c2a28cbe147a92e7f92e56a186f1abd..d82a33376f11c315f6ff4baf57580b5b17775c04
        @define('CONST_Osm2pgsql_Binary', CONST_BasePath.'/osm2pgsql/osm2pgsql');
        @define('CONST_Osmosis_Binary', '/usr/bin/osmosis');
  
+       // Connection buckets to rate limit people being nasty
+       @define('CONST_ConnectionBucket_MemcacheServerAddress', false);
+       @define('CONST_ConnectionBucket_MemcacheServerPort', 11211);
+       @define('CONST_ConnectionBucket_MaxBlockList', 100);
+       @define('CONST_ConnectionBucket_LeakRate', 1);
+       @define('CONST_ConnectionBucket_BlockLimit', 10);
+       @define('CONST_ConnectionBucket_WaitLimit', 6);
+       @define('CONST_ConnectionBucket_MaxSleeping', 10);
+       @define('CONST_ConnectionBucket_Cost_Reverse', 1);
+       @define('CONST_ConnectionBucket_Cost_Search', 2);
+       @define('CONST_ConnectionBucket_Cost_Details', 3);
+       // Override this function to add an adjustment factor to the cost
+       // based on server load. e.g. getBlockingProcesses
+       if (!function_exists('user_busy_cost'))
+       {
+               function user_busy_cost()
+               {
+                       return 0;
+               }
+       }
        // Website settings
 -      @define('CONST_ClosedForIndexing', false);
 -      @define('CONST_ClosedForIndexingExceptionIPs', '');
        @define('CONST_BlockedIPs', '');
 +      @define('CONST_IPBanFile', CONST_BasePath.'/settings/ip_blocks');
 +      @define('CONST_WhitelistedIPs', '');
 +      @define('CONST_BlockedUserAgents', '');
 +      @define('CONST_BlockReverseMaxLoad', 15);
        @define('CONST_BulkUserIPs', '');
  
 -      @define('CONST_Website_BaseURL', 'http://'.php_uname('n').'/');
 +      @define('CONST_Website_BaseURL', 'http://nominatim.openstreetmap.org/');
        @define('CONST_Tile_Default', 'Mapnik');
  
 -      @define('CONST_Default_Language', 'xx');
 +      @define('CONST_Default_Language', 'en');
        @define('CONST_Default_Lat', 20.0);
        @define('CONST_Default_Lon', 0.0);
        @define('CONST_Default_Zoom', 2);
  
        @define('CONST_Search_AreaPolygons_Enabled', true);
        @define('CONST_Search_AreaPolygons', true);
-       @define('CONST_Search_TryDroppedAddressTerms', false);
  
        @define('CONST_Suggestions_Enabled', false);
  
+       @define('CONST_Search_TryDroppedAddressTerms', false);
        // Set to zero to disable polygon output
        @define('CONST_PolygonOutput_MaximumTypes', 1);
  
diff --combined website/reverse.php
index 810be3c491fd2c96a8de4415a0b64c1c93b94a90,d5a36998efd4143a7513dc6f5f8a30797f9e4aab..2c62948fbb7a20d1d89612ee28ba0a52b266a5cc
@@@ -1,23 -1,9 +1,25 @@@
  <?php
+       @define('CONST_ConnectionBucket_PageType', 'Reverse');
        require_once(dirname(dirname(__FILE__)).'/lib/init-website.php');
        require_once(CONST_BasePath.'/lib/log.php');
  
 +    if (preg_match(CONST_BlockedUserAgents, $_SERVER["HTTP_USER_AGENT"]) > 0)
 +    {
 +        $fLoadAvg = getLoadAverage();
 +        if ($fLoadAvg >= CONST_BlockReverseMaxLoad) {
 +            header('HTTP/1.0 403 Forbidden');
 +            header('Content-type: text/html; charset=utf-8');
 +              echo "<html><body><h1>App temporarily blocked</h1>";
 +            echo "Your application has been temporarily blocked from the OpenStreetMap Nominatim ";
 +            echo "geolocation service due to high server load.";
 +            echo "\n</body></html>\n";
 +            exit;
 +        }
 +
 +    }
 +
 +
          if (strpos(CONST_BulkUserIPs, ','.$_SERVER["REMOTE_ADDR"].',') !== false)
          {
                  $fLoadAvg = getLoadAverage();
diff --combined website/search.php
index b7a59613e3073f75de6b4fea6ee8776c341ef402,5ba2571bb5a221236db542b92312abd6e3d2951d..24f21e12f31d1690aca25e4d3064d3e9b7c136a2
@@@ -1,8 -1,11 +1,11 @@@
  <?php
+       @define('CONST_ConnectionBucket_PageType', 'Search');
        require_once(dirname(dirname(__FILE__)).'/lib/init-website.php');
        require_once(CONST_BasePath.'/lib/log.php');
  
        ini_set('memory_limit', '200M');
        $oDB =& getDB();
  
        // Display defaults
        $iOffset = isset($_GET['offset'])?(int)$_GET['offset']:0;
        $iMaxRank = 20;
        if ($iFinalLimit > 50) $iFinalLimit = 50;
-     $iLimit = $iFinalLimit + min($iFinalLimit, 10);
+       $iLimit = $iFinalLimit + min($iFinalLimit, 10);
        $iMinAddressRank = 0;
        $iMaxAddressRank = 30;
+       $sAllowedTypesSQLList = false;
  
        // Format for output
        if (isset($_GET['format']) && ($_GET['format'] == 'html' || $_GET['format'] == 'xml' || $_GET['format'] == 'json' ||  $_GET['format'] == 'jsonv2'))
@@@ -67,7 -71,6 +71,7 @@@
        if (isset($aLangPrefOrder['name:de'])) $bReverseInPlan = true;
        if (isset($aLangPrefOrder['name:ru'])) $bReverseInPlan = true;
        if (isset($aLangPrefOrder['name:ja'])) $bReverseInPlan = true;
 +      if (isset($aLangPrefOrder['name:pl'])) $bReverseInPlan = true;
  
        $sLanguagePrefArraySQL = "ARRAY[".join(',',array_map("getDBQuoted",$aLangPrefOrder))."]";
  
                                array('postalcode', 16, 25),
                                );
        $aStructuredQuery = array();
+       $sAllowedTypesSQLList = '';
        foreach($aStructuredOptions as $aStructuredOption)
        {
                loadStructuredAddressElement($aStructuredQuery, $iMinAddressRank, $iMaxAddressRank, $_GET, $aStructuredOption[0], $aStructuredOption[1], $aStructuredOption[2]);
        }
        if (sizeof($aStructuredQuery) > 0) {
                $sQuery = join(', ', $aStructuredQuery);
+               $sAllowedTypesSQLList = '(\'place\',\'boundary\')';
        }
  
        if ($sQuery)
                                                        if ($bBoundingBoxSearch) $aTerms[] = "centroid && $sViewboxSmallSQL";
                                                        if ($sNearPointSQL) $aOrder[] = "ST_Distance($sNearPointSQL, centroid) asc";
  
-                                                       $sImportanceSQL = 'case when importance = 0 OR importance IS NULL then 0.92-(search_rank::float/33) else importance end';
+                                                       $sImportanceSQL = 'case when importance = 0 OR importance IS NULL then 0.75-(search_rank::float/40) else importance end';
  
                                                        if ($sViewboxSmallSQL) $sImportanceSQL .= " * case when ST_Contains($sViewboxSmallSQL, centroid) THEN 1 ELSE 0.5 END";
                                                        if ($sViewboxLargeSQL) $sImportanceSQL .= " * case when ST_Contains($sViewboxLargeSQL, centroid) THEN 1 ELSE 0.5 END";
                                                                else
                                                                        $sSQL .= " limit ".$iLimit;
  
 -                                                              if (CONST_Debug) { var_dump($sSQL); }
 +                                                              if (CONST_Debug) var_dump($sSQL);
 +                                                              $iStartTime = time();
                                                                $aViewBoxPlaceIDs = $oDB->getAll($sSQL);
                                                                if (PEAR::IsError($aViewBoxPlaceIDs))
                                                                {
                                                                        failInternalError("Could not get places for search terms.", $sSQL, $aViewBoxPlaceIDs);
                                                                }
 +                                                              if (time() - $iStartTime > 60) {
 +                                                                      file_put_contents(CONST_BasePath.'/log/long_queries.log', date('Y-m-d H:i:s', $iStartTime).' '.$sSQL."\n", FILE_APPEND);
 +                                                              }
 +
  //var_dump($aViewBoxPlaceIDs);
                                                                // Did we have an viewbox matches?
                                                                $aPlaceIDs = array();
                                        $sSQL .= "get_name_by_language(name, ARRAY['ref']) as ref,";
                                        $sSQL .= "avg(ST_X(ST_Centroid(geometry))) as lon,avg(ST_Y(ST_Centroid(geometry))) as lat, ";
  //                                    $sSQL .= $sOrderSQL." as porder, ";
-                                       $sSQL .= "coalesce(importance,0.9-(rank_search::float/30)) as importance ";
+                                       $sSQL .= "coalesce(importance,0.75-(rank_search::float/40)) as importance ";
                                        $sSQL .= "from placex where place_id in ($sPlaceIDs) ";
                                        $sSQL .= "and placex.rank_address between $iMinAddressRank and $iMaxAddressRank ";
+                                       if ($sAllowedTypesSQLList) $sSQL .= "and placex.class in $sAllowedTypesSQLList ";
                                        $sSQL .= "and linked_place_id is null ";
                                        $sSQL .= "group by osm_type,osm_id,class,type,admin_level,rank_search,rank_address,country_code,importance";
                                        if (!$bDeDupe) $sSQL .= ",place_id";
                                        $sSQL .= "get_name_by_language(name, ARRAY['ref']) as ref,";
                                        $sSQL .= "avg(ST_X(ST_Centroid(geometry))) as lon,avg(ST_Y(ST_Centroid(geometry))) as lat, ";
  //                                    $sSQL .= $sOrderSQL." as porder, ";
-                                       $sSQL .= "coalesce(importance,0.9-(rank_search::float/30)) as importance ";
+                                       $sSQL .= "coalesce(importance,0.75-(rank_search::float/40)) as importance ";
                                        $sSQL .= "from placex where place_id in ($sPlaceIDs) ";
                                        $sSQL .= "and placex.rank_address between $iMinAddressRank and $iMaxAddressRank ";
                                        $sSQL .= "group by osm_type,osm_id,class,type,admin_level,rank_search,rank_address,country_code,importance";
        }
        uasort($aSearchResults, 'byImportance');
  
- //var_dump($aSearchResults);exit;
-       
        $aOSMIDDone = array();
        $aClassTypeNameDone = array();
        $aToFilter = $aSearchResults;
                logEnd($oDB, $hLog, sizeof($aToFilter));
        }
        $sMoreURL = CONST_Website_BaseURL.'search?format='.urlencode($sOutputFormat).'&exclude_place_ids='.join(',',$aExcludePlaceIDs);
-       $sMoreURL .= '&accept-language='.$_SERVER["HTTP_ACCEPT_LANGUAGE"];
+       if (isset($_SERVER["HTTP_ACCEPT_LANGUAGE"])) $sMoreURL .= '&accept-language='.$_SERVER["HTTP_ACCEPT_LANGUAGE"];
        if ($bShowPolygons) $sMoreURL .= '&polygon=1';
        if ($bShowAddressDetails) $sMoreURL .= '&addressdetails=1';
        if (isset($_GET['viewbox']) && $_GET['viewbox']) $sMoreURL .= '&viewbox='.urlencode($_GET['viewbox']);