]> git.openstreetmap.org Git - nominatim.git/blob - lib/leakybucket.php
script for scraper blocking using apache log files
[nominatim.git] / lib / leakybucket.php
1 <?php
2
3         function getBucketMemcache()
4         {
5                 static $m;
6
7                 if (!CONST_ConnectionBucket_MemcacheServerAddress) return null;
8                 if (!isset($m))
9                 {
10                         $m = new Memcached();
11                         $m->addServer(CONST_ConnectionBucket_MemcacheServerAddress, CONST_ConnectionBucket_MemcacheServerPort);
12                 }
13                 return $m;
14         }
15
16         function doBucket($asKey, $iRequestCost, $iLeakPerSecond, $iThreshold)
17         {
18                 $m = getBucketMemcache();
19                 if (!$m) return 0;
20
21                 $iMaxVal = 0;
22                 $t = time();
23
24                 foreach($asKey as $sKey)
25                 {
26                         $aCurrentBlock = $m->get($sKey);
27                         if (!$aCurrentBlock)
28                         {
29                                 $aCurrentBlock = array($iRequestCost, $t, false);
30                         }
31                         else
32                         {
33                                 // add RequestCost
34                                 // remove leak * the time since the last request 
35                                 $aCurrentBlock[0] += $iRequestCost - ($t - $aCurrentBlock[1])*$iLeakPerSecond;
36                                 $aCurrentBlock[1] = $t;
37                         }
38
39                         if ($aCurrentBlock[0] <= 0)
40                         {
41                                 $m->delete($sKey);
42                         }
43                         else
44                         {
45                                 // If we have hit the threshold stop and record this to the block list
46                                 if ($aCurrentBlock[0] >= $iThreshold)
47                                 {
48                                         $aCurrentBlock[0] = $iThreshold;
49
50                                         // Make up to 10 attempts to record this to memcache (with locking to prevent conflicts)
51                                         $i = 10;
52                                         for($i = 0; $i < 10; $i++)
53                                         {
54                                                 $aBlockedList = $m->get('blockedList', null, $hCasToken);
55                                                 if (!$aBlockedList)
56                                                 {
57                                                         $aBlockedList = array();
58                                                         $m->add('blockedList', $aBlockedList);
59                                                         $aBlockedList = $m->get('blockedList', null, $hCasToken);
60                                                 }
61                                                 if (!isset($aBlockedList[$sKey]))
62                                                 {
63                                                         $aBlockedList[$sKey] = array(1, $t);
64                                                 }
65                                                 else
66                                                 {
67                                                         $aBlockedList[$sKey][0]++;
68                                                         $aBlockedList[$sKey][1] = $t;
69                                                 }
70                                                 if (sizeof($aBlockedList) > CONST_ConnectionBucket_MaxBlockList)
71                                                 {
72                                                         uasort($aBlockedList, 'byValue1');
73                                                         $aBlockedList = array_slice($aBlockedList, 0, CONST_ConnectionBucket_MaxBlockList);
74                                                 }
75                                                 $x = $m->cas($hCasToken, 'blockedList', $aBlockedList);
76                                                 if ($x) break;
77                                         }
78                                 }
79                                 // Only keep in memcache until the time it would have expired (to avoid clutering memcache)
80                                 $m->set($sKey, $aCurrentBlock, $t + 1 + $aCurrentBlock[0]/$iLeakPerSecond);
81                         }
82
83                         // Bucket result in the largest bucket we find
84                         $iMaxVal = max($iMaxVal, $aCurrentBlock[0]);
85                 }
86
87                 return $iMaxVal;
88         }
89
90         function isBucketSleeping($asKey)
91         {
92                 $m = getBucketMemcache();
93                 if (!$m) return false;
94
95                 foreach($asKey as $sKey)
96                 {
97                         $aCurrentBlock = $m->get($sKey);
98                         if ($aCurrentBlock[2]) return true;
99                 }
100                 return false;
101         }
102
103         function setBucketSleeping($asKey, $bVal)
104         {
105                 $m = getBucketMemcache();
106                 if (!$m) return false;
107
108                 $iMaxVal = 0;
109                 $t = time();
110
111                 foreach($asKey as $sKey)
112                 {
113                         $aCurrentBlock = $m->get($sKey);
114                         $aCurrentBlock[2] = $bVal;
115                         $m->set($sKey, $aCurrentBlock, $t + 1 + $aCurrentBlock[0]/CONST_ConnectionBucket_LeakRate);
116                 }
117                 return true;
118         }
119
120         function byValue1($a, $b)
121         {
122                 if ($a[1] == $b[1])
123                 {
124                         return 0;
125                 }
126                 return ($a[1] > $b[1]) ? -1 : 1;
127         }
128
129         function byLastBlockTime($a, $b)
130         {
131                 if ($a['lastBlockTimestamp'] == $b['lastBlockTimestamp'])
132                 {
133                         return 0;
134                 }
135                 return ($a['lastBlockTimestamp'] > $b['lastBlockTimestamp']) ? -1 : 1;
136         }
137
138         function getBucketBlocks()
139         {
140                 $m = getBucketMemcache();
141                 if (!$m) return null;
142                 $t = time();
143                 $aBlockedList = $m->get('blockedList', null, $hCasToken);
144                 if (!$aBlockedList) $aBlockedList = array();
145                 foreach($aBlockedList as $sKey => $aDetails)
146                 {
147                         $aCurrentBlock = $m->get($sKey);
148                         if (!$aCurrentBlock) $aCurrentBlock = array(0, $t);
149                         $iCurrentBucketSize = max(0, $aCurrentBlock[0] - ($t - $aCurrentBlock[1])*CONST_ConnectionBucket_LeakRate);
150                         $aBlockedList[$sKey] = array(
151                                 'totalBlocks' => $aDetails[0],
152                                 'lastBlockTimestamp' => $aDetails[1],
153                                 'isSleeping' => (isset($aCurrentBlock[2])?$aCurrentBlock[2]:false),
154                                 'currentBucketSize' => $iCurrentBucketSize,
155                                 'currentlyBlocked' => $iCurrentBucketSize + (CONST_ConnectionBucket_Cost_Reverse) >= CONST_ConnectionBucket_BlockLimit,
156                                 );
157                 }
158                 uasort($aBlockedList, 'byLastBlockTime');
159                 return $aBlockedList;
160         }
161
162         function clearBucketBlocks()
163         {
164                 $m = getBucketMemcache();
165                 if (!$m) return false;
166                 $m->delete('blockedList');
167                 return true;
168         }