WHITELIST = ''
# space-separated list of IPs manually blocked
BLACKLIST = ''
+# user-agents that should be blocked from bulk mode
+# (matched with startswith)
+UA_BLOCKLIST = ()
# time before a automatically blocked IP is allowed back
BLOCKCOOLOFF_PERIOD='1 hour'
BULKSHORT_LIMIT=2000
BLOCK_UPPER=19000
BLOCK_LOWER=4000
-BLOCK_LOADFAC=300
-BULK_LOADFAC=100
+BLOCK_LOADFAC=380
+BULK_LOADFAC=160
BULK_LOWER=1500
+MAX_BULK_IPS=85
#
# END OF DEFAULT SETTINGS
except IOError:
pass
-# determine current load
-fd = open("/proc/loadavg")
-avgload = int(float(fd.readline().split()[2]))
-fd.close()
-
# read the previous blocklist
WHITELIST = set(WHITELIST.split()) if WHITELIST else set()
prevblocks = []
except IOError:
pass #ignore non-existing file
-# current number of bulks
-numbulks = len(prevbulks)
-
-BLOCK_LIMIT = max(BLOCK_LOWER, BLOCK_UPPER - BLOCK_LOADFAC * (numbulks - 27))
-BULKLONG_LIMIT = max(BULK_LOWER, BULKLONG_LIMIT - BULK_LOADFAC * (avgload - 14))
-
+# determine current load
+fd = open("/proc/loadavg")
+avgload = int(float(fd.readline().split()[2]))
+fd.close()
+# DB load
conn = psycopg2.connect('dbname=nominatim')
cur = conn.cursor()
+cur.execute("select count(*)/60 from new_query_log where starttime > now() - interval '1min'")
+dbload = int(cur.fetchone()[0])
+
+BLOCK_LIMIT = max(BLOCK_LOWER, BLOCK_UPPER - BLOCK_LOADFAC * (dbload - 75))
+BULKLONG_LIMIT = max(BULK_LOWER, BULKLONG_LIMIT - BULK_LOADFAC * (avgload - 14))
+if len(prevbulks) > MAX_BULK_IPS:
+ BLOCK_LIMIT = max(3600, BLOCK_LOWER - (len(prevbulks) - MAX_BULK_IPS)*10)
# get the new block candidates
cur.execute("""
- SELECT ipaddress, max(count) FROM
+ SELECT ipaddress, max(count), max(ua) FROM
((SELECT * FROM
- (SELECT ipaddress, sum(case when endtime is null then 1 else 1+date_part('epoch',endtime-starttime) end) as count FROM new_query_log
+ (SELECT ipaddress, sum(case when endtime is null then 1 else 1+1.5*date_part('epoch',endtime-starttime) end) as count, substring(max(useragent) from 1 for 30) as ua FROM new_query_log
WHERE starttime > now() - interval '1 hour' GROUP BY ipaddress) as i
WHERE count > %s)
UNION
- (SELECT ipaddress, count * 4 FROM
- (SELECT ipaddress, sum(case when endtime is null then 1 else 1+date_part('epoch',endtime-starttime) end) as count FROM new_query_log
+ (SELECT ipaddress, count * 3, ua FROM
+ (SELECT ipaddress, sum(case when endtime is null then 1 else 1+1.5*date_part('epoch',endtime-starttime) end) as count, substring(max(useragent) from 1 for 30) as ua FROM new_query_log
WHERE starttime > now() - interval '10 min' GROUP BY ipaddress) as i
WHERE count > %s)) as o
GROUP BY ipaddress
bulkips = {}
emergencyblocks = []
+useragentblocks = []
for c in cur:
if c[0] not in WHITELIST and c[0] not in BLACKLIST:
- if c[1] > BLOCK_UPPER and c[0] not in prevbulks:
+ # check for user agents that receive an immediate block
+ missing_agent = not c[2]
+ if not missing_agent:
+ for ua in UA_BLOCKLIST:
+ if c[2].startswith(ua):
+ missing_agent = True
+ break
+ if (missing_agent or c[1] > BLOCK_UPPER) and c[0] not in prevblocks:
newblocks.add(c[0])
- if c[0] not in prevblocks:
+ if missing_agent:
+ useragentblocks.append(c[0])
+ else:
emergencyblocks.append(c[0])
else:
bulkips[c[0]] = c[1]
deblockcandidates.add(ip)
for ip in prevbulks:
- if ip in bulkips:
- if bulkips[ip] > BLOCK_LIMIT:
- newblocks.add(ip)
- newlyblocked.append(ip)
+ if ip not in newblocks:
+ if ip in bulkips:
+ if bulkips[ip] > BLOCK_LIMIT:
+ newblocks.add(ip)
+ newlyblocked.append(ip)
+ else:
+ newbulks.add(ip)
+ del bulkips[ip]
else:
- newbulks.add(ip)
- del bulkips[ip]
- else:
- debulkcandidates.add(ip)
+ debulkcandidates.add(ip)
# cross-check deblock candidates
if deblockcandidates:
fd.write(logstr % ('new bulks:', ', '.join(bulkips.keys())))
if emergencyblocks:
fd.write(logstr % ('dir.block:', ', '.join(emergencyblocks)))
+if useragentblocks:
+ fd.write(logstr % (' ua block:', ', '.join(useragentblocks)))
if newlyblocked:
fd.write(logstr % ('new block:', ', '.join(newlyblocked)))
fd.close()