X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/ca11fc90e6e646335045e087f03d824236c798f2..ccaf7fa47fc7b02927ca4fe0f717e9e5b725ab9d:/utils/cron_banip.py diff --git a/utils/cron_banip.py b/utils/cron_banip.py index 55a7b295..53f5e5f1 100755 --- a/utils/cron_banip.py +++ b/utils/cron_banip.py @@ -52,6 +52,9 @@ LOGFILE= BASEDIR + '/log/restricted_ip.log' WHITELIST = '' # space-separated list of IPs manually blocked BLACKLIST = '' +# user-agents that should be blocked from bulk mode +# (matched with startswith) +UA_BLOCKLIST = () # time before a automatically blocked IP is allowed back BLOCKCOOLOFF_PERIOD='1 hour' @@ -65,6 +68,7 @@ BLOCK_LOWER=4000 BLOCK_LOADFAC=380 BULK_LOADFAC=160 BULK_LOWER=1500 +MAX_BULK_IPS=85 # # END OF DEFAULT SETTINGS @@ -106,19 +110,26 @@ cur = conn.cursor() cur.execute("select count(*)/60 from new_query_log where starttime > now() - interval '1min'") dbload = int(cur.fetchone()[0]) -BLOCK_LIMIT = max(BLOCK_LOWER, BLOCK_UPPER - BLOCK_LOADFAC * (dbload - 70)) +BLOCK_LIMIT = max(BLOCK_LOWER, BLOCK_UPPER - BLOCK_LOADFAC * (dbload - 75)) BULKLONG_LIMIT = max(BULK_LOWER, BULKLONG_LIMIT - BULK_LOADFAC * (avgload - 14)) +if len(prevbulks) > MAX_BULK_IPS: + BLOCK_LIMIT = max(3600, BLOCK_LOWER - (len(prevbulks) - MAX_BULK_IPS)*10) +# if the bulk pool is still empty, clients will be faster, avoid having +# them blocked in this case +if len(prevbulks) < 10: + BLOCK_LIMIT = 2*BLOCK_UPPER + # get the new block candidates cur.execute(""" - SELECT ipaddress, max(count) FROM + SELECT ipaddress, max(count), max(ua) FROM ((SELECT * FROM - (SELECT ipaddress, sum(case when endtime is null then 1 else 1+date_part('epoch',endtime-starttime) end) as count FROM new_query_log + (SELECT ipaddress, sum(case when endtime is null then 1 else 1+1.5*date_part('epoch',endtime-starttime) end) as count, substring(max(useragent) from 1 for 30) as ua FROM new_query_log WHERE starttime > now() - interval '1 hour' GROUP BY ipaddress) as i WHERE count > %s) UNION - (SELECT ipaddress, count * 4 FROM - (SELECT ipaddress, sum(case when endtime is null then 1 else 1+date_part('epoch',endtime-starttime) end) as count FROM new_query_log + (SELECT ipaddress, count * 3, ua FROM + (SELECT ipaddress, sum(case when endtime is null then 1 else 1+1.5*date_part('epoch',endtime-starttime) end) as count, substring(max(useragent) from 1 for 30) as ua FROM new_query_log WHERE starttime > now() - interval '10 min' GROUP BY ipaddress) as i WHERE count > %s)) as o GROUP BY ipaddress @@ -126,12 +137,22 @@ cur.execute(""" bulkips = {} emergencyblocks = [] +useragentblocks = [] for c in cur: if c[0] not in WHITELIST and c[0] not in BLACKLIST: - if c[1] > BLOCK_UPPER and c[0] not in prevbulks: + # check for user agents that receive an immediate block + missing_agent = not c[2] + if not missing_agent: + for ua in UA_BLOCKLIST: + if c[2].startswith(ua): + missing_agent = True + break + if (missing_agent or c[1] > BLOCK_UPPER) and c[0] not in prevblocks: newblocks.add(c[0]) - if c[0] not in prevblocks: + if missing_agent: + useragentblocks.append(c[0]) + else: emergencyblocks.append(c[0]) else: bulkips[c[0]] = c[1] @@ -152,15 +173,16 @@ for ip in prevblocks: deblockcandidates.add(ip) for ip in prevbulks: - if ip in bulkips: - if bulkips[ip] > BLOCK_LIMIT: - newblocks.add(ip) - newlyblocked.append(ip) + if ip not in newblocks: + if ip in bulkips: + if bulkips[ip] > BLOCK_LIMIT: + newblocks.add(ip) + newlyblocked.append(ip) + else: + newbulks.add(ip) + del bulkips[ip] else: - newbulks.add(ip) - del bulkips[ip] - else: - debulkcandidates.add(ip) + debulkcandidates.add(ip) # cross-check deblock candidates if deblockcandidates: @@ -214,6 +236,8 @@ if bulkips: fd.write(logstr % ('new bulks:', ', '.join(bulkips.keys()))) if emergencyblocks: fd.write(logstr % ('dir.block:', ', '.join(emergencyblocks))) +if useragentblocks: + fd.write(logstr % (' ua block:', ', '.join(useragentblocks))) if newlyblocked: fd.write(logstr % ('new block:', ', '.join(newlyblocked))) fd.close()