X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/3b1f9b8cf9330da559fc37459b3ecdb00ab20e19..2cb5e7b0cc7225daa81ca872449370727bbef07b:/utils/cron_ipanalyse.py diff --git a/utils/cron_ipanalyse.py b/utils/cron_ipanalyse.py index b344a849..1ca2267f 100755 --- a/utils/cron_ipanalyse.py +++ b/utils/cron_ipanalyse.py @@ -9,6 +9,7 @@ import re import os import sys +import subprocess from datetime import datetime, timedelta from collections import defaultdict @@ -33,6 +34,8 @@ UA_BLOCKLIST = () BLOCKCOOLOFF_DELTA=timedelta(hours=1) # quiet time before an IP is released from the bulk pool BULKCOOLOFF_DELTA=timedelta(minutes=15) +# time to check if new accesses appear despite being blocked +BLOCKCHECK_DELTA=timedelta(minutes=1) BULKLONG_LIMIT=8000 BULKSHORT_LIMIT=2000 @@ -84,12 +87,16 @@ class LogEntry: if qp[0] == 'OPTIONS': self.request = None else: - if '/search' in qp[1]: + if '/?' in qp[1]: + self.request = 'S' + elif '/search' in qp[1]: self.request = 'S' elif '/reverse' in qp[1]: self.request = 'R' elif '/details' in qp[1]: self.request = 'D' + elif '/lookup' in qp[1]: + self.request = 'L' else: self.request = None self.query = e['query'] @@ -220,6 +227,7 @@ class IPstats: self.short_api = 0 self.long_total = 0 self.long_api = 0 + self.block_total = 0 self.bad_ua = False def add_long(self, logentry): @@ -236,16 +244,22 @@ class IPstats: self.short_api += 1 self.add_long(logentry) + def add_block(self, logentry): + self.block_total += 1 + + def ignores_warnings(self): + return self.block_total > 5 + def new_state(self, was_blocked, was_bulked): if was_blocked: # deblock only if the IP has been really quiet # (properly catches the ones that simply ignore the HTTP error) - return None if self.long_total < 5 else 'block' + return None if self.long_total < 20 else 'block' if self.long_api > BLOCK_UPPER or self.short_api > BLOCK_UPPER / 3: # client totally overdoing it return 'block' if was_bulked: - if self.short_total < 5: + if self.short_total < 20: # client has stopped, debulk return None if self.long_api > BLOCK_LIMIT or self.short_api > BLOCK_LIMIT / 3: @@ -254,8 +268,8 @@ class IPstats: return 'bulk' if self.long_api > BULKLONG_LIMIT or self.short_api > BULKSHORT_LIMIT: - if self.bad_ua: - return 'uablock' # bad useragent + #if self.bad_ua: + # return 'uablock' # bad useragent return 'bulk' return None @@ -282,6 +296,7 @@ if __name__ == '__main__': bl = BlockList() shortstart = dt + BLOCKCOOLOFF_DELTA - BULKCOOLOFF_DELTA + blockstart = dt + BLOCKCOOLOFF_DELTA - BLOCKCHECK_DELTA notlogged = bl.whitelist | bl.blacklist stats = defaultdict(IPstats) @@ -298,6 +313,8 @@ if __name__ == '__main__': stats[l.ip].add_short(l) if l.request is not None and l.retcode == 200: total200 += 1 + if l.date > blockstart and l.retcode in (403, 429): + stats[l.ip].add_block(l) # adapt limits according to CPU and DB load fd = open("/proc/loadavg") @@ -316,7 +333,8 @@ if __name__ == '__main__': # if the bulk pool is still empty, clients will be faster, avoid having # them blocked in this case if numbulks < 10: - BLOCK_LIMIT = 2*BLOCK_UPPER + BLOCK_UPPER *= 2 + BLOCK_LIMIT = BLOCK_UPPER # collecting statistics @@ -356,7 +374,7 @@ if __name__ == '__main__': fd.close() # TODO write logs (need to collect some statistics) - logstr = datetime.now().strftime('%Y-%m-%d %H:%M') + ' %s %s\n' + logstr = datetime.now().strftime('%d/%b/%Y:%H:%M:%S') + ' %s %s\n' fd = open(LOGFILE, 'a') if unblocked: fd.write(logstr % ('unblocked:', ', '.join(unblocked))) @@ -370,4 +388,7 @@ if __name__ == '__main__': fd.write(logstr % (' ua block:', ', '.join(uablocked))) if blocked: fd.write(logstr % ('new block:', ', '.join(blocked))) + for k,v in stats.items(): + if v.ignores_warnings() and k not in notlogged and ':' not in k: + fd.write(logstr % ('Warning ignored:', k)) fd.close()