X-Git-Url: https://git.openstreetmap.org./nominatim.git/blobdiff_plain/a16075d34895a947bbcbf60f22dc81df92cf4640..b2871268cde63d2dd4d92c1313c396afb2807933:/utils/cron_ipanalyse.py diff --git a/utils/cron_ipanalyse.py b/utils/cron_ipanalyse.py index aba5e510..be5e6956 100755 --- a/utils/cron_ipanalyse.py +++ b/utils/cron_ipanalyse.py @@ -9,6 +9,7 @@ import re import os import sys +import subprocess from datetime import datetime, timedelta from collections import defaultdict @@ -58,8 +59,8 @@ BLOCK_LIMIT = BLOCK_LOWER time_regex = r'(?P\d\d)/(?P[A-Za-z]+)/(?P\d\d\d\d):(?P\d\d):(?P\d\d):(?P\d\d) [+-]\d\d\d\d' -format_pat= re.compile(r'(?P[(\d\.)]+) - - \['+ time_regex + r'] "(?P.*?)" (?P\d+) (?P\d+) "(?P.*?)" "(?P.*?)"') -time_pat= re.compile(r'[(\d\.)]+ - - \[' + time_regex + '\] ') +format_pat= re.compile(r'(?P[a-f\d\.:]+) - - \['+ time_regex + r'] "(?P.*?)" (?P\d+) (?P\d+) "(?P.*?)" "(?P.*?)"') +time_pat= re.compile(r'[a-f\d:\.]+ - - \[' + time_regex + '\] ') logtime_pat = "%d/%b/%Y:%H:%M:%S %z" @@ -84,12 +85,16 @@ class LogEntry: if qp[0] == 'OPTIONS': self.request = None else: - if '/search' in qp[1]: + if '/?' in qp[1]: + self.request = 'S' + elif '/search' in qp[1]: self.request = 'S' elif '/reverse' in qp[1]: self.request = 'R' elif '/details' in qp[1]: self.request = 'D' + elif '/lookup' in qp[1]: + self.request = 'L' else: self.request = None self.query = e['query'] @@ -240,12 +245,12 @@ class IPstats: if was_blocked: # deblock only if the IP has been really quiet # (properly catches the ones that simply ignore the HTTP error) - return None if self.long_total < 5 else 'block' + return None if self.long_total < 20 else 'block' if self.long_api > BLOCK_UPPER or self.short_api > BLOCK_UPPER / 3: # client totally overdoing it return 'block' if was_bulked: - if self.short_total < 5: + if self.short_total < 20: # client has stopped, debulk return None if self.long_api > BLOCK_LIMIT or self.short_api > BLOCK_LIMIT / 3: @@ -254,8 +259,8 @@ class IPstats: return 'bulk' if self.long_api > BULKLONG_LIMIT or self.short_api > BULKSHORT_LIMIT: - if self.bad_ua: - return 'uablock' # bad useragent + #if self.bad_ua: + # return 'uablock' # bad useragent return 'bulk' return None @@ -303,17 +308,21 @@ if __name__ == '__main__': fd = open("/proc/loadavg") cpuload = int(float(fd.readline().split()[2])) fd.close() - dbload = total200 / BULKCOOLOFF_DELTA.total_seconds() + # check the number of excess connections to apache + dbcons = int(subprocess.check_output("netstat -s | grep 'connections established' | sed 's:^\s*::;s: .*::'", shell=True)) + fpms = int(subprocess.check_output('ps -Af | grep php-fpm | wc -l', shell=True)) + dbload = max(0, dbcons - fpms) numbulks = len(bl.prevbulks) - BLOCK_LIMIT = max(BLOCK_LIMIT, BLOCK_UPPER - BLOCK_LOADFAC * (dbload - 75)) - BULKLONG_LIMIT = max(BULK_LOWER, BULKLONG_LIMIT - BULK_LOADFAC * (cpuload - 14)) + BLOCK_LIMIT = max(BLOCK_LIMIT, BLOCK_UPPER - BLOCK_LOADFAC * dbload) + BULKLONG_LIMIT = max(BULK_LOWER, BULKLONG_LIMIT - BULK_LOADFAC * cpuload) if numbulks > MAX_BULK_IPS: BLOCK_LIMIT = max(3600, BLOCK_LOWER - (numbulks - MAX_BULK_IPS)*10) # if the bulk pool is still empty, clients will be faster, avoid having # them blocked in this case if numbulks < 10: - BLOCK_LIMIT = 2*BLOCK_UPPER + BLOCK_UPPER *= 2 + BLOCK_LIMIT = BLOCK_UPPER # collecting statistics @@ -349,7 +358,7 @@ if __name__ == '__main__': elif wasbulked: debulked.append(k) for i in bl.blacklist: - fd.write("%s ban\n" % k) + fd.write("%s ban\n" % i) fd.close() # TODO write logs (need to collect some statistics)