]> git.openstreetmap.org Git - nominatim.git/blobdiff - utils/cron_ipanalyse.py
Merge remote-tracking branch 'upstream/master'
[nominatim.git] / utils / cron_ipanalyse.py
index aba5e510b3b50cee2ff867755f6e3251527f6f2a..333dbed55b4409da28081d24a92c22debad49e1f 100755 (executable)
@@ -9,6 +9,7 @@
 import re
 import os
 import sys
+import subprocess
 from datetime import datetime, timedelta
 from collections import defaultdict
 
@@ -58,8 +59,8 @@ BLOCK_LIMIT = BLOCK_LOWER
 
 time_regex = r'(?P<t_day>\d\d)/(?P<t_month>[A-Za-z]+)/(?P<t_year>\d\d\d\d):(?P<t_hour>\d\d):(?P<t_min>\d\d):(?P<t_sec>\d\d) [+-]\d\d\d\d'
 
-format_pat= re.compile(r'(?P<ip>[(\d\.)]+) - - \['+ time_regex + r'] "(?P<query>.*?)" (?P<return>\d+) (?P<bytes>\d+) "(?P<referer>.*?)" "(?P<ua>.*?)"')
-time_pat= re.compile(r'[(\d\.)]+ - - \[' + time_regex + '\] ')
+format_pat= re.compile(r'(?P<ip>[a-f\d\.:]+) - - \['+ time_regex + r'] "(?P<query>.*?)" (?P<return>\d+) (?P<bytes>\d+) "(?P<referer>.*?)" "(?P<ua>.*?)"')
+time_pat= re.compile(r'[a-f\d:\.]+ - - \[' + time_regex + '\] ')
 
 logtime_pat = "%d/%b/%Y:%H:%M:%S %z"
 
@@ -303,11 +304,14 @@ if __name__ == '__main__':
     fd = open("/proc/loadavg")
     cpuload = int(float(fd.readline().split()[2]))
     fd.close()
-    dbload = total200 / BULKCOOLOFF_DELTA.total_seconds()
+    # check the number of excess connections to apache
+    dbcons = int(subprocess.check_output("netstat -s | grep 'connections established' | sed 's:^\s*::;s: .*::'", shell=True))
+    fpms = int(subprocess.check_output('ps -Af | grep php-fpm | wc -l', shell=True))
+    dbload = max(0, dbcons - fpms)
 
     numbulks = len(bl.prevbulks)
-    BLOCK_LIMIT = max(BLOCK_LIMIT, BLOCK_UPPER - BLOCK_LOADFAC * (dbload - 75))
-    BULKLONG_LIMIT = max(BULK_LOWER, BULKLONG_LIMIT - BULK_LOADFAC * (cpuload - 14))
+    BLOCK_LIMIT = max(BLOCK_LIMIT, BLOCK_UPPER - BLOCK_LOADFAC * dbload)
+    BULKLONG_LIMIT = max(BULK_LOWER, BULKLONG_LIMIT - BULK_LOADFAC * cpuload)
     if numbulks > MAX_BULK_IPS:
         BLOCK_LIMIT = max(3600, BLOCK_LOWER - (numbulks - MAX_BULK_IPS)*10)
     # if the bulk pool is still empty, clients will be faster, avoid having
@@ -349,7 +353,7 @@ if __name__ == '__main__':
             elif wasbulked:
                 debulked.append(k)
     for i in bl.blacklist:
-        fd.write("%s ban\n" % k)
+        fd.write("%s ban\n" % i)
     fd.close()
 
     # TODO write logs (need to collect some statistics)