]> git.openstreetmap.org Git - nominatim.git/commitdiff
replace slow strptime with regex, fix typos
authorSarah Hoffmann <lonvia@denofr.de>
Fri, 12 Dec 2014 21:56:42 +0000 (22:56 +0100)
committerSarah Hoffmann <lonvia@denofr.de>
Fri, 12 Dec 2014 21:56:42 +0000 (22:56 +0100)
utils/cron_ipanalyse.py

index 40afa2b8eb2d90cf9a89e4f7a956bc610191c26d..2d0738af0eade18aa19f4b4b61618e59d87f561b 100755 (executable)
@@ -30,9 +30,9 @@ BLACKLIST = ''
 UA_BLOCKLIST = ()
 
 # time before a automatically blocked IP is allowed back
-BLOCKCOOLOFF_PERIOD=timedelta(hours=1)
+BLOCKCOOLOFF_DELTA=timedelta(hours=1)
 # quiet time before an IP is released from the bulk pool
-BULKCOOLOFF_PERIOD=timedelta(minutes=15)
+BULKCOOLOFF_DELTA=timedelta(minutes=15)
 
 BULKLONG_LIMIT=8000
 BULKSHORT_LIMIT=2000
@@ -50,18 +50,22 @@ MAX_BULK_IPS=85
 try:
     with open(BASEDIR + "/settings/ip_blocks.conf") as f:
         code = compile(f.read(), BASEDIR + "/settings/ip_blocks.conf", 'exec')
-        exec(code, global_vars, local_vars)
+        exec(code)
 except IOError:
     pass
 
 BLOCK_LIMIT = BLOCK_LOWER
 
+time_regex = r'(?P<t_day>\d\d)/(?P<t_month>[A-Za-z]+)/(?P<t_year>\d\d\d\d):(?P<t_hour>\d\d):(?P<t_min>\d\d):(?P<t_sec>\d\d) [+-]\d\d\d\d'
 
-format_pat= re.compile('(?P<ip>[(\d\.)]+) - - \[(?P<time>.*?)\] "(?P<query>.*?)" (?P<return>\d+) (?P<bytes>\d+) "(?P<referer>.*?)" "(?P<ua>.*?)"')
-time_pat= re.compile('[(\d\.)]+ - - \[(?P<time>[^\]]*?)\] ')
+format_pat= re.compile(r'(?P<ip>[(\d\.)]+) - - \['+ time_regex + r'] "(?P<query>.*?)" (?P<return>\d+) (?P<bytes>\d+) "(?P<referer>.*?)" "(?P<ua>.*?)"')
+time_pat= re.compile(r'[(\d\.)]+ - - \[' + time_regex + '\] ')
 
 logtime_pat = "%d/%b/%Y:%H:%M:%S %z"
 
+MONTHS = { 'Jan' : 1, 'Feb' : 2, 'Mar' : 3, 'Apr' : 4, 'May' : 5, 'Jun' : 6,
+           'Jul' : 7, 'Aug' : 8, 'Sep' : 9, 'Oct' : 10, 'Nov' : 11, 'Dec' : 12 }
+
 class LogEntry:
     def __init__(self, logline):
         e = format_pat.match(logline)
@@ -69,7 +73,8 @@ class LogEntry:
             raise ValueError("Invalid log line:", logline)
         e = e.groupdict()
         self.ip = e['ip']
-        self.date = datetime.strptime(e['time'], logtime_pat).replace(tzinfo=None)
+        self.date = datetime(int(e['t_year']), MONTHS[e['t_month']], int(e['t_day']),
+                             int(e['t_hour']), int(e['t_min']), int(e['t_sec']))
         qp = e['query'].split(' ', 2) 
         if len(qp) < 2:
             self.request = None
@@ -97,7 +102,9 @@ class LogEntry:
         if e is None:
             return None
         e = e.groupdict()
-        return datetime.strptime(e['time'], logtime_pat).replace(tzinfo=None)
+        #return datetime.strptime(e['time'], logtime_pat).replace(tzinfo=None)
+        return datetime(int(e['t_year']), MONTHS[e['t_month']], int(e['t_day']),
+                             int(e['t_hour']), int(e['t_min']), int(e['t_sec']))
 
 
 class LogFile:
@@ -124,7 +131,7 @@ class LogFile:
         curdate = self.seek_next(curseek)
         if curdate is None:
             raise RuntimeError("Cannot seek to %d" % curseek)
-        while abs((target - curdate).total_seconds()) > 1.0:
+        while target < curdate or (target - curdate).total_seconds() > 1:
             bps = curseek / ((curdate - date1).total_seconds())
             curseek += (target - curdate).total_seconds() * bps
             if curseek < 0:
@@ -154,9 +161,9 @@ class BlockList:
                 ip, typ = line.strip().split(' ')
                 if ip not in self.blacklist:
                     if typ == 'block':
-                        prevblocks.add(ip)
+                        self.prevblocks.add(ip)
                     elif typ == 'bulk':
-                        prevbulks.add(ip)
+                        self.prevbulks.add(ip)
             fd.close()
         except IOError:
             pass #ignore non-existing file
@@ -217,7 +224,7 @@ if __name__ == '__main__':
         sys.exit(-1)
 
     if len(sys.argv) == 2:
-        dt = datetime.now() - BLOCKCOOLOFF_PERIOD
+        dt = datetime.now() - BLOCKCOOLOFF_DELTA
     else:
         dt = datetime.strptime(sys.argv[2], "%Y-%m-%d %H:%M:%S")
 
@@ -230,7 +237,7 @@ if __name__ == '__main__':
 
     bl = BlockList()
 
-    shortstart = dt + BLOCKCOOLOFF_PERIOD - BULKCOOLOFF_PERIOD
+    shortstart = dt + BLOCKCOOLOFF_DELTA - BULKCOOLOFF_DELTA
     notlogged = bl.whitelist | bl.blacklist
 
     stats = defaultdict(IPstats)
@@ -252,7 +259,7 @@ if __name__ == '__main__':
     fd = open("/proc/loadavg")
     cpuload = int(float(fd.readline().split()[2]))
     fd.close()
-    dbload = total200 / BULKCOOLOFF_PERIOD.total_seconds()
+    dbload = total200 / BULKCOOLOFF_DELTA.total_seconds()
 
     numbulks = len(bl.prevbulks)
     BLOCK_LIMIT = max(BLOCK_LIMIT, BLOCK_UPPER - BLOCK_LOADFAC * (dbload - 75))