]> git.openstreetmap.org Git - nominatim.git/blobdiff - utils/cron_ipanalyse.py
Merge remote-tracking branch 'upstream/master'
[nominatim.git] / utils / cron_ipanalyse.py
index 05b0b7f9e18382e05c33ba6a0c67d69bbc8050d0..1ca2267fa4144081020db88545538fb01e76559c 100755 (executable)
@@ -34,6 +34,8 @@ UA_BLOCKLIST = ()
 BLOCKCOOLOFF_DELTA=timedelta(hours=1)
 # quiet time before an IP is released from the bulk pool
 BULKCOOLOFF_DELTA=timedelta(minutes=15)
 BLOCKCOOLOFF_DELTA=timedelta(hours=1)
 # quiet time before an IP is released from the bulk pool
 BULKCOOLOFF_DELTA=timedelta(minutes=15)
+# time to check if new accesses appear despite being blocked
+BLOCKCHECK_DELTA=timedelta(minutes=1)
 
 BULKLONG_LIMIT=8000
 BULKSHORT_LIMIT=2000
 
 BULKLONG_LIMIT=8000
 BULKSHORT_LIMIT=2000
@@ -85,12 +87,16 @@ class LogEntry:
             if qp[0] == 'OPTIONS':
                 self.request = None
             else:
             if qp[0] == 'OPTIONS':
                 self.request = None
             else:
-                if '/search' in qp[1]:
+                if '/?' in qp[1]:
+                    self.request = 'S'
+                elif '/search' in qp[1]:
                     self.request = 'S'
                 elif '/reverse' in qp[1]:
                     self.request = 'R'
                 elif '/details' in qp[1]:
                     self.request = 'D'
                     self.request = 'S'
                 elif '/reverse' in qp[1]:
                     self.request = 'R'
                 elif '/details' in qp[1]:
                     self.request = 'D'
+                elif '/lookup' in qp[1]:
+                    self.request = 'L'
                 else:
                     self.request = None
         self.query = e['query']
                 else:
                     self.request = None
         self.query = e['query']
@@ -221,6 +227,7 @@ class IPstats:
         self.short_api = 0
         self.long_total = 0
         self.long_api = 0
         self.short_api = 0
         self.long_total = 0
         self.long_api = 0
+        self.block_total = 0
         self.bad_ua = False
 
     def add_long(self, logentry):
         self.bad_ua = False
 
     def add_long(self, logentry):
@@ -237,6 +244,12 @@ class IPstats:
             self.short_api += 1
         self.add_long(logentry)
 
             self.short_api += 1
         self.add_long(logentry)
 
+    def add_block(self, logentry):
+        self.block_total += 1
+
+    def ignores_warnings(self):
+        return self.block_total > 5
+
     def new_state(self, was_blocked, was_bulked):
         if was_blocked:
             # deblock only if the IP has been really quiet
     def new_state(self, was_blocked, was_bulked):
         if was_blocked:
             # deblock only if the IP has been really quiet
@@ -283,6 +296,7 @@ if __name__ == '__main__':
     bl = BlockList()
 
     shortstart = dt + BLOCKCOOLOFF_DELTA - BULKCOOLOFF_DELTA
     bl = BlockList()
 
     shortstart = dt + BLOCKCOOLOFF_DELTA - BULKCOOLOFF_DELTA
+    blockstart = dt + BLOCKCOOLOFF_DELTA - BLOCKCHECK_DELTA
     notlogged = bl.whitelist | bl.blacklist
 
     stats = defaultdict(IPstats)
     notlogged = bl.whitelist | bl.blacklist
 
     stats = defaultdict(IPstats)
@@ -299,6 +313,8 @@ if __name__ == '__main__':
             stats[l.ip].add_short(l)
         if l.request is not None and l.retcode == 200:
             total200 += 1
             stats[l.ip].add_short(l)
         if l.request is not None and l.retcode == 200:
             total200 += 1
+        if l.date > blockstart and l.retcode in (403, 429):
+            stats[l.ip].add_block(l)
 
     # adapt limits according to CPU and DB load
     fd = open("/proc/loadavg")
 
     # adapt limits according to CPU and DB load
     fd = open("/proc/loadavg")
@@ -358,7 +374,7 @@ if __name__ == '__main__':
     fd.close()
 
     # TODO write logs (need to collect some statistics)
     fd.close()
 
     # TODO write logs (need to collect some statistics)
-    logstr = datetime.now().strftime('%Y-%m-%d %H:%M') + ' %s %s\n'
+    logstr = datetime.now().strftime('%d/%b/%Y:%H:%M:%S') + ' %s %s\n'
     fd = open(LOGFILE, 'a')
     if unblocked:
         fd.write(logstr % ('unblocked:', ', '.join(unblocked)))
     fd = open(LOGFILE, 'a')
     if unblocked:
         fd.write(logstr % ('unblocked:', ', '.join(unblocked)))
@@ -372,4 +388,7 @@ if __name__ == '__main__':
         fd.write(logstr % (' ua block:', ', '.join(uablocked)))
     if blocked:
         fd.write(logstr % ('new block:', ', '.join(blocked)))
         fd.write(logstr % (' ua block:', ', '.join(uablocked)))
     if blocked:
         fd.write(logstr % ('new block:', ', '.join(blocked)))
+    for k,v in stats.items():
+        if v.ignores_warnings() and k not in notlogged and ':' not in k:
+            fd.write(logstr % ('Warning ignored:', k))
     fd.close()
     fd.close()