From 2aca37f677121831beebf2276d60b8dd7a413c60 Mon Sep 17 00:00:00 2001 From: Sarah Hoffmann Date: Mon, 21 May 2018 12:01:28 +0200 Subject: [PATCH 1/1] remove unused cron scripts --- utils/cron_banip.py | 243 ---------------------------------------- utils/cron_logrotate.sh | 20 ---- utils/cron_vacuum.sh | 14 --- 3 files changed, 277 deletions(-) delete mode 100755 utils/cron_banip.py delete mode 100755 utils/cron_logrotate.sh delete mode 100755 utils/cron_vacuum.sh diff --git a/utils/cron_banip.py b/utils/cron_banip.py deleted file mode 100755 index 53f5e5f1..00000000 --- a/utils/cron_banip.py +++ /dev/null @@ -1,243 +0,0 @@ -#!/usr/bin/python -# -# Search logs for high-bandwith users and create a list of suspicious IPs. -# There are three states: bulk, block, ban. The first are bulk requesters -# that need throtteling, the second bulk requesters that have overdone it -# and the last manually banned IPs. -# -# The list can then be used in apache using rewrite rules to -# direct bulk users to smaller thread pools or block them. A -# typical apache config that uses php-fpm pools would look -# like this: -# -# Alias /nominatim-www/ "/var/www/nominatim/" -# Alias /nominatim-bulk/ "/var/www/nominatim/" -# -# Options MultiViews FollowSymLinks -# AddType text/html .php -# -# -# -# AddHandler fcgi:/var/run/php5-fpm-www.sock .php -# -# -# AddHandler fcgi:/var/run/php5-fpm-bulk.sock .php -# -# -# Redirect 509 /nominatim-block/ -# ErrorDocument 509 "Bandwidth limit exceeded." -# Redirect 403 /nominatim-ban/ -# ErrorDocument 403 "Access blocked." -# -# RewriteEngine On -# RewriteMap bulklist txt:/home/wherever/ip-block.map -# RewriteRule ^/(.*) /nominatim-${bulklist:%{REMOTE_ADDR}|www}/$1 [PT] -# - -import os -import psycopg2 -import datetime - -BASEDIR = os.path.normpath(os.path.join(os.path.realpath(__file__), '../..')) - -# -# DEFAULT SETTINGS -# -# Copy into settings/ip_blcoks.conf and adapt as required. -# -BLOCKEDFILE= BASEDIR + '/settings/ip_blocks.map' -LOGFILE= BASEDIR + '/log/restricted_ip.log' - -# space-separated list of IPs that are never banned -WHITELIST = '' -# space-separated list of IPs manually blocked -BLACKLIST = '' -# user-agents that should be blocked from bulk mode -# (matched with startswith) -UA_BLOCKLIST = () - -# time before a automatically blocked IP is allowed back -BLOCKCOOLOFF_PERIOD='1 hour' -# quiet time before an IP is released from the bulk pool -BULKCOOLOFF_PERIOD='15 min' - -BULKLONG_LIMIT=8000 -BULKSHORT_LIMIT=2000 -BLOCK_UPPER=19000 -BLOCK_LOWER=4000 -BLOCK_LOADFAC=380 -BULK_LOADFAC=160 -BULK_LOWER=1500 -MAX_BULK_IPS=85 - -# -# END OF DEFAULT SETTINGS -# - -try: - execfile(os.path.expanduser(BASEDIR + "/settings/ip_blocks.conf")) -except IOError: - pass - -# read the previous blocklist -WHITELIST = set(WHITELIST.split()) if WHITELIST else set() -prevblocks = [] -prevbulks = [] -BLACKLIST = set(BLACKLIST.split()) if BLACKLIST else set() -newblocks = set() -newbulks = set() - -try: - fd = open(BLOCKEDFILE) - for line in fd: - ip, typ = line.strip().split(' ') - if ip not in BLACKLIST: - if typ == 'block': - prevblocks.append(ip) - elif typ == 'bulk': - prevbulks.append(ip) - fd.close() -except IOError: - pass #ignore non-existing file - -# determine current load -fd = open("/proc/loadavg") -avgload = int(float(fd.readline().split()[2])) -fd.close() -# DB load -conn = psycopg2.connect('dbname=nominatim') -cur = conn.cursor() -cur.execute("select count(*)/60 from new_query_log where starttime > now() - interval '1min'") -dbload = int(cur.fetchone()[0]) - -BLOCK_LIMIT = max(BLOCK_LOWER, BLOCK_UPPER - BLOCK_LOADFAC * (dbload - 75)) -BULKLONG_LIMIT = max(BULK_LOWER, BULKLONG_LIMIT - BULK_LOADFAC * (avgload - 14)) -if len(prevbulks) > MAX_BULK_IPS: - BLOCK_LIMIT = max(3600, BLOCK_LOWER - (len(prevbulks) - MAX_BULK_IPS)*10) -# if the bulk pool is still empty, clients will be faster, avoid having -# them blocked in this case -if len(prevbulks) < 10: - BLOCK_LIMIT = 2*BLOCK_UPPER - - -# get the new block candidates -cur.execute(""" - SELECT ipaddress, max(count), max(ua) FROM - ((SELECT * FROM - (SELECT ipaddress, sum(case when endtime is null then 1 else 1+1.5*date_part('epoch',endtime-starttime) end) as count, substring(max(useragent) from 1 for 30) as ua FROM new_query_log - WHERE starttime > now() - interval '1 hour' GROUP BY ipaddress) as i - WHERE count > %s) - UNION - (SELECT ipaddress, count * 3, ua FROM - (SELECT ipaddress, sum(case when endtime is null then 1 else 1+1.5*date_part('epoch',endtime-starttime) end) as count, substring(max(useragent) from 1 for 30) as ua FROM new_query_log - WHERE starttime > now() - interval '10 min' GROUP BY ipaddress) as i - WHERE count > %s)) as o - GROUP BY ipaddress -""", (BULKLONG_LIMIT, BULKSHORT_LIMIT)) - -bulkips = {} -emergencyblocks = [] -useragentblocks = [] - -for c in cur: - if c[0] not in WHITELIST and c[0] not in BLACKLIST: - # check for user agents that receive an immediate block - missing_agent = not c[2] - if not missing_agent: - for ua in UA_BLOCKLIST: - if c[2].startswith(ua): - missing_agent = True - break - if (missing_agent or c[1] > BLOCK_UPPER) and c[0] not in prevblocks: - newblocks.add(c[0]) - if missing_agent: - useragentblocks.append(c[0]) - else: - emergencyblocks.append(c[0]) - else: - bulkips[c[0]] = c[1] - -# IPs from the block list that are no longer in the bulk list -deblockcandidates = set() -# IPs from the bulk list that are no longer in the bulk list -debulkcandidates = set() -# new IPs to go into the block list -newlyblocked = [] - - -for ip in prevblocks: - if ip in bulkips: - newblocks.add(ip) - del bulkips[ip] - else: - deblockcandidates.add(ip) - -for ip in prevbulks: - if ip not in newblocks: - if ip in bulkips: - if bulkips[ip] > BLOCK_LIMIT: - newblocks.add(ip) - newlyblocked.append(ip) - else: - newbulks.add(ip) - del bulkips[ip] - else: - debulkcandidates.add(ip) - -# cross-check deblock candidates -if deblockcandidates: - cur.execute(""" - SELECT DISTINCT ipaddress FROM new_query_log - WHERE ipaddress IN ('%s') AND starttime > now() - interval '%s' - """ % ("','".join(deblockcandidates), BLOCKCOOLOFF_PERIOD)) - - for c in cur: - newblocks.add(c[0]) - deblockcandidates.remove(c[0]) -# deblocked IPs go back to the bulk pool to catch the ones that simply -# ignored the HTTP error and just continue to hammer the API. -# Those that behave and stopped will be debulked a minute later. -for ip in deblockcandidates: - newbulks.add(ip) - -# cross-check debulk candidates -if debulkcandidates: - cur.execute(""" - SELECT DISTINCT ipaddress FROM new_query_log - WHERE ipaddress IN ('%s') AND starttime > now() - interval '%s' - AND starttime > date_trunc('day', now()) - """ % ("','".join(debulkcandidates), BULKCOOLOFF_PERIOD)) - - for c in cur: - newbulks.add(c[0]) - debulkcandidates.remove(c[0]) - -for ip in bulkips.iterkeys(): - newbulks.add(ip) - -# write out the new list -fd = open(BLOCKEDFILE, 'w') -for ip in newblocks: - fd.write(ip + " block\n") -for ip in newbulks: - fd.write(ip + " bulk\n") -for ip in BLACKLIST: - fd.write(ip + " ban\n") -fd.close() - -# write out the log -logstr = datetime.datetime.now().strftime('%Y-%m-%d %H:%M') + ' %s %s\n' -fd = open(LOGFILE, 'a') -if deblockcandidates: - fd.write(logstr % ('unblocked:', ', '.join(deblockcandidates))) -if debulkcandidates: - fd.write(logstr % (' debulked:', ', '.join(debulkcandidates))) -if bulkips: - fd.write(logstr % ('new bulks:', ', '.join(bulkips.keys()))) -if emergencyblocks: - fd.write(logstr % ('dir.block:', ', '.join(emergencyblocks))) -if useragentblocks: - fd.write(logstr % (' ua block:', ', '.join(useragentblocks))) -if newlyblocked: - fd.write(logstr % ('new block:', ', '.join(newlyblocked))) -fd.close() diff --git a/utils/cron_logrotate.sh b/utils/cron_logrotate.sh deleted file mode 100755 index b9291d95..00000000 --- a/utils/cron_logrotate.sh +++ /dev/null @@ -1,20 +0,0 @@ -#!/bin/bash -e -# -# Rotate query logs. - -dbname=nominatim - -basedir=`dirname $0` -logfile=`date "+$basedir/../log/query-%F.log.gz"` - -# dump the old logfile -pg_dump -a -F p -t backup_query_log $dbname | gzip -9 > $logfile - -# remove the old logs -psql -q -d $dbname -c 'DROP TABLE backup_query_log' - -# rotate -psql -q -1 -d $dbname -c 'ALTER TABLE new_query_log RENAME TO backup_query_log;CREATE TABLE new_query_log as (select * from backup_query_log limit 0);GRANT SELECT, INSERT, UPDATE ON new_query_log TO "www-data"' -psql -q -d $dbname -c 'ALTER INDEX idx_new_query_log_starttime RENAME TO idx_backup_query_log_starttime' -psql -q -d $dbname -c 'CREATE INDEX idx_new_query_log_starttime ON new_query_log USING BTREE (starttime)' - diff --git a/utils/cron_vacuum.sh b/utils/cron_vacuum.sh deleted file mode 100755 index 3ea43dee..00000000 --- a/utils/cron_vacuum.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash -# -# Vaccum all tables with indices on integer arrays. -# Agressive vacuuming seems to help against index bloat. -# - -psql -q -d nominatim -c 'VACUUM ANALYSE search_name' -psql -q -d nominatim -c 'VACUUM ANALYSE search_name_country' -#psql -q -d nominatim -c 'VACUUM ANALYSE planet_osm_ways' - -for i in `seq 0 250`; do - psql -q -d nominatim -c "VACUUM ANALYSE search_name_${i}" -done - -- 2.39.5