From: Sarah Hoffmann Date: Tue, 9 Apr 2013 21:25:08 +0000 (+0200) Subject: Merge remote-tracking branch 'upstream/master' X-Git-Tag: deploy~624 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/9f880c367c4a9fd8a2950011eeba22c77ab0c914?hp=23d303124e903e58e5c95890868de8c62304946f Merge remote-tracking branch 'upstream/master' Conflicts: sql/indices.src.sql sql/tables.sql --- diff --git a/lib/init-website.php b/lib/init-website.php index bcf3ebf7..eb275af2 100644 --- a/lib/init-website.php +++ b/lib/init-website.php @@ -12,53 +12,5 @@ } if ($_SERVER['REQUEST_METHOD'] == 'OPTIONS') exit; - if (CONST_ClosedForIndexing && strpos(CONST_ClosedForIndexingExceptionIPs, ','.$_SERVER["REMOTE_ADDR"].',') === false) - { - echo "Closed for re-indexing..."; - exit; - } - - $aBucketKeys = array(); - - if (isset($_SERVER["HTTP_REFERER"])) $aBucketKeys[] = str_replace('www.','',strtolower(parse_url($_SERVER["HTTP_REFERER"], PHP_URL_HOST))); - if (isset($_SERVER["REMOTE_ADDR"])) $aBucketKeys[] = $_SERVER["REMOTE_ADDR"]; - if (isset($_GET["email"])) $aBucketKeys[] = $_GET["email"]; - - $fBucketVal = doBucket($aBucketKeys, - (defined('CONST_ConnectionBucket_PageType')?constant('CONST_ConnectionBucket_Cost_'.CONST_ConnectionBucket_PageType):1) + user_busy_cost(), - CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_BlockLimit); - - if ($fBucketVal > CONST_ConnectionBucket_WaitLimit && $fBucketVal < CONST_ConnectionBucket_BlockLimit) - { - $m = getBucketMemcache(); - $iCurrentSleeping = $m->increment('sleepCounter'); - if (false === $iCurrentSleeping) - { - $m->add('sleepCounter', 0); - $iCurrentSleeping = $m->increment('sleepCounter'); - } - if ($iCurrentSleeping >= CONST_ConnectionBucket_MaxSleeping || isBucketSleeping($aBucketKeys)) - { - // Too many threads sleeping already. This becomes a hard block. - $fBucketVal = doBucket($aBucketKeys, CONST_ConnectionBucket_BlockLimit, CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_BlockLimit); - } - else - { - setBucketSleeping($aBucketKeys, true); - sleep(($fBucketVal - CONST_ConnectionBucket_WaitLimit)/CONST_ConnectionBucket_LeakRate); - $fBucketVal = doBucket($aBucketKeys, CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_BlockLimit); - setBucketSleeping($aBucketKeys, false); - } - $m->decrement('sleepCounter'); - } - - if (strpos(CONST_BlockedIPs, ','.$_SERVER["REMOTE_ADDR"].',') !== false || $fBucketVal >= CONST_ConnectionBucket_BlockLimit) - { - echo "Your IP has been blocked. \n"; - echo "Please create a nominatim trac ticket (http://trac.openstreetmap.org/newticket?component=nominatim) to request this to be removed. \n"; - echo "Information on the Nominatim usage policy can be found here: http://wiki.openstreetmap.org/wiki/Nominatim#Usage_Policy \n"; - exit; - } - - header('Content-type: text/html; charset=utf-8'); + header('Content-type: text/html; charset=utf-8'); diff --git a/lib/log.php b/lib/log.php index 5b847a41..8022376d 100644 --- a/lib/log.php +++ b/lib/log.php @@ -17,15 +17,13 @@ if (CONST_Log_DB) { - // Log - if ($sType == 'search') - { - $oDB->query('insert into query_log values ('.getDBQuoted($hLog[0]).','.getDBQuoted($hLog[3]).','.getDBQuoted($hLog[1]).')'); - } - - $sSQL = 'insert into new_query_log (type,starttime,query,ipaddress,useragent,language,format)'; + if (isset($_GET['email'])) + $sUserAgent = $_GET['email']; + else + $sUserAgent = $_SERVER['HTTP_USER_AGENT']; + $sSQL = 'insert into new_query_log (type,starttime,query,ipaddress,useragent,language,format,searchterm)'; $sSQL .= ' values ('.getDBQuoted($sType).','.getDBQuoted($hLog[0]).','.getDBQuoted($hLog[2]); - $sSQL .= ','.getDBQuoted($hLog[1]).','.getDBQuoted($_SERVER['HTTP_USER_AGENT']).','.getDBQuoted(join(',',$aLanguageList)).','.getDBQuoted($sOutputFormat).')'; + $sSQL .= ','.getDBQuoted($hLog[1]).','.getDBQuoted($sUserAgent).','.getDBQuoted(join(',',$aLanguageList)).','.getDBQuoted($sOutputFormat).','.getDBQuoted($hLog[3]).')'; $oDB->query($sSQL); } @@ -59,12 +57,6 @@ if (CONST_Log_DB) { - $sSQL = 'update query_log set endtime = '.getDBQuoted($sEndTime).', results = '.$iNumResults; - $sSQL .= ' where starttime = '.getDBQuoted($hLog[0]); - $sSQL .= ' and ipaddress = '.getDBQuoted($hLog[1]); - $sSQL .= ' and query = '.getDBQuoted($hLog[3]); - $oDB->query($sSQL); - $sSQL = 'update new_query_log set endtime = '.getDBQuoted($sEndTime).', results = '.$iNumResults; $sSQL .= ' where starttime = '.getDBQuoted($hLog[0]); $sSQL .= ' and ipaddress = '.getDBQuoted($hLog[1]); diff --git a/munin/nominatim_throttled_ips b/munin/nominatim_throttled_ips new file mode 100755 index 00000000..2fa1d80e --- /dev/null +++ b/munin/nominatim_throttled_ips @@ -0,0 +1,28 @@ +#!/bin/sh +# +# Plugin to monitor the number of IPs in special pools +# +# Parameters: +# +# config (required) +# autoconf (optional - used by munin-config) +# + +if [ "$1" = "config" ]; then + + echo 'graph_title Restricted IPs' + echo 'graph_args -l 0' + echo 'graph_vlabel number of IPs' + echo 'graph_category nominatim' + echo 'bulk.label bulk' + echo 'bulk.draw AREA' + echo 'bulk.type GAUGE' + echo 'block.label blocked' + echo 'block.draw STACK' + echo 'block.type GAUGE' + exit 0 +fi + +BASEDIR="$(dirname "$(readlink -f "$0")")" + +cut -f 2 -d ' ' $BASEDIR/../settings/ip_blocks.map | uniq -c | sed 's:[[:space:]]*\([0-9]\+\) \(.*\):\2.value \1:' diff --git a/settings/settings.php b/settings/settings.php index 771cf80f..159c24ac 100644 --- a/settings/settings.php +++ b/settings/settings.php @@ -48,12 +48,14 @@ // Website settings @define('CONST_NoAccessControl', true); - @define('CONST_ClosedForIndexing', false); - @define('CONST_ClosedForIndexingExceptionIPs', ''); @define('CONST_BlockedIPs', ''); + @define('CONST_IPBanFile', CONST_BasePath.'/settings/ip_blocks'); + @define('CONST_WhitelistedIPs', ''); + @define('CONST_BlockedUserAgents', ''); + @define('CONST_BlockReverseMaxLoad', 15); @define('CONST_BulkUserIPs', ''); - @define('CONST_Website_BaseURL', 'http://'.php_uname('n').'/'); + @define('CONST_Website_BaseURL', 'http://nominatim.openstreetmap.org/'); @define('CONST_Tile_Default', 'Mapnik'); @define('CONST_Default_Language', 'xx'); diff --git a/sql/indices.src.sql b/sql/indices.src.sql index ea57e74b..4dc23bd2 100644 --- a/sql/indices.src.sql +++ b/sql/indices.src.sql @@ -1,29 +1,29 @@ -- Indices used only during search and update. -- These indices are created only after the indexing process is done. -CREATE INDEX idx_word_word_id on word USING BTREE (word_id); +CREATE INDEX idx_word_word_id on word USING BTREE (word_id) TABLESPACE ssd; -CREATE INDEX idx_search_name_nameaddress_vector ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off); -CREATE INDEX idx_search_name_name_vector ON search_name USING GIN (name_vector) WITH (fastupdate = off); -CREATE INDEX idx_search_name_centroid ON search_name USING GIST (centroid); +CREATE INDEX idx_search_name_nameaddress_vector ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off) TABLESPACE ssd; +CREATE INDEX idx_search_name_name_vector ON search_name USING GIN (name_vector) WITH (fastupdate = off) TABLESPACE ssd; +CREATE INDEX idx_search_name_centroid ON search_name USING GIST (centroid) TABLESPACE ssd; -CREATE INDEX idx_place_addressline_address_place_id on place_addressline USING BTREE (address_place_id); +CREATE INDEX idx_place_addressline_address_place_id on place_addressline USING BTREE (address_place_id) TABLESPACE ssd; -CREATE INDEX idx_place_boundingbox_place_id on place_boundingbox USING BTREE (place_id); -CREATE INDEX idx_place_boundingbox_outline ON place_boundingbox USING GIST (outline); +CREATE INDEX idx_place_boundingbox_place_id on place_boundingbox USING BTREE (place_id) TABLESPACE ssd; +CREATE INDEX idx_place_boundingbox_outline ON place_boundingbox USING GIST (outline) TABLESPACE ssd; DROP INDEX IF EXISTS idx_placex_rank_search; -CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search); -CREATE INDEX idx_placex_rank_address ON placex USING BTREE (rank_address); -CREATE INDEX idx_placex_pendingsector ON placex USING BTREE (rank_search,geometry_sector) where indexed_status > 0; -CREATE INDEX idx_placex_parent_place_id ON placex USING BTREE (parent_place_id) where parent_place_id IS NOT NULL; -CREATE INDEX idx_placex_interpolation ON placex USING BTREE (geometry_sector) where indexed_status > 0 and class='place' and type='houses'; -CREATE INDEX idx_location_area_country_place_id ON location_area_country USING BTREE (place_id); +CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search) TABLESPACE ssd; +CREATE INDEX idx_placex_rank_address ON placex USING BTREE (rank_address) TABLESPACE ssd; +CREATE INDEX idx_placex_pendingsector ON placex USING BTREE (rank_search,geometry_sector) TABLESPACE ssd where indexed_status > 0; +CREATE INDEX idx_placex_parent_place_id ON placex USING BTREE (parent_place_id) TABLESPACE ssd where parent_place_id IS NOT NULL; +CREATE INDEX idx_placex_interpolation ON placex USING BTREE (geometry_sector) TABLESPACE ssd where indexed_status > 0 and class='place' and type='houses'; +CREATE INDEX idx_location_area_country_place_id ON location_area_country USING BTREE (place_id) TABLESPACE ssd; -CREATE INDEX idx_search_name_country_centroid ON search_name_country USING GIST (centroid); +CREATE INDEX idx_search_name_country_centroid ON search_name_country USING GIST (centroid) TABLESPACE ssd; -- start -CREATE INDEX idx_location_property_-partition-_centroid ON location_property_-partition- USING GIST (centroid); +CREATE INDEX idx_location_property_-partition-_centroid ON location_property_-partition- USING GIST (centroid) TABLESPACE ssd; -- end CREATE UNIQUE INDEX idx_place_osm_unique on place using btree(osm_id,osm_type,class,type); diff --git a/sql/partition-tables.src.sql b/sql/partition-tables.src.sql index 29e3d282..e0b1fae0 100644 --- a/sql/partition-tables.src.sql +++ b/sql/partition-tables.src.sql @@ -35,21 +35,21 @@ SELECT AddGeometryColumn('search_name_blank', 'centroid', 4326, 'GEOMETRY', 2); CREATE TABLE location_area_country () INHERITS (location_area_large); -CREATE INDEX idx_location_area_country_geometry ON location_area_country USING GIST (geometry); +CREATE INDEX idx_location_area_country_geometry ON location_area_country USING GIST (geometry) TABLESPACE ssd; CREATE TABLE search_name_country () INHERITS (search_name_blank); -CREATE INDEX idx_search_name_country_place_id ON search_name_country USING BTREE (place_id); -CREATE INDEX idx_search_name_country_name_vector ON search_name_country USING GIN (name_vector) WITH (fastupdate = off); +CREATE INDEX idx_search_name_country_place_id ON search_name_country USING BTREE (place_id) TABLESPACE ssd; +CREATE INDEX idx_search_name_country_name_vector ON search_name_country USING GIN (name_vector) WITH (fastupdate = off) TABLESPACE ssd; -- start CREATE TABLE location_area_large_-partition- () INHERITS (location_area_large); -CREATE INDEX idx_location_area_large_-partition-_place_id ON location_area_large_-partition- USING BTREE (place_id); -CREATE INDEX idx_location_area_large_-partition-_geometry ON location_area_large_-partition- USING GIST (geometry); +CREATE INDEX idx_location_area_large_-partition-_place_id ON location_area_large_-partition- USING BTREE (place_id) TABLESPACE ssd; +CREATE INDEX idx_location_area_large_-partition-_geometry ON location_area_large_-partition- USING GIST (geometry) TABLESPACE ssd; CREATE TABLE search_name_-partition- () INHERITS (search_name_blank); -CREATE INDEX idx_search_name_-partition-_place_id ON search_name_-partition- USING BTREE (place_id); -CREATE INDEX idx_search_name_-partition-_centroid ON search_name_-partition- USING GIST (centroid); -CREATE INDEX idx_search_name_-partition-_name_vector ON search_name_-partition- USING GIN (name_vector) WITH (fastupdate = off); +CREATE INDEX idx_search_name_-partition-_place_id ON search_name_-partition- USING BTREE (place_id) TABLESPACE ssd; +CREATE INDEX idx_search_name_-partition-_centroid ON search_name_-partition- USING GIST (centroid) TABLESPACE ssd; +CREATE INDEX idx_search_name_-partition-_name_vector ON search_name_-partition- USING GIN (name_vector) WITH (fastupdate = off) TABLESPACE ssd; CREATE TABLE location_property_-partition- () INHERITS (location_property); CREATE INDEX idx_location_property_-partition-_place_id ON location_property_-partition- USING BTREE (place_id); @@ -62,7 +62,7 @@ CREATE TABLE location_road_-partition- ( country_code VARCHAR(2) ); SELECT AddGeometryColumn('location_road_-partition-', 'geometry', 4326, 'GEOMETRY', 2); -CREATE INDEX idx_location_road_-partition-_geometry ON location_road_-partition- USING GIST (geometry); -CREATE INDEX idx_location_road_-partition-_place_id ON location_road_-partition- USING BTREE (place_id); +CREATE INDEX idx_location_road_-partition-_geometry ON location_road_-partition- USING GIST (geometry) TABLESPACE ssd; +CREATE INDEX idx_location_road_-partition-_place_id ON location_road_-partition- USING BTREE (place_id) TABLESPACE ssd; -- end diff --git a/sql/tables.sql b/sql/tables.sql index 3bb41e28..244f2036 100644 --- a/sql/tables.sql +++ b/sql/tables.sql @@ -23,19 +23,6 @@ CREATE TABLE import_npi_log ( event text ); ---drop table IF EXISTS query_log; -CREATE TABLE query_log ( - starttime timestamp, - query text, - ipaddress text, - endtime timestamp, - results integer - ); -CREATE INDEX idx_query_log ON query_log USING BTREE (starttime); -GRANT SELECT ON query_log TO "www-data" ; -GRANT INSERT ON query_log TO "www-data" ; -GRANT UPDATE ON query_log TO "www-data" ; - CREATE TABLE new_query_log ( type text, starttime timestamp, @@ -43,6 +30,7 @@ CREATE TABLE new_query_log ( useragent text, language text, query text, + searchterm text, endtime timestamp, results integer, format text, @@ -53,9 +41,6 @@ GRANT INSERT ON new_query_log TO "www-data" ; GRANT UPDATE ON new_query_log TO "www-data" ; GRANT SELECT ON new_query_log TO "www-data" ; -create view vw_search_query_log as SELECT substr(query, 1, 50) AS query, starttime, endtime - starttime AS duration, substr(useragent, 1, 20) as -useragent, language, results, ipaddress FROM new_query_log WHERE type = 'search' ORDER BY starttime DESC; - --drop table IF EXISTS report_log; CREATE TABLE report_log ( starttime timestamp, @@ -76,8 +61,8 @@ CREATE TABLE word ( country_code varchar(2), search_name_count INTEGER, operator TEXT - ); -CREATE INDEX idx_word_word_token on word USING BTREE (word_token); + ) TABLESPACE ssd; +CREATE INDEX idx_word_word_token on word USING BTREE (word_token) TABLESPACE ssd; GRANT SELECT ON word TO "www-data" ; DROP SEQUENCE seq_word; CREATE SEQUENCE seq_word start 1; @@ -132,7 +117,7 @@ CREATE TABLE search_name ( nameaddress_vector integer[] ); SELECT AddGeometryColumn('search_name', 'centroid', 4326, 'GEOMETRY', 2); -CREATE INDEX idx_search_name_place_id ON search_name USING BTREE (place_id); +CREATE INDEX idx_search_name_place_id ON search_name USING BTREE (place_id) TABLESPACE ssd; drop table IF EXISTS place_addressline; CREATE TABLE place_addressline ( @@ -142,8 +127,8 @@ CREATE TABLE place_addressline ( isaddress boolean, distance float, cached_rank_address integer - ); -CREATE INDEX idx_place_addressline_place_id on place_addressline USING BTREE (place_id); + ) TABLESPACE data; +CREATE INDEX idx_place_addressline_place_id on place_addressline USING BTREE (place_id) TABLESPACE ssd; drop table IF EXISTS place_boundingbox CASCADE; CREATE TABLE place_boundingbox ( @@ -196,14 +181,14 @@ CREATE TABLE placex ( wikipedia TEXT, -- calculated wikipedia article name (language:title) geometry_sector INTEGER, calculated_country_code varchar(2) - ); + ) TABLESPACE ssd; SELECT AddGeometryColumn('placex', 'centroid', 4326, 'GEOMETRY', 2); -CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id); -CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id); -CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id); -CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector); -CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry); -CREATE INDEX idx_placex_adminname on placex USING BTREE (make_standard_name(name->'name'),rank_search) WHERE osm_type='N' and rank_search < 26; +CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id) TABLESPACE ssd; +CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id) TABLESPACE ssd; +CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id) TABLESPACE ssd; +CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector) TABLESPACE ssd; +CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry) TABLESPACE ssd; +CREATE INDEX idx_placex_adminname on placex USING BTREE (make_standard_name(name->'name'),rank_search) TABLESPACE ssd WHERE osm_type='N' and rank_search < 26; --CREATE INDEX idx_placex_indexed ON placex USING BTREE (indexed); @@ -244,7 +229,7 @@ CREATE TRIGGER place_before_insert BEFORE INSERT ON place FOR EACH ROW EXECUTE PROCEDURE place_insert(); drop index idx_placex_sector; -CREATE INDEX idx_placex_sector ON placex USING BTREE (geometry_sector,rank_address,osm_type,osm_id); +CREATE INDEX idx_placex_sector ON placex USING BTREE (geometry_sector,rank_address,osm_type,osm_id) TABLESPACE ssd; DROP SEQUENCE seq_postcodes; CREATE SEQUENCE seq_postcodes start 1; @@ -262,7 +247,7 @@ CREATE TABLE import_polygon_error ( ); SELECT AddGeometryColumn('import_polygon_error', 'prevgeometry', 4326, 'GEOMETRY', 2); SELECT AddGeometryColumn('import_polygon_error', 'newgeometry', 4326, 'GEOMETRY', 2); -CREATE INDEX idx_import_polygon_error_osmid ON import_polygon_error USING BTREE (osm_type, osm_id); +CREATE INDEX idx_import_polygon_error_osmid ON import_polygon_error USING BTREE (osm_type, osm_id) TABLESPACE ssd; GRANT SELECT ON import_polygon_error TO "www-data"; drop table import_polygon_delete; @@ -272,7 +257,7 @@ CREATE TABLE import_polygon_delete ( class TEXT NOT NULL, type TEXT NOT NULL ); -CREATE INDEX idx_import_polygon_delete_osmid ON import_polygon_delete USING BTREE (osm_type, osm_id); +CREATE INDEX idx_import_polygon_delete_osmid ON import_polygon_delete USING BTREE (osm_type, osm_id) TABLESPACE ssd; GRANT SELECT ON import_polygon_delete TO "www-data"; drop sequence file; diff --git a/utils/cron_banip.py b/utils/cron_banip.py new file mode 100755 index 00000000..4fe30640 --- /dev/null +++ b/utils/cron_banip.py @@ -0,0 +1,221 @@ +#!/usr/bin/python +# +# Search logs for high-bandwith users and create a list of suspicious IPs. +# There are three states: bulk, block, ban. The first are bulk requesters +# that need throtteling, the second bulk requesters that have overdone it +# and the last manually banned IPs. +# +# The list can then be used in apache using rewrite rules to +# direct bulk users to smaller thread pools or block them. A +# typical apache config that uses php-fpm pools would look +# like this: +# +# Alias /nominatim-www/ "/var/www/nominatim/" +# Alias /nominatim-bulk/ "/var/www/nominatim/" +# +# Options MultiViews FollowSymLinks +# AddType text/html .php +# +# +# +# AddHandler fcgi:/var/run/php5-fpm-www.sock .php +# +# +# AddHandler fcgi:/var/run/php5-fpm-bulk.sock .php +# +# +# Redirect 509 /nominatim-block/ +# ErrorDocument 509 "Bandwidth limit exceeded." +# Redirect 403 /nominatim-ban/ +# ErrorDocument 403 "Access blocked." +# +# RewriteEngine On +# RewriteMap bulklist txt:/home/wherever/ip-block.map +# RewriteRule ^/(.*) /nominatim-${bulklist:%{REMOTE_ADDR}|www}/$1 [PT] +# + +import os +import psycopg2 +import datetime + +BASEDIR = os.path.normpath(os.path.join(os.path.realpath(__file__), '../..')) + +# +# DEFAULT SETTINGS +# +# Copy into settings/ip_blcoks.conf and adapt as required. +# +BLOCKEDFILE= BASEDIR + '/settings/ip_blocks.map' +LOGFILE= BASEDIR + '/log/restricted_ip.log' + +# space-separated list of IPs that are never banned +WHITELIST = '' +# space-separated list of IPs manually blocked +BLACKLIST = '' + +# time before a automatically blocked IP is allowed back +BLOCKCOOLOFF_PERIOD='1 hour' +# quiet time before an IP is released from the bulk pool +BULKCOOLOFF_PERIOD='15 min' + +BULKLONG_LIMIT=8000 +BULKSHORT_LIMIT=2000 +BLOCK_UPPER=19000 +BLOCK_LOWER=4000 +BLOCK_LOADFAC=380 +BULK_LOADFAC=160 +BULK_LOWER=1500 + +# +# END OF DEFAULT SETTINGS +# + +try: + execfile(os.path.expanduser(BASEDIR + "/settings/ip_blocks.conf")) +except IOError: + pass + +# read the previous blocklist +WHITELIST = set(WHITELIST.split()) if WHITELIST else set() +prevblocks = [] +prevbulks = [] +BLACKLIST = set(BLACKLIST.split()) if BLACKLIST else set() +newblocks = set() +newbulks = set() + +try: + fd = open(BLOCKEDFILE) + for line in fd: + ip, typ = line.strip().split(' ') + if ip not in BLACKLIST: + if typ == 'block': + prevblocks.append(ip) + elif typ == 'bulk': + prevbulks.append(ip) + fd.close() +except IOError: + pass #ignore non-existing file + +# determine current load +fd = open("/proc/loadavg") +avgload = int(float(fd.readline().split()[2])) +fd.close() +# DB load +conn = psycopg2.connect('dbname=nominatim') +cur = conn.cursor() +cur.execute("select count(*)/60 from new_query_log where starttime > now() - interval '1min'") +dbload = int(cur.fetchone()[0]) + +BLOCK_LIMIT = max(BLOCK_LOWER, BLOCK_UPPER - BLOCK_LOADFAC * (dbload - 75)) +BULKLONG_LIMIT = max(BULK_LOWER, BULKLONG_LIMIT - BULK_LOADFAC * (avgload - 14)) +if len(prevbulks) > 85: + BLOCK_LIMIT = BLOCK_LOWER + +# get the new block candidates +cur.execute(""" + SELECT ipaddress, max(count) FROM + ((SELECT * FROM + (SELECT ipaddress, sum(case when endtime is null then 1 else 1+date_part('epoch',endtime-starttime) end) as count FROM new_query_log + WHERE starttime > now() - interval '1 hour' GROUP BY ipaddress) as i + WHERE count > %s) + UNION + (SELECT ipaddress, count * 3 FROM + (SELECT ipaddress, sum(case when endtime is null then 1 else 1+date_part('epoch',endtime-starttime) end) as count FROM new_query_log + WHERE starttime > now() - interval '10 min' GROUP BY ipaddress) as i + WHERE count > %s)) as o + GROUP BY ipaddress +""", (BULKLONG_LIMIT, BULKSHORT_LIMIT)) + +bulkips = {} +emergencyblocks = [] + +for c in cur: + if c[0] not in WHITELIST and c[0] not in BLACKLIST: + if c[1] > BLOCK_UPPER and c[0] not in prevbulks: + newblocks.add(c[0]) + if c[0] not in prevblocks: + emergencyblocks.append(c[0]) + else: + bulkips[c[0]] = c[1] + +# IPs from the block list that are no longer in the bulk list +deblockcandidates = set() +# IPs from the bulk list that are no longer in the bulk list +debulkcandidates = set() +# new IPs to go into the block list +newlyblocked = [] + + +for ip in prevblocks: + if ip in bulkips: + newblocks.add(ip) + del bulkips[ip] + else: + deblockcandidates.add(ip) + +for ip in prevbulks: + if ip in bulkips: + if bulkips[ip] > BLOCK_LIMIT: + newblocks.add(ip) + newlyblocked.append(ip) + else: + newbulks.add(ip) + del bulkips[ip] + else: + debulkcandidates.add(ip) + +# cross-check deblock candidates +if deblockcandidates: + cur.execute(""" + SELECT DISTINCT ipaddress FROM new_query_log + WHERE ipaddress IN ('%s') AND starttime > now() - interval '%s' + """ % ("','".join(deblockcandidates), BLOCKCOOLOFF_PERIOD)) + + for c in cur: + newblocks.add(c[0]) + deblockcandidates.remove(c[0]) +# deblocked IPs go back to the bulk pool to catch the ones that simply +# ignored the HTTP error and just continue to hammer the API. +# Those that behave and stopped will be debulked a minute later. +for ip in deblockcandidates: + newbulks.add(ip) + +# cross-check debulk candidates +if debulkcandidates: + cur.execute(""" + SELECT DISTINCT ipaddress FROM new_query_log + WHERE ipaddress IN ('%s') AND starttime > now() - interval '%s' + AND starttime > date_trunc('day', now()) + """ % ("','".join(debulkcandidates), BULKCOOLOFF_PERIOD)) + + for c in cur: + newbulks.add(c[0]) + debulkcandidates.remove(c[0]) + +for ip in bulkips.iterkeys(): + newbulks.add(ip) + +# write out the new list +fd = open(BLOCKEDFILE, 'w') +for ip in newblocks: + fd.write(ip + " block\n") +for ip in newbulks: + fd.write(ip + " bulk\n") +for ip in BLACKLIST: + fd.write(ip + " ban\n") +fd.close() + +# write out the log +logstr = datetime.datetime.now().strftime('%Y-%m-%d %H:%M') + ' %s %s\n' +fd = open(LOGFILE, 'a') +if deblockcandidates: + fd.write(logstr % ('unblocked:', ', '.join(deblockcandidates))) +if debulkcandidates: + fd.write(logstr % (' debulked:', ', '.join(debulkcandidates))) +if bulkips: + fd.write(logstr % ('new bulks:', ', '.join(bulkips.keys()))) +if emergencyblocks: + fd.write(logstr % ('dir.block:', ', '.join(emergencyblocks))) +if newlyblocked: + fd.write(logstr % ('new block:', ', '.join(newlyblocked))) +fd.close() diff --git a/utils/cron_logrotate.sh b/utils/cron_logrotate.sh new file mode 100755 index 00000000..b9291d95 --- /dev/null +++ b/utils/cron_logrotate.sh @@ -0,0 +1,20 @@ +#!/bin/bash -e +# +# Rotate query logs. + +dbname=nominatim + +basedir=`dirname $0` +logfile=`date "+$basedir/../log/query-%F.log.gz"` + +# dump the old logfile +pg_dump -a -F p -t backup_query_log $dbname | gzip -9 > $logfile + +# remove the old logs +psql -q -d $dbname -c 'DROP TABLE backup_query_log' + +# rotate +psql -q -1 -d $dbname -c 'ALTER TABLE new_query_log RENAME TO backup_query_log;CREATE TABLE new_query_log as (select * from backup_query_log limit 0);GRANT SELECT, INSERT, UPDATE ON new_query_log TO "www-data"' +psql -q -d $dbname -c 'ALTER INDEX idx_new_query_log_starttime RENAME TO idx_backup_query_log_starttime' +psql -q -d $dbname -c 'CREATE INDEX idx_new_query_log_starttime ON new_query_log USING BTREE (starttime)' + diff --git a/utils/cron_vacuum.sh b/utils/cron_vacuum.sh new file mode 100755 index 00000000..4c16fc65 --- /dev/null +++ b/utils/cron_vacuum.sh @@ -0,0 +1,14 @@ +#!/bin/bash +# +# Vaccum all tables with indices on integer arrays. +# Agressive vacuuming seems to help against index bloat. +# + +psql -q -d nominatim -c 'VACUUM ANALYSE search_name' +psql -q -d nominatim -c 'VACUUM ANALYSE search_name_country' +#psql -q -d nominatim -c 'VACUUM ANALYSE planet_osm_ways' + +for i in `seq 0 246`; do + psql -q -d nominatim -c "VACUUM ANALYSE search_name_${i}" +done + diff --git a/utils/setup.php b/utils/setup.php index 5b01798e..d2d4381e 100755 --- a/utils/setup.php +++ b/utils/setup.php @@ -155,8 +155,9 @@ echo "Please download and build osm2pgsql.\nIf it is already installed, check the path in your local settings (settings/local.php) file.\n"; fail("osm2pgsql not found in '$osm2pgsql'"); } + $osm2pgsql .= ' --tablespace-slim-index ssd --tablespace-main-index ssd --tablespace-main-data ssd --tablespace-slim-data data'; $osm2pgsql .= ' -lsc -O gazetteer --hstore'; - $osm2pgsql .= ' -C '.$iCacheMemory; + $osm2pgsql .= ' -C 16000'; $osm2pgsql .= ' -P '.$aDSNInfo['port']; $osm2pgsql .= ' -d '.$aDSNInfo['database'].' '.$aCMDResult['osm-file']; passthruCheckReturn($osm2pgsql); @@ -481,7 +482,7 @@ $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,calculated_country_code,"; $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from (select calculated_country_code,postcode,"; $sSQL .= "avg(st_x(st_centroid(geometry))) as x,avg(st_y(st_centroid(geometry))) as y "; - $sSQL .= "from placex where postcode is not null group by calculated_country_code,postcode) as x"; + $sSQL .= "from placex where postcode is not null and calculated_country_code not in ('ie') group by calculated_country_code,postcode) as x"; if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection)); $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,calculated_country_code,geometry) "; diff --git a/utils/specialphrases.php b/utils/specialphrases.php index f1a0d0d0..02aaaa0f 100755 --- a/utils/specialphrases.php +++ b/utils/specialphrases.php @@ -91,12 +91,12 @@ echo "class = '".pg_escape_string($aPair[0])."' and type = '".pg_escape_string($aPair[1])."';\n"; echo "CREATE INDEX idx_place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])."_centroid "; - echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING GIST (centroid);\n"; + echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING GIST (centroid) tablespace ssd;\n"; echo "CREATE INDEX idx_place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])."_place_id "; - echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING btree(place_id);\n"; + echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING btree(place_id) tablespace ssd;\n"; - echo "GRANT SELECT ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." TO \"www-data\";"; + echo "GRANT SELECT ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." TO \"www-data\";\n"; } diff --git a/utils/update.php b/utils/update.php index 76d6f858..e8972f1d 100755 --- a/utils/update.php +++ b/utils/update.php @@ -46,7 +46,6 @@ showUsage($aCMDOptions, true, 'Select either import of hourly or daily'); } - if (!isset($aResult['index-instances'])) $aResult['index-instances'] = 1; if (!isset($aResult['index-rank'])) $aResult['index-rank'] = 0; /* // Lock to prevent multiple copies running @@ -344,6 +343,7 @@ if ($aResult['index']) { + if (!isset($aResult['index-instances'])) $aResult['index-instances'] = 1; passthru(CONST_BasePath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -t '.$aResult['index-instances'].' -r '.$aResult['index-rank']); } @@ -355,7 +355,7 @@ $sCMDDownload = $sOsmosisCMD.' --read-replication-interval workingDirectory='.$sOsmosisConfigDirectory.' --simplify-change --write-xml-change '.$sImportFile; $sCMDCheckReplicationLag = $sOsmosisCMD.' -q --read-replication-lag workingDirectory='.$sOsmosisConfigDirectory; $sCMDImport = CONST_Osm2pgsql_Binary.' -klas -C 2000 -O gazetteer -d '.$aDSNInfo['database'].' '.$sImportFile; - $sCMDIndex = $sBasePath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -t '.$aResult['index-instances']; + $sCMDIndex = $sBasePath.'/nominatim/nominatim -i -d '.$aDSNInfo['database']; if (!$aResult['no-npi']) { $sCMDIndex .= '-F '; } @@ -438,7 +438,16 @@ $sBatchEnd = getosmosistimestamp($sOsmosisConfigDirectory); // Index file - $sThisIndexCmd = $sCMDIndex; + if (!isset($aResult['index-instances'])) + { + if (getLoadAverage() < 15) + $iIndexInstances = 2; + else + $iIndexInstances = 1; + } else + $iIndexInstances = $aResult['index-instances']; + + $sThisIndexCmd = $sCMDIndex.' -t '.$iIndexInstances; if (!$aResult['no-npi']) { diff --git a/website/403.html b/website/403.html new file mode 100644 index 00000000..c5fd71f1 --- /dev/null +++ b/website/403.html @@ -0,0 +1,14 @@ + + +Access blocked + + +

Access blocked

+ +

You have been blocked because you have been overusing OSM's geocoding service. +Please be aware that OSM's resources are limited and shared between many users. +To have this block lifted, contact the Nominatim system administrator.

+ +

For more information, consult the usage policy for the OSM Nominatim server. + + diff --git a/website/509.html b/website/509.html new file mode 100644 index 00000000..047d9025 --- /dev/null +++ b/website/509.html @@ -0,0 +1,13 @@ + + +Bandwidth limit exceeded + + +

Bandwidth limit exceeded

+ +

You have been temporarily blocked because you have been overusing OSM's geocoding service. +Please adapt your scripts to reduce the number of requests and try again later.

+ +

For more information, consult the usage policy for the OSM Nominatim server. + + diff --git a/website/favicon.ico b/website/favicon.ico new file mode 100644 index 00000000..0157ea00 Binary files /dev/null and b/website/favicon.ico differ diff --git a/website/nominatim.xml b/website/nominatim.xml new file mode 100644 index 00000000..28684b16 --- /dev/null +++ b/website/nominatim.xml @@ -0,0 +1,15 @@ + + + Nominatim + Nominatim OSM Search + Search for a place in OpenStreetMap Nominatim + UTF-8 + UTF-8 + + + Brian Quinion + false + Data &copy; OpenStreetMap contributors, Some Rights Reserved. ODbL, http://www.osm.org/copyright. + + diff --git a/website/reverse.php b/website/reverse.php index 33e6cd5e..c348659e 100755 --- a/website/reverse.php +++ b/website/reverse.php @@ -4,6 +4,22 @@ require_once(dirname(dirname(__FILE__)).'/lib/init-website.php'); require_once(CONST_BasePath.'/lib/log.php'); + if (preg_match(CONST_BlockedUserAgents, $_SERVER["HTTP_USER_AGENT"]) > 0) + { + $fLoadAvg = getLoadAverage(); + if ($fLoadAvg >= CONST_BlockReverseMaxLoad) { + header('HTTP/1.0 403 Forbidden'); + header('Content-type: text/html; charset=utf-8'); + echo "

App temporarily blocked

"; + echo "Your application has been temporarily blocked from the OpenStreetMap Nominatim "; + echo "geolocation service due to high server load."; + echo "\n\n"; + exit; + } + + } + + if (strpos(CONST_BulkUserIPs, ','.$_SERVER["REMOTE_ADDR"].',') !== false) { $fLoadAvg = getLoadAverage(); diff --git a/website/robots.txt b/website/robots.txt new file mode 100644 index 00000000..e4d3d3fb --- /dev/null +++ b/website/robots.txt @@ -0,0 +1,10 @@ +User-agent: ia_archiver +Allow: / + +User-agent: * +Disallow: /search.php +Disallow: /search +Disallow: /details.php +Disallow: /details +Disallow: /reverse.php +Disallow: /reverse diff --git a/website/search.php b/website/search.php index 139ed0f6..9940d815 100755 --- a/website/search.php +++ b/website/search.php @@ -71,6 +71,7 @@ if (isset($aLangPrefOrder['name:de'])) $bReverseInPlan = true; if (isset($aLangPrefOrder['name:ru'])) $bReverseInPlan = true; if (isset($aLangPrefOrder['name:ja'])) $bReverseInPlan = true; + if (isset($aLangPrefOrder['name:pl'])) $bReverseInPlan = true; $sLanguagePrefArraySQL = "ARRAY[".join(',',array_map("getDBQuoted",$aLangPrefOrder))."]"; @@ -977,12 +978,17 @@ else $sSQL .= " limit ".$iLimit; - if (CONST_Debug) { var_dump($sSQL); } + if (CONST_Debug) var_dump($sSQL); + $iStartTime = time(); $aViewBoxPlaceIDs = $oDB->getAll($sSQL); if (PEAR::IsError($aViewBoxPlaceIDs)) { failInternalError("Could not get places for search terms.", $sSQL, $aViewBoxPlaceIDs); } + if (time() - $iStartTime > 60) { + file_put_contents(CONST_BasePath.'/log/long_queries.log', date('Y-m-d H:i:s', $iStartTime).' '.$sSQL."\n", FILE_APPEND); + } + //var_dump($aViewBoxPlaceIDs); // Did we have an viewbox matches? $aPlaceIDs = array();