From: Sarah Hoffmann Date: Tue, 18 Jun 2013 19:06:30 +0000 (+0200) Subject: Merge remote-tracking branch 'upstream/master' X-Git-Tag: deploy~604 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/00a3e843e3fb4a6fc735a019cdf7a3e38558cad6?hp=1fb5964b0e08b1d719d919a8c2c0545d8827a502 Merge remote-tracking branch 'upstream/master' --- diff --git a/lib/init-website.php b/lib/init-website.php index bcf3ebf7..4d1c2c70 100644 --- a/lib/init-website.php +++ b/lib/init-website.php @@ -5,6 +5,7 @@ { header("Access-Control-Allow-Origin: *"); header("Access-Control-Allow-Methods: OPTIONS,GET"); + header("Access-Control-Max-Age: 8640000"); if (!empty($_SERVER['HTTP_ACCESS_CONTROL_REQUEST_HEADERS'])) { header("Access-Control-Allow-Headers: ".$_SERVER['HTTP_ACCESS_CONTROL_REQUEST_HEADERS']); @@ -12,53 +13,5 @@ } if ($_SERVER['REQUEST_METHOD'] == 'OPTIONS') exit; - if (CONST_ClosedForIndexing && strpos(CONST_ClosedForIndexingExceptionIPs, ','.$_SERVER["REMOTE_ADDR"].',') === false) - { - echo "Closed for re-indexing..."; - exit; - } - - $aBucketKeys = array(); - - if (isset($_SERVER["HTTP_REFERER"])) $aBucketKeys[] = str_replace('www.','',strtolower(parse_url($_SERVER["HTTP_REFERER"], PHP_URL_HOST))); - if (isset($_SERVER["REMOTE_ADDR"])) $aBucketKeys[] = $_SERVER["REMOTE_ADDR"]; - if (isset($_GET["email"])) $aBucketKeys[] = $_GET["email"]; - - $fBucketVal = doBucket($aBucketKeys, - (defined('CONST_ConnectionBucket_PageType')?constant('CONST_ConnectionBucket_Cost_'.CONST_ConnectionBucket_PageType):1) + user_busy_cost(), - CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_BlockLimit); - - if ($fBucketVal > CONST_ConnectionBucket_WaitLimit && $fBucketVal < CONST_ConnectionBucket_BlockLimit) - { - $m = getBucketMemcache(); - $iCurrentSleeping = $m->increment('sleepCounter'); - if (false === $iCurrentSleeping) - { - $m->add('sleepCounter', 0); - $iCurrentSleeping = $m->increment('sleepCounter'); - } - if ($iCurrentSleeping >= CONST_ConnectionBucket_MaxSleeping || isBucketSleeping($aBucketKeys)) - { - // Too many threads sleeping already. This becomes a hard block. - $fBucketVal = doBucket($aBucketKeys, CONST_ConnectionBucket_BlockLimit, CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_BlockLimit); - } - else - { - setBucketSleeping($aBucketKeys, true); - sleep(($fBucketVal - CONST_ConnectionBucket_WaitLimit)/CONST_ConnectionBucket_LeakRate); - $fBucketVal = doBucket($aBucketKeys, CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_BlockLimit); - setBucketSleeping($aBucketKeys, false); - } - $m->decrement('sleepCounter'); - } - - if (strpos(CONST_BlockedIPs, ','.$_SERVER["REMOTE_ADDR"].',') !== false || $fBucketVal >= CONST_ConnectionBucket_BlockLimit) - { - echo "Your IP has been blocked. \n"; - echo "Please create a nominatim trac ticket (http://trac.openstreetmap.org/newticket?component=nominatim) to request this to be removed. \n"; - echo "Information on the Nominatim usage policy can be found here: http://wiki.openstreetmap.org/wiki/Nominatim#Usage_Policy \n"; - exit; - } - - header('Content-type: text/html; charset=utf-8'); + header('Content-type: text/html; charset=utf-8'); diff --git a/lib/lib.php b/lib/lib.php index 3b16b399..a4e4aa6c 100644 --- a/lib/lib.php +++ b/lib/lib.php @@ -169,7 +169,7 @@ { $aResult = array(array(join(' ',$aWords))); $sFirstToken = ''; - if ($iDepth < 8) { + if ($iDepth < 7) { while(sizeof($aWords) > 1) { $sWord = array_shift($aWords); diff --git a/lib/log.php b/lib/log.php index b08f5383..361c0fb1 100644 --- a/lib/log.php +++ b/lib/log.php @@ -17,15 +17,13 @@ if (CONST_Log_DB) { - // Log - if ($sType == 'search') - { - $oDB->query('insert into query_log values ('.getDBQuoted($hLog[0]).','.getDBQuoted($hLog[3]).','.getDBQuoted($hLog[1]).')'); - } - - $sSQL = 'insert into new_query_log (type,starttime,query,ipaddress,useragent,language,format)'; + if (isset($_GET['email'])) + $sUserAgent = $_GET['email']; + else + $sUserAgent = $_SERVER['HTTP_USER_AGENT']; + $sSQL = 'insert into new_query_log (type,starttime,query,ipaddress,useragent,language,format,searchterm)'; $sSQL .= ' values ('.getDBQuoted($sType).','.getDBQuoted($hLog[0]).','.getDBQuoted($hLog[2]); - $sSQL .= ','.getDBQuoted($hLog[1]).','.getDBQuoted($_SERVER['HTTP_USER_AGENT']).','.getDBQuoted(join(',',$aLanguageList)).','.getDBQuoted($sOutputFormat).')'; + $sSQL .= ','.getDBQuoted($hLog[1]).','.getDBQuoted($sUserAgent).','.getDBQuoted(join(',',$aLanguageList)).','.getDBQuoted($sOutputFormat).','.getDBQuoted($hLog[3]).')'; $oDB->query($sSQL); } @@ -59,12 +57,6 @@ if (CONST_Log_DB) { - $sSQL = 'update query_log set endtime = '.getDBQuoted($sEndTime).', results = '.$iNumResults; - $sSQL .= ' where starttime = '.getDBQuoted($hLog[0]); - $sSQL .= ' and ipaddress = '.getDBQuoted($hLog[1]); - $sSQL .= ' and query = '.getDBQuoted($hLog[3]); - $oDB->query($sSQL); - $sSQL = 'update new_query_log set endtime = '.getDBQuoted($sEndTime).', results = '.$iNumResults; $sSQL .= ' where starttime = '.getDBQuoted($hLog[0]); $sSQL .= ' and ipaddress = '.getDBQuoted($hLog[1]); diff --git a/munin/nominatim_throttled_ips b/munin/nominatim_throttled_ips new file mode 100755 index 00000000..2fa1d80e --- /dev/null +++ b/munin/nominatim_throttled_ips @@ -0,0 +1,28 @@ +#!/bin/sh +# +# Plugin to monitor the number of IPs in special pools +# +# Parameters: +# +# config (required) +# autoconf (optional - used by munin-config) +# + +if [ "$1" = "config" ]; then + + echo 'graph_title Restricted IPs' + echo 'graph_args -l 0' + echo 'graph_vlabel number of IPs' + echo 'graph_category nominatim' + echo 'bulk.label bulk' + echo 'bulk.draw AREA' + echo 'bulk.type GAUGE' + echo 'block.label blocked' + echo 'block.draw STACK' + echo 'block.type GAUGE' + exit 0 +fi + +BASEDIR="$(dirname "$(readlink -f "$0")")" + +cut -f 2 -d ' ' $BASEDIR/../settings/ip_blocks.map | uniq -c | sed 's:[[:space:]]*\([0-9]\+\) \(.*\):\2.value \1:' diff --git a/settings/settings.php b/settings/settings.php index fc6ee8de..4e9d71f0 100644 --- a/settings/settings.php +++ b/settings/settings.php @@ -51,12 +51,14 @@ // Website settings @define('CONST_NoAccessControl', true); - @define('CONST_ClosedForIndexing', false); - @define('CONST_ClosedForIndexingExceptionIPs', ''); @define('CONST_BlockedIPs', ''); + @define('CONST_IPBanFile', CONST_BasePath.'/settings/ip_blocks'); + @define('CONST_WhitelistedIPs', ''); + @define('CONST_BlockedUserAgents', ''); + @define('CONST_BlockReverseMaxLoad', 15); @define('CONST_BulkUserIPs', ''); - @define('CONST_Website_BaseURL', 'http://'.php_uname('n').'/'); + @define('CONST_Website_BaseURL', 'http://nominatim.openstreetmap.org/'); @define('CONST_Tile_Default', 'Mapnik'); @define('CONST_Default_Language', false); diff --git a/sql/functions.sql b/sql/functions.sql index ab965893..929e5376 100644 --- a/sql/functions.sql +++ b/sql/functions.sql @@ -1357,13 +1357,26 @@ BEGIN NEW.centroid := null; -- reclaculate country and partition - IF NEW.rank_search >= 4 THEN - --NEW.calculated_country_code := lower(get_country_code(NEW.geometry, NEW.country_code)); - NEW.calculated_country_code := lower(get_country_code(place_centroid)); + IF NEW.rank_search = 4 THEN + -- for countries, believe the mapped country code, + -- so that we remain in the right partition if the boundaries + -- suddenly expand. + NEW.partition := get_partition(place_centroid, lower(NEW.country_code)); + IF NEW.partition = 0 THEN + NEW.calculated_country_code := lower(get_country_code(place_centroid)); + NEW.partition := get_partition(place_centroid, NEW.calculated_country_code); + ELSE + NEW.calculated_country_code := lower(NEW.country_code); + END IF; ELSE - NEW.calculated_country_code := NULL; + IF NEW.rank_search > 4 THEN + --NEW.calculated_country_code := lower(get_country_code(NEW.geometry, NEW.country_code)); + NEW.calculated_country_code := lower(get_country_code(place_centroid)); + ELSE + NEW.calculated_country_code := NULL; + END IF; + NEW.partition := get_partition(place_centroid, NEW.calculated_country_code); END IF; - NEW.partition := get_partition(place_centroid, NEW.calculated_country_code); NEW.geometry_sector := geometry_sector(NEW.partition, place_centroid); -- Adding ourselves to the list simplifies address calculations later diff --git a/sql/indices.src.sql b/sql/indices.src.sql index ea57e74b..4dc23bd2 100644 --- a/sql/indices.src.sql +++ b/sql/indices.src.sql @@ -1,29 +1,29 @@ -- Indices used only during search and update. -- These indices are created only after the indexing process is done. -CREATE INDEX idx_word_word_id on word USING BTREE (word_id); +CREATE INDEX idx_word_word_id on word USING BTREE (word_id) TABLESPACE ssd; -CREATE INDEX idx_search_name_nameaddress_vector ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off); -CREATE INDEX idx_search_name_name_vector ON search_name USING GIN (name_vector) WITH (fastupdate = off); -CREATE INDEX idx_search_name_centroid ON search_name USING GIST (centroid); +CREATE INDEX idx_search_name_nameaddress_vector ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off) TABLESPACE ssd; +CREATE INDEX idx_search_name_name_vector ON search_name USING GIN (name_vector) WITH (fastupdate = off) TABLESPACE ssd; +CREATE INDEX idx_search_name_centroid ON search_name USING GIST (centroid) TABLESPACE ssd; -CREATE INDEX idx_place_addressline_address_place_id on place_addressline USING BTREE (address_place_id); +CREATE INDEX idx_place_addressline_address_place_id on place_addressline USING BTREE (address_place_id) TABLESPACE ssd; -CREATE INDEX idx_place_boundingbox_place_id on place_boundingbox USING BTREE (place_id); -CREATE INDEX idx_place_boundingbox_outline ON place_boundingbox USING GIST (outline); +CREATE INDEX idx_place_boundingbox_place_id on place_boundingbox USING BTREE (place_id) TABLESPACE ssd; +CREATE INDEX idx_place_boundingbox_outline ON place_boundingbox USING GIST (outline) TABLESPACE ssd; DROP INDEX IF EXISTS idx_placex_rank_search; -CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search); -CREATE INDEX idx_placex_rank_address ON placex USING BTREE (rank_address); -CREATE INDEX idx_placex_pendingsector ON placex USING BTREE (rank_search,geometry_sector) where indexed_status > 0; -CREATE INDEX idx_placex_parent_place_id ON placex USING BTREE (parent_place_id) where parent_place_id IS NOT NULL; -CREATE INDEX idx_placex_interpolation ON placex USING BTREE (geometry_sector) where indexed_status > 0 and class='place' and type='houses'; -CREATE INDEX idx_location_area_country_place_id ON location_area_country USING BTREE (place_id); +CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search) TABLESPACE ssd; +CREATE INDEX idx_placex_rank_address ON placex USING BTREE (rank_address) TABLESPACE ssd; +CREATE INDEX idx_placex_pendingsector ON placex USING BTREE (rank_search,geometry_sector) TABLESPACE ssd where indexed_status > 0; +CREATE INDEX idx_placex_parent_place_id ON placex USING BTREE (parent_place_id) TABLESPACE ssd where parent_place_id IS NOT NULL; +CREATE INDEX idx_placex_interpolation ON placex USING BTREE (geometry_sector) TABLESPACE ssd where indexed_status > 0 and class='place' and type='houses'; +CREATE INDEX idx_location_area_country_place_id ON location_area_country USING BTREE (place_id) TABLESPACE ssd; -CREATE INDEX idx_search_name_country_centroid ON search_name_country USING GIST (centroid); +CREATE INDEX idx_search_name_country_centroid ON search_name_country USING GIST (centroid) TABLESPACE ssd; -- start -CREATE INDEX idx_location_property_-partition-_centroid ON location_property_-partition- USING GIST (centroid); +CREATE INDEX idx_location_property_-partition-_centroid ON location_property_-partition- USING GIST (centroid) TABLESPACE ssd; -- end CREATE UNIQUE INDEX idx_place_osm_unique on place using btree(osm_id,osm_type,class,type); diff --git a/sql/partition-tables.src.sql b/sql/partition-tables.src.sql index 29e3d282..e0b1fae0 100644 --- a/sql/partition-tables.src.sql +++ b/sql/partition-tables.src.sql @@ -35,21 +35,21 @@ SELECT AddGeometryColumn('search_name_blank', 'centroid', 4326, 'GEOMETRY', 2); CREATE TABLE location_area_country () INHERITS (location_area_large); -CREATE INDEX idx_location_area_country_geometry ON location_area_country USING GIST (geometry); +CREATE INDEX idx_location_area_country_geometry ON location_area_country USING GIST (geometry) TABLESPACE ssd; CREATE TABLE search_name_country () INHERITS (search_name_blank); -CREATE INDEX idx_search_name_country_place_id ON search_name_country USING BTREE (place_id); -CREATE INDEX idx_search_name_country_name_vector ON search_name_country USING GIN (name_vector) WITH (fastupdate = off); +CREATE INDEX idx_search_name_country_place_id ON search_name_country USING BTREE (place_id) TABLESPACE ssd; +CREATE INDEX idx_search_name_country_name_vector ON search_name_country USING GIN (name_vector) WITH (fastupdate = off) TABLESPACE ssd; -- start CREATE TABLE location_area_large_-partition- () INHERITS (location_area_large); -CREATE INDEX idx_location_area_large_-partition-_place_id ON location_area_large_-partition- USING BTREE (place_id); -CREATE INDEX idx_location_area_large_-partition-_geometry ON location_area_large_-partition- USING GIST (geometry); +CREATE INDEX idx_location_area_large_-partition-_place_id ON location_area_large_-partition- USING BTREE (place_id) TABLESPACE ssd; +CREATE INDEX idx_location_area_large_-partition-_geometry ON location_area_large_-partition- USING GIST (geometry) TABLESPACE ssd; CREATE TABLE search_name_-partition- () INHERITS (search_name_blank); -CREATE INDEX idx_search_name_-partition-_place_id ON search_name_-partition- USING BTREE (place_id); -CREATE INDEX idx_search_name_-partition-_centroid ON search_name_-partition- USING GIST (centroid); -CREATE INDEX idx_search_name_-partition-_name_vector ON search_name_-partition- USING GIN (name_vector) WITH (fastupdate = off); +CREATE INDEX idx_search_name_-partition-_place_id ON search_name_-partition- USING BTREE (place_id) TABLESPACE ssd; +CREATE INDEX idx_search_name_-partition-_centroid ON search_name_-partition- USING GIST (centroid) TABLESPACE ssd; +CREATE INDEX idx_search_name_-partition-_name_vector ON search_name_-partition- USING GIN (name_vector) WITH (fastupdate = off) TABLESPACE ssd; CREATE TABLE location_property_-partition- () INHERITS (location_property); CREATE INDEX idx_location_property_-partition-_place_id ON location_property_-partition- USING BTREE (place_id); @@ -62,7 +62,7 @@ CREATE TABLE location_road_-partition- ( country_code VARCHAR(2) ); SELECT AddGeometryColumn('location_road_-partition-', 'geometry', 4326, 'GEOMETRY', 2); -CREATE INDEX idx_location_road_-partition-_geometry ON location_road_-partition- USING GIST (geometry); -CREATE INDEX idx_location_road_-partition-_place_id ON location_road_-partition- USING BTREE (place_id); +CREATE INDEX idx_location_road_-partition-_geometry ON location_road_-partition- USING GIST (geometry) TABLESPACE ssd; +CREATE INDEX idx_location_road_-partition-_place_id ON location_road_-partition- USING BTREE (place_id) TABLESPACE ssd; -- end diff --git a/sql/tables.sql b/sql/tables.sql index 3bb41e28..244f2036 100644 --- a/sql/tables.sql +++ b/sql/tables.sql @@ -23,19 +23,6 @@ CREATE TABLE import_npi_log ( event text ); ---drop table IF EXISTS query_log; -CREATE TABLE query_log ( - starttime timestamp, - query text, - ipaddress text, - endtime timestamp, - results integer - ); -CREATE INDEX idx_query_log ON query_log USING BTREE (starttime); -GRANT SELECT ON query_log TO "www-data" ; -GRANT INSERT ON query_log TO "www-data" ; -GRANT UPDATE ON query_log TO "www-data" ; - CREATE TABLE new_query_log ( type text, starttime timestamp, @@ -43,6 +30,7 @@ CREATE TABLE new_query_log ( useragent text, language text, query text, + searchterm text, endtime timestamp, results integer, format text, @@ -53,9 +41,6 @@ GRANT INSERT ON new_query_log TO "www-data" ; GRANT UPDATE ON new_query_log TO "www-data" ; GRANT SELECT ON new_query_log TO "www-data" ; -create view vw_search_query_log as SELECT substr(query, 1, 50) AS query, starttime, endtime - starttime AS duration, substr(useragent, 1, 20) as -useragent, language, results, ipaddress FROM new_query_log WHERE type = 'search' ORDER BY starttime DESC; - --drop table IF EXISTS report_log; CREATE TABLE report_log ( starttime timestamp, @@ -76,8 +61,8 @@ CREATE TABLE word ( country_code varchar(2), search_name_count INTEGER, operator TEXT - ); -CREATE INDEX idx_word_word_token on word USING BTREE (word_token); + ) TABLESPACE ssd; +CREATE INDEX idx_word_word_token on word USING BTREE (word_token) TABLESPACE ssd; GRANT SELECT ON word TO "www-data" ; DROP SEQUENCE seq_word; CREATE SEQUENCE seq_word start 1; @@ -132,7 +117,7 @@ CREATE TABLE search_name ( nameaddress_vector integer[] ); SELECT AddGeometryColumn('search_name', 'centroid', 4326, 'GEOMETRY', 2); -CREATE INDEX idx_search_name_place_id ON search_name USING BTREE (place_id); +CREATE INDEX idx_search_name_place_id ON search_name USING BTREE (place_id) TABLESPACE ssd; drop table IF EXISTS place_addressline; CREATE TABLE place_addressline ( @@ -142,8 +127,8 @@ CREATE TABLE place_addressline ( isaddress boolean, distance float, cached_rank_address integer - ); -CREATE INDEX idx_place_addressline_place_id on place_addressline USING BTREE (place_id); + ) TABLESPACE data; +CREATE INDEX idx_place_addressline_place_id on place_addressline USING BTREE (place_id) TABLESPACE ssd; drop table IF EXISTS place_boundingbox CASCADE; CREATE TABLE place_boundingbox ( @@ -196,14 +181,14 @@ CREATE TABLE placex ( wikipedia TEXT, -- calculated wikipedia article name (language:title) geometry_sector INTEGER, calculated_country_code varchar(2) - ); + ) TABLESPACE ssd; SELECT AddGeometryColumn('placex', 'centroid', 4326, 'GEOMETRY', 2); -CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id); -CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id); -CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id); -CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector); -CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry); -CREATE INDEX idx_placex_adminname on placex USING BTREE (make_standard_name(name->'name'),rank_search) WHERE osm_type='N' and rank_search < 26; +CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id) TABLESPACE ssd; +CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id) TABLESPACE ssd; +CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id) TABLESPACE ssd; +CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector) TABLESPACE ssd; +CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry) TABLESPACE ssd; +CREATE INDEX idx_placex_adminname on placex USING BTREE (make_standard_name(name->'name'),rank_search) TABLESPACE ssd WHERE osm_type='N' and rank_search < 26; --CREATE INDEX idx_placex_indexed ON placex USING BTREE (indexed); @@ -244,7 +229,7 @@ CREATE TRIGGER place_before_insert BEFORE INSERT ON place FOR EACH ROW EXECUTE PROCEDURE place_insert(); drop index idx_placex_sector; -CREATE INDEX idx_placex_sector ON placex USING BTREE (geometry_sector,rank_address,osm_type,osm_id); +CREATE INDEX idx_placex_sector ON placex USING BTREE (geometry_sector,rank_address,osm_type,osm_id) TABLESPACE ssd; DROP SEQUENCE seq_postcodes; CREATE SEQUENCE seq_postcodes start 1; @@ -262,7 +247,7 @@ CREATE TABLE import_polygon_error ( ); SELECT AddGeometryColumn('import_polygon_error', 'prevgeometry', 4326, 'GEOMETRY', 2); SELECT AddGeometryColumn('import_polygon_error', 'newgeometry', 4326, 'GEOMETRY', 2); -CREATE INDEX idx_import_polygon_error_osmid ON import_polygon_error USING BTREE (osm_type, osm_id); +CREATE INDEX idx_import_polygon_error_osmid ON import_polygon_error USING BTREE (osm_type, osm_id) TABLESPACE ssd; GRANT SELECT ON import_polygon_error TO "www-data"; drop table import_polygon_delete; @@ -272,7 +257,7 @@ CREATE TABLE import_polygon_delete ( class TEXT NOT NULL, type TEXT NOT NULL ); -CREATE INDEX idx_import_polygon_delete_osmid ON import_polygon_delete USING BTREE (osm_type, osm_id); +CREATE INDEX idx_import_polygon_delete_osmid ON import_polygon_delete USING BTREE (osm_type, osm_id) TABLESPACE ssd; GRANT SELECT ON import_polygon_delete TO "www-data"; drop sequence file; diff --git a/utils/cron_banip.py b/utils/cron_banip.py new file mode 100755 index 00000000..ad32b852 --- /dev/null +++ b/utils/cron_banip.py @@ -0,0 +1,222 @@ +#!/usr/bin/python +# +# Search logs for high-bandwith users and create a list of suspicious IPs. +# There are three states: bulk, block, ban. The first are bulk requesters +# that need throtteling, the second bulk requesters that have overdone it +# and the last manually banned IPs. +# +# The list can then be used in apache using rewrite rules to +# direct bulk users to smaller thread pools or block them. A +# typical apache config that uses php-fpm pools would look +# like this: +# +# Alias /nominatim-www/ "/var/www/nominatim/" +# Alias /nominatim-bulk/ "/var/www/nominatim/" +# +# Options MultiViews FollowSymLinks +# AddType text/html .php +# +# +# +# AddHandler fcgi:/var/run/php5-fpm-www.sock .php +# +# +# AddHandler fcgi:/var/run/php5-fpm-bulk.sock .php +# +# +# Redirect 509 /nominatim-block/ +# ErrorDocument 509 "Bandwidth limit exceeded." +# Redirect 403 /nominatim-ban/ +# ErrorDocument 403 "Access blocked." +# +# RewriteEngine On +# RewriteMap bulklist txt:/home/wherever/ip-block.map +# RewriteRule ^/(.*) /nominatim-${bulklist:%{REMOTE_ADDR}|www}/$1 [PT] +# + +import os +import psycopg2 +import datetime + +BASEDIR = os.path.normpath(os.path.join(os.path.realpath(__file__), '../..')) + +# +# DEFAULT SETTINGS +# +# Copy into settings/ip_blcoks.conf and adapt as required. +# +BLOCKEDFILE= BASEDIR + '/settings/ip_blocks.map' +LOGFILE= BASEDIR + '/log/restricted_ip.log' + +# space-separated list of IPs that are never banned +WHITELIST = '' +# space-separated list of IPs manually blocked +BLACKLIST = '' + +# time before a automatically blocked IP is allowed back +BLOCKCOOLOFF_PERIOD='1 hour' +# quiet time before an IP is released from the bulk pool +BULKCOOLOFF_PERIOD='15 min' + +BULKLONG_LIMIT=8000 +BULKSHORT_LIMIT=2000 +BLOCK_UPPER=19000 +BLOCK_LOWER=4000 +BLOCK_LOADFAC=380 +BULK_LOADFAC=160 +BULK_LOWER=1500 +MAX_BULK_IPS=85 + +# +# END OF DEFAULT SETTINGS +# + +try: + execfile(os.path.expanduser(BASEDIR + "/settings/ip_blocks.conf")) +except IOError: + pass + +# read the previous blocklist +WHITELIST = set(WHITELIST.split()) if WHITELIST else set() +prevblocks = [] +prevbulks = [] +BLACKLIST = set(BLACKLIST.split()) if BLACKLIST else set() +newblocks = set() +newbulks = set() + +try: + fd = open(BLOCKEDFILE) + for line in fd: + ip, typ = line.strip().split(' ') + if ip not in BLACKLIST: + if typ == 'block': + prevblocks.append(ip) + elif typ == 'bulk': + prevbulks.append(ip) + fd.close() +except IOError: + pass #ignore non-existing file + +# determine current load +fd = open("/proc/loadavg") +avgload = int(float(fd.readline().split()[2])) +fd.close() +# DB load +conn = psycopg2.connect('dbname=nominatim') +cur = conn.cursor() +cur.execute("select count(*)/60 from new_query_log where starttime > now() - interval '1min'") +dbload = int(cur.fetchone()[0]) + +BLOCK_LIMIT = max(BLOCK_LOWER, BLOCK_UPPER - BLOCK_LOADFAC * (dbload - 75)) +BULKLONG_LIMIT = max(BULK_LOWER, BULKLONG_LIMIT - BULK_LOADFAC * (avgload - 14)) +if len(prevbulks) > MAX_BULK_IPS: + BLOCK_LIMIT = max(3600, BLOCK_LOWER - (len(prevbulks) - MAX_BULK_IPS)*10) + +# get the new block candidates +cur.execute(""" + SELECT ipaddress, max(count) FROM + ((SELECT * FROM + (SELECT ipaddress, sum(case when endtime is null then 1 else 1+1.5*date_part('epoch',endtime-starttime) end) as count FROM new_query_log + WHERE starttime > now() - interval '1 hour' GROUP BY ipaddress) as i + WHERE count > %s) + UNION + (SELECT ipaddress, count * 3 FROM + (SELECT ipaddress, sum(case when endtime is null then 1 else 1+1.5*date_part('epoch',endtime-starttime) end) as count FROM new_query_log + WHERE starttime > now() - interval '10 min' GROUP BY ipaddress) as i + WHERE count > %s)) as o + GROUP BY ipaddress +""", (BULKLONG_LIMIT, BULKSHORT_LIMIT)) + +bulkips = {} +emergencyblocks = [] + +for c in cur: + if c[0] not in WHITELIST and c[0] not in BLACKLIST: + if c[1] > BLOCK_UPPER and c[0] not in prevbulks: + newblocks.add(c[0]) + if c[0] not in prevblocks: + emergencyblocks.append(c[0]) + else: + bulkips[c[0]] = c[1] + +# IPs from the block list that are no longer in the bulk list +deblockcandidates = set() +# IPs from the bulk list that are no longer in the bulk list +debulkcandidates = set() +# new IPs to go into the block list +newlyblocked = [] + + +for ip in prevblocks: + if ip in bulkips: + newblocks.add(ip) + del bulkips[ip] + else: + deblockcandidates.add(ip) + +for ip in prevbulks: + if ip in bulkips: + if bulkips[ip] > BLOCK_LIMIT: + newblocks.add(ip) + newlyblocked.append(ip) + else: + newbulks.add(ip) + del bulkips[ip] + else: + debulkcandidates.add(ip) + +# cross-check deblock candidates +if deblockcandidates: + cur.execute(""" + SELECT DISTINCT ipaddress FROM new_query_log + WHERE ipaddress IN ('%s') AND starttime > now() - interval '%s' + """ % ("','".join(deblockcandidates), BLOCKCOOLOFF_PERIOD)) + + for c in cur: + newblocks.add(c[0]) + deblockcandidates.remove(c[0]) +# deblocked IPs go back to the bulk pool to catch the ones that simply +# ignored the HTTP error and just continue to hammer the API. +# Those that behave and stopped will be debulked a minute later. +for ip in deblockcandidates: + newbulks.add(ip) + +# cross-check debulk candidates +if debulkcandidates: + cur.execute(""" + SELECT DISTINCT ipaddress FROM new_query_log + WHERE ipaddress IN ('%s') AND starttime > now() - interval '%s' + AND starttime > date_trunc('day', now()) + """ % ("','".join(debulkcandidates), BULKCOOLOFF_PERIOD)) + + for c in cur: + newbulks.add(c[0]) + debulkcandidates.remove(c[0]) + +for ip in bulkips.iterkeys(): + newbulks.add(ip) + +# write out the new list +fd = open(BLOCKEDFILE, 'w') +for ip in newblocks: + fd.write(ip + " block\n") +for ip in newbulks: + fd.write(ip + " bulk\n") +for ip in BLACKLIST: + fd.write(ip + " ban\n") +fd.close() + +# write out the log +logstr = datetime.datetime.now().strftime('%Y-%m-%d %H:%M') + ' %s %s\n' +fd = open(LOGFILE, 'a') +if deblockcandidates: + fd.write(logstr % ('unblocked:', ', '.join(deblockcandidates))) +if debulkcandidates: + fd.write(logstr % (' debulked:', ', '.join(debulkcandidates))) +if bulkips: + fd.write(logstr % ('new bulks:', ', '.join(bulkips.keys()))) +if emergencyblocks: + fd.write(logstr % ('dir.block:', ', '.join(emergencyblocks))) +if newlyblocked: + fd.write(logstr % ('new block:', ', '.join(newlyblocked))) +fd.close() diff --git a/utils/cron_logrotate.sh b/utils/cron_logrotate.sh new file mode 100755 index 00000000..b9291d95 --- /dev/null +++ b/utils/cron_logrotate.sh @@ -0,0 +1,20 @@ +#!/bin/bash -e +# +# Rotate query logs. + +dbname=nominatim + +basedir=`dirname $0` +logfile=`date "+$basedir/../log/query-%F.log.gz"` + +# dump the old logfile +pg_dump -a -F p -t backup_query_log $dbname | gzip -9 > $logfile + +# remove the old logs +psql -q -d $dbname -c 'DROP TABLE backup_query_log' + +# rotate +psql -q -1 -d $dbname -c 'ALTER TABLE new_query_log RENAME TO backup_query_log;CREATE TABLE new_query_log as (select * from backup_query_log limit 0);GRANT SELECT, INSERT, UPDATE ON new_query_log TO "www-data"' +psql -q -d $dbname -c 'ALTER INDEX idx_new_query_log_starttime RENAME TO idx_backup_query_log_starttime' +psql -q -d $dbname -c 'CREATE INDEX idx_new_query_log_starttime ON new_query_log USING BTREE (starttime)' + diff --git a/utils/cron_vacuum.sh b/utils/cron_vacuum.sh new file mode 100755 index 00000000..4c16fc65 --- /dev/null +++ b/utils/cron_vacuum.sh @@ -0,0 +1,14 @@ +#!/bin/bash +# +# Vaccum all tables with indices on integer arrays. +# Agressive vacuuming seems to help against index bloat. +# + +psql -q -d nominatim -c 'VACUUM ANALYSE search_name' +psql -q -d nominatim -c 'VACUUM ANALYSE search_name_country' +#psql -q -d nominatim -c 'VACUUM ANALYSE planet_osm_ways' + +for i in `seq 0 246`; do + psql -q -d nominatim -c "VACUUM ANALYSE search_name_${i}" +done + diff --git a/utils/setup.php b/utils/setup.php index f2feeadb..55e0038c 100755 --- a/utils/setup.php +++ b/utils/setup.php @@ -159,8 +159,9 @@ { $osm2pgsql .= ' --flat-nodes '.CONST_Osm2pgsql_Flatnode_File; } + $osm2pgsql .= ' --tablespace-slim-index ssd --tablespace-main-index ssd --tablespace-main-data ssd --tablespace-slim-data data'; $osm2pgsql .= ' -lsc -O gazetteer --hstore'; - $osm2pgsql .= ' -C '.$iCacheMemory; + $osm2pgsql .= ' -C 16000'; $osm2pgsql .= ' -P '.$aDSNInfo['port']; $osm2pgsql .= ' -d '.$aDSNInfo['database'].' '.$aCMDResult['osm-file']; passthruCheckReturn($osm2pgsql); @@ -485,7 +486,7 @@ $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,calculated_country_code,"; $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from (select calculated_country_code,postcode,"; $sSQL .= "avg(st_x(st_centroid(geometry))) as x,avg(st_y(st_centroid(geometry))) as y "; - $sSQL .= "from placex where postcode is not null group by calculated_country_code,postcode) as x"; + $sSQL .= "from placex where postcode is not null and calculated_country_code not in ('ie') group by calculated_country_code,postcode) as x"; if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection)); $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,calculated_country_code,geometry) "; diff --git a/utils/specialphrases.php b/utils/specialphrases.php index f1a0d0d0..02aaaa0f 100755 --- a/utils/specialphrases.php +++ b/utils/specialphrases.php @@ -91,12 +91,12 @@ echo "class = '".pg_escape_string($aPair[0])."' and type = '".pg_escape_string($aPair[1])."';\n"; echo "CREATE INDEX idx_place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])."_centroid "; - echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING GIST (centroid);\n"; + echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING GIST (centroid) tablespace ssd;\n"; echo "CREATE INDEX idx_place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])."_place_id "; - echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING btree(place_id);\n"; + echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING btree(place_id) tablespace ssd;\n"; - echo "GRANT SELECT ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." TO \"www-data\";"; + echo "GRANT SELECT ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." TO \"www-data\";\n"; } diff --git a/utils/update.php b/utils/update.php index d38de598..f82696c2 100755 --- a/utils/update.php +++ b/utils/update.php @@ -46,7 +46,6 @@ showUsage($aCMDOptions, true, 'Select either import of hourly or daily'); } - if (!isset($aResult['index-instances'])) $aResult['index-instances'] = 1; if (!isset($aResult['index-rank'])) $aResult['index-rank'] = 0; /* // Lock to prevent multiple copies running @@ -352,6 +351,7 @@ if ($aResult['index']) { + if (!isset($aResult['index-instances'])) $aResult['index-instances'] = 1; passthru(CONST_BasePath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -t '.$aResult['index-instances'].' -r '.$aResult['index-rank']); } @@ -367,7 +367,7 @@ { $sCMDImport .= ' --flat-nodes '.CONST_Osm2pgsql_Flatnode_File; } - $sCMDIndex = $sBasePath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -t '.$aResult['index-instances']; + $sCMDIndex = $sBasePath.'/nominatim/nominatim -i -d '.$aDSNInfo['database']; if (!$aResult['no-npi']) { $sCMDIndex .= '-F '; } @@ -450,7 +450,16 @@ $sBatchEnd = getosmosistimestamp($sOsmosisConfigDirectory); // Index file - $sThisIndexCmd = $sCMDIndex; + if (!isset($aResult['index-instances'])) + { + if (getLoadAverage() < 15) + $iIndexInstances = 2; + else + $iIndexInstances = 1; + } else + $iIndexInstances = $aResult['index-instances']; + + $sThisIndexCmd = $sCMDIndex.' -t '.$iIndexInstances; if (!$aResult['no-npi']) { diff --git a/website/403.html b/website/403.html new file mode 100644 index 00000000..c5fd71f1 --- /dev/null +++ b/website/403.html @@ -0,0 +1,14 @@ + + +Access blocked + + +

Access blocked

+ +

You have been blocked because you have been overusing OSM's geocoding service. +Please be aware that OSM's resources are limited and shared between many users. +To have this block lifted, contact the Nominatim system administrator.

+ +

For more information, consult the usage policy for the OSM Nominatim server. + + diff --git a/website/509.html b/website/509.html new file mode 100644 index 00000000..047d9025 --- /dev/null +++ b/website/509.html @@ -0,0 +1,13 @@ + + +Bandwidth limit exceeded + + +

Bandwidth limit exceeded

+ +

You have been temporarily blocked because you have been overusing OSM's geocoding service. +Please adapt your scripts to reduce the number of requests and try again later.

+ +

For more information, consult the usage policy for the OSM Nominatim server. + + diff --git a/website/favicon.ico b/website/favicon.ico new file mode 100644 index 00000000..0157ea00 Binary files /dev/null and b/website/favicon.ico differ diff --git a/website/nominatim.xml b/website/nominatim.xml new file mode 100644 index 00000000..28684b16 --- /dev/null +++ b/website/nominatim.xml @@ -0,0 +1,15 @@ + + + Nominatim + Nominatim OSM Search + Search for a place in OpenStreetMap Nominatim + UTF-8 + UTF-8 + + + Brian Quinion + false + Data &copy; OpenStreetMap contributors, Some Rights Reserved. ODbL, http://www.osm.org/copyright. + + diff --git a/website/reverse.php b/website/reverse.php index f6925846..708aaf78 100755 --- a/website/reverse.php +++ b/website/reverse.php @@ -4,18 +4,6 @@ require_once(dirname(dirname(__FILE__)).'/lib/init-website.php'); require_once(CONST_BasePath.'/lib/log.php'); - if (strpos(CONST_BulkUserIPs, ','.$_SERVER["REMOTE_ADDR"].',') !== false) - { - $fLoadAvg = getLoadAverage(); - if ($fLoadAvg > 2) sleep(60); - if ($fLoadAvg > 4) sleep(120); - if ($fLoadAvg > 6) - { - echo "Bulk User: Temporary block due to high server load\n"; - exit; - } - } - $oDB =& getDB(); ini_set('memory_limit', '200M'); diff --git a/website/robots.txt b/website/robots.txt new file mode 100644 index 00000000..e4d3d3fb --- /dev/null +++ b/website/robots.txt @@ -0,0 +1,10 @@ +User-agent: ia_archiver +Allow: / + +User-agent: * +Disallow: /search.php +Disallow: /search +Disallow: /details.php +Disallow: /details +Disallow: /reverse.php +Disallow: /reverse diff --git a/website/search.php b/website/search.php index 92cad8a0..d486ed66 100755 --- a/website/search.php +++ b/website/search.php @@ -78,6 +78,7 @@ if (isset($aLangPrefOrder['name:de'])) $bReverseInPlan = true; if (isset($aLangPrefOrder['name:ru'])) $bReverseInPlan = true; if (isset($aLangPrefOrder['name:ja'])) $bReverseInPlan = true; + if (isset($aLangPrefOrder['name:pl'])) $bReverseInPlan = true; $sLanguagePrefArraySQL = "ARRAY[".join(',',array_map("getDBQuoted",$aLangPrefOrder))."]"; @@ -1012,6 +1013,7 @@ $aPlaceIDs = $oDB->getCol($sSQL); // If not try the aux fallback table + /* if (!sizeof($aPlaceIDs)) { $sSQL = "select place_id from location_property_aux where parent_place_id in (".$sPlaceIDs.") and housenumber = '".pg_escape_string($aSearch['sHouseNumber'])."'"; @@ -1023,6 +1025,7 @@ if (CONST_Debug) var_dump($sSQL); $aPlaceIDs = $oDB->getCol($sSQL); } + */ if (!sizeof($aPlaceIDs)) { @@ -1203,6 +1206,10 @@ //var_Dump($aResultPlaceIDs);exit; // Get the details for display (is this a redundant extra step?) $sPlaceIDs = join(',',$aResultPlaceIDs); + $sImportanceSQL = ''; + if ($sViewboxSmallSQL) $sImportanceSQL .= " case when ST_Contains($sViewboxSmallSQL, ST_Collect(centroid)) THEN 1 ELSE 0.75 END * "; + if ($sViewboxLargeSQL) $sImportanceSQL .= " case when ST_Contains($sViewboxLargeSQL, ST_Collect(centroid)) THEN 1 ELSE 0.75 END * "; + $sOrderSQL = 'CASE '; foreach(array_keys($aResultPlaceIDs) as $iOrder => $iPlaceID) { @@ -1215,7 +1222,7 @@ $sSQL .= "get_name_by_language(name, ARRAY['ref']) as ref,"; $sSQL .= "avg(ST_X(centroid)) as lon,avg(ST_Y(centroid)) as lat, "; //$sSQL .= $sOrderSQL." as porder, "; - $sSQL .= "coalesce(importance,0.75-(rank_search::float/40)) as importance, "; + $sSQL .= $sImportanceSQL."coalesce(importance,0.75-(rank_search::float/40)) as importance, "; $sSQL .= "(select max(p.importance*(p.rank_address+2)) from place_addressline s, placex p where s.place_id = min(placex.place_id) and p.place_id = s.address_place_id and s.isaddress and p.importance is not null) as addressimportance, "; $sSQL .= "(extratags->'place') as extra_place "; $sSQL .= "from placex where place_id in ($sPlaceIDs) "; @@ -1238,13 +1245,14 @@ $sSQL .= "null as ref,"; $sSQL .= "avg(ST_X(centroid)) as lon,avg(ST_Y(centroid)) as lat, "; //$sSQL .= $sOrderSQL." as porder, "; - $sSQL .= "-0.15 as importance, "; + $sSQL .= $sImportanceSQL."0.015 as importance, "; $sSQL .= "(select max(p.importance*(p.rank_address+2)) from place_addressline s, placex p where s.place_id = min(location_property_tiger.place_id) and p.place_id = s.address_place_id and s.isaddress and p.importance is not null) as addressimportance, "; $sSQL .= "null as extra_place "; $sSQL .= "from location_property_tiger where place_id in ($sPlaceIDs) "; $sSQL .= "and 30 between $iMinAddressRank and $iMaxAddressRank "; $sSQL .= "group by place_id"; if (!$bDeDupe) $sSQL .= ",place_id"; + /* $sSQL .= " union "; $sSQL .= "select 'L' as osm_type,place_id as osm_id,'place' as class,'house' as type,null as admin_level,30 as rank_search,30 as rank_address,min(place_id) as place_id,'us' as country_code,"; $sSQL .= "get_address_by_language(place_id, $sLanguagePrefArraySQL) as langaddress,"; @@ -1252,7 +1260,7 @@ $sSQL .= "null as ref,"; $sSQL .= "avg(ST_X(centroid)) as lon,avg(ST_Y(centroid)) as lat, "; //$sSQL .= $sOrderSQL." as porder, "; - $sSQL .= "-0.10 as importance, "; + $sSQL .= $sImportanceSQL."0.01 as importance, "; $sSQL .= "(select max(p.importance*(p.rank_address+2)) from place_addressline s, placex p where s.place_id = min(location_property_aux.place_id) and p.place_id = s.address_place_id and s.isaddress and p.importance is not null) as addressimportance, "; $sSQL .= "null as extra_place "; $sSQL .= "from location_property_aux where place_id in ($sPlaceIDs) "; @@ -1262,6 +1270,7 @@ $sSQL .= ",get_address_by_language(place_id, $sLanguagePrefArraySQL) "; $sSQL .= "order by importance desc"; //$sSQL .= "order by rank_search,rank_address,porder asc"; + */ if (CONST_Debug) { echo "


"; var_dump($sSQL); } $aSearchResults = $oDB->getAll($sSQL); //var_dump($sSQL,$aSearchResults);exit; @@ -1323,6 +1332,7 @@ $sSQL .= "and 30 between $iMinAddressRank and $iMaxAddressRank "; $sSQL .= "group by place_id"; if (!$bDeDupe) $sSQL .= ",place_id"; + /* $sSQL .= " union "; $sSQL .= "select 'L' as osm_type,place_id as osm_id,'place' as class,'house' as type,null as admin_level,30 as rank_search,30 as rank_address,min(place_id) as place_id,'us' as country_code,"; $sSQL .= "get_address_by_language(place_id, $sLanguagePrefArraySQL) as langaddress,"; @@ -1335,6 +1345,7 @@ $sSQL .= "from location_property_aux where place_id in ($sPlaceIDs) "; $sSQL .= "and 30 between $iMinAddressRank and $iMaxAddressRank "; $sSQL .= "group by place_id"; + */ if (!$bDeDupe) $sSQL .= ",place_id"; $sSQL .= ",get_address_by_language(place_id, $sLanguagePrefArraySQL) "; $sSQL .= "order by importance desc"; @@ -1412,10 +1423,10 @@ { preg_match_all('/(-?[0-9.]+) (-?[0-9.]+)/',$aMatch[1],$aPolyPoints,PREG_SET_ORDER); } - elseif (preg_match('#MULTIPOLYGON\\(\\(\\(([- 0-9.,]+)#',$aPointPolygon['astext'],$aMatch)) + /*elseif (preg_match('#MULTIPOLYGON\\(\\(\\(([- 0-9.,]+)#',$aPointPolygon['astext'],$aMatch)) { preg_match_all('/(-?[0-9.]+) (-?[0-9.]+)/',$aMatch[1],$aPolyPoints,PREG_SET_ORDER); - } + }*/ elseif (preg_match('#POINT\\((-?[0-9.]+) (-?[0-9.]+)\\)#',$aPointPolygon['astext'],$aMatch)) { $fRadius = 0.01;