]> git.openstreetmap.org Git - nominatim.git/commitdiff
Merge remote-tracking branch 'upstream/master'
authorSarah Hoffmann <lonvia@denofr.de>
Tue, 9 Apr 2013 21:25:08 +0000 (23:25 +0200)
committerSarah Hoffmann <lonvia@denofr.de>
Tue, 9 Apr 2013 21:25:08 +0000 (23:25 +0200)
Conflicts:
sql/indices.src.sql
sql/tables.sql

20 files changed:
lib/init-website.php
lib/log.php
munin/nominatim_throttled_ips [new file with mode: 0755]
settings/settings.php
sql/indices.src.sql
sql/partition-tables.src.sql
sql/tables.sql
utils/cron_banip.py [new file with mode: 0755]
utils/cron_logrotate.sh [new file with mode: 0755]
utils/cron_vacuum.sh [new file with mode: 0755]
utils/setup.php
utils/specialphrases.php
utils/update.php
website/403.html [new file with mode: 0644]
website/509.html [new file with mode: 0644]
website/favicon.ico [new file with mode: 0644]
website/nominatim.xml [new file with mode: 0644]
website/reverse.php
website/robots.txt [new file with mode: 0644]
website/search.php

index bcf3ebf7a0aeb6674e874ebbf3ed7413572373dc..eb275af290e8b15b8aa791eddbd3bc72eb859613 100644 (file)
        }
        if ($_SERVER['REQUEST_METHOD'] == 'OPTIONS') exit;
 
-       if (CONST_ClosedForIndexing && strpos(CONST_ClosedForIndexingExceptionIPs, ','.$_SERVER["REMOTE_ADDR"].',') === false)
-       {
-               echo "Closed for re-indexing...";
-               exit;
-       }
-
-       $aBucketKeys = array();
-
-       if (isset($_SERVER["HTTP_REFERER"])) $aBucketKeys[] = str_replace('www.','',strtolower(parse_url($_SERVER["HTTP_REFERER"], PHP_URL_HOST)));
-       if (isset($_SERVER["REMOTE_ADDR"])) $aBucketKeys[] = $_SERVER["REMOTE_ADDR"];
-       if (isset($_GET["email"])) $aBucketKeys[] = $_GET["email"];
-
-       $fBucketVal = doBucket($aBucketKeys, 
-                       (defined('CONST_ConnectionBucket_PageType')?constant('CONST_ConnectionBucket_Cost_'.CONST_ConnectionBucket_PageType):1) + user_busy_cost(),
-                       CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_BlockLimit);
-
-       if ($fBucketVal > CONST_ConnectionBucket_WaitLimit && $fBucketVal < CONST_ConnectionBucket_BlockLimit)
-       {
-               $m = getBucketMemcache();
-               $iCurrentSleeping = $m->increment('sleepCounter');
-               if (false === $iCurrentSleeping)
-               {
-                       $m->add('sleepCounter', 0);
-                       $iCurrentSleeping = $m->increment('sleepCounter');
-               }
-               if ($iCurrentSleeping >= CONST_ConnectionBucket_MaxSleeping || isBucketSleeping($aBucketKeys))
-               {
-                       // Too many threads sleeping already.  This becomes a hard block.
-                       $fBucketVal = doBucket($aBucketKeys, CONST_ConnectionBucket_BlockLimit, CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_BlockLimit);
-               }
-               else
-               {
-                       setBucketSleeping($aBucketKeys, true);
-                       sleep(($fBucketVal - CONST_ConnectionBucket_WaitLimit)/CONST_ConnectionBucket_LeakRate);
-                       $fBucketVal = doBucket($aBucketKeys, CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_BlockLimit);
-                       setBucketSleeping($aBucketKeys, false);
-               }
-               $m->decrement('sleepCounter');
-       }
-
-       if (strpos(CONST_BlockedIPs, ','.$_SERVER["REMOTE_ADDR"].',') !== false || $fBucketVal >= CONST_ConnectionBucket_BlockLimit)
-       {
-               echo "Your IP has been blocked. \n";
-               echo "Please create a nominatim trac ticket (http://trac.openstreetmap.org/newticket?component=nominatim) to request this to be removed. \n";
-               echo "Information on the Nominatim usage policy can be found here: http://wiki.openstreetmap.org/wiki/Nominatim#Usage_Policy \n";
-               exit;
-       }
-
-       header('Content-type: text/html; charset=utf-8');
 
+    header('Content-type: text/html; charset=utf-8');
index 5b847a41172d8c84626fcb3dfea9d0df1cbaa080..8022376d93a246272c27a69bb1c7caa54673214b 100644 (file)
 
                if (CONST_Log_DB)
                {
-                       // Log
-                       if ($sType == 'search')
-                       {
-                               $oDB->query('insert into query_log values ('.getDBQuoted($hLog[0]).','.getDBQuoted($hLog[3]).','.getDBQuoted($hLog[1]).')');
-                       }
-
-                       $sSQL = 'insert into new_query_log (type,starttime,query,ipaddress,useragent,language,format)';
+                       if (isset($_GET['email']))
+                               $sUserAgent = $_GET['email'];
+                       else
+                               $sUserAgent = $_SERVER['HTTP_USER_AGENT'];
+                       $sSQL = 'insert into new_query_log (type,starttime,query,ipaddress,useragent,language,format,searchterm)';
                        $sSQL .= ' values ('.getDBQuoted($sType).','.getDBQuoted($hLog[0]).','.getDBQuoted($hLog[2]);
-                       $sSQL .= ','.getDBQuoted($hLog[1]).','.getDBQuoted($_SERVER['HTTP_USER_AGENT']).','.getDBQuoted(join(',',$aLanguageList)).','.getDBQuoted($sOutputFormat).')';
+                       $sSQL .= ','.getDBQuoted($hLog[1]).','.getDBQuoted($sUserAgent).','.getDBQuoted(join(',',$aLanguageList)).','.getDBQuoted($sOutputFormat).','.getDBQuoted($hLog[3]).')';
                        $oDB->query($sSQL);
                }
 
 
                if (CONST_Log_DB)
                {
-                       $sSQL = 'update query_log set endtime = '.getDBQuoted($sEndTime).', results = '.$iNumResults;
-                       $sSQL .= ' where starttime = '.getDBQuoted($hLog[0]);
-                       $sSQL .= ' and ipaddress = '.getDBQuoted($hLog[1]);
-                       $sSQL .= ' and query = '.getDBQuoted($hLog[3]);
-                       $oDB->query($sSQL);
-
                        $sSQL = 'update new_query_log set endtime = '.getDBQuoted($sEndTime).', results = '.$iNumResults;
                        $sSQL .= ' where starttime = '.getDBQuoted($hLog[0]);
                        $sSQL .= ' and ipaddress = '.getDBQuoted($hLog[1]);
diff --git a/munin/nominatim_throttled_ips b/munin/nominatim_throttled_ips
new file mode 100755 (executable)
index 0000000..2fa1d80
--- /dev/null
@@ -0,0 +1,28 @@
+#!/bin/sh
+#
+# Plugin to monitor the number of IPs in special pools
+#
+# Parameters: 
+#
+#       config   (required)
+#       autoconf (optional - used by munin-config)
+#
+if [ "$1" = "config" ]; then
+        echo 'graph_title Restricted IPs' 
+        echo 'graph_args -l 0'
+        echo 'graph_vlabel number of IPs'
+        echo 'graph_category nominatim'
+        echo 'bulk.label bulk'
+        echo 'bulk.draw AREA'
+        echo 'bulk.type GAUGE'
+        echo 'block.label blocked'
+        echo 'block.draw STACK'
+        echo 'block.type GAUGE'
+        exit 0
+fi
+BASEDIR="$(dirname "$(readlink -f "$0")")"
+
+cut -f 2 -d ' ' $BASEDIR/../settings/ip_blocks.map | uniq -c | sed 's:[[:space:]]*\([0-9]\+\) \(.*\):\2.value \1:'
index 771cf80fa6e5f723f340a29f0c3ce2b1df335845..159c24ac27544f2c86e289f3b07801c53384b8e3 100644 (file)
 
        // Website settings
        @define('CONST_NoAccessControl', true);
-       @define('CONST_ClosedForIndexing', false);
-       @define('CONST_ClosedForIndexingExceptionIPs', '');
        @define('CONST_BlockedIPs', '');
+       @define('CONST_IPBanFile', CONST_BasePath.'/settings/ip_blocks');
+       @define('CONST_WhitelistedIPs', '');
+       @define('CONST_BlockedUserAgents', '');
+       @define('CONST_BlockReverseMaxLoad', 15);
        @define('CONST_BulkUserIPs', '');
 
-       @define('CONST_Website_BaseURL', 'http://'.php_uname('n').'/');
+       @define('CONST_Website_BaseURL', 'http://nominatim.openstreetmap.org/');
        @define('CONST_Tile_Default', 'Mapnik');
 
        @define('CONST_Default_Language', 'xx');
index ea57e74be1f4174f5e0ac41c881ea60bab866209..4dc23bd250474b76909103926ec338966d9a17af 100644 (file)
@@ -1,29 +1,29 @@
 -- Indices used only during search and update.
 -- These indices are created only after the indexing process is done.
 
-CREATE INDEX idx_word_word_id on word USING BTREE (word_id);
+CREATE INDEX idx_word_word_id on word USING BTREE (word_id) TABLESPACE ssd;
 
-CREATE INDEX idx_search_name_nameaddress_vector ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off);
-CREATE INDEX idx_search_name_name_vector ON search_name USING GIN (name_vector) WITH (fastupdate = off);
-CREATE INDEX idx_search_name_centroid ON search_name USING GIST (centroid);
+CREATE INDEX idx_search_name_nameaddress_vector ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off) TABLESPACE ssd;
+CREATE INDEX idx_search_name_name_vector ON search_name USING GIN (name_vector) WITH (fastupdate = off) TABLESPACE ssd;
+CREATE INDEX idx_search_name_centroid ON search_name USING GIST (centroid) TABLESPACE ssd;
 
-CREATE INDEX idx_place_addressline_address_place_id on place_addressline USING BTREE (address_place_id);
+CREATE INDEX idx_place_addressline_address_place_id on place_addressline USING BTREE (address_place_id) TABLESPACE ssd;
 
-CREATE INDEX idx_place_boundingbox_place_id on place_boundingbox USING BTREE (place_id);
-CREATE INDEX idx_place_boundingbox_outline ON place_boundingbox USING GIST (outline);
+CREATE INDEX idx_place_boundingbox_place_id on place_boundingbox USING BTREE (place_id) TABLESPACE ssd;
+CREATE INDEX idx_place_boundingbox_outline ON place_boundingbox USING GIST (outline) TABLESPACE ssd;
 
 DROP INDEX IF EXISTS idx_placex_rank_search;
-CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search);
-CREATE INDEX idx_placex_rank_address ON placex USING BTREE (rank_address);
-CREATE INDEX idx_placex_pendingsector ON placex USING BTREE (rank_search,geometry_sector) where indexed_status > 0;
-CREATE INDEX idx_placex_parent_place_id ON placex USING BTREE (parent_place_id) where parent_place_id IS NOT NULL;
-CREATE INDEX idx_placex_interpolation ON placex USING BTREE (geometry_sector) where indexed_status > 0 and class='place' and type='houses';
-CREATE INDEX idx_location_area_country_place_id ON location_area_country USING BTREE (place_id);
+CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search) TABLESPACE ssd;
+CREATE INDEX idx_placex_rank_address ON placex USING BTREE (rank_address) TABLESPACE ssd;
+CREATE INDEX idx_placex_pendingsector ON placex USING BTREE (rank_search,geometry_sector) TABLESPACE ssd where indexed_status > 0;
+CREATE INDEX idx_placex_parent_place_id ON placex USING BTREE (parent_place_id) TABLESPACE ssd where parent_place_id IS NOT NULL;
+CREATE INDEX idx_placex_interpolation ON placex USING BTREE (geometry_sector) TABLESPACE ssd where indexed_status > 0 and class='place' and type='houses';
+CREATE INDEX idx_location_area_country_place_id ON location_area_country USING BTREE (place_id) TABLESPACE ssd;
 
-CREATE INDEX idx_search_name_country_centroid ON search_name_country USING GIST (centroid);
+CREATE INDEX idx_search_name_country_centroid ON search_name_country USING GIST (centroid) TABLESPACE ssd;
 
 -- start
-CREATE INDEX idx_location_property_-partition-_centroid ON location_property_-partition- USING GIST (centroid);
+CREATE INDEX idx_location_property_-partition-_centroid ON location_property_-partition- USING GIST (centroid) TABLESPACE ssd;
 -- end
 
 CREATE UNIQUE INDEX idx_place_osm_unique on place using btree(osm_id,osm_type,class,type);
index 29e3d28235d1be5b42af21db4d3b1585e3907458..e0b1fae0eb47cfff13c0041c9e7b8070cb32d610 100644 (file)
@@ -35,21 +35,21 @@ SELECT AddGeometryColumn('search_name_blank', 'centroid', 4326, 'GEOMETRY', 2);
 
 
 CREATE TABLE location_area_country () INHERITS (location_area_large);
-CREATE INDEX idx_location_area_country_geometry ON location_area_country USING GIST (geometry);
+CREATE INDEX idx_location_area_country_geometry ON location_area_country USING GIST (geometry) TABLESPACE ssd;
 
 CREATE TABLE search_name_country () INHERITS (search_name_blank);
-CREATE INDEX idx_search_name_country_place_id ON search_name_country USING BTREE (place_id);
-CREATE INDEX idx_search_name_country_name_vector ON search_name_country USING GIN (name_vector) WITH (fastupdate = off);
+CREATE INDEX idx_search_name_country_place_id ON search_name_country USING BTREE (place_id) TABLESPACE ssd;
+CREATE INDEX idx_search_name_country_name_vector ON search_name_country USING GIN (name_vector) WITH (fastupdate = off) TABLESPACE ssd;
 
 -- start
 CREATE TABLE location_area_large_-partition- () INHERITS (location_area_large);
-CREATE INDEX idx_location_area_large_-partition-_place_id ON location_area_large_-partition- USING BTREE (place_id);
-CREATE INDEX idx_location_area_large_-partition-_geometry ON location_area_large_-partition- USING GIST (geometry);
+CREATE INDEX idx_location_area_large_-partition-_place_id ON location_area_large_-partition- USING BTREE (place_id) TABLESPACE ssd;
+CREATE INDEX idx_location_area_large_-partition-_geometry ON location_area_large_-partition- USING GIST (geometry) TABLESPACE ssd;
 
 CREATE TABLE search_name_-partition- () INHERITS (search_name_blank);
-CREATE INDEX idx_search_name_-partition-_place_id ON search_name_-partition- USING BTREE (place_id);
-CREATE INDEX idx_search_name_-partition-_centroid ON search_name_-partition- USING GIST (centroid);
-CREATE INDEX idx_search_name_-partition-_name_vector ON search_name_-partition- USING GIN (name_vector) WITH (fastupdate = off);
+CREATE INDEX idx_search_name_-partition-_place_id ON search_name_-partition- USING BTREE (place_id) TABLESPACE ssd;
+CREATE INDEX idx_search_name_-partition-_centroid ON search_name_-partition- USING GIST (centroid) TABLESPACE ssd;
+CREATE INDEX idx_search_name_-partition-_name_vector ON search_name_-partition- USING GIN (name_vector) WITH (fastupdate = off) TABLESPACE ssd;
 
 CREATE TABLE location_property_-partition- () INHERITS (location_property);
 CREATE INDEX idx_location_property_-partition-_place_id ON location_property_-partition- USING BTREE (place_id);
@@ -62,7 +62,7 @@ CREATE TABLE location_road_-partition- (
   country_code VARCHAR(2)
   );
 SELECT AddGeometryColumn('location_road_-partition-', 'geometry', 4326, 'GEOMETRY', 2);
-CREATE INDEX idx_location_road_-partition-_geometry ON location_road_-partition- USING GIST (geometry);
-CREATE INDEX idx_location_road_-partition-_place_id ON location_road_-partition- USING BTREE (place_id);
+CREATE INDEX idx_location_road_-partition-_geometry ON location_road_-partition- USING GIST (geometry) TABLESPACE ssd;
+CREATE INDEX idx_location_road_-partition-_place_id ON location_road_-partition- USING BTREE (place_id) TABLESPACE ssd;
 
 -- end
index 3bb41e28f7cf1415be242f223e4d475a60f0a799..244f2036ea3f355894bf07967351a462624031aa 100644 (file)
@@ -23,19 +23,6 @@ CREATE TABLE import_npi_log (
   event text
   );
 
---drop table IF EXISTS query_log;
-CREATE TABLE query_log (
-  starttime timestamp,
-  query text,
-  ipaddress text,
-  endtime timestamp,
-  results integer
-  );
-CREATE INDEX idx_query_log ON query_log USING BTREE (starttime);
-GRANT SELECT ON query_log TO "www-data" ;
-GRANT INSERT ON query_log TO "www-data" ;
-GRANT UPDATE ON query_log TO "www-data" ;
-
 CREATE TABLE new_query_log (
   type text,
   starttime timestamp,
@@ -43,6 +30,7 @@ CREATE TABLE new_query_log (
   useragent text,
   language text,
   query text,
+  searchterm text,
   endtime timestamp,
   results integer,
   format text,
@@ -53,9 +41,6 @@ GRANT INSERT ON new_query_log TO "www-data" ;
 GRANT UPDATE ON new_query_log TO "www-data" ;
 GRANT SELECT ON new_query_log TO "www-data" ;
 
-create view vw_search_query_log as SELECT substr(query, 1, 50) AS query, starttime, endtime - starttime AS duration, substr(useragent, 1, 20) as 
-useragent, language, results, ipaddress FROM new_query_log WHERE type = 'search' ORDER BY starttime DESC;
-
 --drop table IF EXISTS report_log;
 CREATE TABLE report_log (
   starttime timestamp,
@@ -76,8 +61,8 @@ CREATE TABLE word (
   country_code varchar(2),
   search_name_count INTEGER,
   operator TEXT
-  );
-CREATE INDEX idx_word_word_token on word USING BTREE (word_token);
+  ) TABLESPACE ssd;
+CREATE INDEX idx_word_word_token on word USING BTREE (word_token) TABLESPACE ssd;
 GRANT SELECT ON word TO "www-data" ;
 DROP SEQUENCE seq_word;
 CREATE SEQUENCE seq_word start 1;
@@ -132,7 +117,7 @@ CREATE TABLE search_name (
   nameaddress_vector integer[]
   );
 SELECT AddGeometryColumn('search_name', 'centroid', 4326, 'GEOMETRY', 2);
-CREATE INDEX idx_search_name_place_id ON search_name USING BTREE (place_id);
+CREATE INDEX idx_search_name_place_id ON search_name USING BTREE (place_id) TABLESPACE ssd;
 
 drop table IF EXISTS place_addressline;
 CREATE TABLE place_addressline (
@@ -142,8 +127,8 @@ CREATE TABLE place_addressline (
   isaddress boolean,
   distance float,
   cached_rank_address integer
-  );
-CREATE INDEX idx_place_addressline_place_id on place_addressline USING BTREE (place_id);
+  ) TABLESPACE data;
+CREATE INDEX idx_place_addressline_place_id on place_addressline USING BTREE (place_id) TABLESPACE ssd;
 
 drop table IF EXISTS place_boundingbox CASCADE;
 CREATE TABLE place_boundingbox (
@@ -196,14 +181,14 @@ CREATE TABLE placex (
   wikipedia TEXT, -- calculated wikipedia article name (language:title)
   geometry_sector INTEGER,
   calculated_country_code varchar(2)
-  );
+  ) TABLESPACE ssd;
 SELECT AddGeometryColumn('placex', 'centroid', 4326, 'GEOMETRY', 2);
-CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id);
-CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id);
-CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id);
-CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector);
-CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry);
-CREATE INDEX idx_placex_adminname on placex USING BTREE (make_standard_name(name->'name'),rank_search) WHERE osm_type='N' and rank_search < 26;
+CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id) TABLESPACE ssd;
+CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id) TABLESPACE ssd;
+CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id) TABLESPACE ssd;
+CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector) TABLESPACE ssd;
+CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry) TABLESPACE ssd;
+CREATE INDEX idx_placex_adminname on placex USING BTREE (make_standard_name(name->'name'),rank_search) TABLESPACE ssd WHERE osm_type='N' and rank_search < 26;
 
 --CREATE INDEX idx_placex_indexed ON placex USING BTREE (indexed);
 
@@ -244,7 +229,7 @@ CREATE TRIGGER place_before_insert BEFORE INSERT ON place
     FOR EACH ROW EXECUTE PROCEDURE place_insert();
 
 drop index idx_placex_sector;
-CREATE INDEX idx_placex_sector ON placex USING BTREE (geometry_sector,rank_address,osm_type,osm_id);
+CREATE INDEX idx_placex_sector ON placex USING BTREE (geometry_sector,rank_address,osm_type,osm_id) TABLESPACE ssd;
 
 DROP SEQUENCE seq_postcodes;
 CREATE SEQUENCE seq_postcodes start 1;
@@ -262,7 +247,7 @@ CREATE TABLE import_polygon_error (
   );
 SELECT AddGeometryColumn('import_polygon_error', 'prevgeometry', 4326, 'GEOMETRY', 2);
 SELECT AddGeometryColumn('import_polygon_error', 'newgeometry', 4326, 'GEOMETRY', 2);
-CREATE INDEX idx_import_polygon_error_osmid ON import_polygon_error USING BTREE (osm_type, osm_id);
+CREATE INDEX idx_import_polygon_error_osmid ON import_polygon_error USING BTREE (osm_type, osm_id) TABLESPACE ssd;
 GRANT SELECT ON import_polygon_error TO "www-data";
 
 drop table import_polygon_delete;
@@ -272,7 +257,7 @@ CREATE TABLE import_polygon_delete (
   class TEXT NOT NULL,
   type TEXT NOT NULL
   );
-CREATE INDEX idx_import_polygon_delete_osmid ON import_polygon_delete USING BTREE (osm_type, osm_id);
+CREATE INDEX idx_import_polygon_delete_osmid ON import_polygon_delete USING BTREE (osm_type, osm_id) TABLESPACE ssd;
 GRANT SELECT ON import_polygon_delete TO "www-data";
 
 drop sequence file;
diff --git a/utils/cron_banip.py b/utils/cron_banip.py
new file mode 100755 (executable)
index 0000000..4fe3064
--- /dev/null
@@ -0,0 +1,221 @@
+#!/usr/bin/python
+#
+# Search logs for high-bandwith users and create a list of suspicious IPs.
+# There are three states: bulk, block, ban. The first are bulk requesters
+# that need throtteling, the second bulk requesters that have overdone it
+# and the last manually banned IPs.
+#
+# The list can then be used in apache using rewrite rules to
+# direct bulk users to smaller thread pools or block them. A
+# typical apache config that uses php-fpm pools would look
+# like this:
+#
+#    Alias /nominatim-www/ "/var/www/nominatim/"
+#    Alias /nominatim-bulk/ "/var/www/nominatim/"
+#    <Directory "/var/www/nominatim/">
+#        Options MultiViews FollowSymLinks
+#        AddType text/html   .php
+#    </Directory>
+#
+#    <Location /nominatim-www>
+#        AddHandler fcgi:/var/run/php5-fpm-www.sock .php
+#    </Location>
+#    <Location /nominatim-bulk>
+#        AddHandler fcgi:/var/run/php5-fpm-bulk.sock .php
+#    </Location>
+#
+#    Redirect 509 /nominatim-block/
+#    ErrorDocument 509 "Bandwidth limit exceeded."
+#    Redirect 403 /nominatim-ban/
+#    ErrorDocument 403 "Access blocked."
+#
+#    RewriteEngine On
+#    RewriteMap bulklist txt:/home/wherever/ip-block.map
+#    RewriteRule ^/(.*) /nominatim-${bulklist:%{REMOTE_ADDR}|www}/$1 [PT]
+#
+
+import os
+import psycopg2
+import datetime
+
+BASEDIR = os.path.normpath(os.path.join(os.path.realpath(__file__), '../..'))
+
+#
+# DEFAULT SETTINGS
+#
+# Copy into settings/ip_blcoks.conf and adapt as required.
+#
+BLOCKEDFILE= BASEDIR + '/settings/ip_blocks.map'
+LOGFILE= BASEDIR + '/log/restricted_ip.log'
+
+# space-separated list of IPs that are never banned
+WHITELIST = ''
+# space-separated list of IPs manually blocked
+BLACKLIST = ''
+
+# time before a automatically blocked IP is allowed back
+BLOCKCOOLOFF_PERIOD='1 hour'
+# quiet time before an IP is released from the bulk pool
+BULKCOOLOFF_PERIOD='15 min'
+
+BULKLONG_LIMIT=8000
+BULKSHORT_LIMIT=2000
+BLOCK_UPPER=19000
+BLOCK_LOWER=4000
+BLOCK_LOADFAC=380
+BULK_LOADFAC=160
+BULK_LOWER=1500
+
+#
+# END OF DEFAULT SETTINGS
+#
+
+try:
+    execfile(os.path.expanduser(BASEDIR + "/settings/ip_blocks.conf"))
+except IOError:
+    pass
+
+# read the previous blocklist
+WHITELIST = set(WHITELIST.split()) if WHITELIST else set()
+prevblocks = []
+prevbulks = []
+BLACKLIST = set(BLACKLIST.split()) if BLACKLIST else set()
+newblocks = set()
+newbulks = set()
+
+try:
+    fd = open(BLOCKEDFILE)
+    for line in fd:
+        ip, typ = line.strip().split(' ')
+        if ip not in BLACKLIST:
+            if typ == 'block':
+                prevblocks.append(ip)
+            elif typ == 'bulk':
+                prevbulks.append(ip)
+    fd.close()
+except IOError:
+    pass #ignore non-existing file
+
+# determine current load
+fd = open("/proc/loadavg")
+avgload = int(float(fd.readline().split()[2]))
+fd.close()
+# DB load
+conn = psycopg2.connect('dbname=nominatim')
+cur = conn.cursor()
+cur.execute("select count(*)/60 from new_query_log where starttime > now() - interval '1min'")
+dbload = int(cur.fetchone()[0])
+
+BLOCK_LIMIT = max(BLOCK_LOWER, BLOCK_UPPER - BLOCK_LOADFAC * (dbload - 75))
+BULKLONG_LIMIT = max(BULK_LOWER, BULKLONG_LIMIT - BULK_LOADFAC * (avgload - 14))
+if len(prevbulks) > 85:
+    BLOCK_LIMIT = BLOCK_LOWER
+
+# get the new block candidates
+cur.execute("""
+  SELECT ipaddress, max(count) FROM
+   ((SELECT * FROM
+     (SELECT ipaddress, sum(case when endtime is null then 1 else 1+date_part('epoch',endtime-starttime) end) as count FROM new_query_log
+      WHERE starttime > now() - interval '1 hour' GROUP BY ipaddress) as i
+   WHERE count > %s)
+   UNION
+   (SELECT ipaddress, count * 3 FROM
+     (SELECT ipaddress, sum(case when endtime is null then 1 else 1+date_part('epoch',endtime-starttime) end) as count FROM new_query_log 
+      WHERE starttime > now() - interval '10 min' GROUP BY ipaddress) as i
+   WHERE count > %s)) as o
+  GROUP BY ipaddress
+""", (BULKLONG_LIMIT, BULKSHORT_LIMIT))
+
+bulkips = {}
+emergencyblocks = []
+
+for c in cur:
+    if c[0] not in WHITELIST and c[0] not in BLACKLIST:
+        if c[1] > BLOCK_UPPER and c[0] not in prevbulks:
+            newblocks.add(c[0])
+            if c[0] not in prevblocks:
+                emergencyblocks.append(c[0])
+        else:
+            bulkips[c[0]] = c[1]
+
+# IPs from the block list that are no longer in the bulk list
+deblockcandidates = set()
+# IPs from the bulk list that are no longer in the bulk list
+debulkcandidates = set()
+# new IPs to go into the block list
+newlyblocked = []
+
+
+for ip in prevblocks:
+    if ip in bulkips:
+        newblocks.add(ip)
+        del bulkips[ip]
+    else:
+        deblockcandidates.add(ip)    
+        
+for ip in prevbulks:
+    if ip in bulkips:
+        if bulkips[ip] > BLOCK_LIMIT:
+            newblocks.add(ip)
+            newlyblocked.append(ip)
+        else:
+            newbulks.add(ip)
+        del bulkips[ip]
+    else:
+        debulkcandidates.add(ip)
+
+# cross-check deblock candidates
+if deblockcandidates:
+    cur.execute("""
+        SELECT DISTINCT ipaddress FROM new_query_log
+        WHERE ipaddress IN ('%s') AND starttime > now() - interval '%s'
+        """ % ("','".join(deblockcandidates), BLOCKCOOLOFF_PERIOD))
+
+    for c in cur:
+        newblocks.add(c[0])
+        deblockcandidates.remove(c[0])
+# deblocked IPs go back to the bulk pool to catch the ones that simply
+# ignored the HTTP error and just continue to hammer the API.
+# Those that behave and stopped will be debulked a minute later.
+for ip in deblockcandidates:
+    newbulks.add(ip)
+
+# cross-check debulk candidates
+if debulkcandidates:
+    cur.execute("""
+        SELECT DISTINCT ipaddress FROM new_query_log
+        WHERE ipaddress IN ('%s') AND starttime > now() - interval '%s'
+        AND starttime > date_trunc('day', now())
+        """ % ("','".join(debulkcandidates), BULKCOOLOFF_PERIOD))
+
+    for c in cur:
+        newbulks.add(c[0])
+        debulkcandidates.remove(c[0])
+
+for ip in bulkips.iterkeys():
+    newbulks.add(ip)
+
+# write out the new list
+fd = open(BLOCKEDFILE, 'w')
+for ip in newblocks:
+    fd.write(ip + " block\n")
+for ip in newbulks:
+    fd.write(ip + " bulk\n")
+for ip in BLACKLIST:
+    fd.write(ip + " ban\n")
+fd.close()
+
+# write out the log
+logstr = datetime.datetime.now().strftime('%Y-%m-%d %H:%M') + ' %s %s\n'
+fd = open(LOGFILE, 'a')
+if deblockcandidates:
+    fd.write(logstr % ('unblocked:', ', '.join(deblockcandidates)))
+if debulkcandidates:
+    fd.write(logstr % (' debulked:', ', '.join(debulkcandidates)))
+if bulkips:
+    fd.write(logstr % ('new bulks:', ', '.join(bulkips.keys())))
+if emergencyblocks:
+    fd.write(logstr % ('dir.block:', ', '.join(emergencyblocks)))
+if newlyblocked:
+    fd.write(logstr % ('new block:', ', '.join(newlyblocked)))
+fd.close()
diff --git a/utils/cron_logrotate.sh b/utils/cron_logrotate.sh
new file mode 100755 (executable)
index 0000000..b9291d9
--- /dev/null
@@ -0,0 +1,20 @@
+#!/bin/bash -e
+#
+# Rotate query logs.
+
+dbname=nominatim
+
+basedir=`dirname $0`
+logfile=`date "+$basedir/../log/query-%F.log.gz"`
+
+# dump the old logfile
+pg_dump -a -F p -t backup_query_log $dbname | gzip -9 > $logfile
+
+# remove the old logs
+psql -q -d $dbname -c 'DROP TABLE backup_query_log'
+
+# rotate
+psql -q -1 -d $dbname -c 'ALTER TABLE new_query_log RENAME TO backup_query_log;CREATE TABLE new_query_log as (select * from backup_query_log limit 0);GRANT SELECT, INSERT, UPDATE ON new_query_log TO "www-data"'
+psql -q -d $dbname -c 'ALTER INDEX idx_new_query_log_starttime RENAME TO idx_backup_query_log_starttime'
+psql -q -d $dbname -c 'CREATE INDEX idx_new_query_log_starttime ON new_query_log USING BTREE (starttime)'
+
diff --git a/utils/cron_vacuum.sh b/utils/cron_vacuum.sh
new file mode 100755 (executable)
index 0000000..4c16fc6
--- /dev/null
@@ -0,0 +1,14 @@
+#!/bin/bash
+#
+# Vaccum all tables with indices on integer arrays.
+# Agressive vacuuming seems to help against index bloat.
+#
+
+psql -q -d nominatim -c 'VACUUM ANALYSE search_name'
+psql -q -d nominatim -c 'VACUUM ANALYSE search_name_country'
+#psql -q -d nominatim -c 'VACUUM ANALYSE planet_osm_ways'
+
+for i in `seq 0 246`; do
+  psql -q -d nominatim -c "VACUUM ANALYSE search_name_${i}"
+done
+
index 5b01798e40161200f6ea476e1293100d2241c7ba..d2d4381ef82b6ed1d3893ea340018be6ecb2647e 100755 (executable)
                        echo "Please download and build osm2pgsql.\nIf it is already installed, check the path in your local settings (settings/local.php) file.\n";
                        fail("osm2pgsql not found in '$osm2pgsql'");
                }
+               $osm2pgsql .= ' --tablespace-slim-index ssd --tablespace-main-index ssd --tablespace-main-data ssd --tablespace-slim-data data';
                $osm2pgsql .= ' -lsc -O gazetteer --hstore';
-               $osm2pgsql .= ' -C '.$iCacheMemory;
+               $osm2pgsql .= ' -C 16000';
                $osm2pgsql .= ' -P '.$aDSNInfo['port'];
                $osm2pgsql .= ' -d '.$aDSNInfo['database'].' '.$aCMDResult['osm-file'];
                passthruCheckReturn($osm2pgsql);
                $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,calculated_country_code,";
                $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from (select calculated_country_code,postcode,";
                $sSQL .= "avg(st_x(st_centroid(geometry))) as x,avg(st_y(st_centroid(geometry))) as y ";
-               $sSQL .= "from placex where postcode is not null group by calculated_country_code,postcode) as x";
+               $sSQL .= "from placex where postcode is not null and calculated_country_code not in ('ie') group by calculated_country_code,postcode) as x";
                if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
 
                $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,calculated_country_code,geometry) ";
index f1a0d0d078035c75d4168922f2e9612cc7516592..02aaaa0fb140804a411bd840abf0c6f7aff84488 100755 (executable)
                        echo "class = '".pg_escape_string($aPair[0])."' and type = '".pg_escape_string($aPair[1])."';\n";
 
                        echo "CREATE INDEX idx_place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])."_centroid ";
-                       echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING GIST (centroid);\n";
+                       echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING GIST (centroid) tablespace ssd;\n";
 
                        echo "CREATE INDEX idx_place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])."_place_id ";
-                       echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING btree(place_id);\n";
+                       echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING btree(place_id) tablespace ssd;\n";
 
-            echo "GRANT SELECT ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." TO \"www-data\";";
+            echo "GRANT SELECT ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." TO \"www-data\";\n";
 
                }
 
index 76d6f8582c2e1e9646845890b1d451b5ff8c6527..e8972f1d26f4d3cefa962667459966d38f548050 100755 (executable)
@@ -46,7 +46,6 @@
                showUsage($aCMDOptions, true, 'Select either import of hourly or daily');
        }
 
-       if (!isset($aResult['index-instances'])) $aResult['index-instances'] = 1;
        if (!isset($aResult['index-rank'])) $aResult['index-rank'] = 0;
 /*
        // Lock to prevent multiple copies running
 
        if ($aResult['index'])
        {
+               if (!isset($aResult['index-instances'])) $aResult['index-instances'] = 1;
                passthru(CONST_BasePath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -t '.$aResult['index-instances'].' -r '.$aResult['index-rank']);
        }
 
                $sCMDDownload = $sOsmosisCMD.' --read-replication-interval workingDirectory='.$sOsmosisConfigDirectory.' --simplify-change --write-xml-change '.$sImportFile;
                $sCMDCheckReplicationLag = $sOsmosisCMD.' -q --read-replication-lag workingDirectory='.$sOsmosisConfigDirectory;
                $sCMDImport = CONST_Osm2pgsql_Binary.' -klas -C 2000 -O gazetteer -d '.$aDSNInfo['database'].' '.$sImportFile;
-               $sCMDIndex = $sBasePath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -t '.$aResult['index-instances'];
+               $sCMDIndex = $sBasePath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'];
                if (!$aResult['no-npi']) {
                        $sCMDIndex .= '-F ';
                }
                        $sBatchEnd = getosmosistimestamp($sOsmosisConfigDirectory);
 
                        // Index file
-                       $sThisIndexCmd = $sCMDIndex;
+                       if (!isset($aResult['index-instances']))
+                       {
+                               if (getLoadAverage() < 15)
+                                       $iIndexInstances = 2;
+                               else
+                                       $iIndexInstances = 1;
+                       } else
+                               $iIndexInstances = $aResult['index-instances'];
+
+                       $sThisIndexCmd = $sCMDIndex.' -t '.$iIndexInstances;
 
                        if (!$aResult['no-npi'])
                        {
diff --git a/website/403.html b/website/403.html
new file mode 100644 (file)
index 0000000..c5fd71f
--- /dev/null
@@ -0,0 +1,14 @@
+<html>
+<head>
+<title>Access blocked</title>
+</head>
+<body>
+<h1>Access blocked</h1>
+
+<p>You have been blocked because you have been overusing OSM's geocoding service.
+Please be aware that OSM's resources are limited and shared between many users.
+To have this block lifted, contact the <a href="http://wiki.openstreetmap.org/wiki/System_Administrators">Nominatim system administrator</a>.</p>
+
+<p>For more information, consult the <a href="http://wiki.openstreetmap.org/wiki/Nominatim_usage_policy">usage policy</a> for the OSM Nominatim server.
+</body>
+</head>
diff --git a/website/509.html b/website/509.html
new file mode 100644 (file)
index 0000000..047d902
--- /dev/null
@@ -0,0 +1,13 @@
+<html>
+<head>
+<title>Bandwidth limit exceeded</title>
+</head>
+<body>
+<h1>Bandwidth limit exceeded</h1>
+
+<p>You have been temporarily blocked because you have been overusing OSM's geocoding service.
+Please adapt your scripts to reduce the number of requests and try again later.</p>
+
+<p>For more information, consult the <a href="http://wiki.openstreetmap.org/wiki/Nominatim_usage_policy">usage policy</a> for the OSM Nominatim server.
+</body>
+</head>
diff --git a/website/favicon.ico b/website/favicon.ico
new file mode 100644 (file)
index 0000000..0157ea0
Binary files /dev/null and b/website/favicon.ico differ
diff --git a/website/nominatim.xml b/website/nominatim.xml
new file mode 100644 (file)
index 0000000..28684b1
--- /dev/null
@@ -0,0 +1,15 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/"
+                       xmlns:moz="http://www.mozilla.org/2006/browser/search/">
+       <ShortName>Nominatim</ShortName>
+       <LongName>Nominatim OSM Search</LongName>
+       <Description>Search for a place in OpenStreetMap Nominatim</Description>
+       <InputEncoding>UTF-8</InputEncoding>
+       <OutputEncoding>UTF-8</OutputEncoding>
+       <Url type="text/html" method="get" template="http://nominatim.openstreetmap.org/search/?q={searchTerms}" />
+       <Query role="example" searchTerms="Reigate" />
+       <Developer>Brian Quinion</Developer>
+       <AdultContent>false</AdultContent>
+       <Attribution>Data &amp;copy; OpenStreetMap contributors, Some Rights Reserved. ODbL, http://www.osm.org/copyright.</Attribution>
+</OpenSearchDescription>
+
index 33e6cd5e638b4986469a3bc1bf4168472c348177..c348659e25d02fd7cdc136d223ad9b877bd20cd8 100755 (executable)
@@ -4,6 +4,22 @@
        require_once(dirname(dirname(__FILE__)).'/lib/init-website.php');
        require_once(CONST_BasePath.'/lib/log.php');
 
+    if (preg_match(CONST_BlockedUserAgents, $_SERVER["HTTP_USER_AGENT"]) > 0)
+    {
+        $fLoadAvg = getLoadAverage();
+        if ($fLoadAvg >= CONST_BlockReverseMaxLoad) {
+            header('HTTP/1.0 403 Forbidden');
+            header('Content-type: text/html; charset=utf-8');
+               echo "<html><body><h1>App temporarily blocked</h1>";
+            echo "Your application has been temporarily blocked from the OpenStreetMap Nominatim ";
+            echo "geolocation service due to high server load.";
+            echo "\n</body></html>\n";
+            exit;
+        }
+
+    }
+
+
         if (strpos(CONST_BulkUserIPs, ','.$_SERVER["REMOTE_ADDR"].',') !== false)
         {
                 $fLoadAvg = getLoadAverage();
diff --git a/website/robots.txt b/website/robots.txt
new file mode 100644 (file)
index 0000000..e4d3d3f
--- /dev/null
@@ -0,0 +1,10 @@
+User-agent: ia_archiver
+Allow: /
+
+User-agent: *
+Disallow: /search.php
+Disallow: /search
+Disallow: /details.php
+Disallow: /details
+Disallow: /reverse.php
+Disallow: /reverse
index 139ed0f65fb29e9695f2a21da584a8bef242f381..9940d815ea85a05517577285c324a3f05b1ce727 100755 (executable)
@@ -71,6 +71,7 @@
        if (isset($aLangPrefOrder['name:de'])) $bReverseInPlan = true;
        if (isset($aLangPrefOrder['name:ru'])) $bReverseInPlan = true;
        if (isset($aLangPrefOrder['name:ja'])) $bReverseInPlan = true;
+       if (isset($aLangPrefOrder['name:pl'])) $bReverseInPlan = true;
 
        $sLanguagePrefArraySQL = "ARRAY[".join(',',array_map("getDBQuoted",$aLangPrefOrder))."]";
 
                                                                else
                                                                        $sSQL .= " limit ".$iLimit;
 
-                                                               if (CONST_Debug) { var_dump($sSQL); }
+                                                               if (CONST_Debug) var_dump($sSQL);
+                                                               $iStartTime = time();
                                                                $aViewBoxPlaceIDs = $oDB->getAll($sSQL);
                                                                if (PEAR::IsError($aViewBoxPlaceIDs))
                                                                {
                                                                        failInternalError("Could not get places for search terms.", $sSQL, $aViewBoxPlaceIDs);
                                                                }
+                                                               if (time() - $iStartTime > 60) {
+                                                                       file_put_contents(CONST_BasePath.'/log/long_queries.log', date('Y-m-d H:i:s', $iStartTime).' '.$sSQL."\n", FILE_APPEND);
+                                                               }
+
 //var_dump($aViewBoxPlaceIDs);
                                                                // Did we have an viewbox matches?
                                                                $aPlaceIDs = array();