From: Sarah Hoffmann Date: Mon, 21 Jul 2014 20:53:45 +0000 (+0200) Subject: Merge remote-tracking branch 'upstream/master' X-Git-Tag: deploy~526 X-Git-Url: https://git.openstreetmap.org./nominatim.git/commitdiff_plain/30aceda42b01ec8696cd8b81c937ea2a987ae0f0?hp=8a4bd7fa4c6c4b9a74e4b0c1e41518110404981f Merge remote-tracking branch 'upstream/master' --- diff --git a/lib/Geocode.php b/lib/Geocode.php index 7563a26d..5451c382 100644 --- a/lib/Geocode.php +++ b/lib/Geocode.php @@ -15,7 +15,7 @@ protected $aExcludePlaceIDs = array(); protected $bDeDupe = true; - protected $bReverseInPlan = false; + protected $bReverseInPlan = true; protected $iLimit = 20; protected $iFinalLimit = 10; @@ -413,6 +413,7 @@ $sSQL .= "and 30 between $this->iMinAddressRank and $this->iMaxAddressRank "; $sSQL .= "group by place_id"; if (!$this->bDeDupe) $sSQL .= ",place_id "; + /* $sSQL .= " union "; $sSQL .= "select 'L' as osm_type,place_id as osm_id,'place' as class,'house' as type,null as admin_level,30 as rank_search,30 as rank_address,min(place_id) as place_id, min(parent_place_id) as parent_place_id,'us' as country_code,"; $sSQL .= "get_address_by_language(place_id, $sLanguagePrefArraySQL) as langaddress,"; @@ -427,6 +428,7 @@ $sSQL .= "group by place_id"; if (!$this->bDeDupe) $sSQL .= ",place_id"; $sSQL .= ",get_address_by_language(place_id, $sLanguagePrefArraySQL) "; + */ } $sSQL .= " order by importance desc"; @@ -958,7 +960,7 @@ { if (isset($aSearchTerm['word_id']) && $aSearchTerm['word_id']) { - if ((!$bStructuredPhrases || $iPhrase > 0) && sizeof($aCurrentSearch['aName']) && strlen($sToken) >= 4) + if ((!$bStructuredPhrases || $iPhrase > 0) && sizeof($aCurrentSearch['aName']) && strpos($sToken, ' ') === false) { $aSearch = $aCurrentSearch; $aSearch['iSearchRank'] += 1; @@ -967,8 +969,11 @@ $aSearch['aAddress'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; } - elseif (isset($aValidTokens[' '.$sToken])) // revert to the token version? + elseif (isset($aValidTokens[' '.$sToken]) && strlen($sToken) >= 4) // revert to the token version? { + $aSearch['aAddressNonSearch'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; + $aSearch['iSearchRank'] += 1; + if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; foreach($aValidTokens[' '.$sToken] as $aSearchTermToken) { if (empty($aSearchTermToken['country_code']) @@ -985,6 +990,7 @@ else { $aSearch['aAddressNonSearch'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; + if (preg_match('#^[0-9]+$#', $sToken)) $aSearch['iSearchRank'] += 2; if ($aSearch['iSearchRank'] < $this->iMaxRank) $aNewWordsetSearches[] = $aSearch; } } @@ -992,7 +998,8 @@ if (!sizeof($aCurrentSearch['aName']) || $aCurrentSearch['iNamePhrase'] == $iPhrase) { $aSearch = $aCurrentSearch; - $aSearch['iSearchRank'] += 2; + $aSearch['iSearchRank'] += 1; + if (!sizeof($aCurrentSearch['aName'])) $aSearch['iSearchRank'] += 1; if (preg_match('#^[0-9]+$#', $sToken)) $aSearch['iSearchRank'] += 2; if ($aWordFrequencyScores[$aSearchTerm['word_id']] < CONST_Max_Word_Frequency) $aSearch['aName'][$aSearchTerm['word_id']] = $aSearchTerm['word_id']; @@ -1245,7 +1252,7 @@ // TODO: filter out the pointless search terms (2 letter name tokens and less) // they might be right - but they are just too darned expensive to run if (sizeof($aSearch['aName'])) $aTerms[] = "name_vector @> ARRAY[".join($aSearch['aName'],",")."]"; - if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'],",")."]"; + //if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'],",")."]"; if (sizeof($aSearch['aAddress']) && $aSearch['aName'] != $aSearch['aAddress']) { // For infrequent name terms disable index usage for address @@ -1253,12 +1260,13 @@ sizeof($aSearch['aName']) == 1 && $aWordFrequencyScores[$aSearch['aName'][reset($aSearch['aName'])]] < CONST_Search_NameOnlySearchFrequencyThreshold) { - $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'],$aSearch['aAddressNonSearch']),",")."]"; + //$aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'],$aSearch['aAddressNonSearch']),",")."]"; + $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddress'],",")."]"; } else { $aTerms[] = "nameaddress_vector @> ARRAY[".join($aSearch['aAddress'],",")."]"; - if (sizeof($aSearch['aAddressNonSearch'])) $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddressNonSearch'],",")."]"; + //if (sizeof($aSearch['aAddressNonSearch'])) $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddressNonSearch'],",")."]"; } } if ($aSearch['sCountryCode']) $aTerms[] = "country_code = '".pg_escape_string($aSearch['sCountryCode'])."'"; @@ -1354,6 +1362,7 @@ $aPlaceIDs = $this->oDB->getCol($sSQL); // If not try the aux fallback table + /* if (!sizeof($aPlaceIDs)) { $sSQL = "select place_id from location_property_aux where parent_place_id in (".$sPlaceIDs.") and housenumber = '".pg_escape_string($aSearch['sHouseNumber'])."'"; @@ -1365,6 +1374,7 @@ if (CONST_Debug) var_dump($sSQL); $aPlaceIDs = $this->oDB->getCol($sSQL); } + */ if (!sizeof($aPlaceIDs)) { @@ -1616,10 +1626,12 @@ { preg_match_all('/(-?[0-9.]+) (-?[0-9.]+)/',$aMatch[1],$aPolyPoints,PREG_SET_ORDER); } + /* elseif (preg_match('#MULTIPOLYGON\\(\\(\\(([- 0-9.,]+)#',$aPointPolygon['astext'],$aMatch)) { preg_match_all('/(-?[0-9.]+) (-?[0-9.]+)/',$aMatch[1],$aPolyPoints,PREG_SET_ORDER); } + */ elseif (preg_match('#POINT\\((-?[0-9.]+) (-?[0-9.]+)\\)#',$aPointPolygon['astext'],$aMatch)) { $fRadius = 0.01; diff --git a/lib/init-website.php b/lib/init-website.php index 6db83988..013aee4b 100644 --- a/lib/init-website.php +++ b/lib/init-website.php @@ -5,6 +5,7 @@ { header("Access-Control-Allow-Origin: *"); header("Access-Control-Allow-Methods: OPTIONS,GET"); + header("Access-Control-Max-Age: 8640000"); if (!empty($_SERVER['HTTP_ACCESS_CONTROL_REQUEST_HEADERS'])) { header("Access-Control-Allow-Headers: ".$_SERVER['HTTP_ACCESS_CONTROL_REQUEST_HEADERS']); @@ -12,53 +13,4 @@ } if ($_SERVER['REQUEST_METHOD'] == 'OPTIONS') exit; - if (CONST_ClosedForIndexing && strpos(CONST_ClosedForIndexingExceptionIPs, ','.$_SERVER["REMOTE_ADDR"].',') === false) - { - echo "Closed for re-indexing..."; - exit; - } - - $aBucketKeys = array(); - - if (isset($_SERVER["HTTP_REFERER"])) $aBucketKeys[] = str_replace('www.','',strtolower(parse_url($_SERVER["HTTP_REFERER"], PHP_URL_HOST))); - if (isset($_SERVER["REMOTE_ADDR"])) $aBucketKeys[] = $_SERVER["REMOTE_ADDR"]; - if (isset($_GET["email"])) $aBucketKeys[] = $_GET["email"]; - - $fBucketVal = doBucket($aBucketKeys, - (defined('CONST_ConnectionBucket_PageType')?constant('CONST_ConnectionBucket_Cost_'.CONST_ConnectionBucket_PageType):1) + user_busy_cost(), - CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_BlockLimit); - - if ($fBucketVal > CONST_ConnectionBucket_WaitLimit && $fBucketVal < CONST_ConnectionBucket_BlockLimit) - { - $m = getBucketMemcache(); - $iCurrentSleeping = $m->increment('sleepCounter'); - if (false === $iCurrentSleeping) - { - $m->add('sleepCounter', 0); - $iCurrentSleeping = $m->increment('sleepCounter'); - } - if ($iCurrentSleeping >= CONST_ConnectionBucket_MaxSleeping || isBucketSleeping($aBucketKeys)) - { - // Too many threads sleeping already. This becomes a hard block. - $fBucketVal = doBucket($aBucketKeys, CONST_ConnectionBucket_BlockLimit, CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_BlockLimit); - } - else - { - setBucketSleeping($aBucketKeys, true); - sleep(($fBucketVal - CONST_ConnectionBucket_WaitLimit)/CONST_ConnectionBucket_LeakRate); - $fBucketVal = doBucket($aBucketKeys, CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_BlockLimit); - setBucketSleeping($aBucketKeys, false); - } - $m->decrement('sleepCounter'); - } - - if (strpos(CONST_BlockedIPs, ','.$_SERVER["REMOTE_ADDR"].',') !== false || $fBucketVal >= CONST_ConnectionBucket_BlockLimit) - { - header("HTTP/1.0 429 Too Many Requests"); - echo "Your IP has been blocked. \n"; - echo CONST_BlockMessage; - exit; - } - - header('Content-type: text/html; charset=utf-8'); - + header('Content-type: text/html; charset=utf-8'); diff --git a/lib/lib.php b/lib/lib.php index 1e05def3..b54023a4 100644 --- a/lib/lib.php +++ b/lib/lib.php @@ -94,7 +94,8 @@ function bySearchRank($a, $b) { - if ($a['iSearchRank'] == $b['iSearchRank']) return 0; + if ($a['iSearchRank'] == $b['iSearchRank']) + return strlen($a['sOperator']) + strlen($a['sHouseNumber']) - strlen($b['sOperator']) - strlen($b['sHouseNumber']); return ($a['iSearchRank'] < $b['iSearchRank']?-1:1); } @@ -177,7 +178,7 @@ { $aResult = array(array(join(' ',$aWords))); $sFirstToken = ''; - if ($iDepth < 8) { + if ($iDepth < 7) { while(sizeof($aWords) > 1) { $sWord = array_shift($aWords); diff --git a/lib/log.php b/lib/log.php index 37d83c47..59e04d17 100644 --- a/lib/log.php +++ b/lib/log.php @@ -17,15 +17,15 @@ if (CONST_Log_DB) { - // Log - if ($sType == 'search') - { - $oDB->query('insert into query_log values ('.getDBQuoted($hLog[0]).','.getDBQuoted($hLog[3]).','.getDBQuoted($hLog[1]).')'); - } - - $sSQL = 'insert into new_query_log (type,starttime,query,ipaddress,useragent,language,format)'; + if (isset($_GET['email'])) + $sUserAgent = $_GET['email']; + elseif (isset($_SERVER['HTTP_REFERER'])) + $sUserAgent = $_SERVER['HTTP_REFERER']; + else + $sUserAgent = $_SERVER['HTTP_USER_AGENT']; + $sSQL = 'insert into new_query_log (type,starttime,query,ipaddress,useragent,language,format,searchterm)'; $sSQL .= ' values ('.getDBQuoted($sType).','.getDBQuoted($hLog[0]).','.getDBQuoted($hLog[2]); - $sSQL .= ','.getDBQuoted($hLog[1]).','.getDBQuoted($_SERVER['HTTP_USER_AGENT']).','.getDBQuoted(join(',',$aLanguageList)).','.getDBQuoted($sOutputFormat).')'; + $sSQL .= ','.getDBQuoted($hLog[1]).','.getDBQuoted($sUserAgent).','.getDBQuoted(join(',',$aLanguageList)).','.getDBQuoted($sOutputFormat).','.getDBQuoted($hLog[3]).')'; $oDB->query($sSQL); } @@ -59,12 +59,6 @@ if (CONST_Log_DB) { - $sSQL = 'update query_log set endtime = '.getDBQuoted($sEndTime).', results = '.$iNumResults; - $sSQL .= ' where starttime = '.getDBQuoted($hLog[0]); - $sSQL .= ' and ipaddress = '.getDBQuoted($hLog[1]); - $sSQL .= ' and query = '.getDBQuoted($hLog[3]); - $oDB->query($sSQL); - $sSQL = 'update new_query_log set endtime = '.getDBQuoted($sEndTime).', results = '.$iNumResults; $sSQL .= ' where starttime = '.getDBQuoted($hLog[0]); $sSQL .= ' and ipaddress = '.getDBQuoted($hLog[1]); diff --git a/munin/nominatim_throttled_ips b/munin/nominatim_throttled_ips new file mode 100755 index 00000000..2fa1d80e --- /dev/null +++ b/munin/nominatim_throttled_ips @@ -0,0 +1,28 @@ +#!/bin/sh +# +# Plugin to monitor the number of IPs in special pools +# +# Parameters: +# +# config (required) +# autoconf (optional - used by munin-config) +# + +if [ "$1" = "config" ]; then + + echo 'graph_title Restricted IPs' + echo 'graph_args -l 0' + echo 'graph_vlabel number of IPs' + echo 'graph_category nominatim' + echo 'bulk.label bulk' + echo 'bulk.draw AREA' + echo 'bulk.type GAUGE' + echo 'block.label blocked' + echo 'block.draw STACK' + echo 'block.type GAUGE' + exit 0 +fi + +BASEDIR="$(dirname "$(readlink -f "$0")")" + +cut -f 2 -d ' ' $BASEDIR/../settings/ip_blocks.map | uniq -c | sed 's:[[:space:]]*\([0-9]\+\) \(.*\):\2.value \1:' diff --git a/settings/settings.php b/settings/settings.php index fd50a6f0..11e450e9 100644 --- a/settings/settings.php +++ b/settings/settings.php @@ -52,13 +52,15 @@ // Website settings @define('CONST_NoAccessControl', true); - @define('CONST_ClosedForIndexing', false); - @define('CONST_ClosedForIndexingExceptionIPs', ''); @define('CONST_BlockedIPs', ''); + @define('CONST_IPBanFile', CONST_BasePath.'/settings/ip_blocks'); + @define('CONST_WhitelistedIPs', ''); + @define('CONST_BlockedUserAgents', ''); + @define('CONST_BlockReverseMaxLoad', 15); @define('CONST_BulkUserIPs', ''); @define('CONST_BlockMessage', ''); // additional info to show for blocked IPs - @define('CONST_Website_BaseURL', 'http://'.php_uname('n').'/'); + @define('CONST_Website_BaseURL', 'http://nominatim.openstreetmap.org/'); @define('CONST_Tile_Default', 'Mapnik'); @define('CONST_Default_Language', false); diff --git a/sql/functions.sql b/sql/functions.sql index c2359a3c..4224977c 100644 --- a/sql/functions.sql +++ b/sql/functions.sql @@ -936,6 +936,11 @@ DECLARE BEGIN --DEBUG: RAISE WARNING '% %',NEW.osm_type,NEW.osm_id; + -- remove operator tag for most places, messes too much with search_name indexes + IF NEW.class not in ('amenity', 'shop') THEN + NEW.name := delete(NEW.name, 'operator'); + END IF; + -- just block these IF NEW.class in ('landuse','natural') and NEW.name is null THEN -- RAISE WARNING 'empty landuse %',NEW.osm_id; @@ -2062,6 +2067,11 @@ BEGIN --DEBUG: RAISE WARNING '%', existingplacex; END IF; + -- remove operator tag for most places, messes too much with search_name indexes + IF NEW.class not in ('amenity', 'shop') THEN + NEW.name := delete(NEW.name, 'operator'); + END IF; + -- Just block these - lots and pointless IF NEW.class in ('landuse','natural') and NEW.name is null THEN -- if the name tag was removed, older versions might still be lurking in the place table @@ -2252,6 +2262,12 @@ BEGIN END IF; + -- refuse to update multiplpoygons with too many objects, too much of a performance hit + IF ST_NumGeometries(NEW.geometry) > 2000 THEN + RAISE WARNING 'Dropping update of % % because of geometry complexity.', NEW.osm_type, NEW.osm_id; + RETURN NULL; + END IF; + IF coalesce(existing.name::text, '') != coalesce(NEW.name::text, '') OR coalesce(existing.extratags::text, '') != coalesce(NEW.extratags::text, '') OR coalesce(existing.housenumber, '') != coalesce(NEW.housenumber, '') diff --git a/sql/indices.src.sql b/sql/indices.src.sql index ea57e74b..4dc23bd2 100644 --- a/sql/indices.src.sql +++ b/sql/indices.src.sql @@ -1,29 +1,29 @@ -- Indices used only during search and update. -- These indices are created only after the indexing process is done. -CREATE INDEX idx_word_word_id on word USING BTREE (word_id); +CREATE INDEX idx_word_word_id on word USING BTREE (word_id) TABLESPACE ssd; -CREATE INDEX idx_search_name_nameaddress_vector ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off); -CREATE INDEX idx_search_name_name_vector ON search_name USING GIN (name_vector) WITH (fastupdate = off); -CREATE INDEX idx_search_name_centroid ON search_name USING GIST (centroid); +CREATE INDEX idx_search_name_nameaddress_vector ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off) TABLESPACE ssd; +CREATE INDEX idx_search_name_name_vector ON search_name USING GIN (name_vector) WITH (fastupdate = off) TABLESPACE ssd; +CREATE INDEX idx_search_name_centroid ON search_name USING GIST (centroid) TABLESPACE ssd; -CREATE INDEX idx_place_addressline_address_place_id on place_addressline USING BTREE (address_place_id); +CREATE INDEX idx_place_addressline_address_place_id on place_addressline USING BTREE (address_place_id) TABLESPACE ssd; -CREATE INDEX idx_place_boundingbox_place_id on place_boundingbox USING BTREE (place_id); -CREATE INDEX idx_place_boundingbox_outline ON place_boundingbox USING GIST (outline); +CREATE INDEX idx_place_boundingbox_place_id on place_boundingbox USING BTREE (place_id) TABLESPACE ssd; +CREATE INDEX idx_place_boundingbox_outline ON place_boundingbox USING GIST (outline) TABLESPACE ssd; DROP INDEX IF EXISTS idx_placex_rank_search; -CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search); -CREATE INDEX idx_placex_rank_address ON placex USING BTREE (rank_address); -CREATE INDEX idx_placex_pendingsector ON placex USING BTREE (rank_search,geometry_sector) where indexed_status > 0; -CREATE INDEX idx_placex_parent_place_id ON placex USING BTREE (parent_place_id) where parent_place_id IS NOT NULL; -CREATE INDEX idx_placex_interpolation ON placex USING BTREE (geometry_sector) where indexed_status > 0 and class='place' and type='houses'; -CREATE INDEX idx_location_area_country_place_id ON location_area_country USING BTREE (place_id); +CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search) TABLESPACE ssd; +CREATE INDEX idx_placex_rank_address ON placex USING BTREE (rank_address) TABLESPACE ssd; +CREATE INDEX idx_placex_pendingsector ON placex USING BTREE (rank_search,geometry_sector) TABLESPACE ssd where indexed_status > 0; +CREATE INDEX idx_placex_parent_place_id ON placex USING BTREE (parent_place_id) TABLESPACE ssd where parent_place_id IS NOT NULL; +CREATE INDEX idx_placex_interpolation ON placex USING BTREE (geometry_sector) TABLESPACE ssd where indexed_status > 0 and class='place' and type='houses'; +CREATE INDEX idx_location_area_country_place_id ON location_area_country USING BTREE (place_id) TABLESPACE ssd; -CREATE INDEX idx_search_name_country_centroid ON search_name_country USING GIST (centroid); +CREATE INDEX idx_search_name_country_centroid ON search_name_country USING GIST (centroid) TABLESPACE ssd; -- start -CREATE INDEX idx_location_property_-partition-_centroid ON location_property_-partition- USING GIST (centroid); +CREATE INDEX idx_location_property_-partition-_centroid ON location_property_-partition- USING GIST (centroid) TABLESPACE ssd; -- end CREATE UNIQUE INDEX idx_place_osm_unique on place using btree(osm_id,osm_type,class,type); diff --git a/sql/partition-tables.src.sql b/sql/partition-tables.src.sql index 29e3d282..e0b1fae0 100644 --- a/sql/partition-tables.src.sql +++ b/sql/partition-tables.src.sql @@ -35,21 +35,21 @@ SELECT AddGeometryColumn('search_name_blank', 'centroid', 4326, 'GEOMETRY', 2); CREATE TABLE location_area_country () INHERITS (location_area_large); -CREATE INDEX idx_location_area_country_geometry ON location_area_country USING GIST (geometry); +CREATE INDEX idx_location_area_country_geometry ON location_area_country USING GIST (geometry) TABLESPACE ssd; CREATE TABLE search_name_country () INHERITS (search_name_blank); -CREATE INDEX idx_search_name_country_place_id ON search_name_country USING BTREE (place_id); -CREATE INDEX idx_search_name_country_name_vector ON search_name_country USING GIN (name_vector) WITH (fastupdate = off); +CREATE INDEX idx_search_name_country_place_id ON search_name_country USING BTREE (place_id) TABLESPACE ssd; +CREATE INDEX idx_search_name_country_name_vector ON search_name_country USING GIN (name_vector) WITH (fastupdate = off) TABLESPACE ssd; -- start CREATE TABLE location_area_large_-partition- () INHERITS (location_area_large); -CREATE INDEX idx_location_area_large_-partition-_place_id ON location_area_large_-partition- USING BTREE (place_id); -CREATE INDEX idx_location_area_large_-partition-_geometry ON location_area_large_-partition- USING GIST (geometry); +CREATE INDEX idx_location_area_large_-partition-_place_id ON location_area_large_-partition- USING BTREE (place_id) TABLESPACE ssd; +CREATE INDEX idx_location_area_large_-partition-_geometry ON location_area_large_-partition- USING GIST (geometry) TABLESPACE ssd; CREATE TABLE search_name_-partition- () INHERITS (search_name_blank); -CREATE INDEX idx_search_name_-partition-_place_id ON search_name_-partition- USING BTREE (place_id); -CREATE INDEX idx_search_name_-partition-_centroid ON search_name_-partition- USING GIST (centroid); -CREATE INDEX idx_search_name_-partition-_name_vector ON search_name_-partition- USING GIN (name_vector) WITH (fastupdate = off); +CREATE INDEX idx_search_name_-partition-_place_id ON search_name_-partition- USING BTREE (place_id) TABLESPACE ssd; +CREATE INDEX idx_search_name_-partition-_centroid ON search_name_-partition- USING GIST (centroid) TABLESPACE ssd; +CREATE INDEX idx_search_name_-partition-_name_vector ON search_name_-partition- USING GIN (name_vector) WITH (fastupdate = off) TABLESPACE ssd; CREATE TABLE location_property_-partition- () INHERITS (location_property); CREATE INDEX idx_location_property_-partition-_place_id ON location_property_-partition- USING BTREE (place_id); @@ -62,7 +62,7 @@ CREATE TABLE location_road_-partition- ( country_code VARCHAR(2) ); SELECT AddGeometryColumn('location_road_-partition-', 'geometry', 4326, 'GEOMETRY', 2); -CREATE INDEX idx_location_road_-partition-_geometry ON location_road_-partition- USING GIST (geometry); -CREATE INDEX idx_location_road_-partition-_place_id ON location_road_-partition- USING BTREE (place_id); +CREATE INDEX idx_location_road_-partition-_geometry ON location_road_-partition- USING GIST (geometry) TABLESPACE ssd; +CREATE INDEX idx_location_road_-partition-_place_id ON location_road_-partition- USING BTREE (place_id) TABLESPACE ssd; -- end diff --git a/sql/tables.sql b/sql/tables.sql index 038a373e..32ff5e87 100644 --- a/sql/tables.sql +++ b/sql/tables.sql @@ -23,19 +23,6 @@ CREATE TABLE import_npi_log ( event text ); ---drop table IF EXISTS query_log; -CREATE TABLE query_log ( - starttime timestamp, - query text, - ipaddress text, - endtime timestamp, - results integer - ); -CREATE INDEX idx_query_log ON query_log USING BTREE (starttime); -GRANT SELECT ON query_log TO "www-data" ; -GRANT INSERT ON query_log TO "www-data" ; -GRANT UPDATE ON query_log TO "www-data" ; - CREATE TABLE new_query_log ( type text, starttime timestamp, @@ -43,6 +30,7 @@ CREATE TABLE new_query_log ( useragent text, language text, query text, + searchterm text, endtime timestamp, results integer, format text, @@ -53,9 +41,6 @@ GRANT INSERT ON new_query_log TO "www-data" ; GRANT UPDATE ON new_query_log TO "www-data" ; GRANT SELECT ON new_query_log TO "www-data" ; -create view vw_search_query_log as SELECT substr(query, 1, 50) AS query, starttime, endtime - starttime AS duration, substr(useragent, 1, 20) as -useragent, language, results, ipaddress FROM new_query_log WHERE type = 'search' ORDER BY starttime DESC; - --drop table IF EXISTS report_log; CREATE TABLE report_log ( starttime timestamp, @@ -76,8 +61,8 @@ CREATE TABLE word ( country_code varchar(2), search_name_count INTEGER, operator TEXT - ); -CREATE INDEX idx_word_word_token on word USING BTREE (word_token); + ) TABLESPACE ssd; +CREATE INDEX idx_word_word_token on word USING BTREE (word_token) TABLESPACE ssd; GRANT SELECT ON word TO "www-data" ; DROP SEQUENCE seq_word; CREATE SEQUENCE seq_word start 1; @@ -132,7 +117,7 @@ CREATE TABLE search_name ( nameaddress_vector integer[] ); SELECT AddGeometryColumn('search_name', 'centroid', 4326, 'GEOMETRY', 2); -CREATE INDEX idx_search_name_place_id ON search_name USING BTREE (place_id); +CREATE INDEX idx_search_name_place_id ON search_name USING BTREE (place_id) TABLESPACE ssd; drop table IF EXISTS place_addressline; CREATE TABLE place_addressline ( @@ -142,8 +127,8 @@ CREATE TABLE place_addressline ( isaddress boolean, distance float, cached_rank_address integer - ); -CREATE INDEX idx_place_addressline_place_id on place_addressline USING BTREE (place_id); + ) TABLESPACE data; +CREATE INDEX idx_place_addressline_place_id on place_addressline USING BTREE (place_id) TABLESPACE ssd; drop table IF EXISTS place_boundingbox CASCADE; CREATE TABLE place_boundingbox ( @@ -196,14 +181,14 @@ CREATE TABLE placex ( wikipedia TEXT, -- calculated wikipedia article name (language:title) geometry_sector INTEGER, calculated_country_code varchar(2) - ); + ) TABLESPACE ssd; SELECT AddGeometryColumn('placex', 'centroid', 4326, 'GEOMETRY', 2); -CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id); -CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id); -CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id); -CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector); -CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry); -CREATE INDEX idx_placex_adminname on placex USING BTREE (make_standard_name(name->'name'),rank_search) WHERE osm_type='N' and rank_search < 26; +CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id) TABLESPACE ssd; +CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id) TABLESPACE ssd; +CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id) TABLESPACE ssd; +CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector) TABLESPACE ssd; +CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry) TABLESPACE ssd; +CREATE INDEX idx_placex_adminname on placex USING BTREE (make_standard_name(name->'name'),rank_search) TABLESPACE ssd WHERE osm_type='N' and rank_search < 26; --CREATE INDEX idx_placex_indexed ON placex USING BTREE (indexed); @@ -237,7 +222,7 @@ CREATE TRIGGER place_before_insert BEFORE INSERT ON place FOR EACH ROW EXECUTE PROCEDURE place_insert(); drop index idx_placex_sector; -CREATE INDEX idx_placex_sector ON placex USING BTREE (geometry_sector,rank_address,osm_type,osm_id); +CREATE INDEX idx_placex_sector ON placex USING BTREE (geometry_sector,rank_address,osm_type,osm_id) TABLESPACE ssd; DROP SEQUENCE seq_postcodes; CREATE SEQUENCE seq_postcodes start 1; @@ -255,7 +240,7 @@ CREATE TABLE import_polygon_error ( ); SELECT AddGeometryColumn('import_polygon_error', 'prevgeometry', 4326, 'GEOMETRY', 2); SELECT AddGeometryColumn('import_polygon_error', 'newgeometry', 4326, 'GEOMETRY', 2); -CREATE INDEX idx_import_polygon_error_osmid ON import_polygon_error USING BTREE (osm_type, osm_id); +CREATE INDEX idx_import_polygon_error_osmid ON import_polygon_error USING BTREE (osm_type, osm_id) TABLESPACE ssd; GRANT SELECT ON import_polygon_error TO "www-data"; drop table import_polygon_delete; @@ -265,7 +250,7 @@ CREATE TABLE import_polygon_delete ( class TEXT NOT NULL, type TEXT NOT NULL ); -CREATE INDEX idx_import_polygon_delete_osmid ON import_polygon_delete USING BTREE (osm_type, osm_id); +CREATE INDEX idx_import_polygon_delete_osmid ON import_polygon_delete USING BTREE (osm_type, osm_id) TABLESPACE ssd; GRANT SELECT ON import_polygon_delete TO "www-data"; drop sequence file; diff --git a/sql/tiger_import_finish.sql b/sql/tiger_import_finish.sql index a0f4efc7..d6ec1833 100644 --- a/sql/tiger_import_finish.sql +++ b/sql/tiger_import_finish.sql @@ -3,10 +3,10 @@ CREATE UNIQUE INDEX idx_location_property_tiger_place_id_imp ON location_propert GRANT SELECT ON location_property_tiger_import TO "www-data"; -DROP TABLE location_property_tiger; -ALTER TABLE location_property_tiger_import RENAME TO location_property_tiger; +--DROP TABLE location_property_tiger; +--ALTER TABLE location_property_tiger_import RENAME TO location_property_tiger; -ALTER INDEX idx_location_property_tiger_housenumber_parent_place_id_imp RENAME TO idx_location_property_tiger_housenumber_parent_place_id; -ALTER INDEX idx_location_property_tiger_place_id_imp RENAME TO idx_location_property_tiger_place_id; +--ALTER INDEX idx_location_property_tiger_housenumber_parent_place_id_imp RENAME TO idx_location_property_tiger_housenumber_parent_place_id; +--ALTER INDEX idx_location_property_tiger_place_id_imp RENAME TO idx_location_property_tiger_place_id; DROP FUNCTION tigger_create_interpolation (linegeo geometry, in_startnumber integer, in_endnumber integer, interpolationtype text, in_street text, in_isin text, in_postcode text); diff --git a/utils/cron_banip.py b/utils/cron_banip.py new file mode 100755 index 00000000..53f5e5f1 --- /dev/null +++ b/utils/cron_banip.py @@ -0,0 +1,243 @@ +#!/usr/bin/python +# +# Search logs for high-bandwith users and create a list of suspicious IPs. +# There are three states: bulk, block, ban. The first are bulk requesters +# that need throtteling, the second bulk requesters that have overdone it +# and the last manually banned IPs. +# +# The list can then be used in apache using rewrite rules to +# direct bulk users to smaller thread pools or block them. A +# typical apache config that uses php-fpm pools would look +# like this: +# +# Alias /nominatim-www/ "/var/www/nominatim/" +# Alias /nominatim-bulk/ "/var/www/nominatim/" +# +# Options MultiViews FollowSymLinks +# AddType text/html .php +# +# +# +# AddHandler fcgi:/var/run/php5-fpm-www.sock .php +# +# +# AddHandler fcgi:/var/run/php5-fpm-bulk.sock .php +# +# +# Redirect 509 /nominatim-block/ +# ErrorDocument 509 "Bandwidth limit exceeded." +# Redirect 403 /nominatim-ban/ +# ErrorDocument 403 "Access blocked." +# +# RewriteEngine On +# RewriteMap bulklist txt:/home/wherever/ip-block.map +# RewriteRule ^/(.*) /nominatim-${bulklist:%{REMOTE_ADDR}|www}/$1 [PT] +# + +import os +import psycopg2 +import datetime + +BASEDIR = os.path.normpath(os.path.join(os.path.realpath(__file__), '../..')) + +# +# DEFAULT SETTINGS +# +# Copy into settings/ip_blcoks.conf and adapt as required. +# +BLOCKEDFILE= BASEDIR + '/settings/ip_blocks.map' +LOGFILE= BASEDIR + '/log/restricted_ip.log' + +# space-separated list of IPs that are never banned +WHITELIST = '' +# space-separated list of IPs manually blocked +BLACKLIST = '' +# user-agents that should be blocked from bulk mode +# (matched with startswith) +UA_BLOCKLIST = () + +# time before a automatically blocked IP is allowed back +BLOCKCOOLOFF_PERIOD='1 hour' +# quiet time before an IP is released from the bulk pool +BULKCOOLOFF_PERIOD='15 min' + +BULKLONG_LIMIT=8000 +BULKSHORT_LIMIT=2000 +BLOCK_UPPER=19000 +BLOCK_LOWER=4000 +BLOCK_LOADFAC=380 +BULK_LOADFAC=160 +BULK_LOWER=1500 +MAX_BULK_IPS=85 + +# +# END OF DEFAULT SETTINGS +# + +try: + execfile(os.path.expanduser(BASEDIR + "/settings/ip_blocks.conf")) +except IOError: + pass + +# read the previous blocklist +WHITELIST = set(WHITELIST.split()) if WHITELIST else set() +prevblocks = [] +prevbulks = [] +BLACKLIST = set(BLACKLIST.split()) if BLACKLIST else set() +newblocks = set() +newbulks = set() + +try: + fd = open(BLOCKEDFILE) + for line in fd: + ip, typ = line.strip().split(' ') + if ip not in BLACKLIST: + if typ == 'block': + prevblocks.append(ip) + elif typ == 'bulk': + prevbulks.append(ip) + fd.close() +except IOError: + pass #ignore non-existing file + +# determine current load +fd = open("/proc/loadavg") +avgload = int(float(fd.readline().split()[2])) +fd.close() +# DB load +conn = psycopg2.connect('dbname=nominatim') +cur = conn.cursor() +cur.execute("select count(*)/60 from new_query_log where starttime > now() - interval '1min'") +dbload = int(cur.fetchone()[0]) + +BLOCK_LIMIT = max(BLOCK_LOWER, BLOCK_UPPER - BLOCK_LOADFAC * (dbload - 75)) +BULKLONG_LIMIT = max(BULK_LOWER, BULKLONG_LIMIT - BULK_LOADFAC * (avgload - 14)) +if len(prevbulks) > MAX_BULK_IPS: + BLOCK_LIMIT = max(3600, BLOCK_LOWER - (len(prevbulks) - MAX_BULK_IPS)*10) +# if the bulk pool is still empty, clients will be faster, avoid having +# them blocked in this case +if len(prevbulks) < 10: + BLOCK_LIMIT = 2*BLOCK_UPPER + + +# get the new block candidates +cur.execute(""" + SELECT ipaddress, max(count), max(ua) FROM + ((SELECT * FROM + (SELECT ipaddress, sum(case when endtime is null then 1 else 1+1.5*date_part('epoch',endtime-starttime) end) as count, substring(max(useragent) from 1 for 30) as ua FROM new_query_log + WHERE starttime > now() - interval '1 hour' GROUP BY ipaddress) as i + WHERE count > %s) + UNION + (SELECT ipaddress, count * 3, ua FROM + (SELECT ipaddress, sum(case when endtime is null then 1 else 1+1.5*date_part('epoch',endtime-starttime) end) as count, substring(max(useragent) from 1 for 30) as ua FROM new_query_log + WHERE starttime > now() - interval '10 min' GROUP BY ipaddress) as i + WHERE count > %s)) as o + GROUP BY ipaddress +""", (BULKLONG_LIMIT, BULKSHORT_LIMIT)) + +bulkips = {} +emergencyblocks = [] +useragentblocks = [] + +for c in cur: + if c[0] not in WHITELIST and c[0] not in BLACKLIST: + # check for user agents that receive an immediate block + missing_agent = not c[2] + if not missing_agent: + for ua in UA_BLOCKLIST: + if c[2].startswith(ua): + missing_agent = True + break + if (missing_agent or c[1] > BLOCK_UPPER) and c[0] not in prevblocks: + newblocks.add(c[0]) + if missing_agent: + useragentblocks.append(c[0]) + else: + emergencyblocks.append(c[0]) + else: + bulkips[c[0]] = c[1] + +# IPs from the block list that are no longer in the bulk list +deblockcandidates = set() +# IPs from the bulk list that are no longer in the bulk list +debulkcandidates = set() +# new IPs to go into the block list +newlyblocked = [] + + +for ip in prevblocks: + if ip in bulkips: + newblocks.add(ip) + del bulkips[ip] + else: + deblockcandidates.add(ip) + +for ip in prevbulks: + if ip not in newblocks: + if ip in bulkips: + if bulkips[ip] > BLOCK_LIMIT: + newblocks.add(ip) + newlyblocked.append(ip) + else: + newbulks.add(ip) + del bulkips[ip] + else: + debulkcandidates.add(ip) + +# cross-check deblock candidates +if deblockcandidates: + cur.execute(""" + SELECT DISTINCT ipaddress FROM new_query_log + WHERE ipaddress IN ('%s') AND starttime > now() - interval '%s' + """ % ("','".join(deblockcandidates), BLOCKCOOLOFF_PERIOD)) + + for c in cur: + newblocks.add(c[0]) + deblockcandidates.remove(c[0]) +# deblocked IPs go back to the bulk pool to catch the ones that simply +# ignored the HTTP error and just continue to hammer the API. +# Those that behave and stopped will be debulked a minute later. +for ip in deblockcandidates: + newbulks.add(ip) + +# cross-check debulk candidates +if debulkcandidates: + cur.execute(""" + SELECT DISTINCT ipaddress FROM new_query_log + WHERE ipaddress IN ('%s') AND starttime > now() - interval '%s' + AND starttime > date_trunc('day', now()) + """ % ("','".join(debulkcandidates), BULKCOOLOFF_PERIOD)) + + for c in cur: + newbulks.add(c[0]) + debulkcandidates.remove(c[0]) + +for ip in bulkips.iterkeys(): + newbulks.add(ip) + +# write out the new list +fd = open(BLOCKEDFILE, 'w') +for ip in newblocks: + fd.write(ip + " block\n") +for ip in newbulks: + fd.write(ip + " bulk\n") +for ip in BLACKLIST: + fd.write(ip + " ban\n") +fd.close() + +# write out the log +logstr = datetime.datetime.now().strftime('%Y-%m-%d %H:%M') + ' %s %s\n' +fd = open(LOGFILE, 'a') +if deblockcandidates: + fd.write(logstr % ('unblocked:', ', '.join(deblockcandidates))) +if debulkcandidates: + fd.write(logstr % (' debulked:', ', '.join(debulkcandidates))) +if bulkips: + fd.write(logstr % ('new bulks:', ', '.join(bulkips.keys()))) +if emergencyblocks: + fd.write(logstr % ('dir.block:', ', '.join(emergencyblocks))) +if useragentblocks: + fd.write(logstr % (' ua block:', ', '.join(useragentblocks))) +if newlyblocked: + fd.write(logstr % ('new block:', ', '.join(newlyblocked))) +fd.close() diff --git a/utils/cron_logrotate.sh b/utils/cron_logrotate.sh new file mode 100755 index 00000000..7d3ca4a3 --- /dev/null +++ b/utils/cron_logrotate.sh @@ -0,0 +1,20 @@ +#!/bin/bash -e +# +# Rotate query logs. + +dbname=nominatim + +basedir=`dirname $0` +logfile=`date "+$basedir/../log/query-%F.log.gz"` + +# dump the old logfile +pg_dump -a -F p -t backup_query_log $dbname | gzip -9 > $logfile + +# remove the old logs +psql -q -d $dbname -c 'DROP TABLE backup_query_log' + +# rotate +psql -q -1 -d $dbname -c 'ALTER TABLE new_query_log RENAME TO backup_query_log;CREATE TABLE new_query_log TABLESPACE ssd2 as (select * from backup_query_log limit 0);GRANT SELECT, INSERT, UPDATE ON new_query_log TO "www-data"' +psql -q -d $dbname -c 'ALTER INDEX idx_new_query_log_starttime RENAME TO idx_backup_query_log_starttime' +psql -q -d $dbname -c 'CREATE INDEX idx_new_query_log_starttime ON new_query_log USING BTREE (starttime) TABLESPACE ssd2' + diff --git a/utils/cron_vacuum.sh b/utils/cron_vacuum.sh new file mode 100755 index 00000000..4c16fc65 --- /dev/null +++ b/utils/cron_vacuum.sh @@ -0,0 +1,14 @@ +#!/bin/bash +# +# Vaccum all tables with indices on integer arrays. +# Agressive vacuuming seems to help against index bloat. +# + +psql -q -d nominatim -c 'VACUUM ANALYSE search_name' +psql -q -d nominatim -c 'VACUUM ANALYSE search_name_country' +#psql -q -d nominatim -c 'VACUUM ANALYSE planet_osm_ways' + +for i in `seq 0 246`; do + psql -q -d nominatim -c "VACUUM ANALYSE search_name_${i}" +done + diff --git a/utils/setup.php b/utils/setup.php index 71e7dab5..870d37ec 100755 --- a/utils/setup.php +++ b/utils/setup.php @@ -190,8 +190,9 @@ { $osm2pgsql .= ' --flat-nodes '.CONST_Osm2pgsql_Flatnode_File; } + $osm2pgsql .= ' --tablespace-slim-index ssd --tablespace-main-index ssd --tablespace-main-data ssd --tablespace-slim-data data'; $osm2pgsql .= ' -lsc -O gazetteer --hstore'; - $osm2pgsql .= ' -C '.$iCacheMemory; + $osm2pgsql .= ' -C 18000'; $osm2pgsql .= ' -P '.$aDSNInfo['port']; $osm2pgsql .= ' -d '.$aDSNInfo['database'].' '.$aCMDResult['osm-file']; passthruCheckReturn($osm2pgsql); @@ -517,7 +518,7 @@ $sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,calculated_country_code,"; $sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from (select calculated_country_code,postcode,"; $sSQL .= "avg(st_x(st_centroid(geometry))) as x,avg(st_y(st_centroid(geometry))) as y "; - $sSQL .= "from placex where postcode is not null group by calculated_country_code,postcode) as x"; + $sSQL .= "from placex where postcode is not null and calculated_country_code not in ('ie') group by calculated_country_code,postcode) as x"; if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection)); $sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,calculated_country_code,geometry) "; diff --git a/utils/specialphrases.php b/utils/specialphrases.php index f1a0d0d0..02aaaa0f 100755 --- a/utils/specialphrases.php +++ b/utils/specialphrases.php @@ -91,12 +91,12 @@ echo "class = '".pg_escape_string($aPair[0])."' and type = '".pg_escape_string($aPair[1])."';\n"; echo "CREATE INDEX idx_place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])."_centroid "; - echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING GIST (centroid);\n"; + echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING GIST (centroid) tablespace ssd;\n"; echo "CREATE INDEX idx_place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])."_place_id "; - echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING btree(place_id);\n"; + echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING btree(place_id) tablespace ssd;\n"; - echo "GRANT SELECT ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." TO \"www-data\";"; + echo "GRANT SELECT ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." TO \"www-data\";\n"; } diff --git a/utils/update.php b/utils/update.php index c5fc14a6..a561aaa3 100755 --- a/utils/update.php +++ b/utils/update.php @@ -47,7 +47,6 @@ showUsage($aCMDOptions, true, 'Select either import of hourly or daily'); } - if (!isset($aResult['index-instances'])) $aResult['index-instances'] = 1; if (!isset($aResult['index-rank'])) $aResult['index-rank'] = 0; /* @@ -357,6 +356,7 @@ if ($aResult['index']) { + if (!isset($aResult['index-instances'])) $aResult['index-instances'] = 1; passthru(CONST_BasePath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -P '.$aDSNInfo['port'].' -t '.$aResult['index-instances'].' -r '.$aResult['index-rank']); } @@ -374,7 +374,7 @@ $sCMDDownload = $sOsmosisCMD.' --read-replication-interval workingDirectory='.$sOsmosisConfigDirectory.' --simplify-change --write-xml-change '.$sImportFile; $sCMDCheckReplicationLag = $sOsmosisCMD.' -q --read-replication-lag workingDirectory='.$sOsmosisConfigDirectory; $sCMDImport = $sOsm2pgsqlCmd.' '.$sImportFile; - $sCMDIndex = $sBasePath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -P '.$aDSNInfo['port'].' -t '.$aResult['index-instances']; + $sCMDIndex = $sBasePath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -P '.$aDSNInfo['port']; if (!$aResult['no-npi']) { $sCMDIndex .= '-F '; } @@ -459,7 +459,16 @@ $sBatchEnd = getosmosistimestamp($sOsmosisConfigDirectory); // Index file - $sThisIndexCmd = $sCMDIndex; + if (!isset($aResult['index-instances'])) + { + if (getLoadAverage() < 24) + $iIndexInstances = 2; + else + $iIndexInstances = 1; + } else + $iIndexInstances = $aResult['index-instances']; + + $sThisIndexCmd = $sCMDIndex.' -t '.$iIndexInstances; $fCMDStartTime = time(); if (!$aResult['no-npi']) diff --git a/website/403.html b/website/403.html new file mode 100644 index 00000000..44038a49 --- /dev/null +++ b/website/403.html @@ -0,0 +1,15 @@ + + +Access blocked + + +

Access blocked

+ +

You have been blocked because you have been overusing OSM's geocoding service. +Please be aware that OSM's resources are limited and shared between many users. +To have this block lifted, contact the Nominatim system administrator at +nominatim@openstreetmap.org.

+ +

For more information, consult the usage policy for the OSM Nominatim server. + + diff --git a/website/509.html b/website/509.html new file mode 100644 index 00000000..1e67a5a4 --- /dev/null +++ b/website/509.html @@ -0,0 +1,12 @@ + + +Bandwidth limit exceeded + + +

Bandwidth limit exceeded

+ +

You have been temporarily blocked because you have been overusing OSM's geocoding service or because you have not provided sufficient identification of your application. This block will be automatically lifted after a while. Please take the time and adapt your scripts to reduce the number of requests and make sure that you send a valid UserAgent or Referer.

+ +

For more information, consult the usage policy for the OSM Nominatim server. + + diff --git a/website/crossdomain.xml b/website/crossdomain.xml new file mode 100644 index 00000000..963a682b --- /dev/null +++ b/website/crossdomain.xml @@ -0,0 +1,5 @@ + + + + + diff --git a/website/favicon.ico b/website/favicon.ico new file mode 100644 index 00000000..0157ea00 Binary files /dev/null and b/website/favicon.ico differ diff --git a/website/nominatim.xml b/website/nominatim.xml new file mode 100644 index 00000000..28684b16 --- /dev/null +++ b/website/nominatim.xml @@ -0,0 +1,15 @@ + + + Nominatim + Nominatim OSM Search + Search for a place in OpenStreetMap Nominatim + UTF-8 + UTF-8 + + + Brian Quinion + false + Data &copy; OpenStreetMap contributors, Some Rights Reserved. ODbL, http://www.osm.org/copyright. + + diff --git a/website/reverse.php b/website/reverse.php index ee6a6100..ed877899 100755 --- a/website/reverse.php +++ b/website/reverse.php @@ -6,18 +6,6 @@ require_once(CONST_BasePath.'/lib/PlaceLookup.php'); require_once(CONST_BasePath.'/lib/ReverseGeocode.php'); - if (strpos(CONST_BulkUserIPs, ','.$_SERVER["REMOTE_ADDR"].',') !== false) - { - $fLoadAvg = getLoadAverage(); - if ($fLoadAvg > 2) sleep(60); - if ($fLoadAvg > 4) sleep(120); - if ($fLoadAvg > 6) - { - echo "Bulk User: Temporary block due to high server load\n"; - exit; - } - } - $oDB =& getDB(); ini_set('memory_limit', '200M'); diff --git a/website/robots.txt b/website/robots.txt new file mode 100644 index 00000000..e4d3d3fb --- /dev/null +++ b/website/robots.txt @@ -0,0 +1,10 @@ +User-agent: ia_archiver +Allow: / + +User-agent: * +Disallow: /search.php +Disallow: /search +Disallow: /details.php +Disallow: /details +Disallow: /reverse.php +Disallow: /reverse diff --git a/website/search.php b/website/search.php index 872d9801..a3a926fd 100755 --- a/website/search.php +++ b/website/search.php @@ -20,10 +20,12 @@ $aLangPrefOrder = getPreferredLanguages(); $oGeocode->setLanguagePreference($aLangPrefOrder); + /* if (isset($aLangPrefOrder['name:de'])) $oGeocode->setReverseInPlan(true); if (isset($aLangPrefOrder['name:ru'])) $oGeocode->setReverseInPlan(true); if (isset($aLangPrefOrder['name:ja'])) $oGeocode->setReverseInPlan(true); if (isset($aLangPrefOrder['name:pl'])) $oGeocode->setReverseInPlan(true); + */ // Format for output $sOutputFormat = 'html';