protected $aExcludePlaceIDs = array();
protected $bDeDupe = true;
- protected $bReverseInPlan = false;
+ protected $bReverseInPlan = true;
protected $iLimit = 20;
protected $iFinalLimit = 10;
protected $bBoundedSearch = false;
protected $aViewBox = false;
+ protected $sViewboxSmallSQL = false;
+ protected $sViewboxLargeSQL = false;
protected $aRoutePoints = false;
protected $iMaxRank = 20;
// Get the details for display (is this a redundant extra step?)
$sPlaceIDs = join(',',$aPlaceIDs);
+ $sImportanceSQL = '';
+ if ($this->sViewboxSmallSQL) $sImportanceSQL .= " case when ST_Contains($this->sViewboxSmallSQL, ST_Collect(centroid)) THEN 1 ELSE 0.75 END * ";
+ if ($this->sViewboxLargeSQL) $sImportanceSQL .= " case when ST_Contains($this->sViewboxLargeSQL, ST_Collect(centroid)) THEN 1 ELSE 0.75 END * ";
+
$sSQL = "select osm_type,osm_id,class,type,admin_level,rank_search,rank_address,min(place_id) as place_id, min(parent_place_id) as parent_place_id, calculated_country_code as country_code,";
$sSQL .= "get_address_by_language(place_id, $sLanguagePrefArraySQL) as langaddress,";
$sSQL .= "get_name_by_language(name, $sLanguagePrefArraySQL) as placename,";
$sSQL .= "get_name_by_language(name, ARRAY['ref']) as ref,";
$sSQL .= "avg(ST_X(centroid)) as lon,avg(ST_Y(centroid)) as lat, ";
- $sSQL .= "coalesce(importance,0.75-(rank_search::float/40)) as importance, ";
+ $sSQL .= $sImportanceSQL."coalesce(importance,0.75-(rank_search::float/40)) as importance, ";
$sSQL .= "(select max(p.importance*(p.rank_address+2)) from place_addressline s, placex p where s.place_id = min(CASE WHEN placex.rank_search < 28 THEN placex.place_id ELSE placex.parent_place_id END) and p.place_id = s.address_place_id and s.isaddress and p.importance is not null) as addressimportance, ";
$sSQL .= "(extratags->'place') as extra_place ";
$sSQL .= "from placex where place_id in ($sPlaceIDs) ";
$sSQL .= "null as placename,";
$sSQL .= "null as ref,";
$sSQL .= "avg(ST_X(centroid)) as lon,avg(ST_Y(centroid)) as lat, ";
- $sSQL .= "-0.15 as importance, ";
+ $sSQL .= $sImportanceSQL."-1.15 as importance, ";
$sSQL .= "(select max(p.importance*(p.rank_address+2)) from place_addressline s, placex p where s.place_id = min(location_property_tiger.parent_place_id) and p.place_id = s.address_place_id and s.isaddress and p.importance is not null) as addressimportance, ";
$sSQL .= "null as extra_place ";
$sSQL .= "from location_property_tiger where place_id in ($sPlaceIDs) ";
$sSQL .= "and 30 between $this->iMinAddressRank and $this->iMaxAddressRank ";
$sSQL .= "group by place_id";
- if (!$this->bDeDupe) $sSQL .= ",place_id";
+ if (!$this->bDeDupe) $sSQL .= ",place_id ";
+ /*
$sSQL .= " union ";
$sSQL .= "select 'L' as osm_type,place_id as osm_id,'place' as class,'house' as type,null as admin_level,30 as rank_search,30 as rank_address,min(place_id) as place_id, min(parent_place_id) as parent_place_id,'us' as country_code,";
$sSQL .= "get_address_by_language(place_id, $sLanguagePrefArraySQL) as langaddress,";
$sSQL .= "null as placename,";
$sSQL .= "null as ref,";
$sSQL .= "avg(ST_X(centroid)) as lon,avg(ST_Y(centroid)) as lat, ";
- $sSQL .= "-0.10 as importance, ";
+ $sSQL .= $sImportanceSQL."-1.10 as importance, ";
$sSQL .= "(select max(p.importance*(p.rank_address+2)) from place_addressline s, placex p where s.place_id = min(location_property_aux.parent_place_id) and p.place_id = s.address_place_id and s.isaddress and p.importance is not null) as addressimportance, ";
$sSQL .= "null as extra_place ";
$sSQL .= "from location_property_aux where place_id in ($sPlaceIDs) ";
$sSQL .= "group by place_id";
if (!$this->bDeDupe) $sSQL .= ",place_id";
$sSQL .= ",get_address_by_language(place_id, $sLanguagePrefArraySQL) ";
+ */
}
- $sSQL .= "order by importance desc";
+ $sSQL .= " order by importance desc";
if (CONST_Debug) { echo "<hr>"; var_dump($sSQL); }
$aSearchResults = $this->oDB->getAll($sSQL);
}
// Hack to make it handle "new york, ny" (and variants) correctly
- $sQuery = str_ireplace(array('New York, ny','new york, new york', 'New York ny','new york new york'), 'new york city, ny', $this->sQuery);
+ //$sQuery = str_ireplace(array('New York, ny','new york, new york', 'New York ny','new york new york'), 'new york city, ny', $this->sQuery);
+ $sQuery = $this->sQuery;
// Conflicts between US state abreviations and various words for 'the' in different languages
if (isset($this->aLangPrefOrder['name:en']))
}
// View Box SQL
- $sViewboxCentreSQL = $sViewboxSmallSQL = $sViewboxLargeSQL = false;
+ $sViewboxCentreSQL;
$bBoundingBoxSearch = false;
if ($this->aViewBox)
{
$aBigViewBox[1] = $this->aViewBox[1] + $fWidth;
$aBigViewBox[3] = $this->aViewBox[3] - $fWidth;
- $sViewboxSmallSQL = "ST_SetSRID(ST_MakeBox2D(ST_Point(".(float)$this->aViewBox[0].",".(float)$this->aViewBox[1]."),ST_Point(".(float)$this->aViewBox[2].",".(float)$this->aViewBox[3].")),4326)";
- $sViewboxLargeSQL = "ST_SetSRID(ST_MakeBox2D(ST_Point(".(float)$aBigViewBox[0].",".(float)$aBigViewBox[1]."),ST_Point(".(float)$aBigViewBox[2].",".(float)$aBigViewBox[3].")),4326)";
+ $this->sViewboxSmallSQL = "ST_SetSRID(ST_MakeBox2D(ST_Point(".(float)$this->aViewBox[0].",".(float)$this->aViewBox[1]."),ST_Point(".(float)$this->aViewBox[2].",".(float)$this->aViewBox[3].")),4326)";
+ $this->sViewboxLargeSQL = "ST_SetSRID(ST_MakeBox2D(ST_Point(".(float)$aBigViewBox[0].",".(float)$aBigViewBox[1]."),ST_Point(".(float)$aBigViewBox[2].",".(float)$aBigViewBox[3].")),4326)";
$bBoundingBoxSearch = $this->bBoundedSearch;
}
$sViewboxCentreSQL .= ")'::geometry,4326)";
$sSQL = "select st_buffer(".$sViewboxCentreSQL.",".(float)($_GET['routewidth']/69).")";
- $sViewboxSmallSQL = $this->oDB->getOne($sSQL);
- if (PEAR::isError($sViewboxSmallSQL))
+ $this->sViewboxSmallSQL = $this->oDB->getOne($sSQL);
+ if (PEAR::isError($this->sViewboxSmallSQL))
{
- failInternalError("Could not get small viewbox.", $sSQL, $sViewboxSmallSQL);
+ failInternalError("Could not get small viewbox.", $sSQL, $this->sViewboxSmallSQL);
}
- $sViewboxSmallSQL = "'".$sViewboxSmallSQL."'::geometry";
+ $this->sViewboxSmallSQL = "'".$this->sViewboxSmallSQL."'::geometry";
$sSQL = "select st_buffer(".$sViewboxCentreSQL.",".(float)($_GET['routewidth']/30).")";
- $sViewboxLargeSQL = $this->oDB->getOne($sSQL);
- if (PEAR::isError($sViewboxLargeSQL))
+ $this->sViewboxLargeSQL = $this->oDB->getOne($sSQL);
+ if (PEAR::isError($this->sViewboxLargeSQL))
{
- failInternalError("Could not get large viewbox.", $sSQL, $sViewboxLargeSQL);
+ failInternalError("Could not get large viewbox.", $sSQL, $this->sViewboxLargeSQL);
}
- $sViewboxLargeSQL = "'".$sViewboxLargeSQL."'::geometry";
+ $this->sViewboxLargeSQL = "'".$this->sViewboxLargeSQL."'::geometry";
$bBoundingBoxSearch = $this->bBoundedSearch;
}
$aSearch['sType'] = $aSearchTerm['type'];
if (sizeof($aSearch['aName'])) $aSearch['sOperator'] = 'name';
else $aSearch['sOperator'] = 'near'; // near = in for the moment
+ if (strlen($aSearchTerm['operator']) == 0) $aSearch['iSearchRank'] += 1;
// Do we have a shortcut id?
if ($aSearch['sOperator'] == 'name')
{
$sSQL = "select place_id from place_classtype_".$aSearch['sClass']."_".$aSearch['sType']." ct";
if ($sCountryCodesSQL) $sSQL .= " join placex using (place_id)";
- $sSQL .= " where st_contains($sViewboxSmallSQL, ct.centroid)";
+ $sSQL .= " where st_contains($this->sViewboxSmallSQL, ct.centroid)";
if ($sCountryCodesSQL) $sSQL .= " and calculated_country_code in ($sCountryCodesSQL)";
if (sizeof($this->aExcludePlaceIDs))
{
{
$sSQL = "select place_id from place_classtype_".$aSearch['sClass']."_".$aSearch['sType']." ct";
if ($sCountryCodesSQL) $sSQL .= " join placex using (place_id)";
- $sSQL .= " where st_contains($sViewboxLargeSQL, ct.centroid)";
+ $sSQL .= " where st_contains($this->sViewboxLargeSQL, ct.centroid)";
if ($sCountryCodesSQL) $sSQL .= " and calculated_country_code in ($sCountryCodesSQL)";
if ($sViewboxCentreSQL) $sSQL .= " order by st_distance($sViewboxCentreSQL, ct.centroid) asc";
$sSQL .= " limit $this->iLimit";
else
{
$sSQL = "select place_id from placex where class='".$aSearch['sClass']."' and type='".$aSearch['sType']."'";
- $sSQL .= " and st_contains($sViewboxSmallSQL, geometry) and linked_place_id is null";
+ $sSQL .= " and st_contains($this->sViewboxSmallSQL, geometry) and linked_place_id is null";
if ($sCountryCodesSQL) $sSQL .= " and calculated_country_code in ($sCountryCodesSQL)";
if ($sViewboxCentreSQL) $sSQL .= " order by st_distance($sViewboxCentreSQL, centroid) asc";
$sSQL .= " limit $this->iLimit";
// TODO: filter out the pointless search terms (2 letter name tokens and less)
// they might be right - but they are just too darned expensive to run
if (sizeof($aSearch['aName'])) $aTerms[] = "name_vector @> ARRAY[".join($aSearch['aName'],",")."]";
- if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'],",")."]";
+ //if (sizeof($aSearch['aNameNonSearch'])) $aTerms[] = "array_cat(name_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aNameNonSearch'],",")."]";
if (sizeof($aSearch['aAddress']) && $aSearch['aName'] != $aSearch['aAddress'])
{
// For infrequent name terms disable index usage for address
sizeof($aSearch['aName']) == 1 &&
$aWordFrequencyScores[$aSearch['aName'][reset($aSearch['aName'])]] < CONST_Search_NameOnlySearchFrequencyThreshold)
{
- $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'],$aSearch['aAddressNonSearch']),",")."]";
+ //$aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join(array_merge($aSearch['aAddress'],$aSearch['aAddressNonSearch']),",")."]";
+ $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddress'],",")."]";
}
else
{
$aTerms[] = "nameaddress_vector @> ARRAY[".join($aSearch['aAddress'],",")."]";
- if (sizeof($aSearch['aAddressNonSearch'])) $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddressNonSearch'],",")."]";
+ //if (sizeof($aSearch['aAddressNonSearch'])) $aTerms[] = "array_cat(nameaddress_vector,ARRAY[]::integer[]) @> ARRAY[".join($aSearch['aAddressNonSearch'],",")."]";
}
}
if ($aSearch['sCountryCode']) $aTerms[] = "country_code = '".pg_escape_string($aSearch['sCountryCode'])."'";
$aTerms[] = "country_code in ($sCountryCodesSQL)";
}
- if ($bBoundingBoxSearch) $aTerms[] = "centroid && $sViewboxSmallSQL";
+ if ($bBoundingBoxSearch) $aTerms[] = "centroid && $this->sViewboxSmallSQL";
if ($sNearPointSQL) $aOrder[] = "ST_Distance($sNearPointSQL, centroid) asc";
$sImportanceSQL = '(case when importance = 0 OR importance IS NULL then 0.75-(search_rank::float/40) else importance end)';
- if ($sViewboxSmallSQL) $sImportanceSQL .= " * case when ST_Contains($sViewboxSmallSQL, centroid) THEN 1 ELSE 0.5 END";
- if ($sViewboxLargeSQL) $sImportanceSQL .= " * case when ST_Contains($sViewboxLargeSQL, centroid) THEN 1 ELSE 0.5 END";
+ if ($this->sViewboxSmallSQL) $sImportanceSQL .= " * case when ST_Contains($this->sViewboxSmallSQL, centroid) THEN 1 ELSE 0.5 END";
+ if ($this->sViewboxLargeSQL) $sImportanceSQL .= " * case when ST_Contains($this->sViewboxLargeSQL, centroid) THEN 1 ELSE 0.5 END";
$aOrder[] = "$sImportanceSQL DESC";
if (sizeof($aSearch['aFullNameAddress']))
{
$aPlaceIDs = $this->oDB->getCol($sSQL);
// If not try the aux fallback table
+ /*
if (!sizeof($aPlaceIDs))
{
$sSQL = "select place_id from location_property_aux where parent_place_id in (".$sPlaceIDs.") and housenumber = '".pg_escape_string($aSearch['sHouseNumber'])."'";
if (CONST_Debug) var_dump($sSQL);
$aPlaceIDs = $this->oDB->getCol($sSQL);
}
+ */
if (!sizeof($aPlaceIDs))
{
{
preg_match_all('/(-?[0-9.]+) (-?[0-9.]+)/',$aMatch[1],$aPolyPoints,PREG_SET_ORDER);
}
+ /*
elseif (preg_match('#MULTIPOLYGON\\(\\(\\(([- 0-9.,]+)#',$aPointPolygon['astext'],$aMatch))
{
preg_match_all('/(-?[0-9.]+) (-?[0-9.]+)/',$aMatch[1],$aPolyPoints,PREG_SET_ORDER);
}
+ */
elseif (preg_match('#POINT\\((-?[0-9.]+) (-?[0-9.]+)\\)#',$aPointPolygon['astext'],$aMatch))
{
$fRadius = 0.01;
{
header("Access-Control-Allow-Origin: *");
header("Access-Control-Allow-Methods: OPTIONS,GET");
+ header("Access-Control-Max-Age: 8640000");
if (!empty($_SERVER['HTTP_ACCESS_CONTROL_REQUEST_HEADERS']))
{
header("Access-Control-Allow-Headers: ".$_SERVER['HTTP_ACCESS_CONTROL_REQUEST_HEADERS']);
}
if ($_SERVER['REQUEST_METHOD'] == 'OPTIONS') exit;
- if (CONST_ClosedForIndexing && strpos(CONST_ClosedForIndexingExceptionIPs, ','.$_SERVER["REMOTE_ADDR"].',') === false)
- {
- echo "Closed for re-indexing...";
- exit;
- }
-
- $aBucketKeys = array();
-
- if (isset($_SERVER["HTTP_REFERER"])) $aBucketKeys[] = str_replace('www.','',strtolower(parse_url($_SERVER["HTTP_REFERER"], PHP_URL_HOST)));
- if (isset($_SERVER["REMOTE_ADDR"])) $aBucketKeys[] = $_SERVER["REMOTE_ADDR"];
- if (isset($_GET["email"])) $aBucketKeys[] = $_GET["email"];
-
- $fBucketVal = doBucket($aBucketKeys,
- (defined('CONST_ConnectionBucket_PageType')?constant('CONST_ConnectionBucket_Cost_'.CONST_ConnectionBucket_PageType):1) + user_busy_cost(),
- CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_BlockLimit);
-
- if ($fBucketVal > CONST_ConnectionBucket_WaitLimit && $fBucketVal < CONST_ConnectionBucket_BlockLimit)
- {
- $m = getBucketMemcache();
- $iCurrentSleeping = $m->increment('sleepCounter');
- if (false === $iCurrentSleeping)
- {
- $m->add('sleepCounter', 0);
- $iCurrentSleeping = $m->increment('sleepCounter');
- }
- if ($iCurrentSleeping >= CONST_ConnectionBucket_MaxSleeping || isBucketSleeping($aBucketKeys))
- {
- // Too many threads sleeping already. This becomes a hard block.
- $fBucketVal = doBucket($aBucketKeys, CONST_ConnectionBucket_BlockLimit, CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_BlockLimit);
- }
- else
- {
- setBucketSleeping($aBucketKeys, true);
- sleep(($fBucketVal - CONST_ConnectionBucket_WaitLimit)/CONST_ConnectionBucket_LeakRate);
- $fBucketVal = doBucket($aBucketKeys, CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_LeakRate, CONST_ConnectionBucket_BlockLimit);
- setBucketSleeping($aBucketKeys, false);
- }
- $m->decrement('sleepCounter');
- }
-
- if (strpos(CONST_BlockedIPs, ','.$_SERVER["REMOTE_ADDR"].',') !== false || $fBucketVal >= CONST_ConnectionBucket_BlockLimit)
- {
- header("HTTP/1.0 429 Too Many Requests");
- echo "Your IP has been blocked. \n";
- echo CONST_BlockMessage;
- exit;
- }
-
- header('Content-type: text/html; charset=utf-8');
-
+ header('Content-type: text/html; charset=utf-8');
{
$aResult = array(array(join(' ',$aWords)));
$sFirstToken = '';
- if ($iDepth < 8) {
+ if ($iDepth < 7) {
while(sizeof($aWords) > 1)
{
$sWord = array_shift($aWords);
if (sizeof($aNearPostcodes))
{
- return array(array('lat' => $aNearPostcodes[0]['lat'], 'lon' => $aNearPostcodes[0]['lon'], 'radius' => 0.005));
+ $aPostcodes = array();
+ foreach($aNearPostcodes as $aPostcode)
+ {
+ $aPostcodes[] = array('lat' => $aPostcode['lat'], 'lon' => $aPostcode['lon'], 'radius' => 0.005);
+ }
+
+ return $aPostcodes;
}
return false;
if (CONST_Log_DB)
{
- // Log
- if ($sType == 'search')
- {
- $oDB->query('insert into query_log values ('.getDBQuoted($hLog[0]).','.getDBQuoted($hLog[3]).','.getDBQuoted($hLog[1]).')');
- }
-
- $sSQL = 'insert into new_query_log (type,starttime,query,ipaddress,useragent,language,format)';
+ if (isset($_GET['email']))
+ $sUserAgent = $_GET['email'];
+ elseif (isset($_SERVER['HTTP_REFERER']))
+ $sUserAgent = $_SERVER['HTTP_REFERER'];
+ else
+ $sUserAgent = $_SERVER['HTTP_USER_AGENT'];
+ $sSQL = 'insert into new_query_log (type,starttime,query,ipaddress,useragent,language,format,searchterm)';
$sSQL .= ' values ('.getDBQuoted($sType).','.getDBQuoted($hLog[0]).','.getDBQuoted($hLog[2]);
- $sSQL .= ','.getDBQuoted($hLog[1]).','.getDBQuoted($_SERVER['HTTP_USER_AGENT']).','.getDBQuoted(join(',',$aLanguageList)).','.getDBQuoted($sOutputFormat).')';
+ $sSQL .= ','.getDBQuoted($hLog[1]).','.getDBQuoted($sUserAgent).','.getDBQuoted(join(',',$aLanguageList)).','.getDBQuoted($sOutputFormat).','.getDBQuoted($hLog[3]).')';
$oDB->query($sSQL);
}
if (CONST_Log_DB)
{
- $sSQL = 'update query_log set endtime = '.getDBQuoted($sEndTime).', results = '.$iNumResults;
- $sSQL .= ' where starttime = '.getDBQuoted($hLog[0]);
- $sSQL .= ' and ipaddress = '.getDBQuoted($hLog[1]);
- $sSQL .= ' and query = '.getDBQuoted($hLog[3]);
- $oDB->query($sSQL);
-
$sSQL = 'update new_query_log set endtime = '.getDBQuoted($sEndTime).', results = '.$iNumResults;
$sSQL .= ' where starttime = '.getDBQuoted($hLog[0]);
$sSQL .= ' and ipaddress = '.getDBQuoted($hLog[1]);
--- /dev/null
+#!/bin/sh
+#
+# Plugin to monitor the number of IPs in special pools
+#
+# Parameters:
+#
+# config (required)
+# autoconf (optional - used by munin-config)
+#
+
+if [ "$1" = "config" ]; then
+
+ echo 'graph_title Restricted IPs'
+ echo 'graph_args -l 0'
+ echo 'graph_vlabel number of IPs'
+ echo 'graph_category nominatim'
+ echo 'bulk.label bulk'
+ echo 'bulk.draw AREA'
+ echo 'bulk.type GAUGE'
+ echo 'block.label blocked'
+ echo 'block.draw STACK'
+ echo 'block.type GAUGE'
+ exit 0
+fi
+
+BASEDIR="$(dirname "$(readlink -f "$0")")"
+
+cut -f 2 -d ' ' $BASEDIR/../settings/ip_blocks.map | uniq -c | sed 's:[[:space:]]*\([0-9]\+\) \(.*\):\2.value \1:'
// Website settings
@define('CONST_NoAccessControl', true);
- @define('CONST_ClosedForIndexing', false);
- @define('CONST_ClosedForIndexingExceptionIPs', '');
@define('CONST_BlockedIPs', '');
+ @define('CONST_IPBanFile', CONST_BasePath.'/settings/ip_blocks');
+ @define('CONST_WhitelistedIPs', '');
+ @define('CONST_BlockedUserAgents', '');
+ @define('CONST_BlockReverseMaxLoad', 15);
@define('CONST_BulkUserIPs', '');
@define('CONST_BlockMessage', ''); // additional info to show for blocked IPs
- @define('CONST_Website_BaseURL', 'http://'.php_uname('n').'/');
+ @define('CONST_Website_BaseURL', 'http://nominatim.openstreetmap.org/');
@define('CONST_Tile_Default', 'Mapnik');
@define('CONST_Default_Language', false);
BEGIN
--DEBUG: RAISE WARNING '% %',NEW.osm_type,NEW.osm_id;
+ -- remove operator tag for most places, messes too much with search_name indexes
+ IF NEW.class not in ('amenity', 'shop') THEN
+ NEW.name := delete(NEW.name, 'operator');
+ END IF;
+
-- just block these
IF NEW.class in ('landuse','natural') and NEW.name is null THEN
-- RAISE WARNING 'empty landuse %',NEW.osm_id;
NEW.centroid := null;
-- reclaculate country and partition
- IF NEW.rank_search >= 4 THEN
- --NEW.calculated_country_code := lower(get_country_code(NEW.geometry, NEW.country_code));
- NEW.calculated_country_code := lower(get_country_code(place_centroid));
+ IF NEW.rank_search = 4 THEN
+ -- for countries, believe the mapped country code,
+ -- so that we remain in the right partition if the boundaries
+ -- suddenly expand.
+ NEW.partition := get_partition(place_centroid, lower(NEW.country_code));
+ IF NEW.partition = 0 THEN
+ NEW.calculated_country_code := lower(get_country_code(place_centroid));
+ NEW.partition := get_partition(place_centroid, NEW.calculated_country_code);
+ ELSE
+ NEW.calculated_country_code := lower(NEW.country_code);
+ END IF;
ELSE
- NEW.calculated_country_code := NULL;
+ IF NEW.rank_search > 4 THEN
+ --NEW.calculated_country_code := lower(get_country_code(NEW.geometry, NEW.country_code));
+ NEW.calculated_country_code := lower(get_country_code(place_centroid));
+ ELSE
+ NEW.calculated_country_code := NULL;
+ END IF;
+ NEW.partition := get_partition(place_centroid, NEW.calculated_country_code);
END IF;
- NEW.partition := get_partition(place_centroid, NEW.calculated_country_code);
NEW.geometry_sector := geometry_sector(NEW.partition, place_centroid);
-- Adding ourselves to the list simplifies address calculations later
--DEBUG: RAISE WARNING '%', existingplacex;
END IF;
+ -- remove operator tag for most places, messes too much with search_name indexes
+ IF NEW.class not in ('amenity', 'shop') THEN
+ NEW.name := delete(NEW.name, 'operator');
+ END IF;
+
-- Just block these - lots and pointless
IF NEW.class in ('landuse','natural') and NEW.name is null THEN
-- if the name tag was removed, older versions might still be lurking in the place table
END IF;
+ -- refuse to update multiplpoygons with too many objects, too much of a performance hit
+ IF ST_NumGeometries(NEW.geometry) > 2000 THEN
+ RAISE WARNING 'Dropping update of % % because of geometry complexity.', NEW.osm_type, NEW.osm_id;
+ RETURN NULL;
+ END IF;
+
IF coalesce(existing.name::text, '') != coalesce(NEW.name::text, '')
OR coalesce(existing.extratags::text, '') != coalesce(NEW.extratags::text, '')
OR coalesce(existing.housenumber, '') != coalesce(NEW.housenumber, '')
-- Indices used only during search and update.
-- These indices are created only after the indexing process is done.
-CREATE INDEX idx_word_word_id on word USING BTREE (word_id);
+CREATE INDEX idx_word_word_id on word USING BTREE (word_id) TABLESPACE ssd;
-CREATE INDEX idx_search_name_nameaddress_vector ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off);
-CREATE INDEX idx_search_name_name_vector ON search_name USING GIN (name_vector) WITH (fastupdate = off);
-CREATE INDEX idx_search_name_centroid ON search_name USING GIST (centroid);
+CREATE INDEX idx_search_name_nameaddress_vector ON search_name USING GIN (nameaddress_vector) WITH (fastupdate = off) TABLESPACE ssd;
+CREATE INDEX idx_search_name_name_vector ON search_name USING GIN (name_vector) WITH (fastupdate = off) TABLESPACE ssd;
+CREATE INDEX idx_search_name_centroid ON search_name USING GIST (centroid) TABLESPACE ssd;
-CREATE INDEX idx_place_addressline_address_place_id on place_addressline USING BTREE (address_place_id);
+CREATE INDEX idx_place_addressline_address_place_id on place_addressline USING BTREE (address_place_id) TABLESPACE ssd;
-CREATE INDEX idx_place_boundingbox_place_id on place_boundingbox USING BTREE (place_id);
-CREATE INDEX idx_place_boundingbox_outline ON place_boundingbox USING GIST (outline);
+CREATE INDEX idx_place_boundingbox_place_id on place_boundingbox USING BTREE (place_id) TABLESPACE ssd;
+CREATE INDEX idx_place_boundingbox_outline ON place_boundingbox USING GIST (outline) TABLESPACE ssd;
DROP INDEX IF EXISTS idx_placex_rank_search;
-CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search);
-CREATE INDEX idx_placex_rank_address ON placex USING BTREE (rank_address);
-CREATE INDEX idx_placex_pendingsector ON placex USING BTREE (rank_search,geometry_sector) where indexed_status > 0;
-CREATE INDEX idx_placex_parent_place_id ON placex USING BTREE (parent_place_id) where parent_place_id IS NOT NULL;
-CREATE INDEX idx_placex_interpolation ON placex USING BTREE (geometry_sector) where indexed_status > 0 and class='place' and type='houses';
-CREATE INDEX idx_location_area_country_place_id ON location_area_country USING BTREE (place_id);
+CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search) TABLESPACE ssd;
+CREATE INDEX idx_placex_rank_address ON placex USING BTREE (rank_address) TABLESPACE ssd;
+CREATE INDEX idx_placex_pendingsector ON placex USING BTREE (rank_search,geometry_sector) TABLESPACE ssd where indexed_status > 0;
+CREATE INDEX idx_placex_parent_place_id ON placex USING BTREE (parent_place_id) TABLESPACE ssd where parent_place_id IS NOT NULL;
+CREATE INDEX idx_placex_interpolation ON placex USING BTREE (geometry_sector) TABLESPACE ssd where indexed_status > 0 and class='place' and type='houses';
+CREATE INDEX idx_location_area_country_place_id ON location_area_country USING BTREE (place_id) TABLESPACE ssd;
-CREATE INDEX idx_search_name_country_centroid ON search_name_country USING GIST (centroid);
+CREATE INDEX idx_search_name_country_centroid ON search_name_country USING GIST (centroid) TABLESPACE ssd;
-- start
-CREATE INDEX idx_location_property_-partition-_centroid ON location_property_-partition- USING GIST (centroid);
+CREATE INDEX idx_location_property_-partition-_centroid ON location_property_-partition- USING GIST (centroid) TABLESPACE ssd;
-- end
CREATE UNIQUE INDEX idx_place_osm_unique on place using btree(osm_id,osm_type,class,type);
CREATE TABLE location_area_country () INHERITS (location_area_large);
-CREATE INDEX idx_location_area_country_geometry ON location_area_country USING GIST (geometry);
+CREATE INDEX idx_location_area_country_geometry ON location_area_country USING GIST (geometry) TABLESPACE ssd;
CREATE TABLE search_name_country () INHERITS (search_name_blank);
-CREATE INDEX idx_search_name_country_place_id ON search_name_country USING BTREE (place_id);
-CREATE INDEX idx_search_name_country_name_vector ON search_name_country USING GIN (name_vector) WITH (fastupdate = off);
+CREATE INDEX idx_search_name_country_place_id ON search_name_country USING BTREE (place_id) TABLESPACE ssd;
+CREATE INDEX idx_search_name_country_name_vector ON search_name_country USING GIN (name_vector) WITH (fastupdate = off) TABLESPACE ssd;
-- start
CREATE TABLE location_area_large_-partition- () INHERITS (location_area_large);
-CREATE INDEX idx_location_area_large_-partition-_place_id ON location_area_large_-partition- USING BTREE (place_id);
-CREATE INDEX idx_location_area_large_-partition-_geometry ON location_area_large_-partition- USING GIST (geometry);
+CREATE INDEX idx_location_area_large_-partition-_place_id ON location_area_large_-partition- USING BTREE (place_id) TABLESPACE ssd;
+CREATE INDEX idx_location_area_large_-partition-_geometry ON location_area_large_-partition- USING GIST (geometry) TABLESPACE ssd;
CREATE TABLE search_name_-partition- () INHERITS (search_name_blank);
-CREATE INDEX idx_search_name_-partition-_place_id ON search_name_-partition- USING BTREE (place_id);
-CREATE INDEX idx_search_name_-partition-_centroid ON search_name_-partition- USING GIST (centroid);
-CREATE INDEX idx_search_name_-partition-_name_vector ON search_name_-partition- USING GIN (name_vector) WITH (fastupdate = off);
+CREATE INDEX idx_search_name_-partition-_place_id ON search_name_-partition- USING BTREE (place_id) TABLESPACE ssd;
+CREATE INDEX idx_search_name_-partition-_centroid ON search_name_-partition- USING GIST (centroid) TABLESPACE ssd;
+CREATE INDEX idx_search_name_-partition-_name_vector ON search_name_-partition- USING GIN (name_vector) WITH (fastupdate = off) TABLESPACE ssd;
CREATE TABLE location_property_-partition- () INHERITS (location_property);
CREATE INDEX idx_location_property_-partition-_place_id ON location_property_-partition- USING BTREE (place_id);
country_code VARCHAR(2)
);
SELECT AddGeometryColumn('location_road_-partition-', 'geometry', 4326, 'GEOMETRY', 2);
-CREATE INDEX idx_location_road_-partition-_geometry ON location_road_-partition- USING GIST (geometry);
-CREATE INDEX idx_location_road_-partition-_place_id ON location_road_-partition- USING BTREE (place_id);
+CREATE INDEX idx_location_road_-partition-_geometry ON location_road_-partition- USING GIST (geometry) TABLESPACE ssd;
+CREATE INDEX idx_location_road_-partition-_place_id ON location_road_-partition- USING BTREE (place_id) TABLESPACE ssd;
-- end
event text
);
---drop table IF EXISTS query_log;
-CREATE TABLE query_log (
- starttime timestamp,
- query text,
- ipaddress text,
- endtime timestamp,
- results integer
- );
-CREATE INDEX idx_query_log ON query_log USING BTREE (starttime);
-GRANT SELECT ON query_log TO "www-data" ;
-GRANT INSERT ON query_log TO "www-data" ;
-GRANT UPDATE ON query_log TO "www-data" ;
-
CREATE TABLE new_query_log (
type text,
starttime timestamp,
useragent text,
language text,
query text,
+ searchterm text,
endtime timestamp,
results integer,
format text,
GRANT UPDATE ON new_query_log TO "www-data" ;
GRANT SELECT ON new_query_log TO "www-data" ;
-create view vw_search_query_log as SELECT substr(query, 1, 50) AS query, starttime, endtime - starttime AS duration, substr(useragent, 1, 20) as
-useragent, language, results, ipaddress FROM new_query_log WHERE type = 'search' ORDER BY starttime DESC;
-
--drop table IF EXISTS report_log;
CREATE TABLE report_log (
starttime timestamp,
country_code varchar(2),
search_name_count INTEGER,
operator TEXT
- );
-CREATE INDEX idx_word_word_token on word USING BTREE (word_token);
+ ) TABLESPACE ssd;
+CREATE INDEX idx_word_word_token on word USING BTREE (word_token) TABLESPACE ssd;
GRANT SELECT ON word TO "www-data" ;
DROP SEQUENCE seq_word;
CREATE SEQUENCE seq_word start 1;
nameaddress_vector integer[]
);
SELECT AddGeometryColumn('search_name', 'centroid', 4326, 'GEOMETRY', 2);
-CREATE INDEX idx_search_name_place_id ON search_name USING BTREE (place_id);
+CREATE INDEX idx_search_name_place_id ON search_name USING BTREE (place_id) TABLESPACE ssd;
drop table IF EXISTS place_addressline;
CREATE TABLE place_addressline (
isaddress boolean,
distance float,
cached_rank_address integer
- );
-CREATE INDEX idx_place_addressline_place_id on place_addressline USING BTREE (place_id);
+ ) TABLESPACE data;
+CREATE INDEX idx_place_addressline_place_id on place_addressline USING BTREE (place_id) TABLESPACE ssd;
drop table IF EXISTS place_boundingbox CASCADE;
CREATE TABLE place_boundingbox (
wikipedia TEXT, -- calculated wikipedia article name (language:title)
geometry_sector INTEGER,
calculated_country_code varchar(2)
- );
+ ) TABLESPACE ssd;
SELECT AddGeometryColumn('placex', 'centroid', 4326, 'GEOMETRY', 2);
-CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id);
-CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id);
-CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id);
-CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector);
-CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry);
-CREATE INDEX idx_placex_adminname on placex USING BTREE (make_standard_name(name->'name'),rank_search) WHERE osm_type='N' and rank_search < 26;
+CREATE UNIQUE INDEX idx_place_id ON placex USING BTREE (place_id) TABLESPACE ssd;
+CREATE INDEX idx_placex_osmid ON placex USING BTREE (osm_type, osm_id) TABLESPACE ssd;
+CREATE INDEX idx_placex_linked_place_id ON placex USING BTREE (linked_place_id) TABLESPACE ssd;
+CREATE INDEX idx_placex_rank_search ON placex USING BTREE (rank_search, geometry_sector) TABLESPACE ssd;
+CREATE INDEX idx_placex_geometry ON placex USING GIST (geometry) TABLESPACE ssd;
+CREATE INDEX idx_placex_adminname on placex USING BTREE (make_standard_name(name->'name'),rank_search) TABLESPACE ssd WHERE osm_type='N' and rank_search < 26;
--CREATE INDEX idx_placex_indexed ON placex USING BTREE (indexed);
FOR EACH ROW EXECUTE PROCEDURE place_insert();
drop index idx_placex_sector;
-CREATE INDEX idx_placex_sector ON placex USING BTREE (geometry_sector,rank_address,osm_type,osm_id);
+CREATE INDEX idx_placex_sector ON placex USING BTREE (geometry_sector,rank_address,osm_type,osm_id) TABLESPACE ssd;
DROP SEQUENCE seq_postcodes;
CREATE SEQUENCE seq_postcodes start 1;
);
SELECT AddGeometryColumn('import_polygon_error', 'prevgeometry', 4326, 'GEOMETRY', 2);
SELECT AddGeometryColumn('import_polygon_error', 'newgeometry', 4326, 'GEOMETRY', 2);
-CREATE INDEX idx_import_polygon_error_osmid ON import_polygon_error USING BTREE (osm_type, osm_id);
+CREATE INDEX idx_import_polygon_error_osmid ON import_polygon_error USING BTREE (osm_type, osm_id) TABLESPACE ssd;
GRANT SELECT ON import_polygon_error TO "www-data";
drop table import_polygon_delete;
class TEXT NOT NULL,
type TEXT NOT NULL
);
-CREATE INDEX idx_import_polygon_delete_osmid ON import_polygon_delete USING BTREE (osm_type, osm_id);
+CREATE INDEX idx_import_polygon_delete_osmid ON import_polygon_delete USING BTREE (osm_type, osm_id) TABLESPACE ssd;
GRANT SELECT ON import_polygon_delete TO "www-data";
drop sequence file;
GRANT SELECT ON location_property_tiger_import TO "www-data";
-DROP TABLE location_property_tiger;
-ALTER TABLE location_property_tiger_import RENAME TO location_property_tiger;
+--DROP TABLE location_property_tiger;
+--ALTER TABLE location_property_tiger_import RENAME TO location_property_tiger;
-ALTER INDEX idx_location_property_tiger_housenumber_parent_place_id_imp RENAME TO idx_location_property_tiger_housenumber_parent_place_id;
-ALTER INDEX idx_location_property_tiger_place_id_imp RENAME TO idx_location_property_tiger_place_id;
+--ALTER INDEX idx_location_property_tiger_housenumber_parent_place_id_imp RENAME TO idx_location_property_tiger_housenumber_parent_place_id;
+--ALTER INDEX idx_location_property_tiger_place_id_imp RENAME TO idx_location_property_tiger_place_id;
DROP FUNCTION tigger_create_interpolation (linegeo geometry, in_startnumber integer, in_endnumber integer, interpolationtype text, in_street text, in_isin text, in_postcode text);
--- /dev/null
+#!/usr/bin/python
+#
+# Search logs for high-bandwith users and create a list of suspicious IPs.
+# There are three states: bulk, block, ban. The first are bulk requesters
+# that need throtteling, the second bulk requesters that have overdone it
+# and the last manually banned IPs.
+#
+# The list can then be used in apache using rewrite rules to
+# direct bulk users to smaller thread pools or block them. A
+# typical apache config that uses php-fpm pools would look
+# like this:
+#
+# Alias /nominatim-www/ "/var/www/nominatim/"
+# Alias /nominatim-bulk/ "/var/www/nominatim/"
+# <Directory "/var/www/nominatim/">
+# Options MultiViews FollowSymLinks
+# AddType text/html .php
+# </Directory>
+#
+# <Location /nominatim-www>
+# AddHandler fcgi:/var/run/php5-fpm-www.sock .php
+# </Location>
+# <Location /nominatim-bulk>
+# AddHandler fcgi:/var/run/php5-fpm-bulk.sock .php
+# </Location>
+#
+# Redirect 509 /nominatim-block/
+# ErrorDocument 509 "Bandwidth limit exceeded."
+# Redirect 403 /nominatim-ban/
+# ErrorDocument 403 "Access blocked."
+#
+# RewriteEngine On
+# RewriteMap bulklist txt:/home/wherever/ip-block.map
+# RewriteRule ^/(.*) /nominatim-${bulklist:%{REMOTE_ADDR}|www}/$1 [PT]
+#
+
+import os
+import psycopg2
+import datetime
+
+BASEDIR = os.path.normpath(os.path.join(os.path.realpath(__file__), '../..'))
+
+#
+# DEFAULT SETTINGS
+#
+# Copy into settings/ip_blcoks.conf and adapt as required.
+#
+BLOCKEDFILE= BASEDIR + '/settings/ip_blocks.map'
+LOGFILE= BASEDIR + '/log/restricted_ip.log'
+
+# space-separated list of IPs that are never banned
+WHITELIST = ''
+# space-separated list of IPs manually blocked
+BLACKLIST = ''
+# user-agents that should be blocked from bulk mode
+# (matched with startswith)
+UA_BLOCKLIST = ()
+
+# time before a automatically blocked IP is allowed back
+BLOCKCOOLOFF_PERIOD='1 hour'
+# quiet time before an IP is released from the bulk pool
+BULKCOOLOFF_PERIOD='15 min'
+
+BULKLONG_LIMIT=8000
+BULKSHORT_LIMIT=2000
+BLOCK_UPPER=19000
+BLOCK_LOWER=4000
+BLOCK_LOADFAC=380
+BULK_LOADFAC=160
+BULK_LOWER=1500
+MAX_BULK_IPS=85
+
+#
+# END OF DEFAULT SETTINGS
+#
+
+try:
+ execfile(os.path.expanduser(BASEDIR + "/settings/ip_blocks.conf"))
+except IOError:
+ pass
+
+# read the previous blocklist
+WHITELIST = set(WHITELIST.split()) if WHITELIST else set()
+prevblocks = []
+prevbulks = []
+BLACKLIST = set(BLACKLIST.split()) if BLACKLIST else set()
+newblocks = set()
+newbulks = set()
+
+try:
+ fd = open(BLOCKEDFILE)
+ for line in fd:
+ ip, typ = line.strip().split(' ')
+ if ip not in BLACKLIST:
+ if typ == 'block':
+ prevblocks.append(ip)
+ elif typ == 'bulk':
+ prevbulks.append(ip)
+ fd.close()
+except IOError:
+ pass #ignore non-existing file
+
+# determine current load
+fd = open("/proc/loadavg")
+avgload = int(float(fd.readline().split()[2]))
+fd.close()
+# DB load
+conn = psycopg2.connect('dbname=nominatim')
+cur = conn.cursor()
+cur.execute("select count(*)/60 from new_query_log where starttime > now() - interval '1min'")
+dbload = int(cur.fetchone()[0])
+
+BLOCK_LIMIT = max(BLOCK_LOWER, BLOCK_UPPER - BLOCK_LOADFAC * (dbload - 75))
+BULKLONG_LIMIT = max(BULK_LOWER, BULKLONG_LIMIT - BULK_LOADFAC * (avgload - 14))
+if len(prevbulks) > MAX_BULK_IPS:
+ BLOCK_LIMIT = max(3600, BLOCK_LOWER - (len(prevbulks) - MAX_BULK_IPS)*10)
+# if the bulk pool is still empty, clients will be faster, avoid having
+# them blocked in this case
+if len(prevbulks) < 10:
+ BLOCK_LIMIT = 2*BLOCK_UPPER
+
+
+# get the new block candidates
+cur.execute("""
+ SELECT ipaddress, max(count), max(ua) FROM
+ ((SELECT * FROM
+ (SELECT ipaddress, sum(case when endtime is null then 1 else 1+1.5*date_part('epoch',endtime-starttime) end) as count, substring(max(useragent) from 1 for 30) as ua FROM new_query_log
+ WHERE starttime > now() - interval '1 hour' GROUP BY ipaddress) as i
+ WHERE count > %s)
+ UNION
+ (SELECT ipaddress, count * 3, ua FROM
+ (SELECT ipaddress, sum(case when endtime is null then 1 else 1+1.5*date_part('epoch',endtime-starttime) end) as count, substring(max(useragent) from 1 for 30) as ua FROM new_query_log
+ WHERE starttime > now() - interval '10 min' GROUP BY ipaddress) as i
+ WHERE count > %s)) as o
+ GROUP BY ipaddress
+""", (BULKLONG_LIMIT, BULKSHORT_LIMIT))
+
+bulkips = {}
+emergencyblocks = []
+useragentblocks = []
+
+for c in cur:
+ if c[0] not in WHITELIST and c[0] not in BLACKLIST:
+ # check for user agents that receive an immediate block
+ missing_agent = not c[2]
+ if not missing_agent:
+ for ua in UA_BLOCKLIST:
+ if c[2].startswith(ua):
+ missing_agent = True
+ break
+ if (missing_agent or c[1] > BLOCK_UPPER) and c[0] not in prevblocks:
+ newblocks.add(c[0])
+ if missing_agent:
+ useragentblocks.append(c[0])
+ else:
+ emergencyblocks.append(c[0])
+ else:
+ bulkips[c[0]] = c[1]
+
+# IPs from the block list that are no longer in the bulk list
+deblockcandidates = set()
+# IPs from the bulk list that are no longer in the bulk list
+debulkcandidates = set()
+# new IPs to go into the block list
+newlyblocked = []
+
+
+for ip in prevblocks:
+ if ip in bulkips:
+ newblocks.add(ip)
+ del bulkips[ip]
+ else:
+ deblockcandidates.add(ip)
+
+for ip in prevbulks:
+ if ip not in newblocks:
+ if ip in bulkips:
+ if bulkips[ip] > BLOCK_LIMIT:
+ newblocks.add(ip)
+ newlyblocked.append(ip)
+ else:
+ newbulks.add(ip)
+ del bulkips[ip]
+ else:
+ debulkcandidates.add(ip)
+
+# cross-check deblock candidates
+if deblockcandidates:
+ cur.execute("""
+ SELECT DISTINCT ipaddress FROM new_query_log
+ WHERE ipaddress IN ('%s') AND starttime > now() - interval '%s'
+ """ % ("','".join(deblockcandidates), BLOCKCOOLOFF_PERIOD))
+
+ for c in cur:
+ newblocks.add(c[0])
+ deblockcandidates.remove(c[0])
+# deblocked IPs go back to the bulk pool to catch the ones that simply
+# ignored the HTTP error and just continue to hammer the API.
+# Those that behave and stopped will be debulked a minute later.
+for ip in deblockcandidates:
+ newbulks.add(ip)
+
+# cross-check debulk candidates
+if debulkcandidates:
+ cur.execute("""
+ SELECT DISTINCT ipaddress FROM new_query_log
+ WHERE ipaddress IN ('%s') AND starttime > now() - interval '%s'
+ AND starttime > date_trunc('day', now())
+ """ % ("','".join(debulkcandidates), BULKCOOLOFF_PERIOD))
+
+ for c in cur:
+ newbulks.add(c[0])
+ debulkcandidates.remove(c[0])
+
+for ip in bulkips.iterkeys():
+ newbulks.add(ip)
+
+# write out the new list
+fd = open(BLOCKEDFILE, 'w')
+for ip in newblocks:
+ fd.write(ip + " block\n")
+for ip in newbulks:
+ fd.write(ip + " bulk\n")
+for ip in BLACKLIST:
+ fd.write(ip + " ban\n")
+fd.close()
+
+# write out the log
+logstr = datetime.datetime.now().strftime('%Y-%m-%d %H:%M') + ' %s %s\n'
+fd = open(LOGFILE, 'a')
+if deblockcandidates:
+ fd.write(logstr % ('unblocked:', ', '.join(deblockcandidates)))
+if debulkcandidates:
+ fd.write(logstr % (' debulked:', ', '.join(debulkcandidates)))
+if bulkips:
+ fd.write(logstr % ('new bulks:', ', '.join(bulkips.keys())))
+if emergencyblocks:
+ fd.write(logstr % ('dir.block:', ', '.join(emergencyblocks)))
+if useragentblocks:
+ fd.write(logstr % (' ua block:', ', '.join(useragentblocks)))
+if newlyblocked:
+ fd.write(logstr % ('new block:', ', '.join(newlyblocked)))
+fd.close()
--- /dev/null
+#!/bin/bash -e
+#
+# Rotate query logs.
+
+dbname=nominatim
+
+basedir=`dirname $0`
+logfile=`date "+$basedir/../log/query-%F.log.gz"`
+
+# dump the old logfile
+pg_dump -a -F p -t backup_query_log $dbname | gzip -9 > $logfile
+
+# remove the old logs
+psql -q -d $dbname -c 'DROP TABLE backup_query_log'
+
+# rotate
+psql -q -1 -d $dbname -c 'ALTER TABLE new_query_log RENAME TO backup_query_log;CREATE TABLE new_query_log as (select * from backup_query_log limit 0);GRANT SELECT, INSERT, UPDATE ON new_query_log TO "www-data"'
+psql -q -d $dbname -c 'ALTER INDEX idx_new_query_log_starttime RENAME TO idx_backup_query_log_starttime'
+psql -q -d $dbname -c 'CREATE INDEX idx_new_query_log_starttime ON new_query_log USING BTREE (starttime)'
+
--- /dev/null
+#!/bin/bash
+#
+# Vaccum all tables with indices on integer arrays.
+# Agressive vacuuming seems to help against index bloat.
+#
+
+psql -q -d nominatim -c 'VACUUM ANALYSE search_name'
+psql -q -d nominatim -c 'VACUUM ANALYSE search_name_country'
+#psql -q -d nominatim -c 'VACUUM ANALYSE planet_osm_ways'
+
+for i in `seq 0 246`; do
+ psql -q -d nominatim -c "VACUUM ANALYSE search_name_${i}"
+done
+
{
$osm2pgsql .= ' --flat-nodes '.CONST_Osm2pgsql_Flatnode_File;
}
+ $osm2pgsql .= ' --tablespace-slim-index ssd --tablespace-main-index ssd --tablespace-main-data ssd --tablespace-slim-data data';
$osm2pgsql .= ' -lsc -O gazetteer --hstore';
- $osm2pgsql .= ' -C '.$iCacheMemory;
+ $osm2pgsql .= ' -C 18000';
$osm2pgsql .= ' -P '.$aDSNInfo['port'];
$osm2pgsql .= ' -d '.$aDSNInfo['database'].' '.$aCMDResult['osm-file'];
passthruCheckReturn($osm2pgsql);
$sSQL .= "select 'P',nextval('seq_postcodes'),'place','postcode',postcode,calculated_country_code,";
$sSQL .= "ST_SetSRID(ST_Point(x,y),4326) as geometry from (select calculated_country_code,postcode,";
$sSQL .= "avg(st_x(st_centroid(geometry))) as x,avg(st_y(st_centroid(geometry))) as y ";
- $sSQL .= "from placex where postcode is not null group by calculated_country_code,postcode) as x";
+ $sSQL .= "from placex where postcode is not null and calculated_country_code not in ('ie') group by calculated_country_code,postcode) as x";
if (!pg_query($oDB->connection, $sSQL)) fail(pg_last_error($oDB->connection));
$sSQL = "insert into placex (osm_type,osm_id,class,type,postcode,calculated_country_code,geometry) ";
echo "class = '".pg_escape_string($aPair[0])."' and type = '".pg_escape_string($aPair[1])."';\n";
echo "CREATE INDEX idx_place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])."_centroid ";
- echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING GIST (centroid);\n";
+ echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING GIST (centroid) tablespace ssd;\n";
echo "CREATE INDEX idx_place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])."_place_id ";
- echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING btree(place_id);\n";
+ echo "ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." USING btree(place_id) tablespace ssd;\n";
- echo "GRANT SELECT ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." TO \"www-data\";";
+ echo "GRANT SELECT ON place_classtype_".pg_escape_string($aPair[0])."_".pg_escape_string($aPair[1])." TO \"www-data\";\n";
}
showUsage($aCMDOptions, true, 'Select either import of hourly or daily');
}
- if (!isset($aResult['index-instances'])) $aResult['index-instances'] = 1;
if (!isset($aResult['index-rank'])) $aResult['index-rank'] = 0;
/*
if ($aResult['index'])
{
+ if (!isset($aResult['index-instances'])) $aResult['index-instances'] = 1;
passthru(CONST_BasePath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -P '.$aDSNInfo['port'].' -t '.$aResult['index-instances'].' -r '.$aResult['index-rank']);
}
$sCMDDownload = $sOsmosisCMD.' --read-replication-interval workingDirectory='.$sOsmosisConfigDirectory.' --simplify-change --write-xml-change '.$sImportFile;
$sCMDCheckReplicationLag = $sOsmosisCMD.' -q --read-replication-lag workingDirectory='.$sOsmosisConfigDirectory;
$sCMDImport = $sOsm2pgsqlCmd.' '.$sImportFile;
- $sCMDIndex = $sBasePath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -P '.$aDSNInfo['port'].' -t '.$aResult['index-instances'];
+ $sCMDIndex = $sBasePath.'/nominatim/nominatim -i -d '.$aDSNInfo['database'].' -P '.$aDSNInfo['port'];
if (!$aResult['no-npi']) {
$sCMDIndex .= '-F ';
}
$sBatchEnd = getosmosistimestamp($sOsmosisConfigDirectory);
// Index file
- $sThisIndexCmd = $sCMDIndex;
+ if (!isset($aResult['index-instances']))
+ {
+ if (getLoadAverage() < 24)
+ $iIndexInstances = 2;
+ else
+ $iIndexInstances = 1;
+ } else
+ $iIndexInstances = $aResult['index-instances'];
+
+ $sThisIndexCmd = $sCMDIndex.' -t '.$iIndexInstances;
$fCMDStartTime = time();
if (!$aResult['no-npi'])
--- /dev/null
+<html>
+<head>
+<title>Access blocked</title>
+</head>
+<body>
+<h1>Access blocked</h1>
+
+<p>You have been blocked because you have been overusing OSM's geocoding service.
+Please be aware that OSM's resources are limited and shared between many users.
+To have this block lifted, contact the Nominatim system administrator at
+nominatim@openstreetmap.org.</p>
+
+<p>For more information, consult the <a href="http://wiki.openstreetmap.org/wiki/Nominatim_usage_policy">usage policy</a> for the OSM Nominatim server.
+</body>
+</head>
--- /dev/null
+<html>
+<head>
+<title>Bandwidth limit exceeded</title>
+</head>
+<body>
+<h1>Bandwidth limit exceeded</h1>
+
+<p>You have been temporarily blocked because you have been overusing OSM's geocoding service or because you have not provided sufficient identification of your application. This block will be automatically lifted after a while. Please take the time and adapt your scripts to reduce the number of requests and make sure that you send a valid UserAgent or Referer.</p>
+
+<p>For more information, consult the <a href="http://wiki.openstreetmap.org/wiki/Nominatim_usage_policy">usage policy</a> for the OSM Nominatim server.
+</body>
+</head>
--- /dev/null
+<?xml version="1.0"?>
+ <!DOCTYPE cross-domain-policy SYSTEM "http://www.macromedia.com/xml/dtds/cross-domain-policy.dtd">
+ <cross-domain-policy>
+ <allow-access-from domain="*" />
+ </cross-domain-policy>
--- /dev/null
+<?xml version="1.0" encoding="UTF-8"?>
+<OpenSearchDescription xmlns="http://a9.com/-/spec/opensearch/1.1/"
+ xmlns:moz="http://www.mozilla.org/2006/browser/search/">
+ <ShortName>Nominatim</ShortName>
+ <LongName>Nominatim OSM Search</LongName>
+ <Description>Search for a place in OpenStreetMap Nominatim</Description>
+ <InputEncoding>UTF-8</InputEncoding>
+ <OutputEncoding>UTF-8</OutputEncoding>
+ <Url type="text/html" method="get" template="http://nominatim.openstreetmap.org/search/?q={searchTerms}" />
+ <Query role="example" searchTerms="Reigate" />
+ <Developer>Brian Quinion</Developer>
+ <AdultContent>false</AdultContent>
+ <Attribution>Data &copy; OpenStreetMap contributors, Some Rights Reserved. ODbL, http://www.osm.org/copyright.</Attribution>
+</OpenSearchDescription>
+
require_once(CONST_BasePath.'/lib/PlaceLookup.php');
require_once(CONST_BasePath.'/lib/ReverseGeocode.php');
- if (strpos(CONST_BulkUserIPs, ','.$_SERVER["REMOTE_ADDR"].',') !== false)
- {
- $fLoadAvg = getLoadAverage();
- if ($fLoadAvg > 2) sleep(60);
- if ($fLoadAvg > 4) sleep(120);
- if ($fLoadAvg > 6)
- {
- echo "Bulk User: Temporary block due to high server load\n";
- exit;
- }
- }
-
$oDB =& getDB();
ini_set('memory_limit', '200M');
--- /dev/null
+User-agent: ia_archiver
+Allow: /
+
+User-agent: *
+Disallow: /search.php
+Disallow: /search
+Disallow: /details.php
+Disallow: /details
+Disallow: /reverse.php
+Disallow: /reverse
$aLangPrefOrder = getPreferredLanguages();
$oGeocode->setLanguagePreference($aLangPrefOrder);
+ /*
if (isset($aLangPrefOrder['name:de'])) $oGeocode->setReverseInPlan(true);
if (isset($aLangPrefOrder['name:ru'])) $oGeocode->setReverseInPlan(true);
if (isset($aLangPrefOrder['name:ja'])) $oGeocode->setReverseInPlan(true);
if (isset($aLangPrefOrder['name:pl'])) $oGeocode->setReverseInPlan(true);
+ */
function loadParamsToGeocode($oGeocode, $aParams, $bBatch = false)
{