From 240313208ea75d2b57e0f1e231d244bf4e5bd955 Mon Sep 17 00:00:00 2001 From: Grant Slater Date: Mon, 25 Sep 2023 18:16:28 +0100 Subject: [PATCH] Add redirects to planet S3 --- cookbooks/planet/recipes/default.rb | 11 ++- .../templates/default/apache-s3-ip2region.erb | 81 +++++++++++++++++++ cookbooks/planet/templates/default/apache.erb | 33 ++++++-- 3 files changed, 118 insertions(+), 7 deletions(-) create mode 100644 cookbooks/planet/templates/default/apache-s3-ip2region.erb diff --git a/cookbooks/planet/recipes/default.rb b/cookbooks/planet/recipes/default.rb index fdeb9b8f5..db55c246d 100644 --- a/cookbooks/planet/recipes/default.rb +++ b/cookbooks/planet/recipes/default.rb @@ -24,8 +24,9 @@ include_recipe "planet::aws" include_recipe "munin" package %w[ - perl php-cli + python3 + python3-geoip2 ] remote_directory "/store/planet#html" do @@ -109,6 +110,14 @@ template "/usr/local/bin/apache-latest-planet-filename" do notifies :restart, "service[apache2]" end +template "/usr/local/bin/apache-s3-ip2region" do + source "apache-s3-ip2region.erb" + owner "root" + group "root" + mode "755" + notifies :restart, "service[apache2]" +end + apache_module "cgid" apache_module "rewrite" apache_module "proxy_http" diff --git a/cookbooks/planet/templates/default/apache-s3-ip2region.erb b/cookbooks/planet/templates/default/apache-s3-ip2region.erb new file mode 100644 index 000000000..92f438b16 --- /dev/null +++ b/cookbooks/planet/templates/default/apache-s3-ip2region.erb @@ -0,0 +1,81 @@ +#!/usr/bin/env python3 + +import sys +import os +import geoip2.database +import ipaddress + +# Constants +DB_PATH = "<%= node[:geoipupdate][:directory] %>/GeoLite2-Country.mmdb" + +# Default region when continent doesn't match any in the dictionary +DEFAULT_REGION = "eu-central-1" + +# Mapping of continents to AWS regions +CONTINENT_TO_AWS_REGION = { + "NA": "us-west-2", # North America + "OC": "us-west-2", # Oceania + "SA": "us-west-2", # South America +} + +# Global to store last known modification time and database reader +last_mod_time = None +reader = None + +def is_valid_ip(ip_str): + """Check if a string is a valid IPv4 or IPv6 address.""" + try: + ipaddress.ip_address(ip_str) + return True + except ValueError: + return False + +def get_reader(): + """Get the geoip2 database reader. Reload if the DB file has changed.""" + global last_mod_time + global reader + + if not os.path.exists(DB_PATH): + return None # Database file missing + + current_mod_time = os.path.getmtime(DB_PATH) + + # If file has changed or reader isn't initialized, reload it + if reader is None or current_mod_time != last_mod_time: + if reader: + reader.close() # Close the existing reader before reinitializing + reader = geoip2.database.Reader(DB_PATH) + last_mod_time = current_mod_time + + return reader + +def get_continent_from_ip(ip_address): + """Return the continent for a given IP address.""" + if not is_valid_ip(ip_address): + return None + reader = get_reader() + if reader is None: + return None # No continent as DB is missing + try: + response = reader.country(ip_address) + return response.continent.code + except: + return None # Indicates invalid IP address or other issues + +def determine_aws_region(continent_code): + """Determine AWS region based on the continent code using a dictionary.""" + return CONTINENT_TO_AWS_REGION.get(continent_code, DEFAULT_REGION) + +def main(): + """Main function to process IP addresses from stdin and return AWS regions.""" + for line in sys.stdin: + ip_address = line.strip() + + continent_code = get_continent_from_ip(ip_address) + aws_region = determine_aws_region(continent_code) + + sys.stdout.write(f"{aws_region}\n") + sys.stdout.flush() + +if __name__ == "__main__": + main() diff --git a/cookbooks/planet/templates/default/apache.erb b/cookbooks/planet/templates/default/apache.erb index faddc0589..6d76783af 100644 --- a/cookbooks/planet/templates/default/apache.erb +++ b/cookbooks/planet/templates/default/apache.erb @@ -34,17 +34,15 @@ RewriteEngine on RewriteMap latestplanet prg:/usr/local/bin/apache-latest-planet-filename + RewriteMap ip2region prg:/usr/local/bin/apache-s3-ip2region - #Direct, no redirect for the following + # Direct, no redirect for the following RewriteCond %{REMOTE_ADDR} ^127\. [OR] RewriteCond %{REMOTE_ADDR} ^10\. [OR] - RewriteCond %{REMOTE_ADDR} ^193\.60\. [OR] - RewriteCond %{REMOTE_ADDR} ^193\.61\. [OR] - RewriteCond %{REMOTE_ADDR} ^193\.62\. [OR] - RewriteCond %{REMOTE_ADDR} ^193\.63\. [OR] RewriteCond %{QUERY_STRING} nomirror - RewriteRule .* - [L] + RewriteRule .* - [L] + # Use RewriteMap latestplanet to redirect -latest symlink to resolved file eg: planet-latest.osm.bz2 -> 2023/planet-230918.osm.bz2 RewriteRule ^(/planet/planet\-latest\.osm\.bz2(\.torrent)?)$ ${latestplanet:$1} [R,L] RewriteRule ^(/planet/full\-history/history\-latest\.osm\.bz2(\.torrent)?)$ ${latestplanet:$1} [R,L] RewriteRule ^(/planet/changesets\-latest\.osm\.bz2(\.torrent)?)$ ${latestplanet:$1} [R,L] @@ -58,10 +56,33 @@ RewriteRule ^(/cc\-by\-sa/changesets\-latest\.osm\.bz2)$ ${latestplanet:$1} [R,L] RewriteRule ^(/cc\-by\-sa/relations\-latest\.osm\.bz2)$ ${latestplanet:$1} [R,L] + RewriteRule ^(/notes/planet\-notes\-latest\.osn\.bz2)$ ${latestplanet:$1} [R,L] + # Block an abusive fake user agent RewriteCond %{HTTP_USER_AGENT} "=Mozilla/5.0 (Linux; Android) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.131 Safari/537.36" RewriteRule /.*$ - [F,L] + <% start_year = 2008 %> + <% current_year = Time.now.year %> + <% (start_year..current_year).each do |year| %> + <% year_two = sprintf('%02d', year % 100) %> + <% if year == current_year %> + RewriteRule ^/pbf/(planet\-<%= year_two %>[0-1][0-9][0-3][0-9]\.osm\.pbf(\.torrent|\.md5)?)$ https://osm-planet-eu-central-1.s3.amazonaws.com/planet/pbf/<%= year %>/$1 [R,L] + RewriteRule ^/pbf/full\-history/(history\-<%= year_two %>[0-1][0-9][0-3][0-9]\.osm\.pbf(\.torrent|\.md5)?)$ https://osm-planet-eu-central-1.s3.amazonaws.com/planet-full-history/pbf/<%= year %>/$1 [R,L] + RewriteRule ^/planet/<%= year %>/(planet\-<%= year_two %>[0-1][0-9][0-3][0-9]\.osm\.bz2(\.torrent|\.md5)?)$ https://osm-planet-eu-central-1.s3.amazonaws.com/planet/osm/<%= year %>/$1 [R,L] + RewriteRule ^/planet/<%= year %>/(changesets\-<%= year_two %>[0-1][0-9][0-3][0-9]\.osm\.bz2(\.torrent|\.md5)?)$ https://osm-planet-eu-central-1.s3.amazonaws.com/changesets/osm/<%= year %>/$1 [R,L] + RewriteRule ^/planet/<%= year %>/(discussions\-<%= year_two %>[0-1][0-9][0-3][0-9]\.osm\.bz2(\.torrent|\.md5)?)$ https://osm-planet-eu-central-1.s3.amazonaws.com/discussions/osm/<%= year %>/$1 [R,L] + RewriteRule ^/planet/<%= year %>/full\-history/(history\-<%= year_two %>[0-1][0-9][0-3][0-9]\.osm\.bz2(\.torrent|\.md5)?)$ https://osm-planet-eu-central-1.s3.amazonaws.com/planet-full-history/osm/<%= year %>/$1 [R,L] + <% else %> + RewriteRule ^/pbf/(planet\-<%= year_two %>[0-1][0-9][0-3][0-9]\.osm\.pbf(\.torrent|\.md5)?)$ https://osm-planet-${ip2region:%{REMOTE_ADDR}|eu-central-1}.s3.amazonaws.com/planet/pbf/<%= year %>/$1 [R,L] + RewriteRule ^/pbf/full\-history/(history\-<%= year_two %>[0-1][0-9][0-3][0-9]\.osm\.pbf(\.torrent|\.md5)?)$ https://osm-planet-${ip2region:%{REMOTE_ADDR}|eu-central-1}.s3.amazonaws.com/planet-full-history/pbf/<%= year %>/$1 [R,L] + RewriteRule ^/planet/<%= year %>/(planet\-<%= year_two %>[0-1][0-9][0-3][0-9]\.osm\.bz2(\.torrent|\.md5)?)$ https://osm-planet-${ip2region:%{REMOTE_ADDR}|eu-central-1}.s3.amazonaws.com/planet/osm/<%= year %>/$1 [R,L] + RewriteRule ^/planet/<%= year %>/(changesets\-<%= year_two %>[0-1][0-9][0-3][0-9]\.osm\.bz2(\.torrent|\.md5)?)$ https://osm-planet-${ip2region:%{REMOTE_ADDR}|eu-central-1}.s3.amazonaws.com/changesets/osm/<%= year %>/$1 [R,L] + RewriteRule ^/planet/<%= year %>/(discussions\-<%= year_two %>[0-1][0-9][0-3][0-9]\.osm\.bz2(\.torrent|\.md5)?)$ https://osm-planet-${ip2region:%{REMOTE_ADDR}|eu-central-1}.s3.amazonaws.com/discussions/osm/<%= year %>/$1 [R,L] + RewriteRule ^/planet/<%= year %>/full\-history/(history\-<%= year_two %>[0-1][0-9][0-3][0-9]\.osm\.bz2(\.torrent|\.md5)?)$ https://osm-planet-${ip2region:%{REMOTE_ADDR}|eu-central-1}.s3.amazonaws.com/planet-full-history/osm/<%= year %>/$1 [R,L] + <% end %> + <% end %> + # Temporary download rate limit due to upstream fibre connection issues. - 13 Jan 2020 SetOutputFilter RATE_LIMIT -- 2.39.5