6 # always keep the last 4 weeks
7 ALWAYS_KEEP_DAYS = 4 * 7
9 # otherwise, bucket by month and keep the earliest in the bucket
11 Date.new(date.year, date.month, 1)
14 Candidate = Struct.new(:filename, :date)
16 def list_files(glob, date_pattern)
17 # find all candidates for deletion
18 real_files = Dir.glob(glob).select do |file|
19 File.file?(file) && !File.symlink?(file)
22 real_files.map do |file|
24 m = date_pattern.match(file)
25 raise "Unable to extract date string from #{file.inspect}" if m.nil?
26 d = Date.strptime(m[1], "%y%m%d")
27 Candidate.new(file, d)
31 def deletion_candidates(today, candidates)
32 candidate_buckets = Hash.new
34 candidates.each do |c|
35 next if today - c.date < ALWAYS_KEEP_DAYS
38 candidate_buckets[b] = Array.new unless candidate_buckets.has_key?(b)
39 candidate_buckets[b] << c
42 # delete all but the earliest in each bucket
43 candidate_buckets.collect_concat do |bucket, contents|
44 contents.sort_by {|c| c.date}[1..-1]
48 def deletions(glob, date_pattern, today, expansions)
49 candidates = list_files(glob, date_pattern)
50 to_delete = deletion_candidates(today, candidates)
52 expanded = to_delete.collect_concat do |candidate|
53 dir = File.dirname(candidate.filename)
55 exp = candidate.date.strftime(e)
60 expanded.select {|e| File.exist?(e)}
66 OptionParser.new do |opt|
67 opt.on('--dry-run') { dry_run = true }
68 opt.on('--debug') { debug = true }
71 xml_directory = "<%= node[:planet][:dump][:xml_directory] %>"
72 xml_history_directory = "<%= node[:planet][:dump][:xml_history_directory] %>"
73 pbf_directory = "<%= node[:planet][:dump][:pbf_directory] %>"
74 pbf_history_directory = "<%= node[:planet][:dump][:pbf_history_directory] %>"
79 to_delete += deletions(
80 "#{xml_directory}/20??/planet-??????.osm.bz2",
81 /planet-([0-9]{6}).osm.bz2/,
83 ["changesets-%y%m%d.osm.bz2",
84 "changesets-%y%m%d.osm.bz2.md5",
85 "discussions-%y%m%d.osm.bz2",
86 "discussions-%y%m%d.osm.bz2.md5",
87 "planet-%y%m%d.osm.bz2",
88 "planet-%y%m%d.osm.bz2.md5"])
90 to_delete += deletions(
91 "#{xml_history_directory}/20??/history-??????.osm.bz2",
92 /history-([0-9]{6}).osm.bz2/,
94 ["history-%y%m%d.osm.bz2",
95 "history-%y%m%d.osm.bz2.md5"])
97 to_delete += deletions(
98 "#{pbf_directory}/planet-??????.osm.pbf",
99 /planet-([0-9]{6}).osm.pbf/,
101 ["planet-%y%m%d.osm.pbf",
102 "planet-%y%m%d.osm.pbf.torrent",
103 "planet-%y%m%d.osm.pbf.md5"])
105 to_delete += deletions(
106 "#{pbf_history_directory}/history-??????.osm.pbf",
107 /history-([0-9]{6}).osm.pbf/,
109 ["history-%y%m%d.osm.pbf",
110 "history-%y%m%d.osm.pbf.torrent",
111 "history-%y%m%d.osm.pbf.md5"])
115 cmd = dry_run ? "Would delete" : "Deleted"
116 to_delete.each do |file|
118 File.delete(file) unless dry_run
119 puts "#{cmd} #{file.inspect}, #{s.size / 1000000} MB" if debug
124 puts "#{cmd} files of total size #{total_size / 1000000000.0} GB"
125 puts "#{cmd} #{num_deleted} files"