class S3Enumerator

Constants

HUMAN_READABLE_SHORT_SUFFIX

Attributes

directories[R]
files[R]
largest_file[R]
limit[R]
max_keys[R]
objects[R]
objects_by_ext[R]
objects_by_storage_class[R]
preview_only[R]
s3[R]
s3_bucket_name[R]
s3_bucket_region[R]
s3_object_key_prefix[R]
should_show_summary[R]
total_bytes[R]

Public Class Methods

new(args = { }) click to toggle source
# File lib/envoi/utils/s3_enumerator.rb, line 17
def initialize(args = { })
  aws_access_key_id = args[:aws_access_key_id]
  aws_secret_access_key = args[:aws_secret_access_key]
  aws_region = args[:aws_region]

  aws_config = {}
  aws_config[:credentials] = (aws_access_key_id || aws_secret_access_key) ?
                                 Aws::Credentials.new(aws_access_key_id, aws_secret_access_key) :
                                 Aws::SharedCredentials.new(profile_name: args[:aws_profile])
  aws_config[:region] = aws_region if aws_region
  Aws.config.update(aws_config) unless aws_config.empty?

  @preview_only = args[:preview_only]
  @should_show_summary = args.fetch(:show_summary, preview_only)

  @limit = args[:limit]
  @limit = @limit.to_i if @limit

  @max_keys = 1000
  @max_keys = @limit if @limit && @limit < @max_keys

  @s3_bucket_name = args[:s3_bucket_name] || args[:bucket_name]
  @s3_object_key_prefix = args[:s3_object_key_prefix] || args[:object_key_prefix] || ''
  @s3_object_key_prefix = @s3_object_key_prefix[1..-1] if @s3_object_key_prefix.start_with?('/')

  @s3 = Aws::S3::Client.new
  @s3_bucket_region = s3.get_bucket_location(bucket: @s3_bucket_name).location_constraint
  @s3_bucket_region = 'us-east-1' if @s3_bucket_region.empty?

  @objects = []
  @files = []
  @ignored = []
  @directories = []
  @objects_by_ext = Hash.new { |h, k| h[k] = [] }
end

Public Instance Methods

concat_objects(objects, resp) click to toggle source
# File lib/envoi/utils/s3_enumerator.rb, line 53
def concat_objects(objects, resp)
  objects.concat resp.contents
  objects
end
grouped_data_to_table(collection, key_name) click to toggle source
# File lib/envoi/utils/s3_enumerator.rb, line 143
def grouped_data_to_table(collection, key_name)
  table_data =  [ [ key_name, 'Count', 'Short Bytes', 'Total Bytes' ] ]
  collection.each do |key, objects|
    group_total_size = objects.reduce(0) { |s, o| s + o.size }
    human_readable_group_total_size = humanize_number(group_total_size)
    human_readable_group_total_size_short = human_readable_bytes_short(human_readable_group_total_size)
    row_values = [ key, humanize_number(objects.length), human_readable_group_total_size_short, human_readable_group_total_size ]
    table_data << row_values
  end
  table_data
end
human_readable_bytes_short(human_readable_number) click to toggle source
# File lib/envoi/utils/s3_enumerator.rb, line 135
def human_readable_bytes_short(human_readable_number)
  "#{human_readable_number.split(',').first} #{HUMAN_READABLE_SHORT_SUFFIX[human_readable_number.count(',')]}"
end
humanize_number(number) click to toggle source
# File lib/envoi/utils/s3_enumerator.rb, line 139
def humanize_number(number)
  number.to_s.chars.to_a.reverse.each_slice(3).map(&:join).join(',').reverse
end
print_table(data, options = { }) click to toggle source
process_files(args = { }) click to toggle source
# File lib/envoi/utils/s3_enumerator.rb, line 180
def process_files(args = { })
  process_objects(files, { total_bytes: total_bytes })
end
process_objects(objects, args = {}) { |object, self| ... } click to toggle source
# File lib/envoi/utils/s3_enumerator.rb, line 184
def process_objects(objects, args = {})
  return objects unless block_given?
  return objects.map { |object| yield object, self } if args[:quiet]

  bytes_remaining = args[:total_bytes] || objects.reduce(0) { |s, o| s + o.size }
  _total_object_count = objects.length
  counter = 0
  objects.each do |object|
    counter += 1
    human_readable_bytes_remaining = humanize_number(bytes_remaining)
    human_readable_bytes_remaining_short = human_readable_bytes_short(human_readable_bytes_remaining)
    human_readable_object_bytes = humanize_number(object.size)
    human_readable_object_bytes_short = human_readable_bytes_short(human_readable_object_bytes)
    puts "Processing #{humanize_number(counter)} of #{humanize_number(_total_object_count)} #{human_readable_object_bytes} (#{human_readable_object_bytes_short}) of #{human_readable_bytes_remaining} (#{human_readable_bytes_remaining_short})  #{object.key}"
    yield object, self if block_given?
    bytes_remaining -= object.size
  end
end
retrieve_objects() { |object, self| ... } click to toggle source
# File lib/envoi/utils/s3_enumerator.rb, line 62
def retrieve_objects(&block)
  @objects = []

  resp = s3.list_objects_v2(bucket: s3_bucket_name, prefix: s3_object_key_prefix, max_keys: max_keys)
  loop do
    concat_objects(objects, resp)
    puts objects.length
    break if !resp.next_page? || (limit && total_object_count >= limit)
    resp = resp.next_page
  end
  @objects = objects.first(limit) if limit

  @files = []
  @directories = []
  @objects_by_ext = Hash.new { |h, k| h[k] = [] }
  @objects_by_storage_class = Hash.new { |h, k| h[k] = [] }
  #
  @total_bytes = 0
  @largest_file = nil
  objects.each do |object|
    if object.key.end_with?('/')
      @directories << object
    else
      filename = File.basename(object.key)
      should_ignore = filename.start_with?('.')
      if should_ignore
        @ignored << object
      else
        @files << object
        @total_bytes += object.size
        @largest_file = object unless @largest_file && @largest_file.size > object.size
        filename_ext = File.extname(filename).downcase
        objects_by_ext[filename_ext] << object
        objects_by_storage_class[object.storage_class] << object
      end
    end

    yield object, self if block_given?
  end
end
run(&block) click to toggle source
# File lib/envoi/utils/s3_enumerator.rb, line 130
def run(&block)
  retrieve_objects(&block)
  show_summary if @should_show_summary
end
show_summary() click to toggle source
# File lib/envoi/utils/s3_enumerator.rb, line 155
def show_summary
  objects_by_ext_table_data = grouped_data_to_table(objects_by_ext, 'File Ext')
  objects_by_storage_class_table_data = grouped_data_to_table(objects_by_storage_class, 'Storage Class')

  human_readable_total_bytes = humanize_number(total_bytes)
  human_readable_total_bytes_short = human_readable_bytes_short(human_readable_total_bytes)
  row_data = [ 'TOTAL', humanize_number(objects.length), human_readable_total_bytes_short,  human_readable_total_bytes]
  objects_by_ext_table_data << row_data
  objects_by_storage_class_table_data << row_data

  puts "\n\n--- Summary ---"
  puts "Bucket Name: #{s3_bucket_name}"
  puts "Bucket Region: #{s3_bucket_region}"
  puts "Object Key Prefix: #{s3_object_key_prefix}"
  puts "Total Objects: #{humanize_number(total_object_count)}"
  puts "Total Directories: #{humanize_number(directories.length)}"
  puts "Total Files: #{humanize_number(files.length)}"
  puts "Total Size (in bytes): #{human_readable_total_bytes} (#{human_readable_total_bytes_short})"
  puts "Largest File: #{largest_file}"
  puts "\n"
  print_table(objects_by_ext_table_data, { has_totals: true })
  puts "\n"
  print_table(objects_by_storage_class_table_data, { has_totals: true })
end
total_object_count() click to toggle source
# File lib/envoi/utils/s3_enumerator.rb, line 58
def total_object_count
  objects.length
end