class BrowseEverything::Retriever

Class for retrieving a file or resource from a storage provider

Constants

CHUNK_SIZE

Attributes

chunk_size[RW]

Public Class Methods

can_retrieve?(uri, headers = {}) click to toggle source

Determines whether or not a remote resource can be retrieved @param uri [String] URI for the remote resource (usually a URL) @param headers [Hash] any custom headers required to transit the request

# File lib/browse_everything/retriever.rb, line 37
def can_retrieve?(uri, headers = {})
  request_headers = headers.merge(Range: 'bytes=0-0')
  response = Typhoeus.get(uri, headers: request_headers)
  response.success?
end
new() click to toggle source

Constructor

# File lib/browse_everything/retriever.rb, line 45
def initialize
  @chunk_size = CHUNK_SIZE
end

Public Instance Methods

download(spec, target = nil) { |target, retrieved, total| ... } click to toggle source

Download a file or resource @param options [Hash] @param target [String, nil] system path to the downloaded file (defaults to a temporary file)

# File lib/browse_everything/retriever.rb, line 52
def download(spec, target = nil)
  if target.nil?
    ext = File.extname(spec['file_name'])
    base = File.basename(spec['file_name'], ext)
    target = Dir::Tmpname.create([base, ext]) {}
  end

  File.open(target, 'wb') do |output|
    retrieve(spec) do |chunk, retrieved, total|
      output.write(chunk)
      yield(target, retrieved, total) if block_given?
    end
  end
  target
end
retrieve(options, &block) click to toggle source

Retrieve the resource from the storage service @param options [Hash]

# File lib/browse_everything/retriever.rb, line 70
def retrieve(options, &block)
  expiry_time_value = options.fetch('expires', nil)
  if expiry_time_value
    expiry_time = Time.parse(expiry_time_value)
    raise ArgumentError, "Download expired at #{expiry_time}" if expiry_time < Time.now
  end

  download_options = extract_download_options(options)
  url = download_options[:url]

  case url.scheme
  when 'file'
    retrieve_file(download_options, &block)
  when /https?/
    retrieve_http(download_options, &block)
  else
    raise URI::BadURIError, "Unknown URI scheme: #{url.scheme}"
  end
end

Private Instance Methods

extract_download_options(options) click to toggle source

Extract and parse options used to download a file or resource from an HTTP API @param options [Hash] @return [Hash]

# File lib/browse_everything/retriever.rb, line 95
def extract_download_options(options)
  url = options.fetch('url')

  # This avoids the potential for a KeyError
  headers = options.fetch('headers', {}) || {}

  file_size_value = options.fetch('file_size', 0)
  file_size = file_size_value.to_i

  output = {
    url: ::Addressable::URI.parse(url),
    headers: headers,
    file_size: file_size
  }

  output[:file_size] = get_file_size(output) if output[:file_size] < 1
  output
end
get_file_size(options) click to toggle source

Retrieve the file size @param options [Hash] @return [Integer] the size of the requested file

# File lib/browse_everything/retriever.rb, line 152
def get_file_size(options)
  url = options.fetch(:url)
  headers = options.fetch(:headers)
  file_size = options.fetch(:file_size)

  case url.scheme
  when 'file'
    File.size(url.path)
  when /https?/
    response = Typhoeus.head(url.to_s, headers: headers)
    length_value = response.headers['Content-Length'] || file_size
    length_value.to_i
  else
    raise URI::BadURIError, "Unknown URI scheme: #{url.scheme}"
  end
end
retrieve_file(options) { |chunk, retrieved, file_size| ... } click to toggle source

Retrieve the file from the file system @param options [Hash]

# File lib/browse_everything/retriever.rb, line 116
def retrieve_file(options)
  file_uri = options.fetch(:url)
  file_size = options.fetch(:file_size)

  retrieved = 0
  File.open(file_uri.path, 'rb') do |f|
    until f.eof?
      chunk = f.read(chunk_size)
      retrieved += chunk.length
      yield(chunk, retrieved, file_size)
    end
  end
end
retrieve_http(options) { |chunk, retrieved, file_size| ... } click to toggle source

Retrieve a resource over the HTTP @param options [Hash]

# File lib/browse_everything/retriever.rb, line 132
def retrieve_http(options)
  file_size = options.fetch(:file_size)
  headers = options.fetch(:headers)
  url = options.fetch(:url)
  retrieved = 0

  request = Typhoeus::Request.new(url.to_s, method: :get, headers: headers)
  request.on_headers do |response|
    raise DownloadError.new("#{self.class}: Failed to download #{url}: Status Code: #{response.code}", response) unless response.code == 200
  end
  request.on_body do |chunk|
    retrieved += chunk.bytesize
    yield(chunk, retrieved, file_size)
  end
  request.run
end