class Down::Wget

Provides streaming downloads implemented with the wget command-line tool. The design is very similar to Down::Http.

Public Class Methods

new(*arguments) click to toggle source

Initializes the backend with common defaults.

# File lib/down/wget.rb, line 20
def initialize(*arguments)
  @arguments = [
    user_agent:      "Down/#{Down::VERSION}",
    max_redirect:    2,
    dns_timeout:     30,
    connect_timeout: 30,
    read_timeout:    30,
  ] + arguments
end

Public Instance Methods

download(url, *args, max_size: nil, content_length_proc: nil, progress_proc: nil, destination: nil, **options) click to toggle source

Downlods the remote file to disk. Accepts wget command-line options and some additional options as well.

# File lib/down/wget.rb, line 32
def download(url, *args, max_size: nil, content_length_proc: nil, progress_proc: nil, destination: nil, **options)
  io = open(url, *args, **options, rewindable: false)

  content_length_proc.call(io.size) if content_length_proc && io.size

  if max_size && io.size && io.size > max_size
    raise Down::TooLarge, "file is too large (max is #{max_size/1024/1024}MB)"
  end

  extname  = File.extname(URI(url).path)
  tempfile = Tempfile.new(["down-wget", extname], binmode: true)

  until io.eof?
    chunk = io.readpartial(nil, buffer ||= String.new)

    tempfile.write(chunk)

    progress_proc.call(tempfile.size) if progress_proc

    if max_size && tempfile.size > max_size
      raise Down::TooLarge, "file is too large (max is #{max_size/1024/1024}MB)"
    end
  end

  tempfile.open # flush written content

  tempfile.extend Down::Wget::DownloadedFile
  tempfile.url     = url
  tempfile.headers = io.data[:headers]

  download_result(tempfile, destination)
rescue
  tempfile.close! if tempfile
  raise
ensure
  io.close if io
end
open(url, *args, rewindable: true, **options) click to toggle source

Starts retrieving the remote file and returns an IO-like object which downloads the response body on-demand. Accepts wget command-line options.

# File lib/down/wget.rb, line 72
def open(url, *args, rewindable: true, **options)
  arguments = generate_command(url, *args, **options)

  command = Down::Wget::Command.execute(arguments)
  # Wrap the wget command output in an IO-like object.
  output  = Down::ChunkedIO.new(
    chunks:     command.enum_for(:output),
    on_close:   command.method(:terminate),
    rewindable: false,
  )

  # https://github.com/tmm1/http_parser.rb/issues/29#issuecomment-309976363
  header_string  = output.readpartial
  header_string << output.readpartial until header_string.include?("\r\n\r\n")
  header_string, first_chunk = header_string.split("\r\n\r\n", 2)

  # Use an HTTP parser to parse out the response headers.
  parser = HTTP::Parser.new
  parser << header_string

  if parser.headers.nil?
    output.close
    raise Down::Error, "failed to parse response headers"
  end

  headers = parser.headers
  status  = parser.status_code

  content_length = headers["Content-Length"].to_i if headers["Content-Length"]
  charset        = headers["Content-Type"][/;\s*charset=([^;]+)/i, 1] if headers["Content-Type"]

  # Create an Enumerator which will lazily retrieve chunks of response body.
  chunks = Enumerator.new do |yielder|
    yielder << first_chunk if first_chunk
    yielder << output.readpartial until output.eof?
  end

  Down::ChunkedIO.new(
    chunks:     chunks,
    size:       content_length,
    encoding:   charset,
    rewindable: rewindable,
    on_close:   output.method(:close),
    data:       { status: status, headers: headers },
  )
end

Private Instance Methods

generate_command(url, *args, **options) click to toggle source

Generates the wget command.

# File lib/down/wget.rb, line 122
def generate_command(url, *args, **options)
  command = %W[wget --no-verbose --save-headers -O -]

  options = @arguments.grep(Hash).inject({}, :merge).merge(options)
  args    = @arguments.grep(->(o){!o.is_a?(Hash)}) + args

  (args + options.to_a).each do |option, value|
    if option.is_a?(String)
      command << option
    elsif option.length == 1
      command << "-#{option}"
    else
      command << "--#{option.to_s.gsub("_", "-")}"
    end

    command << value.to_s unless value.nil?
  end

  command << url
  command
end