class Datasets::Downloader

Public Class Methods

new(url) click to toggle source
# File lib/datasets/downloader.rb, line 13
def initialize(url)
  if url.is_a?(URI::Generic)
    url = url.dup
  else
    url = URI.parse(url)
  end
  @url = url
  unless @url.is_a?(URI::HTTP)
    raise ArgumentError, "download URL must be HTTP or HTTPS: <#{@url}>"
  end
end

Public Instance Methods

download(output_path) click to toggle source
# File lib/datasets/downloader.rb, line 25
def download(output_path)
  output_path.parent.mkpath

  headers = {"User-Agent" => "Red Datasets/#{VERSION}"}
  start = nil
  partial_output_path = Pathname.new("#{output_path}.partial")
  if partial_output_path.exist?
    start = partial_output_path.size
    headers["Range"] = "bytes=#{start}-"
  end

  start_http(@url, headers) do |response|
    if response.is_a?(Net::HTTPPartialContent)
      mode = "ab"
    else
      start = nil
      mode = "wb"
    end

    base_name = @url.path.split("/").last
    size_current = 0
    size_max = response.content_length
    if start
      size_current += start
      size_max += start
    end
    progress_reporter = ProgressReporter.new(base_name, size_max)
    partial_output_path.open(mode) do |output|
      response.read_body do |chunk|
        size_current += chunk.bytesize
        progress_reporter.report(size_current)
        output.write(chunk)
      end
    end
  end
  FileUtils.mv(partial_output_path, output_path)
rescue TooManyRedirects => error
  last_url = error.message[/\Atoo many redirections: (.+)\z/, 1]
  raise TooManyRedirects, "too many redirections: #{@url} .. #{last_url}"
end

Private Instance Methods

start_http(url, headers, limit = 10, &block) click to toggle source
# File lib/datasets/downloader.rb, line 66
        def start_http(url, headers, limit = 10, &block)
  if limit == 0
    raise TooManyRedirects, "too many redirections: #{url}"
  end
  http = Net::HTTP.new(url.hostname, url.port)
  # http.set_debug_output($stderr)
  http.use_ssl = (url.scheme == "https")
  http.start do
    path = url.path
    path += "?#{url.query}" if url.query
    request = Net::HTTP::Get.new(path, headers)
    http.request(request) do |response|
      case response
      when Net::HTTPSuccess, Net::HTTPPartialContent
        return block.call(response)
      when Net::HTTPRedirection
        url = URI.parse(response[:location])
        $stderr.puts "Redirect to #{url}"
        return start_http(url, headers, limit - 1, &block)
      else
        message = response.code
        if response.message and not response.message.empty?
          message += ": #{response.message}"
        end
        message += ": #{url}"
        raise response.error_type.new(message, response)
      end
    end
  end
end