class Datasets::Downloader
Public Class Methods
new(url)
click to toggle source
# File lib/datasets/downloader.rb, line 13 def initialize(url) if url.is_a?(URI::Generic) url = url.dup else url = URI.parse(url) end @url = url unless @url.is_a?(URI::HTTP) raise ArgumentError, "download URL must be HTTP or HTTPS: <#{@url}>" end end
Public Instance Methods
download(output_path)
click to toggle source
# File lib/datasets/downloader.rb, line 25 def download(output_path) output_path.parent.mkpath headers = {"User-Agent" => "Red Datasets/#{VERSION}"} start = nil partial_output_path = Pathname.new("#{output_path}.partial") if partial_output_path.exist? start = partial_output_path.size headers["Range"] = "bytes=#{start}-" end start_http(@url, headers) do |response| if response.is_a?(Net::HTTPPartialContent) mode = "ab" else start = nil mode = "wb" end base_name = @url.path.split("/").last size_current = 0 size_max = response.content_length if start size_current += start size_max += start end progress_reporter = ProgressReporter.new(base_name, size_max) partial_output_path.open(mode) do |output| response.read_body do |chunk| size_current += chunk.bytesize progress_reporter.report(size_current) output.write(chunk) end end end FileUtils.mv(partial_output_path, output_path) rescue TooManyRedirects => error last_url = error.message[/\Atoo many redirections: (.+)\z/, 1] raise TooManyRedirects, "too many redirections: #{@url} .. #{last_url}" end
Private Instance Methods
start_http(url, headers, limit = 10, &block)
click to toggle source
# File lib/datasets/downloader.rb, line 66 def start_http(url, headers, limit = 10, &block) if limit == 0 raise TooManyRedirects, "too many redirections: #{url}" end http = Net::HTTP.new(url.hostname, url.port) # http.set_debug_output($stderr) http.use_ssl = (url.scheme == "https") http.start do path = url.path path += "?#{url.query}" if url.query request = Net::HTTP::Get.new(path, headers) http.request(request) do |response| case response when Net::HTTPSuccess, Net::HTTPPartialContent return block.call(response) when Net::HTTPRedirection url = URI.parse(response[:location]) $stderr.puts "Redirect to #{url}" return start_http(url, headers, limit - 1, &block) else message = response.code if response.message and not response.message.empty? message += ": #{response.message}" end message += ": #{url}" raise response.error_type.new(message, response) end end end end