class ArchiveToday::Archiver
Constants
- BASE_URL
- DEFAULT_USER_AGENT
Attributes
debug[R]
response[RW]
target_url[R]
user_agent[R]
Public Class Methods
new(url:, user_agent: DEFAULT_USER_AGENT, debug: false)
click to toggle source
# File lib/archive_today/archiver.rb, line 11 def initialize(url:, user_agent: DEFAULT_USER_AGENT, debug: false) @debug = debug @target_url = url @user_agent = user_agent end
Public Instance Methods
capture()
click to toggle source
# File lib/archive_today/archiver.rb, line 17 def capture puts 'Submitting URL ...' if debug response = connection.post('/submit/') do |req| req.body = submission_body end raise unless response.success? self.response = response { url: finalized_url, screenshot_url: screenshot_url } rescue Faraday::ClientError, Faraday::ServerError => e puts "[archive_today] Error capturing URL: #{e.response[:status]} #{e.response[:body]}" nil end
Private Instance Methods
archived_url()
click to toggle source
# File lib/archive_today/archiver.rb, line 42 def archived_url @archived_url ||= begin headers = response.headers return headers[:location] if headers.has_key?('location') return headers[:refresh].split(';url=').last if headers.has_key?('refresh') # TODO: handle the history case mentioned here? # https://github.com/pastpages/archiveis/blob/master/archiveis/api.py#L81 response.env.url end end
connection()
click to toggle source
# File lib/archive_today/archiver.rb, line 102 def connection @connection ||= begin Faraday.new(BASE_URL) do |faraday| faraday.headers = { 'User-Agent' => user_agent } faraday.use FaradayMiddleware::FollowRedirects faraday.use Faraday::Response::RaiseError faraday.response :logger if debug end end end
finalized_url()
click to toggle source
# File lib/archive_today/archiver.rb, line 38 def finalized_url archived_url.gsub('/wip', '') end
screenshot_url()
click to toggle source
# File lib/archive_today/archiver.rb, line 55 def screenshot_url return nil unless archived_url return nil if archived_url.include? '/wip/' response = connection.get do |req| req.url "#{archived_url}/image" end html = Nokogiri::HTML(response.body) node = html.at_css('img[itemprop="contentUrl"]') url = node.attr('src') puts "Got screenshot URL: #{url}" if debug && url return url if url nil rescue Faraday::ClientError, Faraday::ServerError => e puts "[archive_today] Error retrieving screenshot URL: #{e.response[:status]} #{e.response[:body]}" nil end
submission_body()
click to toggle source
# File lib/archive_today/archiver.rb, line 75 def submission_body URI.encode_www_form( { url: target_url, anyway: 1, submitid: unique_submission_id } ) end
unique_submission_id()
click to toggle source
# File lib/archive_today/archiver.rb, line 85 def unique_submission_id puts 'Getting unique submission ID ...' if debug response = connection.get('/') html = Nokogiri::HTML(response.body) node = html.at_css('input[name="submitid"]') id = node.attr('value') puts "Got ID: #{id}" if debug && id return id if id nil rescue Faraday::ClientError, Faraday::ServerError => e puts "[archive_today] Error retrieving submission ID: #{e.response[:status]} #{e.response[:body]}" nil end