class ArchiveToday::Archiver

Constants

BASE_URL
DEFAULT_USER_AGENT

Attributes

debug[R]
response[RW]
target_url[R]
user_agent[R]

Public Class Methods

new(url:, user_agent: DEFAULT_USER_AGENT, debug: false) click to toggle source
# File lib/archive_today/archiver.rb, line 11
def initialize(url:, user_agent: DEFAULT_USER_AGENT, debug: false)
  @debug = debug
  @target_url = url
  @user_agent = user_agent
end

Public Instance Methods

capture() click to toggle source
# File lib/archive_today/archiver.rb, line 17
def capture
  puts 'Submitting URL ...' if debug
  response = connection.post('/submit/') do |req|
    req.body = submission_body
  end
  raise unless response.success?

  self.response = response

  {
    url: finalized_url,
    screenshot_url: screenshot_url
  }
rescue Faraday::ClientError, Faraday::ServerError => e
  puts "[archive_today] Error capturing URL: #{e.response[:status]} #{e.response[:body]}"

  nil
end

Private Instance Methods

archived_url() click to toggle source
# File lib/archive_today/archiver.rb, line 42
def archived_url
  @archived_url ||= begin
    headers = response.headers

    return headers[:location] if headers.has_key?('location')
    return headers[:refresh].split(';url=').last if headers.has_key?('refresh')

    # TODO: handle the history case mentioned here?
    # https://github.com/pastpages/archiveis/blob/master/archiveis/api.py#L81
    response.env.url
  end
end
connection() click to toggle source
# File lib/archive_today/archiver.rb, line 102
def connection
  @connection ||= begin
    Faraday.new(BASE_URL) do |faraday|
      faraday.headers = { 'User-Agent' => user_agent }
      faraday.use FaradayMiddleware::FollowRedirects
      faraday.use Faraday::Response::RaiseError
      faraday.response :logger if debug
    end
  end
end
finalized_url() click to toggle source
# File lib/archive_today/archiver.rb, line 38
def finalized_url
  archived_url.gsub('/wip', '')
end
screenshot_url() click to toggle source
# File lib/archive_today/archiver.rb, line 55
def screenshot_url
  return nil unless archived_url
  return nil if archived_url.include? '/wip/'

  response = connection.get do |req|
    req.url "#{archived_url}/image"
  end
  html = Nokogiri::HTML(response.body)
  node = html.at_css('img[itemprop="contentUrl"]')
  url = node.attr('src')
  puts "Got screenshot URL: #{url}" if debug && url
  return url if url

  nil
rescue Faraday::ClientError, Faraday::ServerError => e
  puts "[archive_today] Error retrieving screenshot URL: #{e.response[:status]} #{e.response[:body]}"

  nil
end
submission_body() click to toggle source
# File lib/archive_today/archiver.rb, line 75
def submission_body
  URI.encode_www_form(
    {
      url: target_url,
      anyway: 1,
      submitid: unique_submission_id
    }
  )
end
unique_submission_id() click to toggle source
# File lib/archive_today/archiver.rb, line 85
def unique_submission_id
  puts 'Getting unique submission ID ...' if debug
  response = connection.get('/')

  html = Nokogiri::HTML(response.body)
  node = html.at_css('input[name="submitid"]')
  id = node.attr('value')
  puts "Got ID: #{id}" if debug && id
  return id if id

  nil
rescue Faraday::ClientError, Faraday::ServerError => e
  puts "[archive_today] Error retrieving submission ID: #{e.response[:status]} #{e.response[:body]}"

  nil
end