class ScrapedPageArchive
Constants
- VERSION
Attributes
storage[R]
Public Class Methods
new(storage)
click to toggle source
# File lib/scraped_page_archive.rb, line 23 def initialize(storage) @storage = storage end
record(*args, &block)
click to toggle source
# File lib/scraped_page_archive.rb, line 17 def self.record(*args, &block) new(GitStorage.new).record(*args, &block) end
Public Instance Methods
filename_from_url(url)
click to toggle source
# File lib/scraped_page_archive.rb, line 53 def filename_from_url(url) File.join(URI.parse(url).host, Digest::SHA1.hexdigest(url)) end
open_from_archive(url)
click to toggle source
# File lib/scraped_page_archive.rb, line 41 def open_from_archive(url) storage.chdir do filename = filename_from_url(url.to_s) meta = YAML.load_file(filename + '.yml') if File.exist?(filename + '.yml') response_body = File.read(filename + '.html') if File.exist?(filename + '.html') unless meta && response_body fail Error, "No archived copy of #{url} found." end response_from(meta, response_body) end end
record() { || ... }
click to toggle source
# File lib/scraped_page_archive.rb, line 27 def record(&block) if storage.github_repo_url.nil? warn "The 'scraped_page_archive' gem wants to store the scraped pages in a git repo," \ 'but it cannot determine which git repo it should use. See ' \ 'https://github.com/everypolitician/scraped_page_archive#usage for details of how ' \ "to specify the repo.\n\n" return yield end VCR::Archive::Persister.storage_location = storage.path ret = VCR.use_cassette('', &block) storage.save ret end
response_from(meta, response_body)
click to toggle source
# File lib/scraped_page_archive.rb, line 57 def response_from(meta, response_body) StringIO.new(response_body).tap do |response| OpenURI::Meta.init(response) meta['response']['headers'].each { |k, v| response.meta_add_field(k, v.join(', ')) } response.status = meta['response']['status'].values.map(&:to_s) response.base_uri = URI.parse(meta['request']['uri']) end end