module Scrapers::Xkcd

Constants

PUBDATE_FORMAT
XKCD_URL

Public Class Methods

get_pubdate(url) click to toggle source

Get the http header of the image file which reveals the last_modified date. We’ll use this as the publication date.

# File lib/scrapers/xkcd.rb, line 42
def self.get_pubdate(url)
  url = URI.parse(url.dup)
  head_req = Net::HTTP::Head.new url
  
  head = Net::HTTP.start(url.host, url.port) do |http|
    http.request head_req
  end
  return Time.now.strftime(PUBDATE_FORMAT) if head["Last-Modified"].nil?
  last_modified = Time.parse(head["Last-Modified"]) rescue nil
  return Time.now.strftime(PUBDATE_FORMAT) if last_modified.nil?
  last_modified.strftime(PUBDATE_FORMAT)
end
scrape(comic=nil) click to toggle source
Get the current or numbered xkcd comic

+comic+ = (string) the number of the xkcd comic to

retreive. Gets current comic if nil.

returns hash containing comic info:

     {:title => "comic' title",
      :url => "url to comic",
      :img_src => "source url to comic image",
      :hover_text => "the hover (mouse-over) text",
      :pubdate => "publication date",
     }
# File lib/scrapers/xkcd.rb, line 23
def self.scrape(comic=nil)
  results = Hash.new

  url = URI.parse XKCD_URL
  url.path = "/#{comic}/" unless comic.nil?
  results[:url] = url.to_s

  doc = Nokogiri::HTML(open(url.to_s))
  comic = doc.at_css("#comic img")
  results[:img_src] = comic.attr("src")
  results[:img_title] = comic.attr("title")
  results[:title] = results[:img_alt] = comic.attr("alt")
  results[:pubdate] = get_pubdate(results[:img_src])

  results
end