class Scraper

Public Class Methods

get_page() click to toggle source
# File lib/apod_cli/scraper.rb, line 17
def self.get_page
  Nokogiri::HTML(open("http://apod.nasa.gov/apod/archivepix.html"))
end
index_data() click to toggle source
# File lib/apod_cli/scraper.rb, line 32
def self.index_data
  array = []
  months = ["January", "February", "March", "April", "May", "June", "July", "August", "September", "October", "November", "December"]
  content = self.get_page.css("body b")
  links = content.css("a")
  date_titles = content.text.split("\n").reject!{|item| item == ""}
  date_titles.pop

  links_hash = {}
  links.each do |link|
    links_hash[link.text.strip] = link.attribute("href").value
  end

  toggle_2007 = false
  date_titles.each_with_index do |dt, idx|
    hash = {}
    #There's one bloody link with a \n typo in its name that requires me to write this code.
    if idx == date_titles.length - 1 || months.index(date_titles[idx + 1].match(/[a-zA-Z]{1,}/).to_s).nil?
      if idx != date_titles.length - 1
        toggle_2007 = true
        next
      end
    end
    if toggle_2007
      toggle_2007 = false
      next
    end

    month_str = ""
    month_num = months.index(dt.match(/[a-zA-Z]{1,}/).to_s) + 1
    if month_num.to_s.length == 1
      month_str = "0#{month_num}"
    else
      month_str = month_num.to_s
    end

    name_i = dt.match(/:.+/).to_s
    name_i[0] = " "

    hash[:date] = "#{dt.match(/[0-9]{4}/)}-#{month_str}-#{dt.match(/[^0-9][0-9]{2}[^0-9]/).to_s.gsub(/[: ]/, "")}"
    hash[:name] = name_i.strip
    hash[:link] = "http://apod.nasa.gov/apod/" + links_hash[hash[:name]]
    array << hash
  end

  array.insert(-4411, {date: "2007-07-16", name: "The Lagoon Nebula in Gas, Dust, and Stars", link: "http://apod.nasa.gov/apod/ap070716.html"}) #This is the dumb typo'd link that I decided to hardcode.
  array
end
new() click to toggle source
# File lib/apod_cli/scraper.rb, line 9
def initialize
  @@data = self.class.index_data
end

Public Instance Methods

data() click to toggle source
# File lib/apod_cli/scraper.rb, line 13
def data
  @@data
end
pic_data(url) click to toggle source
# File lib/apod_cli/scraper.rb, line 21
def pic_data(url)
  explanation = Nokogiri::HTML(open(url)).css("body").text.match(/Explanation:[\s\S]+?(\n(\s*)){3}/).to_s.gsub(/\n/, " ").gsub(/\s{2,}/, " ").strip
  name = self.data.select{|hash| url.include?(hash[:link])}[0][:name]
  if Nokogiri::HTML(open(url)).css("p a img").to_a != []
    link = "http://apod.nasa.gov/apod/#{Nokogiri::HTML(open(url)).css("p a img").attribute("src").to_s}"
  else
    link = self.data.select{|hash| url.include?(hash[:link])}[0][:link]
  end
  hash = {name: name, expl: explanation, link:link}
end