class SeaLife::Scraper

Constants

BASE_URL

Public Class Methods

scrape_animal_info(animal) click to toggle source
# File lib/sea_life/scraper.rb, line 31
def self.scrape_animal_info(animal)
  doc = Nokogiri::HTML(open(BASE_URL + animal.url))

  animal_info = {}
  animal_info[:scientific_name] = doc.css("section.subpage-header div p").text
  animal_info[:short_desc] = doc.css("div.animal-description-contain p").text
  animal_info[:longer_desc] = ""

  doc.css("section.animal-secondary div.flex-item-2 p").each do |paragraph|
    break if paragraph.text == "Additional Resources:"
    animal_info[:longer_desc] += "\n\n #{paragraph.text}"
  end

  i = 0
  while i < doc.css("div.animal-details-side h2").size - 1 do
    info_cat = doc.css("div.animal-details-side h2")[i].text.strip.downcase
    info = doc.css("div.animal-details-side p")[i].text.strip

    case info_cat
    when "ecosystem/habitat"
      animal_info[:habitat] = info
    when "feeding habits"
      animal_info[:habits] = info
    when "conservation status"
      animal_info[:status] = info
    else
      animal_info[info_cat.to_sym] = info
    end

    i += 1
  end
  
  animal.add_info(animal_info)
end
scrape_animals(category) click to toggle source
# File lib/sea_life/scraper.rb, line 19
def self.scrape_animals(category)
  doc = Nokogiri::HTML(open(BASE_URL + category.url))

  doc.css("article").each do |animal|
    animal_info = {}
    animal_info[:category] = category
    animal_info[:name] = animal.css("div.copy h1").text
    animal_info[:url] = animal.css("div.overlay a").attribute("href").value
    SeaLife::Animal.new(animal_info)
  end
end
scrape_categories() click to toggle source
# File lib/sea_life/scraper.rb, line 5
def self.scrape_categories #Scrapes oceana and returns array of categories
  categories = []
  doc = Nokogiri::HTML(open(BASE_URL + "/marine-life"))

  doc.css("article.animal-tile").each do |item|
    category = {}
    category[:url] = item.css("div.overlay a").attribute("href").value
    category[:name] = item.css("div.copy h1").text
    categories << category unless category[:name] == "Marine Science and Ecosystems"
  end

  categories
end