module EndangeredSpecies::Scraper
Scrapes www.worldwildlife.org, returns array of animals
Public Class Methods
create_animal(name, url)
click to toggle source
# File lib/endangered_species/scraper.rb, line 13 def self.create_animal(name, url) EndangeredSpecies::Animal.new(name: name, url: url) end
endangered_species()
click to toggle source
# File lib/endangered_species/scraper.rb, line 7 def self.endangered_species [].tap do |animals| urls.each { |name, url| animals << create_animal(name, "https://www.worldwildlife.org#{url}") } end end
scrape_attributes(doc)
click to toggle source
# File lib/endangered_species/scraper.rb, line 22 def self.scrape_attributes(doc) { description: doc.css('.section-pop-inner .lead p').map(&:text) }.tap do |attributes| doc.css('.list-stats li').each do |stat| attributes[:"#{stat.css('.hdr').text.downcase.strip.tr(' ', '_')}"] = stat.css('.container').text.strip end end end
select_animal_attributes(animal)
click to toggle source
# File lib/endangered_species/scraper.rb, line 17 def self.select_animal_attributes(animal) doc = Nokogiri::HTML(URI.open(animal.url)) animal.assign_attributes(scrape_attributes(doc)) end
urls()
click to toggle source
# File lib/endangered_species/scraper.rb, line 30 def self.urls {}.tap do |links| Nokogiri::HTML(URI.open('https://www.worldwildlife.org/species')) .css('ul.masonry li.item a') .each { |item| links[item.css('.name').text.strip.sub(/\n.+$/, '').to_s] = item['href'] } end end