class DealScraper

this class grabs the daily deals from reddit /r/ebookdeals require_relative “EbookDealInfo”

Public Instance Methods

scrape() click to toggle source
# File lib/ebookdealinfo/deal_scraper.rb, line 6
def scrape #this method scrapes the deals page, then for each item on that page, instantiates a new book with the relevant info; are we passing a hash or attributes?
  deals = Nokogiri::HTML(open("https://www.reddit.com/r/ebookdeals/new/",'User-Agent' => 'Chrome'))

  deals.search("div.link").each_with_index do |post,index|
    if post.search("p.title").text.size > 0
      if post.search("p.title").text.include?(";") #for posts formatted "#~Author~; ~Title; ~Price~"
        author = post.search("p.title").text.split(";")[0].strip
        title = post.search("p.title").text.split(";")[1].strip
        post.search("p.title").text.split(";")[2] != nil ? price = post.search("p.title").text.split(";")[2].strip : price = ""
        Book.create(author, title, price)
        puts "Loaded book ##{index+1} of #{deals.search("div.link").size}"
      else #for posts formatted "~Title~ by ~author~ (~Price~)"
        if post.search("p.title").text.slice(/[,.-[ ]]([Bb]y)/) != nil #ignore wacky formatted posts
          if post.search("p.title").text.scan(/(\s[Bb][Yy]\s)/).size > 1 #count instances of "by"; if the title includes a "by" we can't parse it
            puts "Unable to load book ##{index+1} of #{deals.search("div.link").size}. Probably a bad post name."
            Book.create("","","",0)
          else
            author = post.search("p.title").text.gsub(/(\s[Bb][Yy]\s)/, " by ").split("by")[1].strip.slice(/\A[^(,$\/]+/).split(". Kindle")[0].split("-- Kindle")[0].strip
            title = post.search("p.title").text.gsub(/(\s[Bb][Yy]\s)/, " by ").split("by").first.gsub(/[(].+[)]/,"").gsub(/\W+\z/, "").strip
            post.search("p.title").text.slice(/[$]\d+[.]\d+/) != nil ? price = post.search("p.title").text.slice(/[$]\d+[.]\d+/).strip : price = ""
            Book.create(author, title, price)
            puts "Loaded book ##{index+1} of #{deals.search("div.link").size}"
          end
        else
          puts "Unable to load book ##{index+1} of #{deals.search("div.link").size}. Probably a bad post name."
          Book.create("","","",0)
        end
      end
    end
  end
end