class Scraper

Public Class Methods

new() click to toggle source
# File lib/podcast_book_club/scraper.rb, line 5
def initialize
    path = build_path
    fetch_episodes(path)
    @config = googlebooks_config
end

Public Instance Methods

build_books(episode) click to toggle source
# File lib/podcast_book_club/scraper.rb, line 33
def build_books(episode)
  describe_episode(episode)

  if @description.match?(/\b(B|b)ook/)
    queries = send_to_parser(episode)

    queries.each do |query|
      google_book_search = GoogleBooks.search(query, @config)
      result = google_book_search.first

      attributes = {}


      begin

        attributes[:url] = result.info_link unless result.info_link.nil?
        attributes[:title] = result.title  unless result.title.nil?
        attributes[:author] = result.authors_array unless result.authors_array.nil? || result.authors_array == [nil]
        attributes[:genre] = result.categories unless result.categories.nil? || result.categories == ""
        attributes[:synopsis] = result.description unless result.description.nil?
        attributes[:episode] = episode
        

        Book.find_or_create_by_title(attributes)

      rescue

        puts "I'm having trouble adding the book " + Rainbow("#{query}.").bg(:black).yellow

      end
    end
  end

end
build_path() click to toggle source
# File lib/podcast_book_club/scraper.rb, line 68
def build_path
    snapshot_date = Date.new(2019,6,25)
    today = Date.today
    episodes_since_snapshot = snapshot_date.step(today).select{|d| d.monday? || d.thursday?}.size
    url = "https://player.fm/series/the-ezra-klein-show/episodes?active=true&limit=#{episodes_since_snapshot + 183}&order=newest&query=&style=list&container=false&offset=0"
end
describe_episode(episode) click to toggle source
# File lib/podcast_book_club/scraper.rb, line 75
def describe_episode(episode)
  path = episode.link

  html = open(path)
  @episode_doc = Nokogiri::HTML(html)
  @description = @episode_doc.css(".story .description").text
end
fetch_episodes(path) click to toggle source
# File lib/podcast_book_club/scraper.rb, line 11
def fetch_episodes(path)
  html = open(path)
  doc = Nokogiri::HTML(html, nil, Encoding::UTF_8.to_s)
  episodes = doc.css(".info")

  episodes.each do |episode|

    title = episode.css(".info-top a").text.strip
    link = "https://player.fm#{episode.css(".info-top a").attribute("href").value}"
    date = Date.strptime(episode.css(".timeago").attribute("datetime").value)

    attributes = {
        title: title,
        link: link,
        date: date
    }

    Episode.create(attributes)
  end

end
send_to_parser(episode) click to toggle source
# File lib/podcast_book_club/scraper.rb, line 83
def send_to_parser(episode)
  today = Date.today
  with_links = Date.new(2019, 1, 14)
  without_links = Date.new(2017, 3, 28)

  case episode.date
  when (with_links..today)
    parse_with_links(episode)
  when (without_links...with_links)
    parse_without_links(episode)
  else
    puts "This episode has no recommendations."
  end

end