module SportHeadlines::Scraper
Public Class Methods
scrape_article(article)
click to toggle source
# File lib/sport_headlines/scraper.rb, line 33 def self.scrape_article(article) doc = Nokogiri::HTML(open(article.article_url)) p_text = "" if article.site.site_name == "ESPN" doc.search(".article-body p").each do |p| p_text += " " + p.text + "\n\n" end elsif article.site.site_name == "Bleacher Report" doc.search(".article_body p").each do |p| p_text += " " + p.text + "\n" end elsif article.site.site_name == "Pro Football Talk" doc.search(".post-body p").each do |p| p_text += " " + p.text + "\n\n" end end article.content = p_text end
scrape_site_headlines(site)
click to toggle source
# File lib/sport_headlines/scraper.rb, line 3 def self.scrape_site_headlines(site) doc = Nokogiri::HTML(open(site.site_url)) site.clear_articles if site.site_name == "ESPN" doc.search(".headlines li").each do |headline| new_article ||= SportHeadlines::Article.new new_article.title ||= headline.search("a").text new_article.article_url ||= site.site_url + headline.search("a").attribute("href").value site.add_article(new_article) new_article.site = site end elsif site.site_name == "Bleacher Report" doc.search(".headlineArticles li").each do |headline| new_article = SportHeadlines::Article.new new_article.title = headline.search("span.title").text new_article.article_url = headline.search("a.title").attribute("href").value site.add_article(new_article) new_article.site = site end else doc.search("#top-headlines li").each do |headline| new_article = SportHeadlines::Article.new new_article.title = headline.text new_article.article_url = headline.search("a").attribute("href").value site.add_article(new_article) new_article.site = site end end end