class Scraper
> Scraper
scrapes the articles from the front page of www.louderwithcrowder.com¶ ↑
Scrapes the data from each article to be displayed to the user via CLI
Public Class Methods
initiate_scrape()
click to toggle source
> This method iniates the scrape from our website¶ ↑
# File lib/crowder_news/scraper.rb, line 12 def self.initiate_scrape Article.create_from_collection(self.scrape_featured) Article.create_from_collection(self.scrape_recent) Article.all.each {|article| details = self.scrape_details(article.link) article.add_details(details) } end
scrape_details(article_url)
click to toggle source
> Pulls the articles details from each article url so we can complete out Article
objects¶ ↑
# File lib/crowder_news/scraper.rb, line 60 def self.scrape_details(article_url) doc = Nokogiri::HTML(open(article_url)) article_info = {} article_info[:author] = doc.css("h2 span.lwc-author").text article_info[:date] = doc.css("h2 span.lwc-date").text article_info[:body] = doc.css("p").text article_info[:youtube_links] = [] doc.css("div.fluid-width-video-wrapper").each { |wrapper| article_info[:youtube_links] << wrapper.css("iframe").attribute("src").value } article_info end
scrape_featured()
click to toggle source
> Scrapes the featured articles from LwC¶ ↑
# File lib/crowder_news/scraper.rb, line 24 def self.scrape_featured doc = Nokogiri::HTML(open(@@url)) articles = [] doc.css("div.lwc-featured").each {|featured_article| featured_article.css(".featured-box").each { |box| title = box.css("h3.featured-title a").text link = box.css("h3.featured-title a").attribute("href").value excerpt = box.css("p.lwc-excerpt").text type = "Featured" articles << {:title => title, :link => link, :excerpt => excerpt, :type => type} } } articles end
scrape_recent()
click to toggle source
> Scrapes the recent articles from LwC¶ ↑
# File lib/crowder_news/scraper.rb, line 42 def self.scrape_recent doc = Nokogiri::HTML(open(@@url)) articles = [] doc.css("div.lwc-recent").each {|article| article.css(".recent-box").each { |box| title = box.css("h3.recent-title a").text link = box.css("h3.recent-title a").attribute("href").value type = "Recent" excerpt = "" articles << {:title => title, :link => link, :excerpt => excerpt, :type => type} } } articles end