class NprCliNewsReader::Scraper
Public Class Methods
scrape_articles_for_category(selected_category)
click to toggle source
# File lib/npr_cli_news_reader/scraper.rb, line 5 def self.scrape_articles_for_category(selected_category) if NprCliNewsReader::Article.all.detect {|article| article.category == selected_category} return else html = "" if selected_category == "race & culture" html = open("#{@@base_url}/sections/codeswitch") else html = open("#{@@base_url}/sections/#{selected_category}") end doc = Nokogiri::HTML(html) articles = doc.css('article.item') articles.each do |article| article_attributes = { category: selected_category.downcase, title: article.css('h2.title a').text.strip, teaser: article.css('p.teaser a').text.strip, article_url: article.css('h2.title a').attr('href').to_s.strip } NprCliNewsReader::Article.new(article_attributes) end end end
scrape_full_article(article)
click to toggle source
# File lib/npr_cli_news_reader/scraper.rb, line 30 def self.scrape_full_article(article) html = open (article.article_url) doc = Nokogiri::HTML(html) # discard the first p tag paragraphs = doc.css('#storytext > p') article.full_article = paragraphs end