class NytJourneys::Scraper

Public Class Methods

scrape_quotes(quotes_url="http://www.quotationspage.com/search.php3?homesearch=journey&page=1") click to toggle source

quote: quotations_page.css(“dt.quote”), .css(“a”).text author: quotations_page.css(“dd.author), .css(”a“).text

# File lib/nyt_journeys/scraper.rb, line 34
def self.scrape_quotes(quotes_url="http://www.quotationspage.com/search.php3?homesearch=journey&page=1")
  quotations_page = Nokogiri::HTML(open(quotes_url))
  quotes = []
  quotations_page.css("dt.quote").each do |quote|
    item = "#{quote.css("a").text}"
    quotes << item
  end
  authors = []
  quotations_page.css("dd.author").each do |author|
     item = "#{author.css("a")[4].text}"
     authors << item
  end
  quotes.map.with_index do |quote, index|
    "#{quote} ~ #{authors[index]}"
  end
end
scrape_type_detail_page(types_hash) click to toggle source

trip_name: .css(“h2.item-title”).text trip_url: .css.attribute(“href”).value

# File lib/nyt_journeys/scraper.rb, line 19
def self.scrape_type_detail_page(types_hash)
  type_details = Nokogiri::HTML(open(types_hash.values[1]))
  trips =[]
  type_details.css("li.journey-list-item > a").each do |trip|
    trips_hash = {}
    trips_hash[:name] = trip.css("h2.item-title").text
    trips_hash[:url] = trip.attribute("href").value
    trips_hash[:type] = types_hash.values[0]
    trips << trips_hash
  end
  trips  # an array of trip hashes with name, url, and type properties
end
scrape_type_summary_page(summary_url) click to toggle source

type_name: .css(“h3.trip-type-list-title”).text type_url: .css(“a.view-all-link”).attribute(“href”).value

# File lib/nyt_journeys/scraper.rb, line 5
def self.scrape_type_summary_page(summary_url)
  type_summary = Nokogiri::HTML(open(summary_url))
  types = []
  type_summary.css("div.trip-type-listings").each do |type|
    type_hash = {}
    type_hash[:type_name] = type.css("h3.trip-type-list-title").text
    type_hash[:type_url] = type.css("a.view-all-link").attribute("href").value
    types << type_hash
  end
  types  # an array of type hashes with type_name and type_url properties
end