class BookDeals::Scraper
builds models from goodreads
Constants
- BOOK_DEALS_URL
- CATEGORY_PAGE_HTML_ELEMENTS
- DEALS_HTML_ELEMENT_AUTHOR
- DEALS_HTML_ELEMENT_CATEGORIES
- DEALS_HTML_ELEMENT_DATETIME
- DEALS_HTML_ELEMENT_DEAL_PRICE
- DEALS_HTML_ELEMENT_DESCRIPTION
- DEALS_HTML_ELEMENT_ORIGINAL_PRICE
- DEALS_HTML_ELEMENT_TITLE
- DEALS_PURCHASE_URL_HTML_ELEMENTS
- HOME_PAGE_HTML_ELEMENTS
- HOME_URL
Public Instance Methods
get_book_deals_page()
click to toggle source
# File lib/book_deals/scraper.rb, line 18 def get_book_deals_page Nokogiri::HTML(open(BOOK_DEALS_URL)) end
scrape_categories_from_home_page()
click to toggle source
# File lib/book_deals/scraper.rb, line 22 def scrape_categories_from_home_page all_category_html_elements = self.get_book_deals_page.css(HOME_PAGE_HTML_ELEMENTS) category_html_elements = select_public_category_html_elements(all_category_html_elements) category_html_elements.map do |html_element| category_name = html_element.text category_url = HOME_URL + html_element.attr("href") Category.new(category_name, category_url) end end
scrape_deals_from_category_page(category)
click to toggle source
# File lib/book_deals/scraper.rb, line 33 def scrape_deals_from_category_page(category) deals_html_doc = Nokogiri::HTML(open(category.url)) deals_html_elements = deals_html_doc.css(CATEGORY_PAGE_HTML_ELEMENTS) deals_html_elements.each do |html_element| price = html_element.css(DEALS_HTML_ELEMENT_DEAL_PRICE).text original_price = html_element.css(DEALS_HTML_ELEMENT_ORIGINAL_PRICE).text expires_in = html_element.css(DEALS_HTML_ELEMENT_DATETIME).text.split(" ").drop(2).join(" ") deal = Deal.new(price, original_price, expires_in) title = html_element.css(DEALS_HTML_ELEMENT_TITLE).text purchase_url = HOME_URL + html_element.css(DEALS_PURCHASE_URL_HTML_ELEMENTS).attr("href") author = html_element.css(DEALS_HTML_ELEMENT_AUTHOR).text description = html_element.css(DEALS_HTML_ELEMENT_DESCRIPTION).text book = Book.new(deal, title, author, purchase_url, description) category.add_book(book) end category end
Private Instance Methods
select_public_category_html_elements(all_category_html_elements)
click to toggle source
# File lib/book_deals/scraper.rb, line 56 def select_public_category_html_elements(all_category_html_elements) all_category_html_elements.reject {|html_element| html_element.text == "Recommended for You"} end