class Scraper
Public Class Methods
scrape_category_page(category_url)
click to toggle source
# File lib/best_boutique_hotels/scraper.rb, line 17 def self.scrape_category_page(category_url) hotel_array = [] doc = Nokogiri::HTML(open(category_url)) hotel_links = doc.css('h3.title-hotel-row a').collect {|el| el['href'].strip } hotel_names = doc.css('h3.title-hotel-row a').collect {|el| el.text.strip } hotel_names.each_with_index do |hotel, index| hotel_array << Hash[hotel_name: hotel, hotel_url: hotel_links[index]] end hotel_array end
scrape_hotel_page(hotel_url)
click to toggle source
# File lib/best_boutique_hotels/scraper.rb, line 28 def self.scrape_hotel_page(hotel_url) hotel_details = [] location = "" hotel_website = "" headline = "" notes = [] price = nil number_of_rooms = nil doc = Nokogiri::HTML(open(hotel_url)) location = doc.css('div.address-section div li')[0].text unless doc.css('div.address-section div li')[0] == nil hotel_website = doc.css('div.action-link-hotel a')[1]['href'] unless (doc.css('div.action-link-hotel a')[1] == nil || doc.css('div.action-link-hotel a')[1]['href'].include?('boutiquehotelawards')) headline = doc.css('div.tag-line').text unless doc.css('div.tag-line') == nil notes = doc.css('div.hotel-info ul')[0] unless doc.css('div.hotel-info ul')[0] == nil notes = notes.css('li').collect {|el| el.text} unless notes == [] number_of_rooms = notes[0] unless notes == nil notes.shift unless notes == nil price = notes.detect {|text| (text.include?("$") || text.include?("USD") || text.include?("EUR"))} unless notes == nil notes.reject!{ |item| item == price} unless notes == nil hotel_details = Hash[location: location, hotel_website: hotel_website, number_of_rooms: number_of_rooms, price: price, notes: notes, headline: headline] hotel_details.reject!{|k , v| (v == nil || v == "" || v == [])} hotel_details end
scrape_index_page(index_url)
click to toggle source
# File lib/best_boutique_hotels/scraper.rb, line 3 def self.scrape_index_page(index_url) categories_array = [] hotels_array = [] doc = Nokogiri::HTML(open(index_url)) category_links = doc.css('div.overlay-content-cat a').collect {|el| el['href'] } category_names = doc.css('div.overlay-content-cat a').collect {|el| el.text.strip } category_names.each_with_index do |category, index| categories_array << Hash[category_name: category, category_url: category_links[index]] end categories_array end