class Couchsurfing::Scraper
Scrapes 'couchsurfing.com'
Public Instance Methods
get_host_pages(continent, country, city)
click to toggle source
# File lib/couchsurfing/scraper.rb, line 33 def get_host_pages(continent, country, city) url = 'https://www.couchsurfing.com/places/' \ + continent + '/' + country + '/' + city doc = Nokogiri::HTML(open(url)) host_info = doc.css('div.multicolumn.mod-flex.mod-wrap') hosts = [] host_info.css('a.text.mod-truncated.mod-black.mod-w-90').each do |info| host = {} host['name'] = info.text.strip.split("\n").join host['url'] = info['href'] hosts << host end hosts end
scrape_host_page(host_url)
click to toggle source
# File lib/couchsurfing/scraper.rb, line 50 def scrape_host_page(host_url) url = 'https://www.couchsurfing.com' + host_url doc = Nokogiri::HTML(open(url)) host = [] profile = {} host_name = doc.css('div.profile-sidebar__user-info span a span').text profile['name'] = host_name z = doc.css('section:nth-child(3) div div div:nth-child(1) ul ' \ 'li:nth-child(3)').text profile['age_sex'] = z about_me_section = doc.css('div:nth-child(5) section:nth-child(4) div') person = {} profile['about_me'] = person if about_me_section.css('p:nth-child(2)').text == '' person['About Me'] = about_me_section.css('p:nth-child(1)').text else person['About Me'] = about_me_section.css('p:nth-child(1)').text + ': ' + \ about_me_section.css('p:nth-child(2)').text unless \ about_me_section.css('p:nth-child(1)').text == '' end about_me_section.search('h2').each do |node| if node.next_element.text.include?('COUCHSURFING') person[node.text] = node.next_element.text + ': ' + \ node.next_element.next_element.text else person[node.text] = node.next_element.text \ unless node.next_element.text == '' end end host << profile unless profile['name'] == '' host end
scrape_locations()
click to toggle source
# File lib/couchsurfing/scraper.rb, line 3 def scrape_locations doc = Nokogiri::HTML(open('https://www.couchsurfing.com/places')) structure = doc.css('#section_places div:nth-child(4) div div' \ ' div.pure-u-1.pure-u-md-15-24 div') regions = structure.css('.cs-sitemap-region') locations = [] c = structure.css('h2 a').count (0...c).each do |i| n = 0 area = {} continent = (structure.css('h2 a')[i]).text area['continent'] = continent countries = (regions[i]).css('h3 a').map(&:text) area['provinces'] = [] countries.map do |country| p = {} area['provinces'] << p c1 = (regions[i]).css("ul:nth-child(#{(n += 2)})").text cities = c1.split("\n").collect(&:strip).reject!(&:empty?) p['country'] = country p['cities'] = cities end locations << area end locations end