class Scraper
Constants
- URL
Public Class Methods
new(email, password)
click to toggle source
# File lib/socrates_scraper/scraper.rb, line 6 def initialize(email, password) @email = email @password = password @agent = Mechanize.new end
Public Instance Methods
get_user_profiles()
click to toggle source
# File lib/socrates_scraper/scraper.rb, line 13 def get_user_profiles cohort_page = get_cohort_page user_links = get_user_links(cohort_page) raise "Invalid username or password" if user_links.empty? puts "Getting user profiles..." create_user_profiles(user_links) end
Private Instance Methods
convert_html_to_person_hash(html)
click to toggle source
# File lib/socrates_scraper/scraper.rb, line 69 def convert_html_to_person_hash(html) person = {} person[:name] = get_name(html) person[:image] = get_image(html) person[:facebook_id] = get_facebook_id(html) person end
create_user_profiles(user_links)
click to toggle source
# File lib/socrates_scraper/scraper.rb, line 48 def create_user_profiles(user_links) user_links.map do |link| # reset to cohort page after every user @agent.transact do user_html = extract_profile_html(link) convert_html_to_person_hash(user_html) end end end
extract_profile_html(link)
click to toggle source
# File lib/socrates_scraper/scraper.rb, line 58 def extract_profile_html(link) page = @agent.click(link) # get the socrates id and convert into an # easily identifiable html element socrates_id = link.href[/.*\/(\d*)/, 1] socrates_tag = "<p class='soc_id'>#{socrates_id}</p>" Nokogiri::HTML(page.search('div.profile').to_s << socrates_tag) end
get_cohort_page()
click to toggle source
# File lib/socrates_scraper/scraper.rb, line 25 def get_cohort_page @agent.get(URL) login @agent.get("https://socrates.devbootcamp.com/cohorts/78") end
get_facebook_id(html)
click to toggle source
# File lib/socrates_scraper/scraper.rb, line 85 def get_facebook_id(html) html.css('dd')[4].text[/.*\/(.*)/, 1] end
get_image(html)
click to toggle source
# File lib/socrates_scraper/scraper.rb, line 81 def get_image(html) html.css('.user > img').first['src'] end
get_name(html)
click to toggle source
# File lib/socrates_scraper/scraper.rb, line 77 def get_name(html) html.css('h1 > text()').first.text.lstrip.rstrip end
get_user_links(page)
click to toggle source
# File lib/socrates_scraper/scraper.rb, line 38 def get_user_links(page) users = page.links_with(href: /users/) # remove top link 'My Profile' users.shift # deduplicate links (photo link + text link) users.uniq { |link| link.href } end
login()
click to toggle source
# File lib/socrates_scraper/scraper.rb, line 31 def login form = @agent.page.forms.first form.email = @email form.password = @password form.submit end