class DoctorFinder::Scraper
The Scraper
class
Constants
- BASE_URL
Public Class Methods
scrape_by_zipcode(zipcode)
click to toggle source
# File lib/doctor_finder/scraper.rb, line 9 def self.scrape_by_zipcode(zipcode) html = Nokogiri::HTML(open("#{BASE_URL}search?address=#{zipcode}&insurance_carrier=-1&day_filter=AnyDay&gender=-1&language=-1&offset=0&insurance_plan=-1&reason_visit=75&after_5pm=false&before_10am=false&sees_children=false&sort_type=Default&dr_specialty=153&")) slice = html.css('.js-prof-row-container') slice.each do |doctor| # will go through the HTML and create new doctor instances doc = DoctorFinder::Doctor.new doc.name = doctor.css('.js-profile-link').text.strip.gsub("\n", ' ').squeeze(' ') doc.speciality = doctor.css('.ch-prof-row-speciality').text.strip doc.url = BASE_URL + doctor.css('.js-profile-link')[0]['href'] address = doctor.css('.js-search-prof-row-address').text.strip doc.street = address.slice(/^\d+[ ][\w+[ ]]+/) # To format the text correctly, had to use some regex doc.city = address[/[ ][ ]+[\w+[.]*[ ]]*[,]/].strip.chop doc.state = address[/[A-Z][A-Z]/] doc.zip = address[/\d{5}/] end DoctorFinder::Doctor.all end
scrape_for_details(doctor)
click to toggle source
# File lib/doctor_finder/scraper.rb, line 26 def self.scrape_for_details(doctor) html = Nokogiri::HTML(open(doctor.url)) doctor.details = html.css('.profile-professional-statement').text.squeeze(' ') if doctor.details.strip == "" doctor.details = "No further details were available." end doctor.areas = html.css('li.specialty').text.squeeze(" ").gsub("\r\n \r\n ", "\r\n").lstrip doctor end