class CareerProfiles::Scraper
Public Class Methods
get_occupation_page(link)
click to toggle source
# File lib/career_profiles/scraper.rb, line 45 def self.get_occupation_page(link) Nokogiri::HTML(open(link)) end
get_page()
click to toggle source
# File lib/career_profiles/scraper.rb, line 3 def self.get_page Nokogiri::HTML(open("https://www.bls.gov/k12/content/students/careers/career-exploration.htm")) end
scrape_career_interests()
click to toggle source
# File lib/career_profiles/scraper.rb, line 7 def self.scrape_career_interests list = [] names_index = get_page.css("div.careerCol p.careerList") names_index.each do |c| list << c.text end names = [] list.each do |c| names << c.gsub("[+] Show", "") end career_interests = [] names.each {|name| career_interests << {:name => name}} career_interests end
scrape_occupation_attributes(link)
click to toggle source
# File lib/career_profiles/scraper.rb, line 49 def self.scrape_occupation_attributes(link) index = get_occupation_page(link).css("table#quickfacts tbody tr") pay_index = index[0] med_pay = pay_index.css("td").text.strip med_pay = med_pay.gsub("\n", ' ').squeeze(' ') education_index = index[1] education = education_index.css("td").text.strip outlook_index = index[5] outlook_2016_26 = outlook_index.css("td").text.strip role_index = get_occupation_page(link).css("article") role = role_index.css("p")[1].text occupation_attributes = { :median_pay_2017 => med_pay, :education => education, :outlook_2016_26 => outlook_2016_26, :key_responsibilities => role, } occupation_attributes end
scrape_occupations(i)
click to toggle source
# File lib/career_profiles/scraper.rb, line 24 def self.scrape_occupations(i) names = [] career_interest_index = get_page.css("div.careerNames ul")[i].css("a") career_interest_index.each do |o| names << o.text end names.delete_if {|x| x == "Designer"} urls = [] career_interest_index.each do |o| urls << o.attribute("href").text end urls.delete_if {|x| x == "https://www.bls.gov/ooh/arts-and-design/home.htm"} occupations = [] names.each {|name| occupations << {:name => name}} u = 0 urls.each {|url| occupations[u][:url] = url; u += 1} occupations end