class WebScrapingHw3::Scrape
Class to Scrape
the set website
Public Class Methods
find_companies_page(url)
click to toggle source
# File lib/web_scraping_hw3.rb, line 36 def self.find_companies_page(url) parsed_page = make_parsed(url) parsed_page.css("div.col-xs-12.padding-top-10.text-center.capital-letter").css("a") end
find_companies_table(companies_page)
click to toggle source
# File lib/web_scraping_hw3.rb, line 41 def self.find_companies_table(companies_page) companies_url = "#{BASE_URL}#{companies_page.attributes["href"].value}" companies_parsed_page = make_parsed(companies_url) companies_parsed_page.css("table.table-profile.table-hover.table-set-border-yellow").css("a") end
find_company_highlight(company_tag_a)
click to toggle source
# File lib/web_scraping_hw3.rb, line 56 def self.find_company_highlight(company_tag_a) stock_profile_page_url = "#{BASE_URL}#{company_tag_a.attributes["href"].value}" stock_profile_page = make_parsed(stock_profile_page_url) stock_profile_page.css("ul.nav.nav-tabs.set-nav-tabs") .css("a")[1].attributes["href"].value end
main_scrape()
click to toggle source
Main function to scrape website. and print the corporation name and asset cost. @return [nil]
# File lib/web_scraping_hw3.rb, line 21 def self.main_scrape url = "#{BASE_URL}/set/commonslookup.do" companies_pages = find_companies_page(url) companies_pages.each do |companies_page| companies_table = find_companies_table(companies_page) companies_table.each do |company_tag_a| url_stock_highlight_page = find_company_highlight(company_tag_a) stock_url = "#{BASE_URL}#{url_stock_highlight_page}" print_asset(stock_url) end end end
make_parsed(url)
click to toggle source
Convert any url link from string to nokogiri html. To use with any nokogiri tools. @param [String] url @return [HTML]
# File lib/web_scraping_hw3.rb, line 51 def self.make_parsed(url) unparsed_page = HTTParty.get(url) Nokogiri::HTML(unparsed_page.body) end
print_asset(stock_url)
click to toggle source
# File lib/web_scraping_hw3.rb, line 63 def self.print_asset(stock_url) stock_parsed_page = make_parsed(stock_url) name = stock_parsed_page.css("div.col-xs-12.col-md-12.col-lg-8").css("h3").text stock_table_row = stock_parsed_page.css("table.table.table-hover.table-info").css("tr") table_column = stock_table_row[2].css("td") cost = table_column[-2].text puts("#{name} : #{cost}") end