class WebScrapingHw3::Scrape

Class to Scrape the set website

Public Class Methods

find_companies_page(url) click to toggle source
# File lib/web_scraping_hw3.rb, line 36
def self.find_companies_page(url)
  parsed_page = make_parsed(url)
  parsed_page.css("div.col-xs-12.padding-top-10.text-center.capital-letter").css("a")
end
find_companies_table(companies_page) click to toggle source
# File lib/web_scraping_hw3.rb, line 41
def self.find_companies_table(companies_page)
  companies_url = "#{BASE_URL}#{companies_page.attributes["href"].value}"
  companies_parsed_page = make_parsed(companies_url)
  companies_parsed_page.css("table.table-profile.table-hover.table-set-border-yellow").css("a")
end
find_company_highlight(company_tag_a) click to toggle source
# File lib/web_scraping_hw3.rb, line 56
def self.find_company_highlight(company_tag_a)
  stock_profile_page_url = "#{BASE_URL}#{company_tag_a.attributes["href"].value}"
  stock_profile_page = make_parsed(stock_profile_page_url)
  stock_profile_page.css("ul.nav.nav-tabs.set-nav-tabs")
                    .css("a")[1].attributes["href"].value
end
main_scrape() click to toggle source

Main function to scrape website. and print the corporation name and asset cost. @return [nil]

# File lib/web_scraping_hw3.rb, line 21
def self.main_scrape
  url = "#{BASE_URL}/set/commonslookup.do"
  companies_pages = find_companies_page(url)

  companies_pages.each do |companies_page|
    companies_table = find_companies_table(companies_page)

    companies_table.each do |company_tag_a|
      url_stock_highlight_page = find_company_highlight(company_tag_a)
      stock_url = "#{BASE_URL}#{url_stock_highlight_page}"
      print_asset(stock_url)
    end
  end
end
make_parsed(url) click to toggle source

Convert any url link from string to nokogiri html. To use with any nokogiri tools. @param [String] url @return [HTML]

# File lib/web_scraping_hw3.rb, line 51
def self.make_parsed(url)
  unparsed_page = HTTParty.get(url)
  Nokogiri::HTML(unparsed_page.body)
end
print_asset(stock_url) click to toggle source