module Maseti::WebPageParser
Public Instance Methods
fetch_page(page)
click to toggle source
# File lib/maseti/web_page_parser.rb, line 9 def fetch_page(page) start_time = get_micro_second_time response = HTTParty.get("#{Maseti::Constants::BASE_URL}/#{page}") # TODO: Use the time end_time = get_micro_second_time extract_excel_urls(response, start_time, end_time) end
fetch_xls_paths_from_pages()
click to toggle source
# File lib/maseti/web_page_parser.rb, line 3 def fetch_xls_paths_from_pages Maseti::Constants::PAGES.flat_map do |page| fetch_page(page) end end
Private Instance Methods
extract_excel_urls(response, start_time, end_time)
click to toggle source
# File lib/maseti/web_page_parser.rb, line 21 def extract_excel_urls(response, start_time, end_time) parse_html(response) .css('a') .to_a .map { |link| link['href'] } .compact .select { |link| link.include? Maseti::Constants::FILE_TYPE } end
parse_html(raw_html)
click to toggle source
# File lib/maseti/web_page_parser.rb, line 30 def parse_html(raw_html) Nokogiri::HTML(raw_html) end