module Maseti::WebPageParser

Public Instance Methods

fetch_page(page) click to toggle source
# File lib/maseti/web_page_parser.rb, line 9
def fetch_page(page)
  start_time = get_micro_second_time

  response = HTTParty.get("#{Maseti::Constants::BASE_URL}/#{page}")

  # TODO: Use the time
  end_time = get_micro_second_time
  extract_excel_urls(response, start_time, end_time)
end
fetch_xls_paths_from_pages() click to toggle source
# File lib/maseti/web_page_parser.rb, line 3
def fetch_xls_paths_from_pages
  Maseti::Constants::PAGES.flat_map do |page|
    fetch_page(page)
  end
end

Private Instance Methods

extract_excel_urls(response, start_time, end_time) click to toggle source
# File lib/maseti/web_page_parser.rb, line 21
def extract_excel_urls(response, start_time, end_time)
  parse_html(response)
    .css('a')
    .to_a
    .map { |link| link['href'] }
    .compact
    .select { |link| link.include? Maseti::Constants::FILE_TYPE }
end
parse_html(raw_html) click to toggle source
# File lib/maseti/web_page_parser.rb, line 30
def parse_html(raw_html)
  Nokogiri::HTML(raw_html)
end