module Hongkong::News::Scrapers::PhantomScraper

Public Instance Methods

cleanup() click to toggle source

call when shutdown phantomjs

# File lib/hongkong/news/scrapers/phantom_scraper.rb, line 62
def cleanup
  wait_for_ajax
  page.driver.reset!
end
doc() click to toggle source

Get a Nokogiri Document for current page

# File lib/hongkong/news/scrapers/phantom_scraper.rb, line 54
def doc
  unless @doc
    @doc = Nokogiri::HTML(html)
  end
  @doc
end
html() click to toggle source
# File lib/hongkong/news/scrapers/phantom_scraper.rb, line 49
def html
  page.html
end
screenshot_data(filename='screenshot.gif') click to toggle source
# File lib/hongkong/news/scrapers/phantom_scraper.rb, line 36
def screenshot_data(filename='screenshot.gif')
  data = nil
  file = Tempfile.new(filename)
  begin
    page.save_screenshot(file.path, full: true)
    data = file.read
  ensure
    file.close
    file.unlink
  end
  data
end

Private Instance Methods

finished_all_ajax_requests?() click to toggle source
# File lib/hongkong/news/scrapers/phantom_scraper.rb, line 76
def finished_all_ajax_requests?
  begin
    page.evaluate_script("(typeof jQuery !== \"undefined\") ? jQuery.active : 0").zero?
  rescue Exception => e
    puts "ignored excpetion wiating ajax: #{e}"
  end
end
wait_for_ajax() click to toggle source

workaround for hang phantomjs

# File lib/hongkong/news/scrapers/phantom_scraper.rb, line 70
def wait_for_ajax
  Timeout.timeout(Capybara.default_wait_time) do
    loop until finished_all_ajax_requests?
  end
end