module Hongkong::News::Scrapers::PhantomScraper
Public Instance Methods
cleanup()
click to toggle source
call when shutdown phantomjs
# File lib/hongkong/news/scrapers/phantom_scraper.rb, line 62 def cleanup wait_for_ajax page.driver.reset! end
doc()
click to toggle source
Get a Nokogiri Document
for current page
# File lib/hongkong/news/scrapers/phantom_scraper.rb, line 54 def doc unless @doc @doc = Nokogiri::HTML(html) end @doc end
html()
click to toggle source
# File lib/hongkong/news/scrapers/phantom_scraper.rb, line 49 def html page.html end
screenshot_data(filename='screenshot.gif')
click to toggle source
# File lib/hongkong/news/scrapers/phantom_scraper.rb, line 36 def screenshot_data(filename='screenshot.gif') data = nil file = Tempfile.new(filename) begin page.save_screenshot(file.path, full: true) data = file.read ensure file.close file.unlink end data end
Private Instance Methods
finished_all_ajax_requests?()
click to toggle source
# File lib/hongkong/news/scrapers/phantom_scraper.rb, line 76 def finished_all_ajax_requests? begin page.evaluate_script("(typeof jQuery !== \"undefined\") ? jQuery.active : 0").zero? rescue Exception => e puts "ignored excpetion wiating ajax: #{e}" end end
wait_for_ajax()
click to toggle source
workaround for hang phantomjs
# File lib/hongkong/news/scrapers/phantom_scraper.rb, line 70 def wait_for_ajax Timeout.timeout(Capybara.default_wait_time) do loop until finished_all_ajax_requests? end end