class ContentCrawler::Crawler
Public Class Methods
new(crawler, base_url, options={:timeout=>300, :user_agent=>nil})
click to toggle source
Calls superclass method
CrawlerProcess::new
# File lib/content_crawler.rb, line 13 def initialize(crawler, base_url, options={:timeout=>300, :user_agent=>nil}) super end
Public Instance Methods
close_browser()
click to toggle source
Calls superclass method
CrawlerProcess#close_browser
# File lib/content_crawler.rb, line 60 def close_browser super end
get_audio_video_elements(xpath=nil, options={})
click to toggle source
# File lib/content_crawler.rb, line 48 def get_audio_video_elements(xpath=nil, options={}) audio_video_collection(@page.xpath(xpath), options) if not xpath.nil? end
get_datalist_elements(xpath=nil, options={})
click to toggle source
# File lib/content_crawler.rb, line 56 def get_datalist_elements(xpath=nil, options={}) datalist_collection(@page.xpath(xpath), options) if not xpath.nil? end
get_iframe_embed_elements(xpath=nil, options={})
click to toggle source
# File lib/content_crawler.rb, line 44 def get_iframe_embed_elements(xpath=nil, options={}) iframe_embed_collection(@page.xpath(xpath), options) if not xpath.nil? end
get_link_elements(xpath=nil, options={})
click to toggle source
# File lib/content_crawler.rb, line 32 def get_link_elements(xpath=nil, options={}) collection_links(@page.xpath(xpath), options) if not xpath.nil? end
get_object_elements(xpath=nil, options={})
click to toggle source
# File lib/content_crawler.rb, line 52 def get_object_elements(xpath=nil, options={}) object_collection(@page.xpath(xpath), options) if not xpath.nil? end
get_parser_page(crawl_url=nil)
click to toggle source
# File lib/content_crawler.rb, line 17 def get_parser_page(crawl_url=nil) if (not @browser.nil? and not crawl_url.nil?) @browser.goto(crawl_url) @page = Nokogiri::HTML(@browser.html) elsif (not @agent.nil? and not crawl_url.nil?) @page = @agent.get(crawl_url).parser if not crawl_url.nil? else "Please select any one of the parser(selenium_webdriver_with_headless, selenium_webdriver_without_headless, mechanize_parser) and pass the crawl_url to crawl content" end end
get_remote_image(xpath=nil, image_store_dir=nil)
click to toggle source
# File lib/content_crawler.rb, line 36 def get_remote_image(xpath=nil, image_store_dir=nil) store_remote_image(@page.xpath(xpath), image_store_dir) if not xpath.nil? end
get_select_elements(xpath=nil, options={})
click to toggle source
# File lib/content_crawler.rb, line 40 def get_select_elements(xpath=nil, options={}) select_collection(@page.xpath(xpath), options) if not xpath.nil? end
get_simple_text(xpath=nil)
click to toggle source
# File lib/content_crawler.rb, line 28 def get_simple_text(xpath=nil) @page.xpath(xpath).text.strip if not xpath.nil? end