class ContentCrawler::Crawler

Public Class Methods

new(crawler, base_url, options={:timeout=>300, :user_agent=>nil}) click to toggle source
Calls superclass method CrawlerProcess::new
# File lib/content_crawler.rb, line 13
def initialize(crawler, base_url, options={:timeout=>300, :user_agent=>nil})
          super
end

Public Instance Methods

close_browser() click to toggle source
Calls superclass method CrawlerProcess#close_browser
# File lib/content_crawler.rb, line 60
def close_browser
    super
end
get_audio_video_elements(xpath=nil, options={}) click to toggle source
# File lib/content_crawler.rb, line 48
def get_audio_video_elements(xpath=nil, options={})
    audio_video_collection(@page.xpath(xpath), options) if not xpath.nil?
end
get_datalist_elements(xpath=nil, options={}) click to toggle source
# File lib/content_crawler.rb, line 56
def get_datalist_elements(xpath=nil, options={})
    datalist_collection(@page.xpath(xpath), options) if not xpath.nil?
end
get_iframe_embed_elements(xpath=nil, options={}) click to toggle source
# File lib/content_crawler.rb, line 44
def get_iframe_embed_elements(xpath=nil, options={})
    iframe_embed_collection(@page.xpath(xpath), options) if not xpath.nil?
end
get_object_elements(xpath=nil, options={}) click to toggle source
# File lib/content_crawler.rb, line 52
def get_object_elements(xpath=nil, options={})
    object_collection(@page.xpath(xpath), options) if not xpath.nil?
end
get_parser_page(crawl_url=nil) click to toggle source
# File lib/content_crawler.rb, line 17
def get_parser_page(crawl_url=nil)
    if (not @browser.nil? and not crawl_url.nil?)
        @browser.goto(crawl_url)
        @page = Nokogiri::HTML(@browser.html)
    elsif (not @agent.nil? and not crawl_url.nil?)
        @page = @agent.get(crawl_url).parser if not crawl_url.nil?
    else
        "Please select any one of the parser(selenium_webdriver_with_headless, selenium_webdriver_without_headless, mechanize_parser) and pass the crawl_url to crawl content"
    end
end
get_remote_image(xpath=nil, image_store_dir=nil) click to toggle source
# File lib/content_crawler.rb, line 36
def get_remote_image(xpath=nil, image_store_dir=nil)
    store_remote_image(@page.xpath(xpath), image_store_dir) if not xpath.nil?
end
get_select_elements(xpath=nil, options={}) click to toggle source
# File lib/content_crawler.rb, line 40
def get_select_elements(xpath=nil, options={})
    select_collection(@page.xpath(xpath), options) if not xpath.nil?
end
get_simple_text(xpath=nil) click to toggle source
# File lib/content_crawler.rb, line 28
def get_simple_text(xpath=nil)
    @page.xpath(xpath).text.strip if not xpath.nil?
end