class Parser::Base
Html Base
Public Class Methods
new(url, options)
click to toggle source
# File lib/fly_parser/base.rb, line 5 def initialize(url, options) @source = Parser.connect(url) @copyright = copyright(options) @limit_pages ||= 5 @delay ||= 10 end
Public Instance Methods
click(link)
click to toggle source
# File lib/fly_parser/base.rb, line 46 def click(link) agent = Mechanize.new agent.ignore_bad_chunking = true agent.pluggable_parser.default = Mechanize::Page agent.click(link) end
collect_between(first, last)
click to toggle source
# File lib/fly_parser/base.rb, line 34 def collect_between(first, last) return nil if first.nil? first == last ? [first] : [first, *collect_between(first.next, last)] end
copyright(options)
click to toggle source
# File lib/fly_parser/base.rb, line 38 def copyright(options) source = options[:source] { url: source['copyright'], title: source['copyright_title'] } end
next_page(css_selector)
click to toggle source
# File lib/fly_parser/base.rb, line 12 def next_page(css_selector) @next_page = @source.links_with(css_selector)[0] end
parse_all()
click to toggle source
# File lib/fly_parser/base.rb, line 16 def parse_all result = parse_page next_page() #concat all pages into one array i = 1 ap "Parsing #{i} page" until @next_page.nil? || i == @limit_pages sleep @delay i += 1 ap "Parsing #{i} page" @source = @next_page.click next_page() result.concat(parse_page) end result end
parse_page()
click to toggle source
# File lib/fly_parser/base.rb, line 32 def parse_page end