class Pagedump::Driver
WARNING !! Not Thread-Safe
Public Class Methods
inherited(subclass)
click to toggle source
# File lib/pagedump/driver.rb, line 7 def self.inherited(subclass) Pagedump.register_driver subclass end
new()
click to toggle source
# File lib/pagedump/driver.rb, line 11 def initialize @wlinks = {} @data = {} end
Public Instance Methods
check(page)
click to toggle source
# File lib/pagedump/driver.rb, line 44 def check page end
data(key, value)
click to toggle source
# File lib/pagedump/driver.rb, line 16 def data key, value @data[key] ||= [] @data[key] << value end
link(weight, href)
click to toggle source
# File lib/pagedump/driver.rb, line 21 def link weight, href begin relative = URI.parse(href) abs_link = URI.parse(url).merge(relative).to_s @wlinks[abs_link] = weight rescue URI::InvalidURIError Pagedump.logger.warn "[Driver #{name}] Error parsing href \"#{href}\". Ignoring link (weight was #{weight})" end end
name()
click to toggle source
# File lib/pagedump/driver.rb, line 51 def name self.class.name end
scrap()
click to toggle source
# File lib/pagedump/driver.rb, line 31 def scrap @wlinks = {} Pagedump.logger.info "Getting headlines for url #{url}" agent = Mechanize.new page = agent.get(url) self.links page self.check page result = OpenStruct.new result.links = @wlinks result.data = @data result end
url()
click to toggle source
# File lib/pagedump/driver.rb, line 47 def url self.class::URL end