class Scrapix::VBulletin
download images from a vBulletin thread
Attributes
images[R]
max_pages[R]
options[R]
page_no[R]
title[R]
url[R]
Public Class Methods
new(url = nil, options = {})
click to toggle source
# File lib/scrapix/vbulletin.rb, line 7 def initialize(url = nil, options = {}) @images = {} @agent = Mechanize.new @agent.user_agent_alias = 'Mac Safari' self.options = options self.url = url end
Public Instance Methods
filter_images(sources)
click to toggle source
# File lib/scrapix/vbulletin.rb, line 41 def filter_images(sources) # useful for filtering the image by sub-classes return sources end
find()
click to toggle source
find images for this thread, specified by starting page_no
# File lib/scrapix/vbulletin.rb, line 16 def find reset; return @images unless @url @page_no = @options["start"] until @images.count > @options["total"] || thread_has_ended? page = @agent.get "#{@url}&page=#{@page_no}" puts "[VERBOSE] Searching: #{@url}&page=#{@page_no}" if @options["verbose"] && options["cli"] sources = page.image_urls.map{|x| x.to_s} sources = filter_images sources # hook for sub-classes @page_no += 1 continue if sources.empty? sources.each do |source| hash = Digest::MD5.hexdigest(source) unless @images.has_key?(hash) @images[hash] = {url: source} puts source if options["cli"] end end end @images = @images.map{|x, y| y} end
options=(options = {})
click to toggle source
# File lib/scrapix/vbulletin.rb, line 65 def options=(options = {}) @options = { "start" => 1, "end" => 10000, "total" => 100000, "verbose" => false, "cli" => false } options.each { |k,v| @options[k.to_s] = v } ["start", "end", "total"].each {|k| @options[k] = @options[k].to_i} @options end
reset()
click to toggle source
# File lib/scrapix/vbulletin.rb, line 60 def reset @images = {} @page_no = @options["start"] end
thread_has_ended?()
click to toggle source
# File lib/scrapix/vbulletin.rb, line 37 def thread_has_ended? @page_no > @options["end"] || @page_no > @max_pages end
url=(url)
click to toggle source
# File lib/scrapix/vbulletin.rb, line 46 def url=(url) @url = url return unless @url page = @agent.get @url @title = page.title.strip puts @title + "\n" + ("=" * @title.length) if self.options["cli"] begin text = page.search(".pagenav .vbmenu_control").first.inner_text @max_pages = text.match(/Page \d* of (\d*)/)[1].to_i rescue @max_pages = 1 end end