class Scrapix::VBulletin

download images from a vBulletin thread

Attributes

images[R]
max_pages[R]
options[R]
page_no[R]
title[R]
url[R]

Public Class Methods

new(url = nil, options = {}) click to toggle source
# File lib/scrapix/vbulletin.rb, line 7
def initialize(url = nil, options = {})
  @images                 = {}
  @agent                  = Mechanize.new
  @agent.user_agent_alias = 'Mac Safari'
  self.options            = options
  self.url                = url
end

Public Instance Methods

filter_images(sources) click to toggle source
# File lib/scrapix/vbulletin.rb, line 41
def filter_images(sources)
  # useful for filtering the image by sub-classes
  return sources
end
find() click to toggle source

find images for this thread, specified by starting page_no

# File lib/scrapix/vbulletin.rb, line 16
def find
  reset; return @images unless @url
  @page_no = @options["start"]
  until @images.count > @options["total"] || thread_has_ended?
    page      = @agent.get "#{@url}&page=#{@page_no}"
    puts "[VERBOSE] Searching: #{@url}&page=#{@page_no}" if @options["verbose"] && options["cli"]
    sources   = page.image_urls.map{|x| x.to_s}
    sources   = filter_images sources # hook for sub-classes
    @page_no += 1
    continue if sources.empty?
    sources.each do |source|
      hash = Digest::MD5.hexdigest(source)
      unless @images.has_key?(hash)
        @images[hash] = {url: source}
        puts source if options["cli"]
      end
    end
  end
  @images = @images.map{|x, y| y}
end
options=(options = {}) click to toggle source
# File lib/scrapix/vbulletin.rb, line 65
def options=(options = {})
  @options = { "start" => 1, "end" => 10000, "total" => 100000, "verbose" => false, "cli" => false }
  options.each { |k,v| @options[k.to_s] = v }
  ["start", "end", "total"].each {|k| @options[k] = @options[k].to_i}
  @options
end
reset() click to toggle source
# File lib/scrapix/vbulletin.rb, line 60
def reset
  @images  = {}
  @page_no = @options["start"]
end
thread_has_ended?() click to toggle source
# File lib/scrapix/vbulletin.rb, line 37
def thread_has_ended?
  @page_no > @options["end"] || @page_no > @max_pages
end
url=(url) click to toggle source
# File lib/scrapix/vbulletin.rb, line 46
def url=(url)
  @url = url
  return unless @url
  page = @agent.get @url
  @title = page.title.strip
  puts @title + "\n" + ("=" * @title.length) if self.options["cli"]
  begin
    text = page.search(".pagenav .vbmenu_control").first.inner_text
    @max_pages = text.match(/Page \d* of (\d*)/)[1].to_i
  rescue
    @max_pages = 1
  end
end