module Fourchan::Kit::Tools

Public Class Methods

download_image(link, options = {}) click to toggle source

Downloads the image from an URL.

@param link [URL] the URL where the image is.

# File lib/fourchan/kit/tools.rb, line 16
def self.download_image(link, options = {})
  options[:fsize] ||= 0
  options[:name]  ||= link.split('/').last
  options[:out]   ||= "#{Dir.pwd}/images"
  options[:quiet] ||= false

  image = "#{create_dir(options[:out])}/#{options[:name]}"
  unless File.exists?(image)
    if valid_link?(link)
      output = "Downloading: #{link}" unless options[:quiet]
      output << (options[:fsize].zero? ? "" : " @ " << "#{(options[:fsize] / 1024.0).round(2)}kB".rjust(9))
      puts output
      $agent.get(link).save(image)
    end
  else
    puts "Already got image, skipping" unless options[:quiet]
  end
end
download_thread(link, options = {}) click to toggle source

Downloads every image from a thread.

Makes use of parallel processing for faster downloading. Currently set to 8 threads.

@param link [URL] the URL for the thread to download.

# File lib/fourchan/kit/tools.rb, line 41
def self.download_thread(link, options = {})
  options[:checked] ||= false

  if options[:checked] || ( valid_thread?(link) && valid_link?(link) )
    board, thread_no = get_info(link)
    thread  = Thread.new(board, thread_no)
    images  = []
    sizes   = []

    thread.posts.each do |post|
      sizes  << post.fsize
      images << post.image_link if post.tim
    end

    Parallel.each_with_index(images, in_threads: 8) do |image, index|
      options[:fsize] = sizes[index]
      download_image(image, options.dup)
    end

  else
    puts "Not a 4chan thread" unless options[:quiet]
  end
end
download_threads(file, options = {}) click to toggle source

Download all images from each thread in a file.

Each thread must be on its own line and only be the URL, nothing else.

For example:

# threads.txt
http://boards.4chan.org/wg/thread/5777567
http://boards.4chan.org/wg/thread/5776602

It takes care of dead threads or wrong URLs.

@param file [File] the location of the file.

# File lib/fourchan/kit/tools.rb, line 77
def self.download_threads(file, options = {})
  options[:quiet] ||= false

  if File.exists?(file)
    File.open(file, 'r').each_line do |link|
      puts "Getting images from thread: #{link}" unless options[:quiet]
      if valid_thread?(link) && valid_link?(link)
        options[:out]     = "images/#{link.scan(/(\d+)$/).first.first}"
        options[:checked] = true
        download_thread(link, options)
        puts
      else
        puts "Not a 4chan thread" unless options[:quiet]
        puts
      end
    end
  else
    puts "Not able to find the input file"
  end
end
lurk(link, options = {}) click to toggle source

Check the thread for new images every x seconds.

  • The refresh rate is determined by options and is an integer.

  • The time to lurk is determined by options and is an integer.

@param link [URL] the thread to lurk

# File lib/fourchan/kit/tools.rb, line 105
def self.lurk(link, options = {})
  puts "Started lurking #{link}"

  downloaded = []
  board, thread_no = get_info(link)
  thread = Thread.new(board, thread_no)

  download_image(thread.op.image_link, options.dup)

  begin
    timeout(options[:timeout]) do
      loop do
        puts "Checking for images" unless options[:quiet]
        new = thread.fetch_replies

        (new - downloaded).each do |post|
          options[:fsize] = post.fsize
          download_image(post.image_link, options.dup) if post.image_link

          downloaded << post
        end

        sleep(options[:refresh])
      end
    end
  rescue Timeout::Error
    puts "Timeout after #{options[:timeout]} second(s)"
    exit 0
  end
end

Private Class Methods

create_dir(directory) click to toggle source
# File lib/fourchan/kit/tools.rb, line 137
def self.create_dir(directory)
  FileUtils.mkdir_p(directory) unless File.exists?(directory)
  Pathname.new(directory).realpath.to_s
end
get_info(link) click to toggle source
# File lib/fourchan/kit/tools.rb, line 142
def self.get_info(link)
  board  = link.scan(/(\w+)\/thread\//).first.first
  thread = link.scan(/\/thread\/([0-9]+)/).first.first.to_i
  [board, thread]
end
valid_thread?(link) click to toggle source
# File lib/fourchan/kit/tools.rb, line 148
def self.valid_thread?(link)
  link =~ /boards.4chan.org\/\w+\/thread\/\d+$/ ? true : false
end