module Fourchan::Kit::Tools
Public Class Methods
download_image(link, options = {})
click to toggle source
Downloads the image from an URL.
@param link [URL] the URL where the image is.
# File lib/fourchan/kit/tools.rb, line 16 def self.download_image(link, options = {}) options[:fsize] ||= 0 options[:name] ||= link.split('/').last options[:out] ||= "#{Dir.pwd}/images" options[:quiet] ||= false image = "#{create_dir(options[:out])}/#{options[:name]}" unless File.exists?(image) if valid_link?(link) output = "Downloading: #{link}" unless options[:quiet] output << (options[:fsize].zero? ? "" : " @ " << "#{(options[:fsize] / 1024.0).round(2)}kB".rjust(9)) puts output $agent.get(link).save(image) end else puts "Already got image, skipping" unless options[:quiet] end end
download_thread(link, options = {})
click to toggle source
Downloads every image from a thread.
Makes use of parallel processing for faster downloading. Currently set to 8 threads.
@param link [URL] the URL for the thread to download.
# File lib/fourchan/kit/tools.rb, line 41 def self.download_thread(link, options = {}) options[:checked] ||= false if options[:checked] || ( valid_thread?(link) && valid_link?(link) ) board, thread_no = get_info(link) thread = Thread.new(board, thread_no) images = [] sizes = [] thread.posts.each do |post| sizes << post.fsize images << post.image_link if post.tim end Parallel.each_with_index(images, in_threads: 8) do |image, index| options[:fsize] = sizes[index] download_image(image, options.dup) end else puts "Not a 4chan thread" unless options[:quiet] end end
download_threads(file, options = {})
click to toggle source
Download all images from each thread in a file.
Each thread must be on its own line and only be the URL, nothing else.
For example:
# threads.txt http://boards.4chan.org/wg/thread/5777567 http://boards.4chan.org/wg/thread/5776602
It takes care of dead threads or wrong URLs.
@param file [File] the location of the file.
# File lib/fourchan/kit/tools.rb, line 77 def self.download_threads(file, options = {}) options[:quiet] ||= false if File.exists?(file) File.open(file, 'r').each_line do |link| puts "Getting images from thread: #{link}" unless options[:quiet] if valid_thread?(link) && valid_link?(link) options[:out] = "images/#{link.scan(/(\d+)$/).first.first}" options[:checked] = true download_thread(link, options) puts else puts "Not a 4chan thread" unless options[:quiet] puts end end else puts "Not able to find the input file" end end
lurk(link, options = {})
click to toggle source
Check the thread for new images every x seconds.
-
The refresh rate is determined by options and is an integer.
-
The time to lurk is determined by options and is an integer.
@param link [URL] the thread to lurk
# File lib/fourchan/kit/tools.rb, line 105 def self.lurk(link, options = {}) puts "Started lurking #{link}" downloaded = [] board, thread_no = get_info(link) thread = Thread.new(board, thread_no) download_image(thread.op.image_link, options.dup) begin timeout(options[:timeout]) do loop do puts "Checking for images" unless options[:quiet] new = thread.fetch_replies (new - downloaded).each do |post| options[:fsize] = post.fsize download_image(post.image_link, options.dup) if post.image_link downloaded << post end sleep(options[:refresh]) end end rescue Timeout::Error puts "Timeout after #{options[:timeout]} second(s)" exit 0 end end
Private Class Methods
create_dir(directory)
click to toggle source
# File lib/fourchan/kit/tools.rb, line 137 def self.create_dir(directory) FileUtils.mkdir_p(directory) unless File.exists?(directory) Pathname.new(directory).realpath.to_s end
get_info(link)
click to toggle source
# File lib/fourchan/kit/tools.rb, line 142 def self.get_info(link) board = link.scan(/(\w+)\/thread\//).first.first thread = link.scan(/\/thread\/([0-9]+)/).first.first.to_i [board, thread] end
valid_link?(link)
click to toggle source
# File lib/fourchan/kit/tools.rb, line 152 def self.valid_link?(link) begin if link =~ /^#{URI::regexp(['http', 'https'])}$/ begin $agent.get(link) rescue Mechanize::ResponseCodeError return false end else return false end true end end
valid_thread?(link)
click to toggle source
# File lib/fourchan/kit/tools.rb, line 148 def self.valid_thread?(link) link =~ /boards.4chan.org\/\w+\/thread\/\d+$/ ? true : false end