class DirCrawl
Crawls a directory of files and runs a block of code on it
Public Class Methods
new(path_params, process_block, include_block, extras_block, cm_hash, *args)
click to toggle source
# File lib/dircrawl.rb, line 7 def initialize(path_params, process_block, include_block, extras_block, cm_hash, *args) # Set the params for the path @path = path_params[:path] @output_dir = path_params[:output_dir] @ignore_includes = path_params[:ignore_includes] @failure_mode = path_params[:failure_mode] # Setup the blocks to run include_block.call @process_block = process_block @extras_block = extras_block # Setup the Harvester reporter to report the results @reporter = HarvesterReporter.new(cm_hash) crawl_dir(@path, *args) end
Public Instance Methods
crawl_dir(dir, *args)
click to toggle source
Crawls the directory sppecified
# File lib/dircrawl.rb, line 25 def crawl_dir(dir, *args) Dir.foreach(dir) do |file| # Skip . or .. files next if file == '.' or file == '..' # Recurse into directories if File.directory?(dir+"/"+file) crawl_dir("#{dir}/#{file}", *args) # Process file elsif !file.include?(@ignore_includes) begin output_results(process_file(dir, file, *args), dir, file) rescue Exception => e handle_failure(e, dir, file, *args) end end end end
create_write_dirs(dir)
click to toggle source
Create if they don't exist
# File lib/dircrawl.rb, line 66 def create_write_dirs(dir) dirs = dir.split("/") dirs.delete("") # Go through and create all subdirs overallpath = "" dirs.each do |d| Dir.mkdir(overallpath+"/"+d) if !File.directory?(overallpath+"/"+d) overallpath += ("/"+d) end end
get_write_path(dir, file)
click to toggle source
Figure out where to write the file
# File lib/dircrawl.rb, line 79 def get_write_path(dir, file) dir_save = dir.gsub(@path, @output_dir) return "#{dir_save}/#{file}.json" end
handle_failure(error, dir, file, *args)
click to toggle source
Handle different failure modes
# File lib/dircrawl.rb, line 85 def handle_failure(error, dir, file, *args) if @failure_mode == "debug" binding.pry elsif @failure_mode == "log" error_file = "#{dir}/#{file}\n" IO.write(@output_dir+"/error_log.txt", error_file, mode: 'a') end end
output_results(processed, dir, file)
click to toggle source
Output the results to Harvester and file dir
# File lib/dircrawl.rb, line 60 def output_results(processed, dir, file) @reporter.report_results([JSON.parse(processed)], "#{dir}/#{file}") File.write(get_write_path(dir, file), processed) end
process_file(dir, file, *args)
click to toggle source
Process a file using the blocks given
# File lib/dircrawl.rb, line 46 def process_file(dir, file, *args) create_write_dirs(dir.gsub(@path, @output_dir)) # Run blocks to process the file if !File.exist?(get_write_path(dir, file)) @extras_block.call("#{@output_dir}/") if !@extras_block return @process_block.call("#{dir}/#{file}", *args) else # Use already existing file puts "Processed file exists, skipping: #{dir}/#{file}" return File.read(get_write_path(dir, file)) end end