class Excavate::Extractors::OleExtractor

Public Instance Methods

extract(target) click to toggle source
# File lib/excavate/extractors/ole_extractor.rb, line 6
def extract(target)
  do_extract(target)
  rename_archives(target)
end

Private Instance Methods

cab?(file) click to toggle source
# File lib/excavate/extractors/ole_extractor.rb, line 61
def cab?(file)
  FileMagic.detect(file) == :cab
end
children(ole) click to toggle source
# File lib/excavate/extractors/ole_extractor.rb, line 26
def children(ole)
  ole.dir.entries(".") - [".", ".."]
end
do_extract(target) click to toggle source
# File lib/excavate/extractors/ole_extractor.rb, line 13
def do_extract(target)
  reset_filename_lookup

  Ole::Storage.open(@archive) do |ole|
    children(ole).each do |file|
      filename = prepare_filename(file)
      path = File.join(target, filename)
      content = ole.file.read(file)
      File.write(path, content, mode: "wb")
    end
  end
end
prepare_filename(file) click to toggle source
# File lib/excavate/extractors/ole_extractor.rb, line 34
def prepare_filename(file)
  filename = sanitize_filename(file)

  @file_lookup[filename] ||= 0
  @file_lookup[filename] += 1
  filename += @file_lookup[filename].to_s if @file_lookup[filename] > 1

  filename
end
rename_archives(target) click to toggle source
# File lib/excavate/extractors/ole_extractor.rb, line 55
def rename_archives(target)
  Dir.glob(File.join(target, "**", "*")).each do |file|
    FileUtils.mv(file, "#{file}.cab") if cab?(file)
  end
end
reset_filename_lookup() click to toggle source
# File lib/excavate/extractors/ole_extractor.rb, line 30
def reset_filename_lookup
  @file_lookup = {}
end
sanitize_filename(filename) click to toggle source
# File lib/excavate/extractors/ole_extractor.rb, line 44
def sanitize_filename(filename)
  filename.strip.tap do |name|
    # NOTE: File.basename doesn't work right with Windows paths on Unix
    # get only the filename, not the whole path
    name.gsub!(/^.*(\\|\/)/, "")

    # Strip out the non-ascii character
    name.gsub!(/[^0-9A-Za-z.\-]/, "_")
  end
end