class Filecamo::TextMucker

Constants

LANG_MARKS
MAX_FILE_SIZE

Attributes

stats[R]

Public Class Methods

new(comment_prefix, logger: Logger.new($stdout)) click to toggle source
# File lib/filecamo/text_mucker.rb, line 20
def initialize(comment_prefix, logger: Logger.new($stdout))
  @marks = LANG_MARKS.clone
  @marks.each_value{|m| m << comment_prefix}
  @logger = logger
  @magic = FileMagic.new
  @mime = FileMagic.mime
  @stats = {files_selected: 0, lines_added: 0}
end

Public Instance Methods

muck(percent_select, percent_lines, paths) { |fn, lang, line_nums| ... } click to toggle source
# File lib/filecamo/text_mucker.rb, line 31
def muck(percent_select, percent_lines, paths)
  select_chance = percent_select.to_f / 100
  lines_chance = percent_lines.to_f / 100

  paths.each do |path|
    path[0] == '.' and next
    if File.directory?(path)
      paths.concat(Dir.entries(path).map{|e| e[0] == '.' ? nil : File.join(path,e)}.compact)
      next
    end

    fn = path
    fn_size = File.size(fn)

    # todo: support working with large files by reading next line
    if fn_size > MAX_FILE_SIZE
      @logger.debug "Skipping #{fn} by size: #{file.size}"
      break
    end

    lang = case File.extname(fn)
           when '.cs' then :csharp
           when '.py' then :python
           when '.js' then :js
           when '.json' then :json
           when '.yaml','meta' then :yaml
           when '.html' then :html
           when '.txt' then :plain
           else
             case m = @mime.file(fn)
             when /python/ then :python
             when /ruby/ then :ruby
             when /shell/ then :shell
             when /plain/
               case g = @magic.file(fn)
               when /python/ then :python
               when /ruby/ then :ruby
               when /node/ then :js
               else
                 :plain
               end
             else
               @logger.debug "Skipping #{fn} by mime type: #{m}"
               next
             end
           end

    if Random.rand > select_chance
      @logger.debug "Skipping #{fn} by chance"
      next
    end

    @stats[:files_selected] += 1

    new_lines = {}
    new_bytes_needed = (fn_size * lines_chance).floor
    while new_bytes_needed > 0
      offset = Random.rand(fn_size)
      new_line = get_line_for(lang)
      new_lines[offset] = new_line
      new_bytes_needed -= new_line.bytesize
    end
    new_lines = new_lines.sort
    @stats[:lines_added] += new_lines.size

    body = ''
    line_nums = []

    File.open(fn) do |file|
      line_num = 0
      while !file.eof? && line = file.readline
        body << line
        line_num += 1
        new_lines.empty? and next # read remainder of file
        offset = new_lines[0][0]
        if file.pos >= offset # add a line as soon as passed the offset
          offset, new_line = new_lines.shift
          line_nums << (line_num+=1)
          body << new_line
        end
      end

      # concat any remaining lines
      if !new_lines.empty?
        body[-1] == $/ or body << $/
        new_lines.each do |offset, new_line|
          line_num += 1
          line_nums << line_num
          body << new_line
        end
      end
    end

    # todo: use same charset as mime type indicates when writing!
    File.open(fn, 'wb') {|f| f.write(body)}

    block_given? and yield(fn, lang, line_nums)
  end
end

Private Instance Methods

get_line_for(lang) click to toggle source
# File lib/filecamo/text_mucker.rb, line 134
def get_line_for(lang)
  mark = @marks[lang] or return ''
  # todo: match line endings of file!
  return mark + LiterateRandomizer.sentence + $/
end