class Nebulous::DelimiterDetector

Constants

COLUMN_DELIMITERS
LINE_DELIMITERS

Attributes

path[R]

Public Class Methods

new(path, *args) click to toggle source
# File lib/nebulous/delimiter_detector.rb, line 13
def initialize(path, *args)
  @path = path
  @options = args.extract_options!

  raise ArgumentError unless File.exists?(@path)
end

Public Instance Methods

detect() click to toggle source
# File lib/nebulous/delimiter_detector.rb, line 20
def detect
  { col_sep: detect_column_delimiter,
    row_sep: detect_line_delimiter }
end
detect_column_delimiter() click to toggle source
# File lib/nebulous/delimiter_detector.rb, line 25
def detect_column_delimiter
  ln = readline

  column_delimiters.each_with_index do |exp, index|
    counts[index] = ln.split(exp).length - 1
  end

  count = counts.each_with_index.max[1]
  column_delimiters[count]
end
detect_line_delimiter() click to toggle source
# File lib/nebulous/delimiter_detector.rb, line 36
def detect_line_delimiter
  res = Cocaine::CommandLine.new('file', ':path').run(path: path).chomp

  map = line_delimiters.map do |sep|
    sep[1] if res =~ sep[0]
  end.compact

  map.first || line_delimiters[0][1]
end

Private Instance Methods

column_delimiters() click to toggle source
# File lib/nebulous/delimiter_detector.rb, line 52
def column_delimiters
  @options.fetch(:column_delimiters, COLUMN_DELIMITERS)
end
counts() click to toggle source
# File lib/nebulous/delimiter_detector.rb, line 60
def counts
  @counts ||= column_delimiters.map { 0 }
end
encoding() click to toggle source
# File lib/nebulous/delimiter_detector.rb, line 56
def encoding
  @options.fetch(:encoding, Encoding::UTF_8.to_s)
end
line_delimiters() click to toggle source
# File lib/nebulous/delimiter_detector.rb, line 48
def line_delimiters
  @options.fetch(:line_delimiters, LINE_DELIMITERS)
end
readline() click to toggle source
# File lib/nebulous/delimiter_detector.rb, line 64
def readline
  ln = ''

  File.open(path, 'r') do |io|
    while ln.chomp.empty?
      ln += io.readline
    end
  end

  ln.encode(encoding, invalid: :replace)
end