module Cevennes

Constants

DOWNCASE
ENCODINGS

def deflate(row)

::CSV.generate(encoding: 'UTF-8') { |csv| csv << row }.strip

end

IDENTITY
VERSION

Public Class Methods

diff(id, csv0, csv1, opts={}) click to toggle source
# File lib/cevennes.rb, line 12
def diff(id, csv0, csv1, opts={})

  h0 = hash('old', id, csv0, opts)
  h1 = hash('new', id, csv1, opts)

  ks0 = h0.delete(:keys)
  ks1 = h1.delete(:keys)

  d =
    h0
      .collect { |k, v|
        v1 = h1[k]
        if v1 == nil
          [ '-', *v, -1, nil ]
        elsif v1[1] == v[1]
          [ '=', *v, v[0], nil ]
        else
          [ '!', *v, *v1 ]
        end }

  (h1.keys - h0.keys)
    .collect { |k| h1[k] }
    .reverse
    .each { |lnum, line|
      i = d.index { |a, _, _, l1, _| l1 > lnum } || d.length
      d.insert(i, [ '+', -1, nil, lnum, line ]) }

  s = d.inject({}) { |h, (a, _, _)| h[a] = (h[a] || 0) + 1; h }
  s['l0'] = h0.length
  s['l1'] = h1.length

  [ [ 'keys', *ks0, *ks1 ], [ 'stats', s ] ] + d
end

Protected Class Methods

hash(version, id, csv, opts) click to toggle source
# File lib/cevennes.rb, line 56
def hash(version, id, csv, opts)

  d = opts[:ignore_key_case] ? DOWNCASE : IDENTITY
  did = d[id]

  csva = ::CSV.parse(reencode(csv))
    .each_with_index.collect { |row, i| [ 1 + i, strip(row) ] }
    .reject { |i, row| row.compact.empty? }
    .drop_while { |i, row| ! row.find { |cell| d[cell] == did } }

  fail ::IndexError.new("id #{id.inspect} not found in #{version} CSV") \
    if csva.empty?

  csva[0][1] =
    opts[:ignore_key_case] ?
    csva[0][1].collect { |c| DOWNCASE[c] } :
    csva[0][1]

  idi = csva[0][1].index(did)

  csva[1..-1]
    .inject({ keys: csva[0] }) { |h, (i, row)|
      if row.compact.length > 1
        k = row[idi]
        h[k] = [ i, row ] if k
      end
      h }
end
reencode(s) click to toggle source
# File lib/cevennes.rb, line 91
def reencode(s)

  #s = unzip(s) if s[0, 2] == 'PK'
    # no dependency on rubyzip

  #return s if s.encoding == Encoding::UTF_8
    # NO! have to force_encoding for UTF-8 as well!

  s = s.dup if s.frozen?

  ENCODINGS.each do |e|
    (return s.force_encoding(e).encode('UTF-8')) rescue nil
  end

  nil
end
strip(row) click to toggle source
# File lib/cevennes.rb, line 48
def strip(row)

  row.collect { |cell| cell.is_a?(String) ? cell.strip : cell }
end