module HashDiffSym

This module provides methods to diff two hash, patch and unpatch hash

Constants

VERSION

Public Class Methods

best_diff(obj1, obj2, options = {}, &block) click to toggle source

Best diff two objects, which tries to generate the smallest change set using different similarity values.

HashDiffSym.best_diff is useful in case of comparing two objects which include similar hashes in arrays.

@param [Array, Hash] obj1 @param [Array, Hash] obj2 @param [Hash] options the options to use when comparing

* :strict (Boolean) [true] whether numeric values will be compared on type as well as value.  Set to false to allow comparing Fixnum, Float, BigDecimal to each other
* :delimiter (String) ['.'] the delimiter used when returning nested key references
* :numeric_tolerance (Numeric) [0] should be a positive numeric value.  Value by which numeric differences must be greater than.  By default, numeric values are compared exactly; with the :tolerance option, the difference between numeric values must be greater than the given value.
* :strip (Boolean) [false] whether or not to call #strip on strings before comparing

@yield [path, value1, value2] Optional block is used to compare each value, instead of default #==. If the block returns value other than true of false, then other specified comparison options will be used to do the comparison.

@return [Array] an array of changes.

e.g. [[ '+', 'a.b', '45' ], [ '-', 'a.c', '5' ], [ '~', 'a.x', '45', '63']]

@example

a = {'x' => [{'a' => 1, 'c' => 3, 'e' => 5}, {'y' => 3}]}
b = {'x' => [{'a' => 1, 'b' => 2, 'e' => 5}] }
diff = HashDiffSym.best_diff(a, b)
diff.should == [['-', 'x[0].c', 3], ['+', 'x[0].b', 2], ['-', 'x[1].y', 3], ['-', 'x[1]', {}]]

@since 0.0.1

# File lib/hashdiff_sym/diff.rb, line 27
def self.best_diff(obj1, obj2, options = {}, &block)
  options[:comparison] = block if block_given?

  opts = { :similarity => 0.3 }.merge!(options)
  diffs_1 = diff(obj1, obj2, opts)
  count_1 = count_diff diffs_1

  opts = { :similarity => 0.5 }.merge!(options)
  diffs_2 = diff(obj1, obj2, opts)
  count_2 = count_diff diffs_2

  opts = { :similarity => 0.8 }.merge!(options)
  diffs_3 = diff(obj1, obj2, opts)
  count_3 = count_diff diffs_3

  count, diffs = count_1 < count_2 ? [count_1, diffs_1] : [count_2, diffs_2]
  diffs = count < count_3 ? diffs : diffs_3
end
comparable?(obj1, obj2, strict = true) click to toggle source

@private

check if objects are comparable

# File lib/hashdiff_sym/util.rb, line 108
def self.comparable?(obj1, obj2, strict = true)
  [Array, Hash].each do |type|
    return true if obj1.is_a?(type) && obj2.is_a?(type)
  end
  return true if !strict && obj1.is_a?(Numeric) && obj2.is_a?(Numeric)
  obj1.is_a?(obj2.class) && obj2.is_a?(obj1.class)
end
compare_values(obj1, obj2, options = {}) click to toggle source

@private

check for equality or “closeness” within given tolerance

# File lib/hashdiff_sym/util.rb, line 86
def self.compare_values(obj1, obj2, options = {})
  if (options[:numeric_tolerance].is_a? Numeric) &&
      [obj1, obj2].all? { |v| v.is_a? Numeric }
    return (obj1 - obj2).abs <= options[:numeric_tolerance]
  end

  if options[:strip] == true
    obj1 = obj1.strip if obj1.respond_to?(:strip)
    obj2 = obj2.strip if obj2.respond_to?(:strip)
  end

  if options[:case_insensitive] == true
    obj1 = obj1.downcase if obj1.respond_to?(:downcase)
    obj2 = obj2.downcase if obj2.respond_to?(:downcase)
  end

  obj1 == obj2
end
count_diff(diffs) click to toggle source

@private

count node differences

# File lib/hashdiff_sym/util.rb, line 23
def self.count_diff(diffs)
  diffs.inject(0) do |sum, item|
    old_change_count = count_nodes(item[2])
    new_change_count = count_nodes(item[3])
    sum += (old_change_count + new_change_count)
  end
end
count_nodes(obj) click to toggle source

@private

count total nodes for an object

# File lib/hashdiff_sym/util.rb, line 34
def self.count_nodes(obj)
  return 0 unless obj

  count = 0
  if obj.is_a?(Array)
    obj.each {|e| count += count_nodes(e) }
  elsif obj.is_a?(Hash)
    obj.each {|k, v| count += count_nodes(v) }
  else
    return 1
  end

  count
end
custom_compare(method, key, obj1, obj2) click to toggle source

@private

try custom comparison

# File lib/hashdiff_sym/util.rb, line 119
def self.custom_compare(method, key, obj1, obj2)
  if method
    res = method.call(key, obj1, obj2)

    # nil != false here
    if res == false
      return [['~', key, obj1, obj2]]
    elsif res == true
      return []
    end
  end
end
decode_property_path(path, delimiter='.') click to toggle source

@private

decode property path into an array @param [String] path Property-string @param [String] delimiter Property-string delimiter

e.g. “a.b.c” => ['a', 'b', 3, 'c']

# File lib/hashdiff_sym/util.rb, line 56
def self.decode_property_path(path, delimiter='.')
  parts = path.split(delimiter).collect do |part|
    if part =~ /^(.*)\[(\d+)\]$/
      if $1.size > 0
        [import_key($1), $2.to_i]
      else
        $2.to_i
      end
    else
      import_key(part)
    end
  end

  parts.flatten
end
diff(obj1, obj2, options = {}, &block) click to toggle source

Compute the diff of two hashes or arrays

@param [Array, Hash] obj1 @param [Array, Hash] obj2 @param [Hash] options the options to use when comparing

* :strict (Boolean) [true] whether numeric values will be compared on type as well as value.  Set to false to allow comparing Fixnum, Float, BigDecimal to each other
* :similarity (Numeric) [0.8] should be between (0, 1]. Meaningful if there are similar hashes in arrays. See {best_diff}.
* :delimiter (String) ['.'] the delimiter used when returning nested key references
* :numeric_tolerance (Numeric) [0] should be a positive numeric value.  Value by which numeric differences must be greater than.  By default, numeric values are compared exactly; with the :tolerance option, the difference between numeric values must be greater than the given value.
* :strip (Boolean) [false] whether or not to call #strip on strings before comparing

@yield [path, value1, value2] Optional block is used to compare each value, instead of default #==. If the block returns value other than true of false, then other specified comparison options will be used to do the comparison.

@return [Array] an array of changes.

e.g. [[ '+', 'a.b', '45' ], [ '-', 'a.c', '5' ], [ '~', 'a.x', '45', '63']]

@example

a = {"a" => 1, "b" => {"b1" => 1, "b2" =>2}}
b = {"a" => 1, "b" => {}}

diff = HashDiffSym.diff(a, b)
diff.should == [['-', 'b.b1', 1], ['-', 'b.b2', 2]]

@since 0.0.1

# File lib/hashdiff_sym/diff.rb, line 70
def self.diff(obj1, obj2, options = {}, &block)
  opts = {
    :prefix      =>   '',
    :similarity  =>   0.8,
    :delimiter   =>   '.',
    :strict      =>   true,
    :strip       =>   false,
    :numeric_tolerance => 0
  }.merge!(options)

  opts[:comparison] = block if block_given?

  # prefer to compare with provided block
  result = custom_compare(opts[:comparison], opts[:prefix], obj1, obj2)
  return result if result

  if obj1.nil? and obj2.nil?
    return []
  end

  if obj1.nil?
    return [['~', opts[:prefix], nil, obj2]]
  end

  if obj2.nil?
    return [['~', opts[:prefix], obj1, nil]]
  end

  unless comparable?(obj1, obj2, opts[:strict])
    return [['~', opts[:prefix], obj1, obj2]]
  end

  result = []
  if obj1.is_a?(Array)
    changeset = diff_array(obj1, obj2, opts) do |lcs|
      # use a's index for similarity
      lcs.each do |pair|
        result.concat(diff(obj1[pair[0]], obj2[pair[1]], opts.merge(:prefix => "#{opts[:prefix]}[#{pair[0]}]")))
      end
    end

    changeset.each do |change|
      if change[0] == '-'
        result << ['-', "#{opts[:prefix]}[#{change[1]}]", change[2]]
      elsif change[0] == '+'
        result << ['+', "#{opts[:prefix]}[#{change[1]}]", change[2]]
      end
    end
  elsif obj1.is_a?(Hash)
    if opts[:prefix].empty?
      prefix = ""
    else
      prefix = "#{opts[:prefix]}#{opts[:delimiter]}"
    end

    deleted_keys = obj1.keys - obj2.keys
    common_keys = obj1.keys & obj2.keys
    added_keys = obj2.keys - obj1.keys

    # add deleted properties
    deleted_keys.to_s_sort.each do |k|
      custom_result = custom_compare(opts[:comparison], "#{prefix}#{export_key(k)}", obj1[k], nil)

      if custom_result
        result.concat(custom_result)
      else
        result << ['-', "#{prefix}#{export_key(k)}", obj1[k]]
      end
    end

    # recursive comparison for common keys
    common_keys.to_s_sort.each {|k| result.concat(diff(obj1[k], obj2[k], opts.merge(:prefix => "#{prefix}#{export_key(k)}"))) }

    # added properties
    added_keys.to_s_sort.each do |k|
      unless obj1.key?(k)
        custom_result = custom_compare(opts[:comparison], "#{prefix}#{export_key(k)}", nil, obj2[k])

        if custom_result
          result.concat(custom_result)
        else
          result << ['+', "#{prefix}#{export_key(k)}", obj2[k]]
        end
      end
    end
  else
    return [] if compare_values(obj1, obj2, opts)
    return [['~', opts[:prefix], obj1, obj2]]
  end

  result
end
diff_array(a, b, options = {}) { |links| ... } click to toggle source

@private

diff array using LCS algorithm

# File lib/hashdiff_sym/diff.rb, line 166
def self.diff_array(a, b, options = {})
  opts = {
    :prefix      =>   '',
    :similarity  =>   0.8,
    :delimiter   =>   '.'
  }.merge!(options)

  change_set = []
  if a.size == 0 and b.size == 0
    return []
  elsif a.size == 0
    b.each_index do |index|
      change_set << ['+', index, b[index]]
    end
    return change_set
  elsif b.size == 0
    a.each_index do |index|
      i = a.size - index - 1
      change_set << ['-', i, a[i]]
    end
    return change_set
  end

  links = lcs(a, b, opts)

  # yield common
  yield links if block_given?

  # padding the end
  links << [a.size, b.size]

  last_x = -1
  last_y = -1
  links.each do |pair|
    x, y = pair

    # remove from a, beginning from the end
    (x > last_x + 1) and (x - last_x - 2).downto(0).each do |i|
      change_set << ['-', last_y + i + 1, a[i + last_x + 1]]
    end

    # add from b, beginning from the head
    (y > last_y + 1) and 0.upto(y - last_y - 2).each do |i|
      change_set << ['+', last_y + i + 1, b[i + last_y + 1]]
    end

    # update flags
    last_x = x
    last_y = y
  end

  change_set
end
export_key(key) click to toggle source
# File lib/hashdiff_sym/util.rb, line 132
def self.export_key(key)
  if key.class == Symbol
    ":#{key}"
  else
    key
  end
end
import_key(key) click to toggle source
# File lib/hashdiff_sym/util.rb, line 140
def self.import_key(key)
  if key[0] == ":"
    key[1..-1].to_sym
  else
    key
  end
end
lcs(a, b, options = {}) click to toggle source

@private

caculate array difference using LCS algorithm en.wikipedia.org/wiki/Longest_common_subsequence_problem

# File lib/hashdiff_sym/lcs.rb, line 6
def self.lcs(a, b, options = {})
  opts = { :similarity => 0.8 }.merge!(options)

  opts[:prefix] = "#{opts[:prefix]}[*]"

  return [] if a.size == 0 or b.size == 0

  a_start = b_start = 0
  a_finish = a.size - 1
  b_finish = b.size - 1
  vector = []

  lcs = []
  (b_start..b_finish).each do |bi|
    lcs[bi] = [] 
    (a_start..a_finish).each do |ai|
      if similar?(a[ai], b[bi], opts)
        topleft = (ai > 0 and bi > 0)? lcs[bi-1][ai-1][1] : 0
        lcs[bi][ai] = [:topleft, topleft + 1]
      elsif
        top = (bi > 0)? lcs[bi-1][ai][1] : 0
        left = (ai > 0)? lcs[bi][ai-1][1] : 0
        count = (top > left) ? top : left

        direction = :both
        if top > left
          direction = :top
        elsif top < left
          direction = :left
        else
          if bi == 0
            direction = :top
          elsif ai == 0
            direction = :left
          else
            direction = :both
          end
        end

        lcs[bi][ai] = [direction, count]
      end
    end
  end

  x = a_finish
  y = b_finish
  while x >= 0 and y >= 0 and lcs[y][x][1] > 0
    if lcs[y][x][0] == :both
      x -= 1
    elsif lcs[y][x][0] == :topleft
      vector.insert(0, [x, y])
      x -= 1
      y -= 1
    elsif lcs[y][x][0] == :top
      y -= 1
    elsif lcs[y][x][0] == :left
      x -= 1
    end
  end

  vector
end
node(hash, parts) click to toggle source

@private

get the node of hash by given path parts

# File lib/hashdiff_sym/util.rb, line 75
def self.node(hash, parts)
  temp = hash
  parts.each do |part|
    temp = temp[part]
  end
  temp
end
patch!(obj, changes, options = {}) click to toggle source

Apply patch to object

@param [Hash, Array] obj the object to be patched, can be an Array or a Hash @param [Array] changes e.g. [[ '+', 'a.b', '45' ], [ '-', 'a.c', '5' ], [ '~', 'a.x', '45', '63']] @param [Hash] options supports following keys:

* :delimiter (String) ['.'] delimiter string for representing nested keys in changes array

@return the object after patch

@since 0.0.1

# File lib/hashdiff_sym/patch.rb, line 16
def self.patch!(obj, changes, options = {})
  delimiter = options[:delimiter] || '.'

  changes.each do |change|
    parts = decode_property_path(change[1], delimiter)
    last_part = parts.last

    parent_node = node(obj, parts[0, parts.size-1])

    if change[0] == '+'
      if last_part.is_a?(Fixnum)
        parent_node.insert(last_part, change[2])
      else
        parent_node[last_part] = change[2]
      end
    elsif change[0] == '-'
      if last_part.is_a?(Fixnum)
        parent_node.delete_at(last_part)
      else
        parent_node.delete(last_part)
      end
    elsif change[0] == '~'
      parent_node[last_part] = change[3]
    end
  end

  obj
end
similar?(a, b, options = {}) click to toggle source

@private

judge whether two objects are similar

# File lib/hashdiff_sym/util.rb, line 6
def self.similar?(a, b, options = {})
  opts = { :similarity => 0.8 }.merge(options)

  count_a = count_nodes(a)
  count_b = count_nodes(b)
  diffs = count_diff diff(a, b, opts)

  if count_a + count_b == 0
    return true
  else
    (1 - diffs.to_f/(count_a + count_b).to_f) >= opts[:similarity]
  end
end
unpatch!(obj, changes, options = {}) click to toggle source

Unpatch an object

@param [Hash, Array] obj the object to be unpatched, can be an Array or a Hash @param [Array] changes e.g. [[ '+', 'a.b', '45' ], [ '-', 'a.c', '5' ], [ '~', 'a.x', '45', '63']] @param [Hash] options supports following keys:

* :delimiter (String) ['.'] delimiter string for representing nested keys in changes array

@return the object after unpatch

@since 0.0.1

# File lib/hashdiff_sym/patch.rb, line 55
def self.unpatch!(obj, changes, options = {})
  delimiter = options[:delimiter] || '.'

  changes.reverse_each do |change|
    parts = decode_property_path(change[1], delimiter)
    last_part = parts.last

    parent_node = node(obj, parts[0, parts.size-1])

    if change[0] == '+'
      if last_part.is_a?(Fixnum)
        parent_node.delete_at(last_part)
      else
        parent_node.delete(last_part)
      end
    elsif change[0] == '-'
      if last_part.is_a?(Fixnum)
        parent_node.insert(last_part, change[2])
      else
        parent_node[last_part] = change[2]
      end
    elsif change[0] == '~'
      parent_node[last_part] = change[2]
    end
  end

  obj
end