class Dyph::TwoWayDiffers::HeckelDiff

Constants

ChangeData

Public Class Methods

diff(left, right) click to toggle source

Two-way diff based on the algorithm by P. Heckel. @param [in] left Array of anything implementing hash and equals @param [in] right Array of anything implementing hash and equals

# File lib/dyph/two_way_differs/heckel_diff.rb, line 17
def self.diff(left, right)
  differ = HeckelDiff.new(left,right)
  differ.perform_diff
end
execute_diff(old_text_array, new_text_array) click to toggle source

Algorithm adapted from www.rad.upenn.edu/sbia/software/basis/apidoc/v1.2/diff3_8py_source.html

# File lib/dyph/two_way_differs/heckel_diff.rb, line 7
def self.execute_diff(old_text_array, new_text_array)
  raise ArgumentError, "Argument is not an array." unless old_text_array.is_a?(Array) && new_text_array.is_a?(Array)
  diff_result = diff(old_text_array, new_text_array)
  # convert to typed differ's output (wrapped with change types eg. Add, Delete, Change)
  HeckelDiffWrapper.new(old_text_array, new_text_array, diff_result).convert_to_typed_ouput
end
new(left, right) click to toggle source
# File lib/dyph/two_way_differs/heckel_diff.rb, line 22
def initialize(left, right)
  @left = left
  @right = right
end

Public Instance Methods

perform_diff() click to toggle source
# File lib/dyph/two_way_differs/heckel_diff.rb, line 27
def perform_diff
  unique_positions = identify_unique_postions
  unique_positions.sort!{ |a, b| a[0] <=> b[0] }    # sort by the line in which the line was found in a
  left_change_pos, right_change_pos = find_next_change
  init_changes = ChangeData.new(left_change_pos, right_change_pos, [])
  final_changes = unique_positions.reduce(init_changes, &method(:get_differences))
  final_changes.change_ranges
end

Private Instance Methods

append_change_range(changes_ranges, left_lo, left_hi, right_lo, right_hi) click to toggle source

given the calculated bounds of the 2 way diff, create the proper change type and add it to the queue.

# File lib/dyph/two_way_differs/heckel_diff.rb, line 94
def append_change_range(changes_ranges, left_lo, left_hi, right_lo, right_hi)
  if left_lo <= left_hi && right_lo <= right_hi # for this change, the bounds are both 'normal'.  the beginning of the change is before the end.
    changes_ranges << [:change, left_lo + 1, left_hi + 1, right_lo + 1, right_hi + 1]
  elsif left_lo <= left_hi
    changes_ranges << [:delete, left_lo + 1, left_hi + 1, right_lo + 1, right_lo]
  elsif right_lo <= right_hi
    changes_ranges << [:add, left_lo + 1, left_lo, right_lo + 1, right_hi + 1]
  end
  changes_ranges
end
find_next_change(left_start_pos=0, right_start_pos=0) click to toggle source
# File lib/dyph/two_way_differs/heckel_diff.rb, line 54
def find_next_change(left_start_pos=0, right_start_pos=0)
  l_arr, r_arr = (@left[left_start_pos..-1] || []), (@right[right_start_pos..-1] || [])
  offset = mismatch_offset l_arr, r_arr
  [ left_start_pos + offset, right_start_pos + offset]
end
find_prev_change(left_lo, right_lo, left_hi, right_hi) click to toggle source
# File lib/dyph/two_way_differs/heckel_diff.rb, line 61
def find_prev_change(left_lo, right_lo, left_hi, right_hi)
  if left_lo > left_hi || right_lo > right_hi
    [left_lo, left_hi, right_lo, right_hi]
  else
    l_arr, r_arr = (@left[left_lo .. left_hi].reverse || []), (@right[right_lo .. right_hi].reverse || [])
    offset = mismatch_offset l_arr, r_arr
    [left_lo, left_hi - offset, right_lo, right_hi - offset]
  end
end
find_unique(array) click to toggle source
# File lib/dyph/two_way_differs/heckel_diff.rb, line 84
def find_unique(array)
  flagged_uniques = array.each_with_index.reduce({}) do |hash, item_index|
    item, pos = item_index
    hash[item] = {pos: pos, unique: hash[item].nil?}
    hash
  end
  flagged_uniques.select { |_, v| v[:unique] }.map { |k, v| [k, v[:pos]] }.to_h
end
get_differences(change_data, unique_positions) click to toggle source
# File lib/dyph/two_way_differs/heckel_diff.rb, line 40
def get_differences(change_data, unique_positions)
  left_pos, right_pos = change_data.left_change_pos, change_data.right_change_pos
  left_uniq_pos, right_uniq_pos = unique_positions
  if left_uniq_pos < left_pos || right_uniq_pos < right_pos
    change_data
  else
    left_lo, left_hi, right_lo, right_hi  = find_prev_change(left_pos, right_pos, left_uniq_pos-1, right_uniq_pos-1)
    next_left_pos, next_right_pos         = find_next_change(left_uniq_pos+1, right_uniq_pos+1)

    updated_ranges = append_change_range(change_data.change_ranges, left_lo, left_hi, right_lo, right_hi)
    ChangeData.new(next_left_pos, next_right_pos, updated_ranges)
  end
end
identify_unique_postions() click to toggle source
# File lib/dyph/two_way_differs/heckel_diff.rb, line 76
def identify_unique_postions
  left_uniques =   find_unique(@left)
  right_uniques =  find_unique(@right)
  shared_keys = left_uniques.keys & right_uniques.keys
  uniq_ranges = shared_keys.map { |k| [left_uniques[k], right_uniques[k]] }
  uniq_ranges.unshift([ @left.length, @right.length])
end
mismatch_offset(l_arr, r_arr) click to toggle source
# File lib/dyph/two_way_differs/heckel_diff.rb, line 71
def mismatch_offset(l_arr, r_arr)
  _ , index  = l_arr.zip(r_arr).each_with_index.detect { |pair, _| pair[0] != pair[1] }
  index || [l_arr.length, r_arr.length].min
end