class RexleDiff

Public Class Methods

new(source1, source2, fuzzy_match: false) click to toggle source
# File lib/rexle-diff.rb, line 63
def initialize(source1, source2, fuzzy_match: false)    

  @fuzzy_match = fuzzy_match

  doc1, doc2  = HashedDoc.new(Rexle.new(source1).root).to_doc, 
      HashedDoc.new(Rexle.new(source2).root).to_doc

  compare(doc1.root, doc2.root)

  @doc = doc2
end

Public Instance Methods

changed() click to toggle source
# File lib/rexle-diff.rb, line 75
def changed()
  @doc.root.xpath('*[@last_modified]')
end
Also aliased as: updated
created() click to toggle source
# File lib/rexle-diff.rb, line 81
def created()
  @doc.root.xpath('*[@created]')
end
to_doc() click to toggle source
# File lib/rexle-diff.rb, line 85
def to_doc()
  @doc
end
updated()
Alias for: changed

Private Instance Methods

added(hxlist, hxlist2) click to toggle source

Returns an array of indexes of the nodes from the newer document which have been added or changed

# File lib/rexle-diff.rb, line 95
def added(hxlist, hxlist2)
      
  list1 =  hxlist.map.with_index {|x,i| x + i}
  list2 =  hxlist2.map.with_index {|x,i| x + i}
      
  added_or_changed = list2 - list1
  indexes = added_or_changed.map {|x| list2.index x} 
  
  indexes

end
compare(node, node2) click to toggle source

The main method for comparing the newest document node with the older document node

# File lib/rexle-diff.rb, line 110
def compare(node, node2)

  hxlist, hxlist2 = hashedxml(node), hashedxml(node2)   
  
  # elements which may have been modified are also
  #                                         added to the added_indexes list
  
  added_or_changed_indexes = added(hxlist, hxlist2)

  added_indexes, updated_indexes  = @fuzzy_match ? \
                 fuzzy_match(added_or_changed_indexes, node, node2) : \
                                                 [added_or_changed_indexes, []]
  
  # if there are updated indexes, search for differences with the original
  # to-do
  
  added_indexes.each do |i|
    
    attributes = node2.elements[i+1].attributes
    attributes[:created] ||= Time.now.to_s
    
    node2.elements[i+1].traverse do |e|

      e.attributes[:created] ||= Time.now.to_s

    end
  end

  deleted_indexes = deleted(hxlist, hxlist2)
  
  unchanged_indexes = unchanged(hxlist, hxlist2)    

  updated_indexes.each do |i|
    compare(node.elements[i+1], node2.elements[i+1])
  end
  
  unchanged_indexes.each do |i, i2|                  

    compare(node.elements[i+1], node2.elements[i2+1]) if node\
                                                 .elements[i+1].has_elements?
    attributes2 = node2.elements[i2+1].attributes
    
    if attributes2[:created].nil? then
     attributes = node.elements[i+1].attributes
     attributes2[:created] = attributes[:created] if attributes[:created]
    end
  end

end
deleted(list, list2) click to toggle source

Returns an array of indexes pointing to the nodes which were removed from the original document's relative parent node

# File lib/rexle-diff.rb, line 163
def deleted(list, list2)

  result = list -  list2
  indexes = result.map {|x| list.index x}

end
fuzzy_match(added_or_changed_indexes, node, node2) click to toggle source
# File lib/rexle-diff.rb, line 170
def fuzzy_match(added_or_changed_indexes, node, node2)

      # is the added index item a new entry or an entry modification?
  updated_indexes, added_indexes = added_or_changed_indexes.partition do |i|

    e1 = node.elements[i+1]
    next unless e1

    fm = FuzzyMatch.new [e1.text]
    result, score1, score2 = fm.find_with_score node2.elements[i+1].text
    #puts "result: %s score1: %s score2: %s" % [result, score1, score2]
    
    result and score1 >= 0.5 
    
  end
  
  updated_indexes.each do |i|
    
    attributes2 = node2.elements[i+1].attributes
    attributes2[:last_modified] = Time.now.to_s      
    
    attributes = node.elements[i+1].attributes
    attributes2[:created] = attributes[:created] if attributes[:created]      
  end

  [added_indexes, updated_indexes]
end
hashedxml(node) click to toggle source

Returns an array of MD5 hashed strings representing each child node itself

# File lib/rexle-diff.rb, line 201
def hashedxml(node)
  
  node.elements.map &:hashed
          
end
unchanged(list, list2) click to toggle source

Returns an array of indexes from both original and new nodes which identifies which nodes did not change.

# File lib/rexle-diff.rb, line 210
def unchanged(list, list2)
  
  result = list &  list2
  indexes = result.map {|x| list.index x}
  indexes2 = result.map {|x| list2.index x}
  
  indexes.zip(indexes2)

end