module Build::Text::Merge

Constants

Difference
LCSNode

Public Class Methods

combine(old_text, new_text) click to toggle source
# File lib/build/text/merge.rb, line 27
def self.combine(old_text, new_text)
        lcs = lcs(old_text, new_text)
        changes = []
        
        n = 0; o = 0; l = 0
        while o < old_text.size and n < new_text.size and l < lcs.size
                if !similar(old_text[o], lcs[l])
                        changes << Difference.new(:old, old_text[o])
                        o += 1
                elsif !similar(new_text[n], lcs[l])
                        changes << Difference.new(:new, new_text[n])
                        n += 1
                else
                        changes << Difference.new(:both, lcs[l])
                        o += 1; n += 1; l += 1
                end
        end
        
        while o < old_text.size
                changes << Difference.new(:old, old_text[o])
                o += 1
        end
        
        while n < new_text.size
                changes << Difference.new(:old, new_text[n])
                n += 1
        end
        
        changes.map do |change|
                change.value
        end
end
lcs(x, y) click to toggle source

Find the Longest Common Subsequence in the given sequences x, y.

# File lib/build/text/merge.rb, line 107
def self.lcs(x, y)
        # Create the lcs matrix:
        m = Array.new(x.length + 1) do
                Array.new(y.length + 1) do
                        LCSNode.new(nil, nil)
                end
        end

        # LCS(i, 0) and LCS(0, j) are always 0:
        for i in 0..x.length do m[i][0].value = 0 end
        for j in 0..y.length do m[0][j].value = 0 end

        # Main algorithm, solve row by row:
        for i in 1..x.length do
                for j in 1..y.length do
                        if similar(x[i-1], y[j-1])
                                # Value is based on maximizing the length of the matched strings:
                                m[i][j].value = m[i-1][j-1].value + (x[i-1].chomp.length + y[j-1].chomp.length) / 2.0
                                m[i][j].previous = [-1, -1]
                        else
                                if m[i-1][j].value >= m[i][j-1].value
                                        m[i][j].value = m[i-1][j].value
                                        m[i][j].previous = [-1, 0]
                                else
                                        m[i][j].value = m[i][j-1].value
                                        m[i][j].previous = [0, -1]
                                end
                        end
                end
        end

        # Get the solution by following the path backwards from m[x.length][y.length]
        lcs = []
        
        i = x.length; j = y.length
        until m[i][j].previous == nil do
                if m[i][j].previous == [-1, -1]
                        lcs << x[i-1]
                end
                
                i, j = i + m[i][j].previous[0], j + m[i][j].previous[1]
        end

        return lcs.reverse!
end
levenshtein_distance(s, t) click to toggle source

This code is based directly on the Text gem implementation Returns a value representing the “cost” of transforming str1 into str2

# File lib/build/text/merge.rb, line 62
def self.levenshtein_distance(s, t)
        n = s.length
        m = t.length

        return m if n == 0
        return n if m == 0

        d = (0..m).to_a
        x = nil

        n.times do |i|
                e = i+1

                m.times do |j|
                        cost = (s[i] == t[j]) ? 0 : 1
                        x = [
                                d[j+1] + 1, # insertion
                                e + 1,      # deletion
                                d[j] + cost # substitution
                        ].min
                        d[j] = e
                        e = x
                end

                d[m] = x
        end

        return x
end
similar(s, t, factor = 0.15) click to toggle source

Calculate the similarity of two sequences, return true if they are with factor% similarity.

# File lib/build/text/merge.rb, line 93
def self.similar(s, t, factor = 0.15)
        return true if s == t
        
        distance = levenshtein_distance(s, t)
        average_length = (s.length + t.length) / 2.0

        proximity = (distance.to_f / average_length)
        
        return proximity <= factor
end