class BomDB::Diff::Aligner
Constants
- DIFF_RE
- INSERT_RE
- VERSE_RE
- WS_INSERT_RE
Public Class Methods
parse(diff_text)
click to toggle source
# File lib/bomdb/diff/aligner.rb, line 53 def self.parse(diff_text) scanner = StringScanner.new(diff_text) output = "" last_pos = 0 while !scanner.eos? if scanner.scan_until(DIFF_RE) output << scanner.pre_match[last_pos..-1] last_pos = scanner.pos diff_match = DIFF_RE.match(scanner.matched) case diff_match[1] when '-' then # this is a deletion delete_inner = diff_match[2] # e.g. ", [|1 Nephi 1:1|] I" # see if there's a verse heading in delete_inner verse_match = VERSE_RE.match(delete_inner) # the only deletions we care about are those with verse headings inside them if verse_match if scanner.scan(WS_INSERT_RE) ws_insert_match = WS_INSERT_RE.match(scanner.matched) insert_inner = ws_insert_match[1] else insert_inner = nil end before, verse, after = parse_verse_heading(verse_match, delete_inner, insert_inner) output << before + "\n" + verse output << " " + after last_pos = scanner.pos end when '+' then # this is an insertion output << diff_match[2] end else output << scanner.rest break end end return output.gsub(/ +/, ' ').gsub(/ +$/, '') end
parse_verse_heading(verse_match, deletion, insertion = nil)
click to toggle source
# File lib/bomdb/diff/aligner.rb, line 12 def self.parse_verse_heading(verse_match, deletion, insertion = nil) # the text of the verse, e.g. "1 Nephi 1:1" verse = verse_match[1] before = after = '' # if there's an insertion immediately following... if insertion # we can assume split will succeed, because the verse was matched del_before, del_after = deletion.split(verse_match[0], 2) del_before.strip! del_after.strip! if del_before.empty? && del_after.empty? # do nothing elsif del_before.empty? # the entire insertion goes after the verse heading after = insertion.chomp elsif del_after.empty? # the entire insertion goes before the verse heading before = insertion.chomp else # we have to use some heuristics to figure out where to split # the insertion. candidates = (0..(insertion.size-1)).map do |i| d1 = Levenshtein.distance(del_before, insertion[0..i]) d2 = Levenshtein.distance(del_after, insertion[(i + 1)..-1]) d3 = insertion[i] == ' ' ? 1 : 0 [ d1 + d2 + d3, insertion[0..i].chomp, insertion[(i + 1)..-1].chomp ] end.sort_by{ |a| a.first } if candidates.empty? raise "Unable to find candidate split for #{del_before.inspect}, #{del_after.inspect} on #{insertion.inspect}" end score, before, after = candidates.first end end [before, verse, after] end