class PDF::Reader::TextRun
A value object that represents one or more consecutive characters on a page.
Attributes
Public Class Methods
Source
# File lib/pdf/reader/text_run.rb, line 13 def initialize(x, y, width, font_size, text) @x = x @y = y @width = width @font_size = font_size.floor @text = text end
Public Instance Methods
Source
# File lib/pdf/reader/text_run.rb, line 53 def +(other) raise ArgumentError, "#{other} cannot be merged with this run" unless mergable?(other) if (other.x - endx) <( font_size * 0.2) TextRun.new(x, y, other.endx - x, font_size, text + other.text) else TextRun.new(x, y, other.endx - x, font_size, "#{text} #{other.text}") end end
Source
# File lib/pdf/reader/text_run.rb, line 23 def <=>(other) if x == other.x && y == other.y 0 elsif y < other.y 1 elsif y > other.y -1 elsif x < other.x -1 elsif x > other.x 1 end end
Allows collections of TextRun
objects to be sorted. They will be sorted in order of their position on a cartesian plain - Top Left to Bottom Right
Source
# File lib/pdf/reader/text_run.rb, line 63 def inspect "#{text} w:#{width} f:#{font_size} @#{x},#{y}" end
Source
# File lib/pdf/reader/text_run.rb, line 67 def intersect?(other_run) x <= other_run.endx && endx >= other_run.x && endy >= other_run.y && y <= other_run.endy end
Source
# File lib/pdf/reader/text_run.rb, line 73 def intersection_area_percent(other_run) return 0 unless intersect?(other_run) dx = [endx, other_run.endx].min - [x, other_run.x].max dy = [endy, other_run.endy].min - [y, other_run.y].max intersection_area = dx*dy intersection_area.to_f / area end
return what percentage of this text run is overlapped by another run
Source
# File lib/pdf/reader/text_run.rb, line 45 def mean_character_width @width / character_count end
Source
# File lib/pdf/reader/text_run.rb, line 49 def mergable?(other) y.to_i == other.y.to_i && font_size == other.font_size && mergable_range.include?(other.x) end
Private Instance Methods
Source
# File lib/pdf/reader/text_run.rb, line 95 def character_count @text.size.to_f end
Assume string encoding is marked correctly and we can trust String#size to return a character count
Source
# File lib/pdf/reader/text_run.rb, line 89 def mergable_range @mergable_range ||= Range.new(endx - 3, endx + font_size) end