class HocrTurtletext::Textangle

A DSL syntax for text extraction. Modified from the original at github.com/tardate/pdf-reader-turtletext

Attributes

above[W]
below[W]
inclusive[W]
left_of[W]
reader[R]
right_of[W]

Public Class Methods

new(hocr_turtletext_reader) { |self| ... } click to toggle source

hocr_turtletext_reader is a HocrTurtletext::Reader

# File lib/hocr_turtletext/textangle.rb, line 8
def initialize(hocr_turtletext_reader,&block)
  @reader = hocr_turtletext_reader
  @inclusive = false
  if block_given?
    if block.arity == 1
      yield self
    else
      instance_eval &block
    end
  end
end

Public Instance Methods

above(*args) click to toggle source
# File lib/hocr_turtletext/textangle.rb, line 40
def above(*args)
  if value = args.first
    @above = value
  end
  @above
end
below(*args) click to toggle source
# File lib/hocr_turtletext/textangle.rb, line 48
def below(*args)
  if value = args.first
    @below = value
  end
  @below
end
exclusive!() click to toggle source

Command: sets +inclusive false

# File lib/hocr_turtletext/textangle.rb, line 35
def exclusive!
  @inclusive = false
end
inclusive(*args) click to toggle source
# File lib/hocr_turtletext/textangle.rb, line 22
def inclusive(*args)
  if value = args.first
    @inclusive = value
  end
  @inclusive
end
inclusive!() click to toggle source

Command: sets +inclusive true

# File lib/hocr_turtletext/textangle.rb, line 30
def inclusive!
  @inclusive = true
end
left_of(*args) click to toggle source
# File lib/hocr_turtletext/textangle.rb, line 56
def left_of(*args)
  if value = args.first
    @left_of = value
  end
  @left_of
end
right_of(*args) click to toggle source
# File lib/hocr_turtletext/textangle.rb, line 64
def right_of(*args)
  if value = args.first
    @right_of = value
  end
  @right_of
end
text() click to toggle source

Returns the text array found within the defined region. Each line of text is an array of the separate text elements found on that line.

[["first line first text", "first line last text"],["second line text"]]
# File lib/hocr_turtletext/textangle.rb, line 74
def text
  return unless reader

  xmin = if right_of
           if [Integer,Float].include?(right_of.class)
             right_of
           elsif xy = reader.text_position(right_of)
             xy[:x]
           end
         else
           0
         end
  xmax = if left_of
           if [Integer,Float].include?(left_of.class)
             left_of
           elsif xy = reader.text_position(left_of)
             xy[:x]
           end
         else
           99999 # TODO: figure out the actual limit?
         end

  ymax = if above
           if [Integer,Float].include?(above.class)
             above
           elsif xy = reader.text_position(above)
             xy[:y]
           end
         else
           99999
         end
  ymin = if below
           if [Integer,Float].include?(below.class)
             below
           elsif xy = reader.text_position(below)
             xy[:y]
           end
         else
           0 # TODO: figure out the actual limit?
         end

  reader.text_in_region(xmin,xmax,ymin,ymax,inclusive)
end