class PDF::Reader::PageTextReceiver

Builds a UTF-8 string of all the text on a single page by processing all the operaters in a content stream.

Constants

SPACE

Attributes

options[R]
state[R]

Public Instance Methods

content() click to toggle source
# File lib/pdf/reader/page_text_receiver.rb, line 50
def content
  PageLayout.new(@characters, @device_mediabox).to_s
end
invoke_xobject(label) click to toggle source

XObjects

# File lib/pdf/reader/page_text_receiver.rb, line 86
def invoke_xobject(label)
  @state.invoke_xobject(label) do |xobj|
    case xobj
    when PDF::Reader::FormXObject then
      xobj.walk(self)
    end
  end
end
move_to_next_line_and_show_text(str) click to toggle source
# File lib/pdf/reader/page_text_receiver.rb, line 72
def move_to_next_line_and_show_text(str) # '
  @state.move_to_start_of_next_line
  show_text(str)
end
page=(page) click to toggle source

starting a new page

# File lib/pdf/reader/page_text_receiver.rb, line 40
def page=(page)
  @state = PageState.new(page)
  @content = []
  @characters = []
  @mediabox = page.objects.deref(page.attributes[:MediaBox])
  device_bl = @state.ctm_transform(@mediabox[0], @mediabox[1])
  device_tr = @state.ctm_transform(@mediabox[2], @mediabox[3])
  @device_mediabox = [ device_bl.first, device_bl.last, device_tr.first, device_tr.last]
end
set_spacing_next_line_show_text(aw, ac, string) click to toggle source
# File lib/pdf/reader/page_text_receiver.rb, line 77
def set_spacing_next_line_show_text(aw, ac, string) # "
  @state.set_word_spacing(aw)
  @state.set_character_spacing(ac)
  move_to_next_line_and_show_text(string)
end
show_text(string) click to toggle source

Text Showing Operators

record text that is drawn on the page

# File lib/pdf/reader/page_text_receiver.rb, line 58
def show_text(string) # Tj (AWAY)
  internal_show_text(string)
end
show_text_with_positioning(params) click to toggle source
# File lib/pdf/reader/page_text_receiver.rb, line 62
def show_text_with_positioning(params) # TJ [(A) 120 (WA) 20 (Y)]
  params.each do |arg|
    if arg.is_a?(String)
      internal_show_text(arg)
    else
      @state.process_glyph_displacement(0, arg, false)
    end
  end
end

Private Instance Methods

internal_show_text(string) click to toggle source
# File lib/pdf/reader/page_text_receiver.rb, line 97
def internal_show_text(string)
  if @state.current_font.nil?
    raise PDF::Reader::MalformedPDFError, "current font is invalid"
  end
  glyphs = @state.current_font.unpack(string)
  glyphs.each_with_index do |glyph_code, index|
    # paint the current glyph
    newx, newy = @state.trm_transform(0,0)
    utf8_chars = @state.current_font.to_utf8(glyph_code)

    # apply to glyph displacment for the current glyph so the next
    # glyph will appear in the correct position
    glyph_width = @state.current_font.glyph_width(glyph_code) / 1000.0
    th = 1
    scaled_glyph_width = glyph_width * @state.font_size * th
    unless utf8_chars == SPACE
      @characters << TextRun.new(newx, newy, scaled_glyph_width, @state.font_size, utf8_chars)
    end
    @state.process_glyph_displacement(glyph_width, 0, utf8_chars == SPACE)
  end
end