class HocrReader::Reader
class reader
Attributes
parts[RW]
Public Class Methods
new(str)
click to toggle source
# File lib/hocr_reader/reader.rb, line 13 def initialize(str) @string = str @html = Nokogiri::HTML(@string) end
Public Instance Methods
convert_to_string()
click to toggle source
rubocop:enable Metrics/MethodLength, Metrics/AbcSize
# File lib/hocr_reader/reader.rb, line 52 def convert_to_string s = '' @parts.each { |part| s += part.text + ' ' } s end
extract_parts(part_name)
click to toggle source
rubocop:disable Metrics/MethodLength, Metrics/AbcSize
# File lib/hocr_reader/reader.rb, line 34 def extract_parts(part_name) @parts = [] tag = TAGS[part_name] tag_pair = tag + ', ' + tag # example tags 'span.ocrx_word, span.ocrx_word' @html.css(tag_pair) .reject { |part| part.text.strip.empty? } .each do |part| title_attributes = part.attributes['title'].value.to_s .split(';') language_attribute = part.attributes['lang'].value.to_s if part.attributes['lang'] this_part = Part.new(part_name, part, title_attributes, language_attribute) @parts.push this_part end @parts end
method_missing(name, *args, &block)
click to toggle source
Calls superclass method
# File lib/hocr_reader/reader.rb, line 18 def method_missing(name, *args, &block) if TAGS[name] extract_parts name else super end end
respond_to_missing?(name, *)
click to toggle source
Calls superclass method
# File lib/hocr_reader/reader.rb, line 26 def respond_to_missing?(name, *) if TAGS[name] else super end end