class Shear::WordCollection

Attributes

words[R]

Public Class Methods

build_from_url(_url, _google_vision_api_key) click to toggle source
# File lib/shear/word_collection.rb, line 19
def self.build_from_url(_url, _google_vision_api_key)
  word_layout = VisionUtils.get_word_layout(_url, _google_vision_api_key)
  new.tap do |collection|
    word_layout.each do |wtext, bounding_box, confidence|
      collection.push_word(wtext, bounding_box: bounding_box, confidence: confidence)
    end
  end
end
new(words = []) click to toggle source
# File lib/shear/word_collection.rb, line 30
def initialize(words = [])
  @words = words
end

Public Instance Methods

bounding_box(_index) click to toggle source
# File lib/shear/word_collection.rb, line 54
def bounding_box(_index)
  @words[_index][:bounding_box]
end
clone() click to toggle source
# File lib/shear/word_collection.rb, line 88
def clone
  self.class.new.tap do |coll_clone|
    @words.each do |w|
      coll_clone.push_word_raw(w)
    end
  end
end
confidence(_index) click to toggle source
# File lib/shear/word_collection.rb, line 46
def confidence(_index)
  @words[_index][:conf]
end
count() click to toggle source
# File lib/shear/word_collection.rb, line 66
def count
  @words.count
end
deleted(_index) click to toggle source
# File lib/shear/word_collection.rb, line 62
def deleted(_index)
  @words[_index][:deleted]
end
location(_index) click to toggle source
# File lib/shear/word_collection.rb, line 42
def location(_index)
  @words[_index][:bounding_box][0]
end
original_bounding_box(_index) click to toggle source
# File lib/shear/word_collection.rb, line 58
def original_bounding_box(_index)
  @words[_index][:original_bounding_box]
end
original_location(_index) click to toggle source
# File lib/shear/word_collection.rb, line 50
def original_location(_index)
  @words[_index][:original_bounding_box][0]
end
push_word(_word, bounding_box:, confidence: 1.0) click to toggle source
# File lib/shear/word_collection.rb, line 70
def push_word(_word, bounding_box:, confidence: 1.0)
  @words << {
    word: _word,
    tl_word: I18n.transliterate(_word).upcase,
    conf: confidence,
    bounding_box: bounding_box,
    original_bounding_box: bounding_box.clone,
    deleted: false
  }
end
read(_upper_left_pt, _lower_right_pt, line_height: 2.0, exclusion: Set[], delete: false, min_confidence: 0) click to toggle source
# File lib/shear/word_collection.rb, line 107
def read(_upper_left_pt, _lower_right_pt, line_height: 2.0, exclusion: Set[], delete: false,
  min_confidence: 0)
  read_words = select_inside_box(
    _upper_left_pt,
    _lower_right_pt,
    min_confidence,
    exclusion,
    delete
  )
  confidence = read_words.map { |w| w[:conf] }.min || 1.0

  lines = []
  while !read_words.empty?
    line_words, read_words = partition_by_line(read_words, line_height)

    lines << line_words.sort_by { |w| w[:bounding_box][0][0] }.map { |w| w[:word] }.join(' ')
  end

  ReadString.new lines.join("\n"), confidence
end
tl_word(_index) click to toggle source
# File lib/shear/word_collection.rb, line 38
def tl_word(_index)
  @words[_index][:tl_word]
end
transform!(_matrix) click to toggle source
# File lib/shear/word_collection.rb, line 96
def transform!(_matrix)
  @words.each do |w|
    w[:original_bounding_box].each_with_index do |vertex, index|
      new_vertex = (_matrix * Matrix.column_vector(vertex + [1.0])).transpose.to_a.first[0..1]
      w[:bounding_box][index] = new_vertex
    end
  end

  self
end
word(_index) click to toggle source
# File lib/shear/word_collection.rb, line 34
def word(_index)
  @words[_index][:word]
end

Protected Instance Methods

partition_by_line(_words, _line_height) click to toggle source
# File lib/shear/word_collection.rb, line 149
def partition_by_line(_words, _line_height)
  upper_word = _words.min { |a, b| a[:bounding_box][0][1] <=> b[:bounding_box][0][1] }

  _words.partition do |word|
    word[:bounding_box][0][1] - upper_word[:bounding_box][0][1] < _line_height
  end
end
push_word_raw(_raw) click to toggle source
# File lib/shear/word_collection.rb, line 130
def push_word_raw(_raw)
  @words << _raw
end
select_inside_box(_upper_left_pt, _lower_right_pt, _min_confidence, _exclusion, _delete) click to toggle source
# File lib/shear/word_collection.rb, line 134
def select_inside_box(_upper_left_pt, _lower_right_pt, _min_confidence, _exclusion, _delete)
  inside_box = []
  @words.each do |w|
    next if _exclusion.include? w[:tl_word]

    aabb = { "min": _upper_left_pt, "max": _lower_right_pt }
    if w[:conf] >= _min_confidence &&
        BoundingBoxUtils.collides?(aabb, w[:bounding_box]) && !w[:deleted]
      inside_box << w
      w[:deleted] = true if _delete
    end
  end
  inside_box
end