class CooCoo::DataSources::Xournal::TrainingDocument::DocumentReader

Public Class Methods

new() click to toggle source
# File lib/coo-coo/data_sources/xournal/training_document/document_reader.rb, line 9
def initialize
end

Public Instance Methods

load(xournal) click to toggle source
# File lib/coo-coo/data_sources/xournal/training_document/document_reader.rb, line 12
def load(xournal)
  version, columns, rows, cells_per_example = read_meta_label(xournal)
  
  if columns == nil || rows == nil
    raise ArgumentError.new("Xournal lacks a Text element with '#{META_LABEL} VERSION: COLS ROWS CELLS_PER_EXAMPLE'")
  end

  doc = TrainingDocument.new
  
  xournal.each_page do |page|
    page.each_layer do |layer|
      process_layer(doc, page, layer, columns, rows)
    end
  end

  doc
end
process_layer(doc, page, layer, columns, rows) click to toggle source
# File lib/coo-coo/data_sources/xournal/training_document/document_reader.rb, line 58
def process_layer(doc, page, layer, columns, rows)
  grid_w = page.width / columns.to_f
  grid_h = page.height / rows.to_f

  labels = Hash.new { |h, k| h[k] = Hash.new { |a, b| a[b] = Array.new } }
  strokes = Hash.new { |h, k| h[k] = Hash.new { |a, b| a[b] = Array.new } }
  
  layer.each_text do |txt|
    next if txt.text =~ /^#{META_LABEL}/
    row = (txt.y / grid_h).round
    column = (txt.x / grid_w).round
    labels[row.to_i][column.to_i] << txt
  end

  layer.each_stroke do |stroke|
    color = ChunkyPNG::Color.parse(stroke.color)
    next if ChunkyPNG::Color.euclidean_distance_rgba(color, PARSED_GRID_COLOR) == 0.0
    min, max = stroke.minmax
    row = (min[1] / grid_h)
    column = (min[0] / grid_w)

    strokes[row.to_i][column.to_i] << stroke
  end
  

  rows.times do |row|
    grid_min_y = (row * grid_h).floor

    columns.times do |column|
      grid_min_x = (column * grid_w).floor
      ex_label = labels[row][column].first
      ex_strokes = strokes[row][column]
      unless ex_strokes.empty? && ex_label == nil
        doc.add_example(ex_label && ex_label.text,
                        ex_strokes.collect { |s|
                          s.
                          translate(-grid_min_x, -grid_min_y).
                          scale(1.0 / grid_w, 1.0 / grid_h, 1.0 / grid_w)
                        })
      end
    end
  end
end
read_meta_label(xournal) click to toggle source
# File lib/coo-coo/data_sources/xournal/training_document/document_reader.rb, line 30
def read_meta_label(xournal)
  version = nil
  columns = nil
  rows = nil
  meta = nil
  
  xournal.each_page do |page|
    page.each_layer do |layer|
      layer.each_text do |txt|
        if txt.text =~ /^#{META_LABEL}/
          meta = txt.text
          break
        end
      end
    end
  end

  if meta
    m = meta.match(META_LABEL_REGEX)
    version = m[1].to_f
    columns = m[2].to_i
    rows = m[3].to_i
    cells_per_example = (m[4] || 1).to_i
  end

  return version, columns, rows, cells_per_example
end