class Embulk::Parser::Mahout

Public Class Methods

transaction(config) { |task, columns| ... } click to toggle source
# File lib/embulk/parser/mahout.rb, line 7
def self.transaction(config, &control)
  # configuration code:
  task = {
    "command" => config.param("command", :string, default: "recommenditembased"),                     # integer, required
    "schema" => config.param("schema", :array)
  }

  columns = task["schema"].each_with_index.map do |col, index|
    Column.new(index, col["name"], col["type"].to_sym)
  end

  yield(task, columns)
end

Public Instance Methods

init() click to toggle source
# File lib/embulk/parser/mahout.rb, line 21
def init
  @command = task["command"]
  @col = task["columns"]
end
run(file_input) click to toggle source
# File lib/embulk/parser/mahout.rb, line 26
def run(file_input)
  while file = file_input.next_file

    text = file.read
    text.each_line do |row|

      record = []

      user_id = row.match(/^\d*/)[0]
      record.push(user_id)
      recommend_items = row.scan(/(\d*):(\d*\.\d*)/)
      recommend_items.each do |item|
        record += [item[0], item[1]]
      end
      page_builder.add(record)

    end

  end
  page_builder.finish
end