class RP::EMR::Step::Pig

Public Instance Methods

to_hash() click to toggle source
# File lib/rp/emr/step/pig.rb, line 20
def to_hash
  @hash ||= begin
    upload_script! unless dry_run
    step.to_hash
  end
end

Private Instance Methods

formatted_params() click to toggle source
# File lib/rp/emr/step/pig.rb, line 69
def formatted_params
  [
    '--args',
    '-f', script_url,
  ] + pig_params.
    reject { |k, v| v.nil? }.
    flat_map { |k, v| ['-p', "#{k}=#{v}"] }
end
hadoop_jar_base_args() click to toggle source
# File lib/rp/emr/step/pig.rb, line 60
def hadoop_jar_base_args
  [
    's3://us-east-1.elasticmapreduce/libs/pig/pig-script',
    '--base-path', 's3://us-east-1.elasticmapreduce/libs/pig/',
    '--pig-versions', pig_version,
    '--run-pig-script',
  ]
end
s3() click to toggle source
# File lib/rp/emr/step/pig.rb, line 78
def s3
  AWS::S3.new
end
script() click to toggle source
# File lib/rp/emr/step/pig.rb, line 34
def script
  @script ||= File.open(script_path, 'r').read
end
script_key() click to toggle source
# File lib/rp/emr/step/pig.rb, line 38
def script_key
  @script_key ||= begin
    hash = Digest::MD5.hexdigest(script)
    "scripts/emr_gem/#{File.basename(script_path, '.pig')}_#{hash}.pig"
  end
end
script_url() click to toggle source
# File lib/rp/emr/step/pig.rb, line 45
def script_url
  "s3://#{script_bucket}/#{script_key}"
end
step() click to toggle source
# File lib/rp/emr/step/pig.rb, line 49
def step
  RP::EMR::Step.new(
    name: name,
    action_on_failure: action_on_failure,
    hadoop_jar_step: {
      jar: 's3://us-east-1.elasticmapreduce/libs/script-runner/script-runner.jar',
      args: hadoop_jar_base_args + args + formatted_params,
    }
  )
end
upload_script!() click to toggle source
# File lib/rp/emr/step/pig.rb, line 29
def upload_script!
  # puts "Uploading to s3://#{script_bucket}/#{script_key}"
  s3.buckets[script_bucket].objects[script_key].write(script)
end