class TurbotRunner::Runner

Constants

RC_OK
RC_SCRAPER_FAILED
RC_TRANSFORMER_FAILED

Attributes

base_directory[R]

Public Class Methods

new(directory, options={}) click to toggle source
# File lib/turbot_runner/runner.rb, line 13
def initialize(directory, options={})
  assert_absolute_path(directory)
  @base_directory = directory
  @config = load_config(directory)
  @record_handler = options[:record_handler]
  @log_to_file = options[:log_to_file]
  @timeout = options[:timeout]
  @scraper_provided = options[:scraper_provided]
  if options[:output_directory]
    assert_absolute_path(options[:output_directory])
    @output_directory = options[:output_directory]
  else
    @output_directory = File.join(@base_directory, 'output')
  end
end

Public Instance Methods

process_output() click to toggle source
# File lib/turbot_runner/runner.rb, line 71
def process_output
  process_script_output(scraper_config)

  transformers.each do |transformer_config|
    process_script_output(transformer_config.merge(:base_directory => @base_directory))
  end
end
run() click to toggle source
# File lib/turbot_runner/runner.rb, line 29
def run
  set_up_output_directory

  if @scraper_provided
    scraper_succeeded = true
  else
    scraper_succeeded = run_script(scraper_config)
  end

  # Run the transformers even if the scraper fails
  transformers_succeeded = true
  transformers.each do |transformer_config|
    config = transformer_config.merge({
      :base_directory => @base_directory,
      # :duplicates_allowed => duplicates_allowed,
    })
    transformers_succeeded = run_script(config, input_file=scraper_output_file) && transformers_succeeded
  end

  if !scraper_succeeded
    RC_SCRAPER_FAILED
  elsif !transformers_succeeded
    RC_TRANSFORMER_FAILED
  else
    RC_OK
  end
end
set_up_output_directory() click to toggle source
# File lib/turbot_runner/runner.rb, line 57
def set_up_output_directory
  FileUtils.mkdir_p(@output_directory)

  if !@scraper_provided
    FileUtils.rm_f(output_file('scraper', '.out'))
    FileUtils.rm_f(output_file('scraper', '.err'))
  end

  transformers.each do |transformer_config|
    FileUtils.rm_f(output_file(transformer_config[:file], '.out'))
    FileUtils.rm_f(output_file(transformer_config[:file], '.err'))
  end
end

Private Instance Methods

additional_args() click to toggle source
# File lib/turbot_runner/runner.rb, line 156
def additional_args
  {
    'ruby' => "-r#{File.expand_path('../prerun.rb', __FILE__)}",
    'python' => '-u',
  }[language]
end
assert_absolute_path(path) click to toggle source

def duplicates_allowed

@config[:duplicates_allowed]

end

# File lib/turbot_runner/runner.rb, line 201
def assert_absolute_path(path)
  unless Pathname.new(path).absolute?
    raise "#{path} must be an absolute path"
  end
end
build_command(script, input_file=nil) click to toggle source
# File lib/turbot_runner/runner.rb, line 136
def build_command(script, input_file=nil)
  raise "Could not run #{script} with #{language}" unless script_extension == File.extname(script)
  command = "#{full_interpreter_path} #{additional_args} #{script} >#{output_file(script)}"
  command << " 2>#{output_file(script, '.err')}" if @log_to_file
  command << " <#{input_file}" unless input_file.nil?
  command
end
full_interpreter_path() click to toggle source
# File lib/turbot_runner/runner.rb, line 80
def full_interpreter_path
  if language == "ruby"
    # Ensure we use the same ruby as the current interpreter when
    # creating a subshell. Necessary for OSX packaged version.
    RbConfig.ruby
  else
    # Assume the first python in PATH
    language
  end
end
language() click to toggle source
# File lib/turbot_runner/runner.rb, line 185
def language
  @config[:language].downcase
end
load_config(directory) click to toggle source
# File lib/turbot_runner/runner.rb, line 91
def load_config(directory)
  manifest_path = File.join(directory, 'manifest.json')
  raise "Could not find #{manifest_path}" unless File.exist?(manifest_path)

  begin
    json = open(manifest_path) {|f| f.read}
    JSON.parse(json, :symbolize_names => true)
  rescue JSON::ParserError
    # TODO provide better error message
    raise "Could not parse #{manifest_path} as JSON"
  end
end
output_file(script, extension='.out') click to toggle source
# File lib/turbot_runner/runner.rb, line 144
def output_file(script, extension='.out')
  basename = File.basename(script, script_extension)
  File.join(@output_directory, basename) + extension
end
process_script_output(script_config) click to toggle source
# File lib/turbot_runner/runner.rb, line 118
def process_script_output(script_config)
  # The first argument to the Processor constructor is a nil
  # Runner. This is because no running behaviour
  # (e.g. interruptions etc) is required; we just want to do
  # record handling.
  processor = Processor.new(nil, script_config, @record_handler)
  file = output_file(script_config[:file])
  File.open(file) do |f|
    f.each_line do |line|
      processor.process(line)
    end
  end
rescue Errno::ENOENT => e
  # We only want to catch ENOENT if the output file doesn't exist, and not
  # if, for instance, a schema file is missing.
  raise unless e.message == "No such file or directory - #{output_file(script_config[:file])}"
end
run_script(script_config, input_file=nil) click to toggle source
# File lib/turbot_runner/runner.rb, line 105
def run_script(script_config, input_file=nil)
  command = build_command(script_config[:file], input_file)
  script_runner = ScriptRunner.new(
    command,
    output_file(script_config[:file]),
    script_config,
    :record_handler => @record_handler,
    :timeout => @timeout
  )

  script_runner.run # returns boolean indicating success
end
scraper_config() click to toggle source
# File lib/turbot_runner/runner.rb, line 163
def scraper_config
  {
    :base_directory => @base_directory,
    :file => scraper_script,
    :data_type => scraper_data_type,
    :identifying_fields => scraper_identifying_fields,
    # :duplicates_allowed => duplicates_allowed,
  }
end
scraper_data_type() click to toggle source
# File lib/turbot_runner/runner.rb, line 189
def scraper_data_type
  @config[:data_type]
end
scraper_identifying_fields() click to toggle source
# File lib/turbot_runner/runner.rb, line 193
def scraper_identifying_fields
  @config[:identifying_fields]
end
scraper_output_file() click to toggle source
# File lib/turbot_runner/runner.rb, line 181
def scraper_output_file
  File.join(@output_directory, 'scraper.out')
end
scraper_script() click to toggle source
# File lib/turbot_runner/runner.rb, line 173
def scraper_script
  "scraper#{script_extension}"
end
script_extension() click to toggle source
# File lib/turbot_runner/runner.rb, line 149
def script_extension
  {
    'ruby' => '.rb',
    'python' => '.py',
  }[language]
end
transformers() click to toggle source
# File lib/turbot_runner/runner.rb, line 177
def transformers
  @config[:transformers] || []
end