class Spark::Config

Common configuration for RubySpark and Spark

Constants

TYPES

Public Class Methods

new() click to toggle source

Initialize java SparkConf and load default configuration.

# File lib/spark/config.rb, line 16
def initialize
  @spark_conf = SparkConf.new(true)
  set_default
  from_file(Spark::DEFAULT_CONFIG_FILE)
end

Public Instance Methods

[](key) click to toggle source
# File lib/spark/config.rb, line 31
def [](key)
  get(key)
end
[]=(key, value) click to toggle source
# File lib/spark/config.rb, line 35
def []=(key, value)
  set(key, value)
end
contains?(key) click to toggle source
# File lib/spark/config.rb, line 104
def contains?(key)
  spark_conf.contains(key.to_s)
end
default_executor_command() click to toggle source

Command template which is applied when scala want create a ruby process (e.g. master, home request). Command is represented by '%s'.

Example:

bash --norc -i -c "export HOME=/home/user; cd; source .bashrc; %s"
# File lib/spark/config.rb, line 169
def default_executor_command
  ENV['SPARK_RUBY_EXECUTOR_COMMAND'] || '%s'
end
default_executor_options() click to toggle source

Options for every worker.

Example:

-J-Xmx512m
# File lib/spark/config.rb, line 178
def default_executor_options
  ENV['SPARK_RUBY_EXECUTOR_OPTIONS'] || ''
end
default_serializer() click to toggle source
# File lib/spark/config.rb, line 151
def default_serializer
  ENV['SPARK_RUBY_SERIALIZER'] || Spark::Serializer::DEFAULT_SERIALIZER_NAME
end
default_serializer_batch_size() click to toggle source
# File lib/spark/config.rb, line 159
def default_serializer_batch_size
  ENV['SPARK_RUBY_SERIALIZER_BATCH_SIZE'] || Spark::Serializer::DEFAULT_BATCH_SIZE
end
default_serializer_compress() click to toggle source
# File lib/spark/config.rb, line 155
def default_serializer_compress
  ENV['SPARK_RUBY_SERIALIZER_COMPRESS'] || Spark::Serializer::DEFAULT_COMPRESS
end
default_worker_type() click to toggle source

Type of worker.

Options:

process

(default)

thread

(experimental)

# File lib/spark/config.rb, line 198
def default_worker_type
  ENV['SPARK_RUBY_WORKER_TYPE'] || 'process'
end
from_file(file) click to toggle source
# File lib/spark/config.rb, line 22
def from_file(file)
  check_read_only

  if file && File.exist?(file)
    file = File.expand_path(file)
    RubyUtils.loadPropertiesFile(spark_conf, file)
  end
end
get(key) click to toggle source

Rescue from NoSuchElementException

# File lib/spark/config.rb, line 85
def get(key)
  value = spark_conf.get(key.to_s)

  case TYPES[key]
  when :boolean
    parse_boolean(value)
  when :integer
    parse_integer(value)
  else
    value
  end
rescue
  nil
end
getAll()

Aliases

Alias for: get_all
get_all() click to toggle source
# File lib/spark/config.rb, line 100
def get_all
  Hash[spark_conf.getAll.map{|tuple| [tuple._1, tuple._2]}]
end
Also aliased as: getAll
load_executor_envs() click to toggle source

Load environment variables for executor from ENV.

Examples:

SPARK_RUBY_EXECUTOR_ENV_KEY1="1"
SPARK_RUBY_EXECUTOR_ENV_KEY2="2"
# File lib/spark/config.rb, line 208
def load_executor_envs
  prefix = 'SPARK_RUBY_EXECUTOR_ENV_'

  envs = ENV.select{|key, _| key.start_with?(prefix)}
  envs.each do |key, value|
    key = key.dup # ENV keys are frozen
    key.slice!(0, prefix.size)

    set("spark.ruby.executor.env.#{key}", value)
  end
end
parse_boolean(value) click to toggle source
# File lib/spark/config.rb, line 121
def parse_boolean(value)
  case value
  when 'true'
    true
  when 'false'
    false
  end
end
parse_integer(value) click to toggle source
# File lib/spark/config.rb, line 130
def parse_integer(value)
  value.to_i
end
read_only?() click to toggle source
# File lib/spark/config.rb, line 80
def read_only?
  Spark.started?
end
set(key, value) click to toggle source
# File lib/spark/config.rb, line 108
def set(key, value)
  check_read_only
  spark_conf.set(key.to_s, value.to_s)
end
setAppName(name)
Alias for: set_app_name
setMaster(master)
Alias for: set_master
set_app_name(name) click to toggle source
# File lib/spark/config.rb, line 113
def set_app_name(name)
  set('spark.app.name', name)
end
Also aliased as: setAppName
set_default() click to toggle source

Defaults

# File lib/spark/config.rb, line 137
def set_default
  set_app_name('RubySpark')
  set_master('local[*]')
  set('spark.ruby.driver_home', Spark.home)
  set('spark.ruby.serializer', default_serializer)
  set('spark.ruby.serializer.compress', default_serializer_compress)
  set('spark.ruby.serializer.batch_size', default_serializer_batch_size)
  set('spark.ruby.executor.command', default_executor_command)
  set('spark.ruby.executor.options', default_executor_options)
  set('spark.ruby.worker.type', default_worker_type)
  load_executor_envs
  # set('spark.ruby.executor.install', default_executor_install)
end
set_master(master) click to toggle source
# File lib/spark/config.rb, line 117
def set_master(master)
  set('spark.master', master)
end
Also aliased as: setMaster
spark_conf() click to toggle source
# File lib/spark/config.rb, line 39
def spark_conf
  if Spark.started?
    # Get latest configuration
    Spark.context.jcontext.conf
  else
    @spark_conf
  end
end
valid!() click to toggle source
# File lib/spark/config.rb, line 48
def valid!
  errors = []

  if !contains?('spark.app.name')
    errors << 'An application name must be set in your configuration.'
  end

  if !contains?('spark.master')
    errors << 'A master URL must be set in your configuration.'
  end

  if Spark::Serializer.find(get('spark.ruby.serializer')).nil?
    errors << 'Unknow serializer.'
  end

  scanned = get('spark.ruby.executor.command').scan('%s')

  if scanned.size == 0
    errors << "Executor command must contain '%s'."
  end

  if scanned.size > 1
    errors << "Executor command can contain only one '%s'."
  end

  if errors.any?
    errors.map!{|error| "- #{error}"}

    raise Spark::ConfigurationError, "Configuration is not valid:\r\n#{errors.join("\r\n")}"
  end
end

Private Instance Methods

check_read_only() click to toggle source
# File lib/spark/config.rb, line 227
def check_read_only
  if read_only?
    raise Spark::ConfigurationError, 'Configuration is ready only'
  end
end