class Spark::Config
Common configuration for RubySpark and Spark
Constants
- TYPES
Public Class Methods
new()
click to toggle source
Initialize java SparkConf and load default configuration.
# File lib/spark/config.rb, line 16 def initialize @spark_conf = SparkConf.new(true) set_default from_file(Spark::DEFAULT_CONFIG_FILE) end
Public Instance Methods
[](key)
click to toggle source
# File lib/spark/config.rb, line 31 def [](key) get(key) end
[]=(key, value)
click to toggle source
# File lib/spark/config.rb, line 35 def []=(key, value) set(key, value) end
contains?(key)
click to toggle source
# File lib/spark/config.rb, line 104 def contains?(key) spark_conf.contains(key.to_s) end
default_executor_command()
click to toggle source
Command
template which is applied when scala want create a ruby process (e.g. master, home request). Command
is represented by '%s'.
Example:¶ ↑
bash --norc -i -c "export HOME=/home/user; cd; source .bashrc; %s"
# File lib/spark/config.rb, line 169 def default_executor_command ENV['SPARK_RUBY_EXECUTOR_COMMAND'] || '%s' end
default_executor_options()
click to toggle source
default_serializer()
click to toggle source
# File lib/spark/config.rb, line 151 def default_serializer ENV['SPARK_RUBY_SERIALIZER'] || Spark::Serializer::DEFAULT_SERIALIZER_NAME end
default_serializer_batch_size()
click to toggle source
# File lib/spark/config.rb, line 159 def default_serializer_batch_size ENV['SPARK_RUBY_SERIALIZER_BATCH_SIZE'] || Spark::Serializer::DEFAULT_BATCH_SIZE end
default_serializer_compress()
click to toggle source
# File lib/spark/config.rb, line 155 def default_serializer_compress ENV['SPARK_RUBY_SERIALIZER_COMPRESS'] || Spark::Serializer::DEFAULT_COMPRESS end
default_worker_type()
click to toggle source
from_file(file)
click to toggle source
# File lib/spark/config.rb, line 22 def from_file(file) check_read_only if file && File.exist?(file) file = File.expand_path(file) RubyUtils.loadPropertiesFile(spark_conf, file) end end
get(key)
click to toggle source
Rescue from NoSuchElementException
# File lib/spark/config.rb, line 85 def get(key) value = spark_conf.get(key.to_s) case TYPES[key] when :boolean parse_boolean(value) when :integer parse_integer(value) else value end rescue nil end
get_all()
click to toggle source
# File lib/spark/config.rb, line 100 def get_all Hash[spark_conf.getAll.map{|tuple| [tuple._1, tuple._2]}] end
Also aliased as: getAll
load_executor_envs()
click to toggle source
Load environment variables for executor from ENV.
Examples:¶ ↑
SPARK_RUBY_EXECUTOR_ENV_KEY1="1" SPARK_RUBY_EXECUTOR_ENV_KEY2="2"
# File lib/spark/config.rb, line 208 def load_executor_envs prefix = 'SPARK_RUBY_EXECUTOR_ENV_' envs = ENV.select{|key, _| key.start_with?(prefix)} envs.each do |key, value| key = key.dup # ENV keys are frozen key.slice!(0, prefix.size) set("spark.ruby.executor.env.#{key}", value) end end
parse_boolean(value)
click to toggle source
# File lib/spark/config.rb, line 121 def parse_boolean(value) case value when 'true' true when 'false' false end end
parse_integer(value)
click to toggle source
# File lib/spark/config.rb, line 130 def parse_integer(value) value.to_i end
read_only?()
click to toggle source
# File lib/spark/config.rb, line 80 def read_only? Spark.started? end
set(key, value)
click to toggle source
# File lib/spark/config.rb, line 108 def set(key, value) check_read_only spark_conf.set(key.to_s, value.to_s) end
set_app_name(name)
click to toggle source
# File lib/spark/config.rb, line 113 def set_app_name(name) set('spark.app.name', name) end
Also aliased as: setAppName
set_default()
click to toggle source
¶ ↑
Defaults
# File lib/spark/config.rb, line 137 def set_default set_app_name('RubySpark') set_master('local[*]') set('spark.ruby.driver_home', Spark.home) set('spark.ruby.serializer', default_serializer) set('spark.ruby.serializer.compress', default_serializer_compress) set('spark.ruby.serializer.batch_size', default_serializer_batch_size) set('spark.ruby.executor.command', default_executor_command) set('spark.ruby.executor.options', default_executor_options) set('spark.ruby.worker.type', default_worker_type) load_executor_envs # set('spark.ruby.executor.install', default_executor_install) end
set_master(master)
click to toggle source
# File lib/spark/config.rb, line 117 def set_master(master) set('spark.master', master) end
Also aliased as: setMaster
spark_conf()
click to toggle source
# File lib/spark/config.rb, line 39 def spark_conf if Spark.started? # Get latest configuration Spark.context.jcontext.conf else @spark_conf end end
valid!()
click to toggle source
# File lib/spark/config.rb, line 48 def valid! errors = [] if !contains?('spark.app.name') errors << 'An application name must be set in your configuration.' end if !contains?('spark.master') errors << 'A master URL must be set in your configuration.' end if Spark::Serializer.find(get('spark.ruby.serializer')).nil? errors << 'Unknow serializer.' end scanned = get('spark.ruby.executor.command').scan('%s') if scanned.size == 0 errors << "Executor command must contain '%s'." end if scanned.size > 1 errors << "Executor command can contain only one '%s'." end if errors.any? errors.map!{|error| "- #{error}"} raise Spark::ConfigurationError, "Configuration is not valid:\r\n#{errors.join("\r\n")}" end end
Private Instance Methods
check_read_only()
click to toggle source
# File lib/spark/config.rb, line 227 def check_read_only if read_only? raise Spark::ConfigurationError, 'Configuration is ready only' end end