module Spark

Server for handeling Accumulator update

Spark::JavaBridge::Base

Parent for all adapter (ruby - java)

Random Generators

Samplers

Constants

DEFAULT_CONFIG_FILE
VERSION

Public Class Methods

clear_config() click to toggle source

Destroy current configuration. This can be useful for restarting config to set new. It has no effect if context is already started.

# File lib/spark.rb, line 75
def self.clear_config
  @config = nil
end
config(&block) click to toggle source

Returns current configuration. Configurations can be changed until context is initialized. In this case config is locked only for reading.

Configuration can be changed:

Spark.config.set('spark.app.name', 'RubySpark')

Spark.config['spark.app.name'] = 'RubySpark'

Spark.config do
  set 'spark.app.name', 'RubySpark'
end
# File lib/spark.rb, line 63
def self.config(&block)
  @config ||= Spark::Config.new

  if block_given?
    @config.instance_eval(&block)
  else
    @config
  end
end
context() click to toggle source

Return a current active context or nil.

TODO: Run `start` if context is nil?

# File lib/spark.rb, line 83
def self.context
  @context
end
Also aliased as: sc
home()
Alias for: root
java_bridge() click to toggle source
# File lib/spark.rb, line 218
def self.java_bridge
  @java_bridge
end
Also aliased as: jb
jb()
Alias for: java_bridge
load_defaults() click to toggle source

Load default configuration for Spark and RubySpark By default are values stored at ~/.ruby-spark.conf File is automatically created

# File lib/spark.rb, line 120
def self.load_defaults
  unless File.exists?(DEFAULT_CONFIG_FILE)
    save_defaults_to(DEFAULT_CONFIG_FILE)
  end

  load_defaults_from(DEFAULT_CONFIG_FILE)
end
load_defaults_from(file_path) click to toggle source

Clear prev setting and load new from file

# File lib/spark.rb, line 129
def self.load_defaults_from(file_path)
  # Parse values
  values = File.readlines(file_path)
  values.map!(&:strip)
  values.select!{|value| value.start_with?('gem.')}
  values.map!{|value| value.split(nil, 2)}
  values = Hash[values]

  # Clear prev values
  @target_dir = nil
  @ruby_spark_jar = nil
  @spark_home = nil

  # Load new
  @target_dir = values['gem.target']
end
load_lib(target=nil) click to toggle source

Load dependent libraries, can be use once Cannot load before CLI::install

Parameters:

target

path to directory where are located sparks .jar files or single Spark jar

# File lib/spark.rb, line 208
def self.load_lib(target=nil)
  return if @java_bridge

  target ||= Spark.target_dir

  @java_bridge = JavaBridge.init(target)
  @java_bridge.import_all
  nil
end
logger() click to toggle source

Global settings and variables

# File lib/spark.rb, line 170
def self.logger
  @logger ||= Spark::Logger.new
end
root() click to toggle source

Root of the gem

# File lib/spark.rb, line 175
def self.root
  @root ||= File.expand_path('..', File.dirname(__FILE__))
end
Also aliased as: home
ruby_spark_jar() click to toggle source
# File lib/spark.rb, line 189
def self.ruby_spark_jar
  @ruby_spark_jar ||= File.join(target_dir, 'ruby-spark.jar')
end
save_defaults_to(file_path) click to toggle source

Create target dir and new config file

# File lib/spark.rb, line 147
def self.save_defaults_to(file_path)
  dir = File.join(Dir.home, ".ruby-spark.#{SecureRandom.uuid}")

  if Dir.exist?(dir)
    save_defaults_to(file_path)
  else
    Dir.mkdir(dir, 0700)
    file = File.open(file_path, 'w')
    file.puts "# Directory where will be Spark saved"
    file.puts "gem.target   #{dir}"
    file.puts ""
    file.puts "# You can also defined spark properties"
    file.puts "# spark.master                       spark://master:7077"
    file.puts "# spark.ruby.serializer              marshal"
    file.puts "# spark.ruby.serializer.batch_size   2048"
    file.close
  end
end
sc()
Alias for: context
spark_ext_dir() click to toggle source
# File lib/spark.rb, line 193
def self.spark_ext_dir
  @spark_ext_dir ||= File.join(root, 'ext', 'spark')
end
start() click to toggle source

Initialize spark context if not already. Config will be automatically loaded on constructor. From that point `config` will use configuration from running Spark and will be locked only for reading.

# File lib/spark.rb, line 90
def self.start
  if started?
    # Already started
  else
    @context ||= Spark::Context.new
  end
end
started?() click to toggle source
# File lib/spark.rb, line 109
def self.started?
  !!@context
end
stop() click to toggle source
# File lib/spark.rb, line 98
def self.stop
  @context.stop
  RubyWorker.stopServer
  logger.info('Workers were stopped')
rescue
  nil
ensure
  @context = nil
  clear_config
end
target_dir() click to toggle source

Default directory for java extensions

# File lib/spark.rb, line 180
def self.target_dir
  @target_dir ||= File.join(root, 'target')
end
worker_dir() click to toggle source

Directory where is worker.rb

# File lib/spark.rb, line 185
def self.worker_dir
  @worker_dir ||= File.join(root, 'lib', 'spark', 'worker')
end