class DNN::Layers::BatchNormalization
Attributes
axis[R]
beta[R]
eps[RW]
gamma[R]
momentum[RW]
running_mean[R]
running_var[R]
Public Class Methods
new(axis: 0, momentum: 0.9, eps: 1e-7)
click to toggle source
@param [Integer] axis The axis to normalization. @param [Float] momentum Exponential moving average of mean and variance. @param [Float] eps Value to avoid division by zero.
Calls superclass method
DNN::Layers::TrainableLayer::new
# File lib/dnn/core/layers/normalizations.rb, line 18 def initialize(axis: 0, momentum: 0.9, eps: 1e-7) super() @axis = axis @momentum = momentum @eps = eps @gamma = Param.new(nil, Xumo::SFloat[0]) @beta = Param.new(nil, Xumo::SFloat[0]) @running_mean = Param.new @running_var = Param.new end
Public Instance Methods
backward_node(dy)
click to toggle source
# File lib/dnn/core/layers/normalizations.rb, line 54 def backward_node(dy) batch_size = dy.shape[@axis] if @trainable @beta.grad = dy.sum(axis: @axis, keepdims: true) @gamma.grad = (@xn * dy).sum(axis: @axis, keepdims: true) end dxn = @gamma.data * dy dxc = dxn / @std dstd = -((dxn * @xc) / (@std**2)).sum(axis: @axis, keepdims: true) dvar = 0.5 * dstd / @std dxc += (2.0 / batch_size) * @xc * dvar dmean = dxc.sum(axis: @axis, keepdims: true) dxc - dmean / batch_size end
build(input_shape)
click to toggle source
Calls superclass method
DNN::Layers::Layer#build
# File lib/dnn/core/layers/normalizations.rb, line 29 def build(input_shape) super @gamma.data = Xumo::SFloat.ones(*@output_shape) @beta.data = Xumo::SFloat.zeros(*@output_shape) @running_mean.data = Xumo::SFloat.zeros(*@output_shape) @running_var.data = Xumo::SFloat.zeros(*@output_shape) end
forward_node(x)
click to toggle source
# File lib/dnn/core/layers/normalizations.rb, line 37 def forward_node(x) if DNN.learning_phase mean = x.mean(axis: @axis, keepdims: true) @xc = x - mean var = (@xc**2).mean(axis: @axis, keepdims: true) @std = Xumo::NMath.sqrt(var + @eps) xn = @xc / @std @xn = xn @running_mean.data = @momentum * @running_mean.data + (1 - @momentum) * mean @running_var.data = @momentum * @running_var.data + (1 - @momentum) * var else xc = x - @running_mean.data xn = xc / Xumo::NMath.sqrt(@running_var.data + @eps) end @gamma.data * xn + @beta.data end
get_params()
click to toggle source
# File lib/dnn/core/layers/normalizations.rb, line 77 def get_params { gamma: @gamma, beta: @beta, running_mean: @running_mean, running_var: @running_var } end
load_hash(hash)
click to toggle source
# File lib/dnn/core/layers/normalizations.rb, line 73 def load_hash(hash) initialize(axis: hash[:axis], momentum: hash[:momentum]) end
to_hash()
click to toggle source
Calls superclass method
DNN::Layers::Layer#to_hash
# File lib/dnn/core/layers/normalizations.rb, line 69 def to_hash super(axis: @axis, momentum: @momentum, eps: @eps) end