class Chainer::Functions::Loss::SoftmaxCrossEntropy
Public Class Methods
check_class_weight_option(class_weight)
click to toggle source
# File lib/chainer/functions/loss/softmax_cross_entropy.rb, line 167 def self.check_class_weight_option(class_weight) return if class_weight.nil? xm = Chainer.get_array_module(@class_weight) if class_weight.ndim != 1 raise ArgumentError, 'class_weight.ndim should be 1' elsif (class_weight.class != xm::DFloat) and (class_weight.class != xm::SFloat) raise ArgumentError, "The dtype of class_weight should be 'DFloat' or 'SFloat'" elsif class_weight.kind_of?(Chainer::Variable) raise ArgumentError, 'class_weight should be a NArray, not a chainer.Variable' end end
check_reduce_option(reduce)
click to toggle source
# File lib/chainer/functions/loss/softmax_cross_entropy.rb, line 180 def self.check_reduce_option(reduce) unless ['mean', 'no'].include?(reduce) raise ArgumentError, "only 'mean' and 'no' are valid for 'reduce', but #{reduce} is given" end end
double_backward_softmax_cross_entropy(x, t, normalize, class_weight, ignore_label, reduce)
click to toggle source
# File lib/chainer/functions/loss/softmax_cross_entropy.rb, line 13 def self.double_backward_softmax_cross_entropy(x, t, normalize, class_weight, ignore_label, reduce) if t.is_a?(Chainer::Variable) t = t.data end self.check_class_weight_option(class_weight) self.check_reduce_option(reduce) loss = -Activation::LogSoftmax.log_softmax(x) if class_weight shape = x.ndim.times.map { |d| d != 1 ? 1 : class_weight.shape[-1] } class_weight = Chainer::Functions::Array::BroadcastTo.broadcast_to(class_weight.reshape(*shape), x.shape) loss = loss * class_weight end dtype = x.is_a?(Chainer::Variable) ? x.dtype : x.class in_use = t.ne(ignore_label).cast_to(dtype) loss = Chainer::Functions::Array::Rollaxis.rollaxis(loss, 1, start: loss.ndim) # TODO: loss = chainer.functions.reshape(loss, (-1, loss.shape[-1])) shape = loss.shape last_shape = shape.pop loss = Chainer::Functions::Array::Reshape.reshape(loss, [shape.inject(:*), last_shape]) # Replace ignore_label value with one valid for F.select_item below. t = t.clip(0, loss.shape[1] - 1) loss = Chainer::Functions::Array::SelectItem.select_item(loss, t.flatten.dup) loss = Chainer::Functions::Array::Reshape.reshape(loss, t.shape) loss = loss * in_use if reduce == "mean" count = normalize ? in_use.sum : x.shape.first count = [count, 1.0].max loss = loss * (1.0 / count) return Chainer::Functions::Math::Sum.sum(loss) else return loss end end
new(normalize: true, cache_score: true, class_weight: nil, ignore_label: -1, reduce: 'mean')
click to toggle source
# File lib/chainer/functions/loss/softmax_cross_entropy.rb, line 57 def initialize(normalize: true, cache_score: true, class_weight: nil, ignore_label: -1, reduce: 'mean') @normalize = normalize @cache_score = cache_score self.class.check_class_weight_option(class_weight) @class_weight = class_weight @ignore_label = ignore_label self.class.check_reduce_option(reduce) @reduce = reduce end
softmax_cross_entropy(x, t, normalize: true, cache_score: true, class_weight: nil, ignore_label: -1, reduce: 'mean', enable_double_backprop: false)
click to toggle source
# File lib/chainer/functions/loss/softmax_cross_entropy.rb, line 5 def self.softmax_cross_entropy(x, t, normalize: true, cache_score: true, class_weight: nil, ignore_label: -1, reduce: 'mean', enable_double_backprop: false) if enable_double_backprop self.double_backward_softmax_cross_entropy(x, t, normalize, class_weight, ignore_label, reduce) else self.new(normalize: normalize, cache_score: cache_score, class_weight: class_weight, ignore_label: ignore_label, reduce: reduce).(x, t) end end
Public Instance Methods
backward(inputs, grad_outputs)
click to toggle source
# File lib/chainer/functions/loss/softmax_cross_entropy.rb, line 107 def backward(inputs, grad_outputs) xm = Chainer.get_array_module(*(inputs + grad_outputs)) x, t = inputs gloss = grad_outputs[0] if self.instance_variable_defined?(:'@y') y = @y.dup else y = Activation._log_softmax(x) y = xm::NMath.exp(y) end if y.ndim == 2 gx = y # TODO(sonots): Avoid to_a especially in Cumo to improve performance t.class.new(t.shape[0]).seq(0).to_a.zip(t.class.maximum(t, 0).to_a).each{|v| gx[*v] -= 1} if @class_weight shape = x.ndim.times.map { |d| d == 1 ? true : 1 } c = Chainer::Utils::Array.broadcast_to(@class_weight.reshape(*shape), x.shape) c = c[t.class.new(t.shape[0]).seq, t.class.maximum(t, 0)].diagonal.dup gx *= Chainer::Utils::Array.broadcast_to(c.expand_dims(1), gx.shape) end if @ignore_label gx *= (t.ne @ignore_label).reshape(t.shape[0], 1) end else # in the case where y.ndim is higher than 2, # we think that a current implementation is inefficient # because it yields two provisional arrays for indexing. n_unit = t.size / t.shape[0] gx = y.reshape(y.shape[0], y.shape[1], true) fst_index = xm::Int32.new(t.size).seq(0) / n_unit trd_index = xm::Int32.new(t.size).seq(0) % n_unit # TODO(sonots): Avoid to_a especially in Cumo to improve performance fst_index.to_a.zip(t.class.maximum(t.flatten.dup, 0).to_a, trd_index.to_a).each{|v| gx[*v] -= 1} if @class_weight shape = x.ndim.times.map{|d| d == 1 ? true : 1} c = Chainer::Utils::Array.broadcast_to(@class_weight.reshape(*shape), x.shape) c = c.reshape(*gx.shape) c = c[fst_index, t.class.maximum(t.flatten.dup, 0), trd_index].diagonal.diagonal.dup c = c.reshape(y.shape[0], 1, true) gx *= Chainer::Utils::Array.broadcast_to(c, gx.shape) end if @ignore_label gx *= (t.ne @ignore_label).reshape(t.shape[0], 1, true) end gx = gx.reshape(*y.shape) end if @reduce == 'mean' gx *= gloss * @coeff else gx *= gloss[true,:- , false] end return [gx, nil] end
forward(inputs)
click to toggle source
# File lib/chainer/functions/loss/softmax_cross_entropy.rb, line 69 def forward(inputs) xm = Chainer.get_array_module(*inputs) x, t = inputs log_y = Activation._log_softmax(x) if @cache_score @y = xm::NMath.exp(log_y) end if @class_weight shape = x.ndim.times.map { |e| e == 1 ? true : 1 } log_y *= Chainer::Utils::Array.broadcast_to(@class_weight.reshape(*shape), x.shape) end log_yd = Chainer::Utils::Array.rollaxis(log_y, 1) begin log_yd = log_yd.reshape(log_yd.shape[0], true) rescue ArgumentError end log_p = log_yd[t.class.maximum(t.flatten, 0), t.class.new(t.size).seq].diagonal if @ignore_label t_valid= t.ne(@ignore_label) log_p *= t_valid.flatten end if @reduce == 'mean' if @normalize and t_valid @coeff = 1.0 / log_p.class.maximum(Chainer::Utils::Array.force_array(t_valid.count), 1) else count = x.shape[0] @coeff = 1.0 / [count, 1].max end y = log_p.sum(keepdims: true) * (-@coeff) [y.class.cast(y[0])] else [-log_p.reshape(*t.shape)] end end