module Benchmark::Experiment::MannWhitneyUTest

Public Class Methods

calculate_U(x, y) click to toggle source
# File lib/benchmark/lab/mann_whitney_u_test.rb, line 6
def self.calculate_U(x, y)
  ranked = concatenate_and_label(x, y)

  rank!(ranked)

  adjust_ties!(ranked)

  r_x = rank_sum(ranked, :x)
  r_y = rank_sum(ranked, :y)

  n_x = x.size
  n_y = y.size

  u_x = calculate_u_sample(n_x, n_y, r_x)
  u_y = calculate_u_sample(n_y, n_x, r_y)

  [u_x, u_y]
end
calculate_probability_z(z, two_sided = true) click to toggle source
# File lib/benchmark/lab/mann_whitney_u_test.rb, line 46
def self.calculate_probability_z(z, two_sided = true)
  prob = (1.0 - Distribution::Normal.cdf(z.abs()))
  prob *= 2.0 if two_sided
  prob
end
calculate_z(x, y) click to toggle source
# File lib/benchmark/lab/mann_whitney_u_test.rb, line 25
def self.calculate_z(x, y)
  n_x = x.size.to_f
  n_y = y.size.to_f
  n = n_x + n_y
  n_xy = n_x * n_y

  u = calculate_U(x, y).minmax.first.to_f

  t = ties?(x, y)

  mu_u = n_xy / 2.0

  if !t.first
    sigma_u = Math::sqrt(n_xy * (n + 1.0) / 12.0)
  else
    sigma_u = Math::sqrt(n_xy / (n * (n + 1)) * ((n**3 - n) / 12.0 - t.last))
  end

  (u - mu_u) / sigma_u
end
is_null_hypothesis_rejected?(pvalue, significance_level) click to toggle source
# File lib/benchmark/lab/mann_whitney_u_test.rb, line 52
def self.is_null_hypothesis_rejected?(pvalue, significance_level)
  pvalue < significance_level
end

Private Class Methods

adjust_ties!(ranked) click to toggle source
# File lib/benchmark/lab/mann_whitney_u_test.rb, line 91
def self.adjust_ties!(ranked)
  ties = {}

  ranked
    .group_by { |e| e.first }
    .reject { |_, v| v.size < 2 }
    .each do |score, data|
      ties[score] = data.inject(0) do |sum, elem|
        sum + elem.last
      end / data.size.to_f
    end 

  ranked.map! do |elem|
    elem[-1] = ties[elem.first] if ties.keys.include? elem.first
    elem
  end if ties.keys.size > 0
end
calculate_u_sample(n, n_other, r) click to toggle source
# File lib/benchmark/lab/mann_whitney_u_test.rb, line 109
def self.calculate_u_sample(n, n_other, r)
  n * n_other + n * (n + 1) / 2.0 - r
end
concatenate_and_label(x, y) click to toggle source
# File lib/benchmark/lab/mann_whitney_u_test.rb, line 69
def self.concatenate_and_label(x, y)
  ranked = []
  
  ranked += x.map { |e| [e, :x] }
  ranked += y.map { |e| [e, :y] }
end
rank!(ranked) click to toggle source
# File lib/benchmark/lab/mann_whitney_u_test.rb, line 76
def self.rank!(ranked)
  ranked.sort!

  ranked.inject(1) do |rank, elem|
    elem << rank
    rank + 1
  end
end
rank_sum(ranked, label) click to toggle source
# File lib/benchmark/lab/mann_whitney_u_test.rb, line 85
def self.rank_sum(ranked, label)
  ranked
    .select { |elem| elem[1] == label }
    .inject(0) { |rank_sum, elem| rank_sum + elem.last }
end
ties?(x, y) click to toggle source
# File lib/benchmark/lab/mann_whitney_u_test.rb, line 58
def self.ties?(x, y)
  all = x + y
  ties = all.group_by { |e| e }.reject { |_, v| v.size < 2 }

  found_ties = ties.size > 0
  [
    found_ties,
    ties.inject(0) { |a, v| a + (v.size**3 - v.size) / 12.0 }
  ]
end