class Statistics::StatisticalTest::MannWhitneyU

Public Instance Methods

perform(alpha, tails, group_one, group_two) click to toggle source

Steps to perform the calculation are based on www.mit.edu/~6.s085/notes/lecture5.pdf

# File lib/statistics/statistical_test/wilcoxon_rank_sum_test.rb, line 22
def perform(alpha, tails, group_one, group_two)
  # Size for each group
  n1, n2 = group_one.size, group_two.size

  # Rank all data
  total_ranks = rank(group_one + group_two)

  # sum rankings per group
  r1 = ranked_sum_for(total_ranks, group_one)
  r2 = ranked_sum_for(total_ranks, group_two)

  # calculate U statistic
  u1 = (n1 * (n1 + 1)/2.0) - r1
  u2 = (n2 * (n2 + 1)/2.0 ) - r2

  u_statistic = [u1.abs, u2.abs].min

  median_u = (n1 * n2)/2.0

  ties = total_ranks.values.select { |element| element[:counter] > 1 }

  std_u = if ties.size > 0
            corrected_sigma(ties, n1, n2)
          else
            Math.sqrt((n1 * n2 * (n1 + n2 + 1))/12.0)
          end

  z = (u_statistic - median_u)/std_u

  # Most literature are not very specific about the normal distribution to be used.
  # We ran multiple tests with a Normal(median_u, std_u) and Normal(0, 1) and we found
  # the latter to be more aligned with the results.
  probability = Distribution::StandardNormal.new.cumulative_function(z.abs)
  p_value = 1 - probability
  p_value *= 2 if tails == :two_tail

  { probability: probability,
    u: u_statistic,
    z: z,
    p_value: p_value,
    alpha: alpha,
    null: alpha < p_value,
    alternative: p_value <= alpha,
    confidence_level: 1 - alpha }
end
rank(elements) click to toggle source
# File lib/statistics/statistical_test/wilcoxon_rank_sum_test.rb, line 4
def rank(elements)
  ranked_elements = {}

  elements.sort.each_with_index do |element, index|
    if ranked_elements.fetch(element, false)
      # This allow us to solve the ties easily when performing the rank summation per group
      ranked_elements[element][:counter] += 1
      ranked_elements[element][:rank] += (index + 1)
    else
      ranked_elements[element] = { counter: 1, rank: (index + 1) }
    end
  end

  # ranked_elements = [{ x => { counter: 1, rank: y } ]
  ranked_elements
end

Private Instance Methods

corrected_sigma(ties, total_group_one, total_group_two) click to toggle source

Formula extracted from www.statstutor.ac.uk/resources/uploaded/mannwhitney.pdf

# File lib/statistics/statistical_test/wilcoxon_rank_sum_test.rb, line 69
        def corrected_sigma(ties, total_group_one, total_group_two)
  n = total_group_one + total_group_two

  rank_sum = ties.reduce(0) do |memo, t|
              memo += ((t[:counter] ** 3) - t[:counter])/12.0
            end

  left = (total_group_one * total_group_two)/(n * (n - 1)).to_r
  right = (((n ** 3) - n)/12.0) - rank_sum

  Math.sqrt(left * right)
end
ranked_sum_for(total, group) click to toggle source
# File lib/statistics/statistical_test/wilcoxon_rank_sum_test.rb, line 82
        def ranked_sum_for(total, group)
  # sum rankings per group
  group.reduce(0) do |memo, element|
    rank_of_element = total[element][:rank] / total[element][:counter].to_r
    memo += rank_of_element
  end
end