class DataMining::PageRank

PageRank Algorithm to measure the importance of nodes in a graph

Attributes

graph[R]
ranks[R]

Public Class Methods

new(graph, damping_factor = 0.85, iterations = 100) click to toggle source

Measure importance of nodes

Arguments:

graph: (array of arrays, like:
  [[:p1, [:p2]], [:p2, [:p1, :p3]], [:p3, [:p2]]]
damping_factor: (double between 0 and 1)
# File lib/data_mining/page_rank.rb, line 11
def initialize(graph, damping_factor = 0.85, iterations = 100)
  @graph      = graph.to_h
  # { :p1 => [:p2], :p2 => [:p1,:p3], :p3 => [:p2] }
  @outlinks   = Hash.new { |_, key| @graph[key].size }
  # { :p1 => 1, :p2 => 2, :p3 => 1 }
  @inlinks    = Hash.new { |_, key| inlinks(key) }
  # { :p1 => [:p2], :p2 => [:p1,:p3], :p3 => [:p2] }
  @ranks      = Hash.new(1.0 / @graph.size)
  # { :p1 => 1/3, :p2 => 1/3, ... }
  @sinknodes  = @graph.select { |_, v| v.empty? }.keys
  # sinknodes aka dead-ends, have no outlink at all

  @damper     = damping_factor
  @iterations = iterations
end

Public Instance Methods

rank!() click to toggle source
# File lib/data_mining/page_rank.rb, line 27
def rank!
  pagerank
end

Private Instance Methods

next_state() click to toggle source
# File lib/data_mining/page_rank.rb, line 41
def next_state
  current_term = term
  @graph.each_with_object({}) do |(node, _), ranks|
    ranks[node] = current_term + @damper * sum_incoming_scores(node)
  end
end
pagerank() click to toggle source
# File lib/data_mining/page_rank.rb, line 37
def pagerank
  @iterations.times { @ranks = next_state }
end
pagerank_of_sinknodes() click to toggle source
# File lib/data_mining/page_rank.rb, line 56
def pagerank_of_sinknodes
  return 0 if @sinknodes.empty?
  @ranks.select { |k, _| @sinknodes.include?(k) }.values.inject(:+).to_f
end
sum_incoming_scores(node) click to toggle source
# File lib/data_mining/page_rank.rb, line 48
def sum_incoming_scores(node)
  @inlinks[node].map { |id| @ranks[id] / @outlinks[id] }.inject(:+).to_f
end
term() click to toggle source
# File lib/data_mining/page_rank.rb, line 52
def term
  ((1 - @damper + @damper * pagerank_of_sinknodes) / @graph.size)
end