class DataMining::DBScan

Density-Based clustering / Outlier-Detection Algorithm

Public Class Methods

new(data, radius, min_points) click to toggle source

Find clusters and outliers

Arguments:

data: (array of arrays, like [[:id, value], [:id2, value2]])
radius: (integer)
min_points: (integer)
# File lib/data_mining/dbscan.rb, line 10
def initialize(data, radius, min_points)
  @data               = data.map { |i, v| DataMining::Point.new(i, v) }
  @radius             = radius
  @min_points         = min_points
  @current_cluster_id = 0
  @clusters           = {}
  @unvisited_points   = @data.shuffle
end

Public Instance Methods

cluster!() click to toggle source
# File lib/data_mining/dbscan.rb, line 19
def cluster!
  dbscan
end
clusters() click to toggle source
# File lib/data_mining/dbscan.rb, line 27
def clusters
  @clusters.map { |cluster, points| { cluster => points.each(&:id) } }
end
outliers() click to toggle source
# File lib/data_mining/dbscan.rb, line 23
def outliers
  @data.select { |p| !p.assigned_to_cluster? }
end

Private Instance Methods

core_object?(neighborhood) click to toggle source
# File lib/data_mining/dbscan.rb, line 75
def core_object?(neighborhood)
  return true if neighborhood.size >= (@min_points - 1)
  false
end
create_cluster(point, neighborhood) click to toggle source
# File lib/data_mining/dbscan.rb, line 48
def create_cluster(point, neighborhood)
  @current_cluster_id += 1
  point.assign_to_cluster!
  (@clusters[@current_cluster_id] ||= []) << point
  fill_current_cluster(neighborhood)
end
dbscan() click to toggle source
# File lib/data_mining/dbscan.rb, line 33
def dbscan
  until unvisited_points.empty?
    p = unvisited_points.pop
    p.visit!

    neighborhood = get_neighborhood(p)
    create_cluster(p, neighborhood) if core_object?(neighborhood)
  end
end
elaborate(point) click to toggle source
# File lib/data_mining/dbscan.rb, line 62
def elaborate(point)
  point.visit!
  @clusters[@current_cluster_id] << point unless point.assigned_to_cluster?
  neighborhood = get_neighborhood(point)
  fill_current_cluster(neighborhood) if core_object?(neighborhood)
end
fill_current_cluster(neighborhood) click to toggle source
# File lib/data_mining/dbscan.rb, line 55
def fill_current_cluster(neighborhood)
  neighborhood.each do |neighbor|
    elaborate(neighbor) unless neighbor.visited?
    neighbor.assign_to_cluster!
  end
end
get_neighborhood(point) click to toggle source
# File lib/data_mining/dbscan.rb, line 69
def get_neighborhood(point)
  @data.each_with_object([]) do |p, neighborhood|
    neighborhood << p if neighbors?(p, point)
  end
end
neighbors?(p1, p2) click to toggle source
# File lib/data_mining/dbscan.rb, line 80
def neighbors?(p1, p2)
  fail ArgumentError, 'Wrong point coordinates' unless valid_points?(p1, p2)
  return true if p1 != p2 && euclidean_distance(p1, p2).abs <= @radius
  false
end
unvisited_points() click to toggle source
# File lib/data_mining/dbscan.rb, line 43
def unvisited_points
  @unvisited_points.select! { |p| !p.visited? }
  @unvisited_points
end
valid_points?(p1, p2) click to toggle source
# File lib/data_mining/dbscan.rb, line 86
def valid_points?(p1, p2)
  return false if p1.value.length != p2.value.length
  (p1.value + p2.value).all? { |x| x.is_a? Numeric }
end