class DataMining::DBScan
Density-Based clustering / Outlier-Detection Algorithm
Public Class Methods
new(data, radius, min_points)
click to toggle source
Find clusters and outliers
Arguments:
data: (array of arrays, like [[:id, value], [:id2, value2]]) radius: (integer) min_points: (integer)
# File lib/data_mining/dbscan.rb, line 10 def initialize(data, radius, min_points) @data = data.map { |i, v| DataMining::Point.new(i, v) } @radius = radius @min_points = min_points @current_cluster_id = 0 @clusters = {} @unvisited_points = @data.shuffle end
Public Instance Methods
cluster!()
click to toggle source
# File lib/data_mining/dbscan.rb, line 19 def cluster! dbscan end
clusters()
click to toggle source
# File lib/data_mining/dbscan.rb, line 27 def clusters @clusters.map { |cluster, points| { cluster => points.each(&:id) } } end
outliers()
click to toggle source
# File lib/data_mining/dbscan.rb, line 23 def outliers @data.select { |p| !p.assigned_to_cluster? } end
Private Instance Methods
core_object?(neighborhood)
click to toggle source
# File lib/data_mining/dbscan.rb, line 75 def core_object?(neighborhood) return true if neighborhood.size >= (@min_points - 1) false end
create_cluster(point, neighborhood)
click to toggle source
# File lib/data_mining/dbscan.rb, line 48 def create_cluster(point, neighborhood) @current_cluster_id += 1 point.assign_to_cluster! (@clusters[@current_cluster_id] ||= []) << point fill_current_cluster(neighborhood) end
dbscan()
click to toggle source
# File lib/data_mining/dbscan.rb, line 33 def dbscan until unvisited_points.empty? p = unvisited_points.pop p.visit! neighborhood = get_neighborhood(p) create_cluster(p, neighborhood) if core_object?(neighborhood) end end
elaborate(point)
click to toggle source
# File lib/data_mining/dbscan.rb, line 62 def elaborate(point) point.visit! @clusters[@current_cluster_id] << point unless point.assigned_to_cluster? neighborhood = get_neighborhood(point) fill_current_cluster(neighborhood) if core_object?(neighborhood) end
fill_current_cluster(neighborhood)
click to toggle source
# File lib/data_mining/dbscan.rb, line 55 def fill_current_cluster(neighborhood) neighborhood.each do |neighbor| elaborate(neighbor) unless neighbor.visited? neighbor.assign_to_cluster! end end
get_neighborhood(point)
click to toggle source
# File lib/data_mining/dbscan.rb, line 69 def get_neighborhood(point) @data.each_with_object([]) do |p, neighborhood| neighborhood << p if neighbors?(p, point) end end
neighbors?(p1, p2)
click to toggle source
# File lib/data_mining/dbscan.rb, line 80 def neighbors?(p1, p2) fail ArgumentError, 'Wrong point coordinates' unless valid_points?(p1, p2) return true if p1 != p2 && euclidean_distance(p1, p2).abs <= @radius false end
unvisited_points()
click to toggle source
# File lib/data_mining/dbscan.rb, line 43 def unvisited_points @unvisited_points.select! { |p| !p.visited? } @unvisited_points end
valid_points?(p1, p2)
click to toggle source
# File lib/data_mining/dbscan.rb, line 86 def valid_points?(p1, p2) return false if p1.value.length != p2.value.length (p1.value + p2.value).all? { |x| x.is_a? Numeric } end