class Apriori::Algorithm
Attributes
min_confidence[R]
min_support[R]
Public Class Methods
new(min_support = 0.15, min_confidence = 0.6)
click to toggle source
# File lib/apriori/algorithm.rb, line 15 def initialize(min_support = 0.15, min_confidence = 0.6) @min_support = min_support @min_confidence = min_confidence end
Public Instance Methods
analyze(transactions)
click to toggle source
transactions: string[]
# File lib/apriori/algorithm.rb, line 21 def analyze(transactions) # AnalysisResult frequencies = {} # {item_set: count} frequent_item_sets = {} # {item_set_size: FrequentItemSet[]} one_element_item_sets = to_one_element_item_sets(transactions) # string[][] one_c_item_sets = find_item_sets_min_support_satisfied( one_element_item_sets, transactions, min_support, frequencies) # FrequentItemSet[] current_l_item_sets = one_c_item_sets # FrequentItemSet[] item_set_size = 1 while current_l_item_sets.length != 0 frequent_item_sets[item_set_size] = current_l_item_sets joined_sets = to_fixed_size_joined_sets(current_l_item_sets.map { |_| _.item_set }, item_set_size + 1) current_l_item_sets = find_item_sets_min_support_satisfied(joined_sets, transactions, min_support, frequencies) item_set_size += 1 end found_sub_sets = [] # string[][] association_rules = [] # AssociationRule[] frequent_item_sets.each do |item_set_size, item_sets| item_sets = item_sets.map { |_| _.item_set } next if item_sets.length == 0 || item_sets[0].length <= 1 item_sets.each do |item_set| to_all_sub_sets(item_set).each do |subset_item_set| diff_item_set = ((item_set - subset_item_set) + (subset_item_set - item_set)).uniq if diff_item_set.length > 0 item_support = calculate_support(item_set, frequencies, transactions) subset_support = calculate_support(subset_item_set, frequencies, transactions) confidence = item_support / subset_support if !is_the_rule_already_found(found_sub_sets, subset_item_set) && confidence >= min_confidence found_sub_sets << subset_item_set association_rules << Apriori::AssociationRule.new(subset_item_set, diff_item_set, confidence) end end end end end Apriori::AnalysisResult.new(frequent_item_sets, association_rules) end
Private Instance Methods
calculate_support(item_set, frequencies, transactions)
click to toggle source
# File lib/apriori/algorithm.rb, line 128 def calculate_support(item_set, frequencies, transactions) frequency = frequencies[item_set] frequency ? (frequency.to_f / transactions.length) : 0 end
find_item_sets_min_support_satisfied(item_sets, transactions, min_support, frequencies)
click to toggle source
# File lib/apriori/algorithm.rb, line 65 def find_item_sets_min_support_satisfied(item_sets, transactions, min_support, frequencies) local_frequencies = {} # {item_set: count} item_sets.each do |item_set| transactions.each do |transaction| # just optimization for performance is_subset = item_set.length == 1 ? transaction.include?(item_set.first) : item_set.to_set.subset?(transaction.to_set) if is_subset frequencies[item_set] = 0 unless frequencies[item_set] local_frequencies[item_set] = 0 unless local_frequencies[item_set] frequencies[item_set] += 1 local_frequencies[item_set] += 1 end end end filtered_item_sets = [] # FrequentItemSet[] local_frequencies.each do |item_set, local_count| support = local_count.to_f / transactions.length if support >= min_support already_added = false filtered_item_sets.each do |frequent_item_set| unless already_added diff_set = ((frequent_item_set.item_set - item_set) + (frequent_item_set.item_set - item_set)).uniq already_added = diff_set.length == 0 end end unless already_added filtered_item_sets << Apriori::FrequentItemSet.new(item_set, support) end end end filtered_item_sets end
is_the_rule_already_found(found_sub_sets, item_set)
click to toggle source
# File lib/apriori/algorithm.rb, line 133 def is_the_rule_already_found(found_sub_sets, item_set) found_sub_sets.any? { |sub_set| sub_set == item_set } end
to_all_sub_sets(item_set)
click to toggle source
# File lib/apriori/algorithm.rb, line 124 def to_all_sub_sets(item_set) (1...(item_set.length)).flat_map { |n| item_set.to_a.combination(n) }.first end
to_fixed_size_joined_sets(item_sets, length)
click to toggle source
# File lib/apriori/algorithm.rb, line 111 def to_fixed_size_joined_sets(item_sets, length) joined_set_array = [] item_sets.each do |item_set_a| item_sets.each do |item_set_b| joined_set = ((item_set_a - item_set_b) + (item_set_b - item_set_a)).uniq if joined_set.length > 0 && joined_set.length == length joined_set_array << joined_set end end end joined_set_array.uniq end
to_one_element_item_sets(transactions)
click to toggle source
# File lib/apriori/algorithm.rb, line 101 def to_one_element_item_sets(transactions) nested_array_of_item = [] transactions.each do |transaction| transaction.reject { |_| _.nil? }.each do |item| nested_array_of_item << [item] end end nested_array_of_item.uniq end