class Evoc::Algorithm
Public Class Methods
cached_rule_range(min,max,tx_store:,query:)
click to toggle source
# File lib/evoc/algorithm.rb, line 39 def self.cached_rule_range(min,max,tx_store:,query:) tag = tx_store.hash+query.hash rule_store = Evoc::RuleStore.new(query: query) if @@rule_range_cache[tag].empty? @@rule_range_cache.clear Evoc::Algorithm.rule_range(min,max,tx_store:tx_store,query:query).group_by {|r| r.lhs.size}.each do |size,rules| @@rule_range_cache[tag][size] = rules end else # calculate missing range missing_range = (min..max).to_a - @@rule_range_cache[tag].keys if !missing_range.empty? Evoc::Algorithm.rule_range(missing_range.min,missing_range.max,tx_store:tx_store,query:query).group_by {|r| r.lhs.size}.each do |size,rules| @@rule_range_cache[tag][size] = rules end end end (min..max).each do |antecedent_size| @@rule_range_cache[tag][antecedent_size].each do |rule| rule_store << rule end end logger.debug "Algorithm (#{min},#{max}) generated #{rule_store.size} rules for query of size #{query.size}" return rule_store end
co_change(tx_store:, query:)
click to toggle source
# File lib/evoc/algorithm.rb, line 226 def self.co_change(tx_store:, query:) self.cached_rule_range(1,1,tx_store: tx_store, query: query) end
execute(tx_store:,query:,algorithm:)
click to toggle source
# File lib/evoc/algorithm.rb, line 6 def self.execute(tx_store:,query:,algorithm:) if algorithm.nil? raise ArgumentError.new, "Algorithm was equal to nil" end # tarmaq2 => call tarmaq(2) if match = /tarmaq(?<num>\d+)/.match(algorithm) Evoc::Algorithm.tarmaq(match[:num].to_i,tx_store:tx_store,query:query) elsif match = /topk_(?<k>\d+)_(?<min>\d+)_(?<max>\d+)/.match(algorithm) Evoc::TopK.topk(k: match[:k].to_i, min: match[:min].to_i, max: match[:max].to_i, tx_store: tx_store, query: query) elsif match = /crule_range_(?<min>\d+)_(?<max>\d+)/.match(algorithm) Evoc::Algorithm.cached_rule_range(match[:min].to_i,match[:max].to_i,tx_store:tx_store,query:query) elsif match = /rule_range_(?<min>\d+)_(?<max>\d+)/.match(algorithm) Evoc::Algorithm.rule_range(match[:min].to_i,match[:max].to_i,tx_store:tx_store,query:query) elsif Evoc::Algorithm.respond_to?(algorithm) Evoc::Algorithm.method(algorithm).call(tx_store:tx_store,query:query) else raise ArgumentError.new, "#{algorithm} is not an available algorithm" end end
hybrid(tx_store:,query:)
click to toggle source
# File lib/evoc/algorithm.rb, line 141 def self.hybrid(tx_store:,query:) overlaps = self.largest_overlaps(tx_store: tx_store, query: query) if overlaps.empty? return Evoc::RuleStore.new(query: query) # no rules else if overlaps.size == 1 if overlaps.first.size == query.size # execute rose return self.rose(tx_store: tx_store,query: query) else return self.tarmaq(0,tx_store: tx_store,query: overlaps.first) # execute tarmaq end else store = Evoc::RuleStore.new(query: query) overlaps.each do |overlap| if overlap.size == 1 # execute co change part_store = Evoc::Algorithm.co_change(tx_store: tx_store, query: overlap) part_store.each {|r| store << r} else # execute tarmaq part_store = Evoc::Algorithm.tarmaq(0,tx_store: tx_store, query: overlap) part_store.each {|r| store << r} end end store.rules = store.select {|r| !query.include?(r.rhs.first)} return store end end end
largest_overlaps(tx_store:,query:)
click to toggle source
# File lib/evoc/algorithm.rb, line 169 def self.largest_overlaps(tx_store:,query:) largest_match = [] #initial filter, we consider all txes where something in the query changed query_changed_in = tx_store.transactions_of_list(query) # now find what subsets of the query changed in each tx query_changed_in.each do |tx_id| tx = tx_store.get_tx(id:tx_id,id_type: :index) largest_match_in_query = (query & tx.items) match_size = largest_match_in_query.size remainder_in_tx = tx.items - largest_match_in_query if remainder_in_tx.size > 0 if match_size > largest_match.size largest_match = largest_match_in_query end end end if largest_match.empty? #no more matches return [] else query_remainder = query - largest_match return [largest_match] + self.largest_overlaps(tx_store: tx_store,query: query_remainder) end end
largest_rules(tx_store:,query:)
click to toggle source
Find the largest rules for each unique consequent
# File lib/evoc/algorithm.rb, line 110 def self.largest_rules(tx_store:,query:) #initial filter, we consider all txes where something in the query changed query_changed_in = tx_store.transactions_of_list(query) # now find what subsets of the query changed in each tx rules = Hash.new query_changed_in.each do |tx_id| tx = tx_store.get_tx(id:tx_id,id_type: :index) antecedent = (query & tx.items) consequents = (tx.items - antecedent) if consequents.size != 0 consequents.each do |consequent| if rules[consequent].nil? rules[consequent] = Set.new([antecedent]) # new consequent elsif antecedent.size > rules[consequent].first.size # larger antecedent rules[consequent] = Set.new([antecedent]) elsif antecedent.size == rules[consequent].first.size # equally large antecedent rules[consequent] << antecedent end end end end # now generate rules rule_store = Evoc::RuleStore.new(query: query) rules.each do |consequent,antecedents| antecedents.each do |antecedent| rule_store << Evoc::Rule.new(lhs: antecedent,rhs: consequent,tx_store:tx_store) end end return rule_store end
rose(tx_store:,query:)
click to toggle source
rose
# File lib/evoc/algorithm.rb, line 221 def self.rose(tx_store:,query:) qs = query.size self.cached_rule_range(qs,qs,tx_store: tx_store, query: query) end
rule_cache()
click to toggle source
Targeted Association Rule
Mining implementations
# File lib/evoc/algorithm.rb, line 34 def self.rule_cache return @@rule_range_cache end
rule_range(min,max,tx_store:,query:)
click to toggle source
# File lib/evoc/algorithm.rb, line 65 def self.rule_range(min,max,tx_store:,query:) rule_store = Evoc::RuleStore.new(query: query) unique_rules = Set.new # min must be equal or smaller to max if min <= max # can only create rules where the antecedent is equal to or smaller in size than the query if min <= query.size #initial filter, we consider all txes where something in the query changed query_changed_in = tx_store.transactions_of_list(query) # create rules query_changed_in.each do |tx_id| tx = tx_store.get_tx(id:tx_id,id_type: :index) # the tx size must be larger than the min size # (so that we have items left for the consequent) if (tx.size > min) # CONSTRUCT RULES in the desired range # 1. Find overlap query_overlap = (query & tx.items) consequents = (tx.items - query_overlap) # 2. Ignore exact overlaps (no items left for consequent) if consequents.size != 0 # 3. Construct rules for overlaps that are at least min in size if query_overlap.size >= min # 4. Generate all permutations of query_overlap in the desired range antecedents = (min..max).map {|size| query_overlap.combination(size).to_a}.flatten(1) # 5. Add one rule for each antecedent/consequent combination antecedents.each do |antecedent| consequents.each do |consequent| unique_rules << [antecedent,[consequent]] end end end end end end end end unique_rules.each {|lhs,rhs| rule_store << Evoc::Rule.new(lhs: lhs,rhs: rhs,tx_store:tx_store)} return rule_store end
tarmaq(depth,tx_store:,query:)
click to toggle source
TARMAQ find largest subsets in @query with evidence in @tx_store version
# File lib/evoc/algorithm.rb, line 196 def self.tarmaq(depth,tx_store:,query:) largest_match = 0 #initial filter, we consider all txes where something in the query changed query_changed_in = tx_store.transactions_of_list(query) # now find what subsets of the query changed in each tx query_changed_in.each do |tx_id| tx = tx_store.get_tx(id:tx_id,id_type: :index) largest_match_in_query = (query & tx.items) match_size = largest_match_in_query.size remainder_in_tx = tx.items - largest_match_in_query if remainder_in_tx.size > 0 if match_size > largest_match largest_match = match_size end end end # now generate rules max = largest_match min = ((max - depth) <= 0) ? 1 : max - depth self.cached_rule_range(min,max,tx_store: tx_store,query: query) end
tcharm(tx_store:, query:)
click to toggle source
# File lib/evoc/algorithm.rb, line 230 def self.tcharm(tx_store:, query:) Evoc::ClosedRules.closed_rules(tx_store: tx_store,query: query) end