class Evoc::Algorithm

Public Class Methods

cached_rule_range(min,max,tx_store:,query:) click to toggle source
# File lib/evoc/algorithm.rb, line 39
def self.cached_rule_range(min,max,tx_store:,query:)
  tag = tx_store.hash+query.hash
  rule_store = Evoc::RuleStore.new(query: query)
  if @@rule_range_cache[tag].empty?
    @@rule_range_cache.clear
    Evoc::Algorithm.rule_range(min,max,tx_store:tx_store,query:query).group_by {|r| r.lhs.size}.each do |size,rules|
      @@rule_range_cache[tag][size] = rules 
    end
  else
    # calculate missing range
    missing_range = (min..max).to_a - @@rule_range_cache[tag].keys
    if !missing_range.empty?
      Evoc::Algorithm.rule_range(missing_range.min,missing_range.max,tx_store:tx_store,query:query).group_by {|r| r.lhs.size}.each do |size,rules|
        @@rule_range_cache[tag][size] = rules 
      end
    end
  end
  (min..max).each do |antecedent_size|
    @@rule_range_cache[tag][antecedent_size].each do |rule|
      rule_store << rule
    end
  end
  logger.debug  "Algorithm (#{min},#{max}) generated #{rule_store.size} rules for query of size #{query.size}"
  return rule_store
end
co_change(tx_store:, query:) click to toggle source
# File lib/evoc/algorithm.rb, line 226
def self.co_change(tx_store:, query:)
  self.cached_rule_range(1,1,tx_store: tx_store, query: query)
end
execute(tx_store:,query:,algorithm:) click to toggle source
# File lib/evoc/algorithm.rb, line 6
def self.execute(tx_store:,query:,algorithm:)
  if algorithm.nil?
    raise ArgumentError.new, "Algorithm was equal to nil" 
  end
    # tarmaq2 => call tarmaq(2)
    if match = /tarmaq(?<num>\d+)/.match(algorithm)
        Evoc::Algorithm.tarmaq(match[:num].to_i,tx_store:tx_store,query:query)
    elsif match = /topk_(?<k>\d+)_(?<min>\d+)_(?<max>\d+)/.match(algorithm)
      Evoc::TopK.topk(k: match[:k].to_i,
                      min: match[:min].to_i,
                      max: match[:max].to_i,
                      tx_store: tx_store,
                      query: query)
    elsif match = /crule_range_(?<min>\d+)_(?<max>\d+)/.match(algorithm)
      Evoc::Algorithm.cached_rule_range(match[:min].to_i,match[:max].to_i,tx_store:tx_store,query:query)
    elsif match = /rule_range_(?<min>\d+)_(?<max>\d+)/.match(algorithm)
      Evoc::Algorithm.rule_range(match[:min].to_i,match[:max].to_i,tx_store:tx_store,query:query)
    elsif Evoc::Algorithm.respond_to?(algorithm)
        Evoc::Algorithm.method(algorithm).call(tx_store:tx_store,query:query)
    else raise ArgumentError.new, "#{algorithm} is not an available algorithm"
    end
end
hybrid(tx_store:,query:) click to toggle source
# File lib/evoc/algorithm.rb, line 141
def self.hybrid(tx_store:,query:)
  overlaps = self.largest_overlaps(tx_store: tx_store, query: query)
  if overlaps.empty?
    return Evoc::RuleStore.new(query: query)                             # no rules
  else
    if overlaps.size == 1
      if overlaps.first.size == query.size                               # execute rose
        return self.rose(tx_store: tx_store,query: query)
      else
        return self.tarmaq(0,tx_store: tx_store,query: overlaps.first)   # execute tarmaq
      end
    else
      store = Evoc::RuleStore.new(query: query)
      overlaps.each do |overlap|
        if overlap.size == 1                                             # execute co change
          part_store = Evoc::Algorithm.co_change(tx_store: tx_store, query: overlap)
          part_store.each {|r| store << r}
        else                                                             # execute tarmaq
          part_store = Evoc::Algorithm.tarmaq(0,tx_store: tx_store, query: overlap)
          part_store.each {|r| store << r}
        end
      end
      store.rules = store.select {|r| !query.include?(r.rhs.first)}
      return store
    end
  end
end
largest_overlaps(tx_store:,query:) click to toggle source
# File lib/evoc/algorithm.rb, line 169
def self.largest_overlaps(tx_store:,query:)
    largest_match = []
    #initial filter, we consider all txes where something in the query changed
    query_changed_in = tx_store.transactions_of_list(query)
    # now find what subsets of the query changed in each tx
    query_changed_in.each do |tx_id|
        tx = tx_store.get_tx(id:tx_id,id_type: :index)
        largest_match_in_query = (query & tx.items)
        match_size = largest_match_in_query.size
        remainder_in_tx = tx.items - largest_match_in_query
        if remainder_in_tx.size > 0
          if match_size > largest_match.size
            largest_match = largest_match_in_query
          end
        end
    end
    if largest_match.empty? #no more matches
      return []
    else
      query_remainder = query - largest_match
      return [largest_match] + self.largest_overlaps(tx_store: tx_store,query: query_remainder)
    end
end
largest_rules(tx_store:,query:) click to toggle source

Find the largest rules for each unique consequent

# File lib/evoc/algorithm.rb, line 110
def self.largest_rules(tx_store:,query:)
  #initial filter, we consider all txes where something in the query changed
  query_changed_in = tx_store.transactions_of_list(query)
  # now find what subsets of the query changed in each tx
  rules = Hash.new
  query_changed_in.each do |tx_id|
    tx = tx_store.get_tx(id:tx_id,id_type: :index)
    antecedent = (query & tx.items)
    consequents = (tx.items - antecedent)
    if consequents.size != 0
      consequents.each do |consequent|
        if rules[consequent].nil?
          rules[consequent] = Set.new([antecedent])             # new consequent
        elsif antecedent.size > rules[consequent].first.size   # larger antecedent
          rules[consequent] = Set.new([antecedent])
        elsif antecedent.size == rules[consequent].first.size  # equally large antecedent
          rules[consequent] << antecedent
        end
      end
    end
  end
  # now generate rules
  rule_store = Evoc::RuleStore.new(query: query)
  rules.each do |consequent,antecedents|
    antecedents.each do |antecedent|
      rule_store << Evoc::Rule.new(lhs: antecedent,rhs: consequent,tx_store:tx_store)
    end
  end
  return rule_store
end
rose(tx_store:,query:) click to toggle source

rose

# File lib/evoc/algorithm.rb, line 221
def self.rose(tx_store:,query:)
  qs = query.size
  self.cached_rule_range(qs,qs,tx_store: tx_store, query: query)
end
rule_cache() click to toggle source

Targeted Association Rule Mining implementations

# File lib/evoc/algorithm.rb, line 34
def self.rule_cache
  return @@rule_range_cache
end
rule_range(min,max,tx_store:,query:) click to toggle source
# File lib/evoc/algorithm.rb, line 65
def self.rule_range(min,max,tx_store:,query:)
  rule_store = Evoc::RuleStore.new(query: query)
  unique_rules = Set.new
  # min must be equal or smaller to max
  if min <= max
    # can only create rules where the antecedent is equal to or smaller in size than the query
    if min <= query.size
      #initial filter, we consider all txes where something in the query changed
      query_changed_in = tx_store.transactions_of_list(query)
      # create rules
      query_changed_in.each do |tx_id| 
        tx = tx_store.get_tx(id:tx_id,id_type: :index)
        # the tx size must be larger than the min size
        # (so that we have items left for the consequent)
        if (tx.size > min) 
          # CONSTRUCT RULES in the desired range

          # 1. Find overlap
          query_overlap = (query & tx.items)
          consequents = (tx.items - query_overlap)
          # 2. Ignore exact overlaps (no items left for consequent)
          if consequents.size != 0
            # 3. Construct rules for overlaps that are at least min in size
            if query_overlap.size >= min
              # 4. Generate all permutations of query_overlap in the desired range
              antecedents = (min..max).map {|size| query_overlap.combination(size).to_a}.flatten(1)
              # 5. Add one rule for each antecedent/consequent combination
              antecedents.each do |antecedent|
                consequents.each do |consequent|
                  unique_rules << [antecedent,[consequent]]
                end
              end
            end
          end
        end
      end
    end
  end
  unique_rules.each {|lhs,rhs| rule_store << Evoc::Rule.new(lhs: lhs,rhs: rhs,tx_store:tx_store)}
  return rule_store
end
tarmaq(depth,tx_store:,query:) click to toggle source

TARMAQ find largest subsets in @query with evidence in @tx_store version

# File lib/evoc/algorithm.rb, line 196
def self.tarmaq(depth,tx_store:,query:)
    largest_match = 0
    #initial filter, we consider all txes where something in the query changed
    query_changed_in = tx_store.transactions_of_list(query)
    # now find what subsets of the query changed in each tx
    query_changed_in.each do |tx_id|
        tx = tx_store.get_tx(id:tx_id,id_type: :index)
        largest_match_in_query = (query & tx.items)
        match_size = largest_match_in_query.size
        remainder_in_tx = tx.items - largest_match_in_query
        if remainder_in_tx.size > 0
          if match_size > largest_match
            largest_match = match_size
          end
        end
    end
    # now generate rules
    max = largest_match
    min = ((max - depth) <= 0) ? 1 : max - depth
    self.cached_rule_range(min,max,tx_store: tx_store,query: query)
end
tcharm(tx_store:, query:) click to toggle source
# File lib/evoc/algorithm.rb, line 230
def self.tcharm(tx_store:, query:)
  Evoc::ClosedRules.closed_rules(tx_store: tx_store,query: query)
end