class Evoc::RuleStore
Attributes
Public Class Methods
# File lib/evoc/rule_store.rb, line 6 def initialize(rules = [],query: nil,aggregator: nil) self.rules = rules self.query = query self.aggregator = aggregator end
CLASS METHODS
# File lib/evoc/rule_store.rb, line 16 def self.parse_file(path_to_rules) rule_store = Evoc::RuleStore.new CSV.foreach(path_to_rules, :headers => true) do |row| params = row.to_h.symbolize_keys.convert_values(except: [:lhs,:rhs], converter: Evoc::InterestingnessMeasures::VALUE_TYPE) rule = Evoc::Rule.new(params) rule_store << rule end rule_store end
# File lib/evoc/rule_store.rb, line 26 def self.parse_string(string) rule_store = Evoc::RuleStore.new CSV.parse(string, :headers => true) do |row| params = row.to_h.symbolize_keys.convert_values(except: [:lhs,:rhs], converter: Evoc::InterestingnessMeasures::VALUE_TYPE) rule = Evoc::Rule.new(params) rule_store << rule end rule_store end
# File lib/evoc/rule_store.rb, line 197 def self.sort_on(rules:, measures:) rules.sort_by {|r| measures.map {|m| r.get_measure(m).value.nil? ? Float::INFINITY : -r.get_measure(m)}} end
Public Instance Methods
# File lib/evoc/rule_store.rb, line 250 def << rule self.rules << rule end
# File lib/evoc/rule_store.rb, line 329 def ==other self_rules = self.rules.sort_by {|r| r.name}.map {|r| "#{r.name}#{r.instantiated_measures.map {|m| r.get_measure(m).value}}"} other_rules = other.rules.sort_by {|r| r.name}.map {|r| "#{r.name}#{r.instantiated_measures.map {|m| r.get_measure(m).value}}"} self_rules == other_rules end
# File lib/evoc/rule_store.rb, line 51 def [] index @rules[index] end
# File lib/evoc/rule_store.rb, line 55 def []=(index,value) @rules[index] = value end
Aggregates the current set of rules using the given aggregator over the rule clusters specified by the given block
@param: [Symbol] aggregator the name of a defined aggregator function @param: [Array<String>] measures the measures to aggregate @param: [block] define the rules clusters which should be aggregated
# File lib/evoc/rule_store.rb, line 88 def aggregate_by(aggregator: ,measures:,&block) rule_clusters = group_by(&block) # remove clusters with only one item #aggregatable_rules = rule_clusters.select {|g,cluster| cluster.size > 1} #non_aggregatable_rules = rule_clusters.select {|g,cluster| cluster.size == 1} aggregation = Evoc::RuleStore.new(query: self.query, aggregator: aggregator) # aggregate the rules in each cluster rule_clusters.each do |_,cluster| # we create one aggregated rule from each rule cluster if cluster.size > 1 aggregation << Evoc::HyperRule.new(cluster,aggregator,measures) else aggregation << cluster.first end end return aggregation end
Calculates the requested measures on the current rule set @param measures [Array<Symbol>] the set of measures to calculate
# File lib/evoc/rule_store.rb, line 70 def calculate_measures(measures) if measures.nil? raise ArgumentError.new, "Tried to calculate measures, but list of measures was 'nil'" else self.each do |rule| measures.each do |m| rule.get_measure(m) end end end end
# File lib/evoc/rule_store.rb, line 343 def clear self.rules.clear end
generate an array suitable for a csv header
# File lib/evoc/rule_store.rb, line 367 def csv_header self.instance_values_for_csv.keys end
required by Enumerable
# File lib/evoc/rule_store.rb, line 41 def each &block @rules.each do |rule| if block_given? block.call rule else yield rule end end end
# File lib/evoc/rule_store.rb, line 339 def empty? self.rules.empty? end
Evaluate
this recommendation using the given evaluator
Note that the hyper coefficient is added as a last tie breaker for aggregation functions called with 'aggregator_hc' Not pretty, sorry..
@param [String] evaluator the method to use for evaluating @param [Array] expected_outcome the list of items to evaluate against @param [Array] measure_combination the list of measures used to first sort the recommendation
# File lib/evoc/rule_store.rb, line 157 def evaluate_with(evaluators:,expected_outcome:,measure_combination:,topk: nil,unique_consequents: nil) if measure_combination.empty? then raise ArgumentError, "Cannot evalute a recommendation without specifying which measures to rank on" end logger.debug "#{__method__} params: evaluators: #{evaluators}, measure_combination: #{measure_combination}" # sort the rules on each combination and evaluate # if !topk.nil? # raise ArgumentError, "Top K must be a number" unless topk.is_a?(Numeric) # sorted_rules = sorted_rules.take(topk) # end # convert rules into format used in evaluation # map to 0/1 list where 1 is a correct item and 0 is not # second item in each tuple gives the weight of the rule # evaluate the sorted list against the expected outcome recommendation = self.evaluation_format(measures: measure_combination, expected_outcome: expected_outcome, topk: topk) potential_params = {rec: recommendation, exp: expected_outcome.size, rules: self} results = Hash.new evaluators.each do |evaluator| t1 = Time.new if Evoc::Evaluate.respond_to?(evaluator) results[evaluator] = Hash.new method_params = Evoc::Evaluate.method(evaluator).parameters.map(&:second) params = potential_params.select {|k,v| method_params.include?(k)} results[evaluator]['value'] = Evoc::Evaluate.method(evaluator).call(params) else raise NoMethodError, "The evaluator you requested (#{evaluator}) has not been implemented in Evoc::Evaluate" end t2 = Time.new results[evaluator]['time'] = TimeDifference.between(t1,t2).in_seconds.round(8) end return results end
Needed by Evaluate
mixin
# File lib/evoc/rule_store.rb, line 120 def evaluation_format(measures:, expected_outcome:,topk: nil) current_weight = nil current_group = [] recommendation = [] topk = (topk.nil? ? self.size : topk) # sort and filter out duplicate consequents self.sort_on(measures: measures, rules: self.unique_by(measures.first)).take(topk).each do |r| expected = ((r.rhs - expected_outcome).empty? ? 1 : 0) weight_tag = measures.map {|m| r.get_measure(m).value.nil? ? "INF" : r.get_measure(m).to_s}.join('_') if current_weight.nil? current_weight = weight_tag end if weight_tag == current_weight current_group << expected else recommendation << current_group current_group = [expected] current_weight = weight_tag end end # add last group if not empty if !current_group.empty? recommendation << current_group end return recommendation end
@return [True/False/Nil] if the lhs of one of the rules is equal to the query
# File lib/evoc/rule_store.rb, line 233 def exact_match match = nil if !self.query.nil? match = false self.each do |rule| if (rule.lhs.sort == self.query.sort) match = true break end end else logger.debug "Tried to calculate exact match, but query was nil " end return match end
# File lib/evoc/rule_store.rb, line 59 def group_by(&block) res = Hash.new { |hash, key| hash[key] = [] } each do |e| res[block.call(e)] << e end res end
@return the hyper rules in the store
# File lib/evoc/rule_store.rb, line 114 def hyper_rules self.select {|r| r.is_a?(Evoc::HyperRule)} end
# File lib/evoc/rule_store.rb, line 360 def instance_values_for_csv dont_include = ['rules'] self.instance_values.delete_if {|k,v| dont_include.include?(k)} end
@return [Integer] the size of the largest rule, measures by antecedent size
# File lib/evoc/rule_store.rb, line 227 def largest_antecedent self.map {|r| r.lhs.size}.max end
@return the number of hyper rules in this store
# File lib/evoc/rule_store.rb, line 108 def number_of_hyper_rules self.hyper_rules.size end
# File lib/evoc/rule_store.rb, line 254 def pretty_print CSV.generate do |csv| # write header defined_measures = [] if aggregator.nil? defined_measures = self.map {|r| r.instantiated_measures}.array_union else defined_measures = self.hyper_rules.map {|r| r.instantiated_measures}.array_union end csv << ['rule'] + defined_measures self.each do |rule| row = CSV::Row.new([],[],false) row << rule.name defined_measures.each do |m| row << rule.get_measure(m).value end csv << row end end end
# File lib/evoc/rule_store.rb, line 291 def print(measures = Evoc::Rule.measures) CSV {|row| row << ['lhs','rhs'] + measures.map {|m| m.to_s.gsub(/^m_/,'')}} if self.size > 0 name_mapping = self.first.tx_store.int_2_name self.sort_on(measures: measures).each do |rule| row = CSV::Row.new([],[],false) row << rule.lhs.map{|i| name_mapping[i]}.join(',') row << rule.rhs.map{|i| name_mapping[i]}.join(',') measures.each do |m| row << (rule.measure_instantiated?(m) ? rule.get_measure(m).value : nil) end CSV {|r| r << row} end end nil end
Print the current rule set to a csv file @param measures [Array<String>] the measures to include in output. Default is all measures. @param file [String] the file to write to.
# File lib/evoc/rule_store.rb, line 313 def print_to_file(measures: Evoc::Rule.measures, file:) CSV.open(file, "wb") do |csv| # write header csv << ['lhs','rhs'] + measures self.each do |rule| row = CSV::Row.new([],[],false) row << rule.lhs.join(',') row << rule.rhs.join(',') measures.each do |m| row << (rule.measure_instantiated?(m) ? rule.get_measure(m).value : nil) end csv << row end end end
# File lib/evoc/rule_store.rb, line 335 def size self.rules.size end
Sort rules on one or more measures If a measure is undefined/nil for a rule, we treat it as -infinity for purposes of sorting @param: [Array<String>] measures the list of measures to sort by
# File lib/evoc/rule_store.rb, line 193 def sort_on(rules: self, measures:) rules.sort_by {|r| measures.map {|m| r.get_measure(m).value.nil? ? Float::INFINITY : -r.get_measure(m)}} end
generate an array of the current values of <self> converts any array values to a comma separated string representation
# File lib/evoc/rule_store.rb, line 374 def to_csv_row self.instance_values_for_csv.values.map {|val| val.is_a?(Array) ? val.join(',') : val} end
# File lib/evoc/rule_store.rb, line 347 def to_h if self.rules.nil? {} else self.rules.map {|r| h = Hash.new h[:lhs] = r.lhs h[:rhs] = r.rhs r.instantiated_measures.each {|m| h[m] = r.get_measure(m).value.to_r} h} end end
# File lib/evoc/rule_store.rb, line 275 def to_s CSV.generate do |csv| # write header csv << ['lhs','rhs'] + Evoc::Rule.measures self.each do |rule| row = CSV::Row.new([],[],false) row << (rule.lhs.respond_to?(:join) ? rule.lhs.join(',') : rule.lhs) row << (rule.rhs.respond_to?(:join) ? rule.rhs.join(',') : rule.rhs) Evoc::Rule.measures.each do |m| row << (rule.measure_instantiated?(m) ? rule.get_measure(m).value : nil) end csv << row end end end
returns the set of unique consequents where each consequent is the strongest given by the input measure
@param: [String] measure the measure used to find the strongest rules
# File lib/evoc/rule_store.rb, line 206 def unique_by(measure, rules: self) selected_rules = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseIntToRuby.new : Hash.new rules.each do |rule| if !rule.get_measure(measure).value.nil? key = rule.rhs.first if selected_rules[key].nil? selected_rules[key] = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseRubyToRuby.new : Hash.new selected_rules[key][:value] = rule.get_measure(measure).value selected_rules[key][:rule] = rule end if rule.get_measure(measure).value > selected_rules[key][:value] selected_rules[key][:value] = rule.get_measure(measure).value selected_rules[key][:rule] = rule end end end return selected_rules.values.map {|k,v| k[:rule]} end