class Evoc::TxStore
Attributes
Public Class Methods
# File lib/evoc/tx_store.rb, line 10 def initialize(path: nil,case_id: nil, granularity: 'mixed') @txes = [] @items = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseIntToRuby.new : Hash.new # keeps track of each txs location # in the @txes array using the tx.index variable @tx_index_mapping = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseIntToInt.new : Hash.new # keeps a dictionary of item ids and their full filename # populated when first importing the json file @name_2_int = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseRubyToInt.new : Hash.new @int_2_name = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseIntToRuby.new : Hash.new if !path.nil? load_transactions(path: path, granularity: granularity) end # Initialize class variables @@case_id ||= case_id end
Public Instance Methods
self << tx
adds <tx> and updates @items with which items are changed in which tx
# File lib/evoc/tx_store.rb, line 41 def << tx if tx.respond_to?(:items) if tx.index.nil? tx.index = self.size end ## # BUILD INTEGER REPRESENTATION # internally, items (e.g., files/methods) are stored as unique integers # but a dictionary is kept updated with item -> integer mappings if !tx.items.all? {|i| i.is_a?(Integer)} integer_representation = [] tx.items.each do |item| if !self.name_2_int.key?(item) int = self.name_2_int.size self.name_2_int[item] = int self.int_2_name[int] = item end integer_representation << self.name_2_int[item] end tx.items = integer_representation end ## # BUILD ITEM <-> TX MAPPING tx.items.each do |item| if !@items.key?(item) @items[item] = [tx] else @items[item] << tx end end @tx_index_mapping[tx.index] = @txes.size @txes << tx else raise Evoc::Exceptions::NotATransaction.new(tx) end end
# File lib/evoc/tx_store.rb, line 98 def [] *indexes @txes[*indexes] end
clear out the currently loaded transactions
# File lib/evoc/tx_store.rb, line 112 def clear @txes.clear @items.clear end
Return a new tx_store containing the specified tx ids
# File lib/evoc/tx_store.rb, line 330 def clone_by_indexes(tx_indexes) subset = TxStore.new self.each do |tx| if tx_indexes.include?(tx.index) subset << tx end end return subset end
get_cloned_subset
Returns a clone of <self> with transactions equal to the index range defined by
from and including <start_index> to and including <stop_index>
also exclude transactions with size larger than <max_size>
# File lib/evoc/tx_store.rb, line 302 def clone_with_subset(start_index,stop_index,max_size = nil) clone = TxStore.new clone.name_2_int = self.name_2_int clone.int_2_name = self.int_2_name if start_index.nil? & stop_index.nil? & max_size.nil? then return self end # if only one of start_index and stop_index is provided, raise exception if !start_index.nil? ^ !stop_index.nil? raise ArgumentError.new "You must provide both a start and end index" end # check that its a valid range if range = self.txes[start_index..stop_index] if max_size.nil? range.each do |tx| clone << tx end else range.select {|tx| tx.size <= max_size}.each do |tx| clone << tx end end else raise ArgumentError.new, "#{start_index}..#{stop_index} was not a valid range on tx_store with size #{self.size}" end clone end
implementing each
gives us access to all Enumerable methods select, find_all etc
# File lib/evoc/tx_store.rb, line 80 def each &block @txes.each do |tx| if block_given? block.call tx else yield tx end end end
# File lib/evoc/tx_store.rb, line 90 def first @txes.first end
Retrieve a transaction using the given identifier
# File lib/evoc/tx_store.rb, line 121 def get_tx(id:,id_type: :index) tx = nil case id_type when :index raise ArgumentError, "Index must be a Fixnum, #{id} was #{id.class}" unless id.is_a?(Fixnum) if index = @tx_index_mapping[id] tx = @txes[index] end when :id tx = @txes.find {|tx| tx.id == id } end if tx.nil? raise ArgumentError, "No transaction with #{id_type} #{id}" else return tx end end
# File lib/evoc/tx_store.rb, line 33 def ints2names(ints) ints.map {|i| self.int_2_name[i]} end
# File lib/evoc/tx_store.rb, line 94 def last @txes.last end
read in a JSON file of transactions
TRANSACTIONS ARE LOADED IN REVERSE ORDER!
this implies that the oldest transaction gets index 0 in the txes array and the newest has index txes.size-1 (givent that the json file is sorted from newest/top to oldest/bottom)
@param [String] path the path to the json history file @param [Symbol] granularity one of :mixed,:file or :method
# File lib/evoc/tx_store.rb, line 196 def load_transactions(path: nil, before: nil, after: nil, granularity: 'mixed') if !path.nil? json = nil if File.extname(path) == '.gz' Zlib::GzipReader.open(path) {|gz| json = gz.read } else json = File.read(path,external_encoding: 'iso-8859-1',internal_encoding: 'utf-8') end STDERR.puts "Loading transactions using strategy: #{granularity}" json.lines.reverse.each do |json_line| begin json_object = JSON.parse(json_line) if valid_date?(json_object,before,after) tx = nil id = json_object["sha"] date = json_object["date"] if items = json_object["changes"] if !items.compact.empty? case granularity when 'mixed' tx = Evoc::Tx.new(id: id,date: date,items: items.compact) when 'file' # group all items by parsable files, and return only the unique set of filenames items = items.group_by {|i| /^(?<parsable_file>.+?):/.match(i).to_s }.keys.reject(&:empty?) tx = Evoc::Tx.new(id: id,date: date,items: items) when 'method' # group all items by parsable files, return only the methods and @residuals items = items.group_by {|i| /^(?<parsable_file>.+?):/.match(i).to_s } # group items by parsable files .select {|k,v| !k.empty?} # filter out the non-parsable files .values # get the methods .flatten # flatten the list of list of methods tx = Evoc::Tx.new(id: id,date: date,items: items) when 'file_all' items = items.group_by {|i| /^(?<file>[^:]+?)(?::|\z)/.match(i)[:file].to_s } # group items by file name .keys # get the set of files tx = Evoc::Tx.new(id: id,date: date,items: items) else raise ArgumentError.new, "Granularity level must be one of 'mixed', 'file', 'method' or 'file_all', was called with #{granularity}" end else logger.warn "#{json["sha"]} \"changes\" field only contained nil value(s)" next end else logger.warn "#{json["sha"]} did not have a \"changes\" field" next end if tx.nil? logger.warn "#{json["sha"]} could not be converted to a tx" next end if tx.items.empty? logger.warn "#{json["sha"]} with granularity #{granularity} filtered out all artifacts" next end self << tx end rescue JSON::ParserError => e logger.warn e.message next # skip to next line rescue Evoc::Exceptions::NoDateInJsonObject => e logger.warn e.message next end end STDERR.puts "Loaded #{self.size} transactions from #{path}" end end
# File lib/evoc/tx_store.rb, line 29 def names2ints(names) names.map {|n| self.name_2_int[n]} end
# File lib/evoc/tx_store.rb, line 368 def pretty_print self.txes.reverse.each {|tx| CSV {|row| row << tx.items}} end
Return the list of items that have changed with at least one item from the query
# File lib/evoc/tx_store.rb, line 176 def relevant_unchanged_items(query) transactions_of_list(query).map {|id| get_tx(id: id).items - query}.array_union end
# File lib/evoc/tx_store.rb, line 180 def size @txes.size end
return a (string) json representation of the tx_store
# File lib/evoc/tx_store.rb, line 353 def to_json commits = Hash.new {|h,k| h[k] = Hash.new(&h.default_proc) } self.each do |tx| sha = tx.id commits[sha][:sha] = sha commits[sha][:date] = tx.date commits[sha][:index] = tx.index commits[sha][:changes][:all] = [] tx.items.each {|item| commits[sha][:changes][:all] << item} end # print the commits sorted by index # but dont include the index in the json as there might be "holes" (after filtering etc) JSON.pretty_generate(commits.sort_by {|id,commit| commit[:index]}.reverse.map {|(_,commit)| commit.tap {|c| c.delete(:index)}}) end
# File lib/evoc/tx_store.rb, line 102 def to_s history = "" self.txes.reverse.each do |tx| history << tx.items.map {|i| self.int_2_name[i] }.join(',') + "\n" end history end
Given an item, find those transactions where the item has been modified parameters: item: the item to check identifier: how to represent the found transactions, either using :index or :id
# File lib/evoc/tx_store.rb, line 144 def transactions_of(item, identifier: :index) # if there are no transactions # just return an empty list if self.size.zero? txes = [] else # get the transactions # return [] if the item cannot be found txes = self.items.key?(item) ? self.items[item] : [] end txes.map(&identifier) end
Returns the relevant transactions of the query That is: all the transactions where at least one item from the query were changed
parameters: query: a list of items (optional) strict: if set to true, all the items of the query has had to be changed in the transaction for it to be included
# File lib/evoc/tx_store.rb, line 166 def transactions_of_list(items, strict: false, identifier: :index) if strict items.map {|item| transactions_of(item, identifier: identifier)}.array_intersection else items.map {|item| transactions_of(item, identifier: identifier)}.array_union end end
a looser version of between? we also allow nil comparisons if both <after> and <before> are nil we consider the date valid
# File lib/evoc/tx_store.rb, line 272 def valid_date?(json_object,after,before) if date = json_object["date"] if after.nil? & before.nil? return true elsif !after.nil? & !before.nil? if date.between?(after, before) return true end elsif !after.nil? if date > after return true end elsif !before.nil? if date < before return true end end else raise Evoc::Exceptions::NoDateInJsonObject.new, "#{json_object["sha"]} had no \"date\" field." end return false end
Private Instance Methods
# File lib/evoc/tx_store.rb, line 373 def parse_date date if !date.nil? begin Time.parse date rescue TypeError => e # something else than string was given as input $stderr.puts "Unable to parse #{date}, error: " + e rescue ArgumentError => e # unable to parse the string for a date $stderr.puts "Unable to parse #{date} for a date, error: " + e end end end