class Evoc::TxStore

Attributes

int_2_name[RW]
items[R]
name_2_int[RW]
tx_index_mapping[R]
txes[R]

Public Class Methods

new(path: nil,case_id: nil, granularity: 'mixed') click to toggle source
# File lib/evoc/tx_store.rb, line 10
def initialize(path: nil,case_id: nil, granularity: 'mixed')
  @txes = []
  @items = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseIntToRuby.new : Hash.new
  # keeps track of each txs location
  # in the @txes array using the tx.index variable
  @tx_index_mapping = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseIntToInt.new : Hash.new
  # keeps a dictionary of item ids and their full filename
  # populated when first importing the json file
  @name_2_int = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseRubyToInt.new : Hash.new  
  @int_2_name = Evoc::Env::GOOGLE_HASH ? GoogleHashSparseIntToRuby.new : Hash.new
  if !path.nil?
    load_transactions(path: path, granularity: granularity)
  end

  # Initialize class variables
  @@case_id ||= case_id
end

Public Instance Methods

<<(tx) click to toggle source

self << tx

adds <tx> and updates @items with which items are changed in which tx

# File lib/evoc/tx_store.rb, line 41
def << tx
  if tx.respond_to?(:items)
    if tx.index.nil?
      tx.index = self.size
    end
    ##
    # BUILD INTEGER REPRESENTATION
    # internally, items (e.g., files/methods) are stored as unique integers
    # but a dictionary is kept updated with item -> integer mappings
    if !tx.items.all? {|i| i.is_a?(Integer)}
      integer_representation = []
      tx.items.each do |item|
        if !self.name_2_int.key?(item)
          int = self.name_2_int.size
          self.name_2_int[item] = int
          self.int_2_name[int] = item
        end
        integer_representation << self.name_2_int[item]
      end
      tx.items = integer_representation
    end
    ##
    # BUILD ITEM <-> TX MAPPING
    tx.items.each do |item|
      if !@items.key?(item)
        @items[item] = [tx]
      else
        @items[item] << tx
      end
    end
    @tx_index_mapping[tx.index] = @txes.size
    @txes << tx
  else
    raise Evoc::Exceptions::NotATransaction.new(tx)
  end
end
[](*indexes) click to toggle source
# File lib/evoc/tx_store.rb, line 98
def [] *indexes
  @txes[*indexes]
end
clear() click to toggle source

clear out the currently loaded transactions

# File lib/evoc/tx_store.rb, line 112
def clear
  @txes.clear
  @items.clear
end
clone_by_indexes(tx_indexes) click to toggle source

Return a new tx_store containing the specified tx ids

# File lib/evoc/tx_store.rb, line 330
def clone_by_indexes(tx_indexes)
    subset = TxStore.new
    self.each do |tx|
        if tx_indexes.include?(tx.index)
            subset << tx
        end
    end
    return subset
end
clone_with_subset(start_index,stop_index,max_size = nil) click to toggle source

get_cloned_subset

Returns a clone of <self> with transactions equal to the index range defined by

from and including <start_index> to and including <stop_index>

also exclude transactions with size larger than <max_size>

# File lib/evoc/tx_store.rb, line 302
def clone_with_subset(start_index,stop_index,max_size = nil)
  clone = TxStore.new
  clone.name_2_int = self.name_2_int
  clone.int_2_name = self.int_2_name
  if start_index.nil? & stop_index.nil? & max_size.nil? then return self end
  # if only one of start_index and stop_index is provided, raise exception
  if !start_index.nil? ^ !stop_index.nil?
    raise ArgumentError.new "You must provide both a start and end index"
  end
  # check that its a valid range
  if range = self.txes[start_index..stop_index]
    if max_size.nil?
      range.each do |tx|
        clone << tx
      end
    else
      range.select {|tx| tx.size <= max_size}.each do |tx|
        clone << tx
      end
    end
  else
    raise ArgumentError.new, "#{start_index}..#{stop_index} was not a valid range on tx_store with size #{self.size}"
  end
  clone
end
each() { |tx| ... } click to toggle source

implementing each gives us access to all Enumerable methods select, find_all etc

# File lib/evoc/tx_store.rb, line 80
def each &block
  @txes.each do |tx|
    if block_given?
      block.call tx
    else
      yield tx
    end
  end
end
first() click to toggle source
# File lib/evoc/tx_store.rb, line 90
def first
  @txes.first
end
get_tx(id:,id_type: :index) click to toggle source

Retrieve a transaction using the given identifier

# File lib/evoc/tx_store.rb, line 121
def get_tx(id:,id_type: :index)
  tx = nil
  case id_type
  when :index
    raise ArgumentError, "Index must be a Fixnum, #{id} was #{id.class}" unless id.is_a?(Fixnum)
    if index = @tx_index_mapping[id]
      tx = @txes[index]
    end
  when :id
    tx = @txes.find {|tx| tx.id == id }
  end
  if tx.nil?
    raise ArgumentError, "No transaction with #{id_type} #{id}"
  else
    return tx
  end
end
ints2names(ints) click to toggle source
# File lib/evoc/tx_store.rb, line 33
def ints2names(ints)
  ints.map {|i| self.int_2_name[i]}
end
last() click to toggle source
# File lib/evoc/tx_store.rb, line 94
def last
  @txes.last
end
load_transactions(path: nil, before: nil, after: nil, granularity: 'mixed') click to toggle source

read in a JSON file of transactions

TRANSACTIONS ARE LOADED IN REVERSE ORDER!

this implies that the oldest transaction gets index 0 in the txes array and the newest has index txes.size-1 (givent that the json file is sorted from newest/top to oldest/bottom)

@param [String] path the path to the json history file @param [Symbol] granularity one of :mixed,:file or :method

# File lib/evoc/tx_store.rb, line 196
def load_transactions(path: nil, before: nil, after: nil, granularity: 'mixed')
  if !path.nil?
    json = nil
    if File.extname(path) == '.gz'
      Zlib::GzipReader.open(path) {|gz|
        json = gz.read
      }
    else
      json = File.read(path,external_encoding: 'iso-8859-1',internal_encoding: 'utf-8')
    end

    STDERR.puts "Loading transactions using strategy: #{granularity}"
    json.lines.reverse.each do |json_line|
      begin
        json_object = JSON.parse(json_line)
        if valid_date?(json_object,before,after)
          tx = nil
          id = json_object["sha"]
          date = json_object["date"]
          if items = json_object["changes"]
            if !items.compact.empty?
              case granularity
              when 'mixed'
                tx = Evoc::Tx.new(id: id,date: date,items: items.compact)
              when 'file'
                # group all items by parsable files, and return only the unique set of filenames
                items = items.group_by {|i| /^(?<parsable_file>.+?):/.match(i).to_s }.keys.reject(&:empty?)
                tx = Evoc::Tx.new(id: id,date: date,items: items)
              when 'method'
                # group all items by parsable files, return only the methods and @residuals
                items = items.group_by {|i| /^(?<parsable_file>.+?):/.match(i).to_s }                   # group items by parsable files
                                                                              .select {|k,v| !k.empty?} # filter out the non-parsable files
                                                                              .values                   # get the methods
                                                                              .flatten                  # flatten the list of list of methods
                tx = Evoc::Tx.new(id: id,date: date,items: items)
              when 'file_all'
                items = items.group_by {|i| /^(?<file>[^:]+?)(?::|\z)/.match(i)[:file].to_s } # group items by file name
                                                                                .keys # get the set of files
                tx = Evoc::Tx.new(id: id,date: date,items: items)
              else
                raise ArgumentError.new, "Granularity level must be one of 'mixed', 'file', 'method' or 'file_all', was called with #{granularity}"
              end
            else
              logger.warn "#{json["sha"]} \"changes\" field only contained nil value(s)"
              next
            end
          else
            logger.warn "#{json["sha"]} did not have a \"changes\" field"
            next
          end
          if tx.nil?
            logger.warn "#{json["sha"]} could not be converted to a tx"
            next
          end
          if tx.items.empty?
            logger.warn "#{json["sha"]} with granularity #{granularity} filtered out all artifacts"
            next
          end
          self << tx
        end
      rescue JSON::ParserError => e
        logger.warn e.message
        next # skip to next line
      rescue Evoc::Exceptions::NoDateInJsonObject => e
        logger.warn e.message
        next
      end
    end
    STDERR.puts "Loaded #{self.size} transactions from #{path}"
  end
end
names2ints(names) click to toggle source
# File lib/evoc/tx_store.rb, line 29
def names2ints(names)
  names.map {|n| self.name_2_int[n]}
end
pretty_print() click to toggle source
# File lib/evoc/tx_store.rb, line 368
def pretty_print
  self.txes.reverse.each {|tx| CSV {|row| row << tx.items}}
end
relevant_unchanged_items(query) click to toggle source

Return the list of items that have changed with at least one item from the query

# File lib/evoc/tx_store.rb, line 176
def relevant_unchanged_items(query)
  transactions_of_list(query).map {|id| get_tx(id: id).items - query}.array_union
end
size() click to toggle source
# File lib/evoc/tx_store.rb, line 180
def size
  @txes.size
end
to_json() click to toggle source

return a (string) json representation of the tx_store

# File lib/evoc/tx_store.rb, line 353
def to_json 
  commits = Hash.new {|h,k| h[k] = Hash.new(&h.default_proc) }
  self.each do |tx|
    sha = tx.id
    commits[sha][:sha]            = sha
    commits[sha][:date]           = tx.date
    commits[sha][:index]          = tx.index
    commits[sha][:changes][:all]  = []
    tx.items.each {|item| commits[sha][:changes][:all] << item}
  end
  # print the commits sorted by index
  # but dont include the index in the json as there might be "holes" (after filtering etc)
  JSON.pretty_generate(commits.sort_by {|id,commit| commit[:index]}.reverse.map {|(_,commit)| commit.tap {|c| c.delete(:index)}})
end
to_s() click to toggle source
# File lib/evoc/tx_store.rb, line 102
def to_s
  history = ""
  self.txes.reverse.each do |tx|
    history << tx.items.map {|i| self.int_2_name[i] }.join(',') + "\n"
  end
  history
end
transactions_of(item, identifier: :index) click to toggle source

Given an item, find those transactions where the item has been modified parameters: item: the item to check identifier: how to represent the found transactions, either using :index or :id

# File lib/evoc/tx_store.rb, line 144
def transactions_of(item, identifier: :index)
  # if there are no transactions
  # just return an empty list
  if self.size.zero?
    txes = []
  else
  # get the transactions
  # return [] if the item cannot be found
    txes = self.items.key?(item) ? self.items[item] : []
  end
  txes.map(&identifier)
end
transactions_of_list(items, strict: false, identifier: :index) click to toggle source

Returns the relevant transactions of the query That is: all the transactions where at least one item from the query were changed

parameters: query: a list of items (optional) strict: if set to true, all the items of the query has had to be changed in the transaction for it to be included

# File lib/evoc/tx_store.rb, line 166
def transactions_of_list(items, strict: false, identifier: :index)
  if strict
    items.map {|item| transactions_of(item, identifier: identifier)}.array_intersection
  else
    items.map {|item| transactions_of(item, identifier: identifier)}.array_union
  end
end
valid_date?(json_object,after,before) click to toggle source

a looser version of between? we also allow nil comparisons if both <after> and <before> are nil we consider the date valid

# File lib/evoc/tx_store.rb, line 272
def valid_date?(json_object,after,before)
  if date = json_object["date"]
    if after.nil? & before.nil?
      return true
    elsif !after.nil? & !before.nil?
      if date.between?(after, before)
        return true
      end
    elsif !after.nil?
      if date > after
        return true
      end
    elsif !before.nil?
      if date < before
        return true
      end
    end
  else
    raise Evoc::Exceptions::NoDateInJsonObject.new, "#{json_object["sha"]} had no \"date\" field."
  end
  return false
end

Private Instance Methods

parse_date(date) click to toggle source
# File lib/evoc/tx_store.rb, line 373
def parse_date date
  if !date.nil?
    begin
      Time.parse date
    rescue TypeError => e
      # something else than string was given as input
      $stderr.puts "Unable to parse #{date}, error: " + e
    rescue ArgumentError => e
      # unable to parse the string for a date
      $stderr.puts "Unable to parse #{date} for a date, error: " + e
    end
  end
end