class MemDump::MemoryDump

Constants

COMMON_COLLAPSE_CLASSES
COMMON_COLLAPSE_TYPES

Attributes

address_to_record[R]

Public Class Methods

new(address_to_record) click to toggle source
# File lib/memdump/memory_dump.rb, line 5
def initialize(address_to_record)
    @address_to_record = address_to_record
    @forward_graph = nil
    @backward_graph = nil
end

Public Instance Methods

add_children(roots, with_keepalive_count: false) click to toggle source
# File lib/memdump/memory_dump.rb, line 475
def add_children(roots, with_keepalive_count: false)
    result = Hash.new
    roots.each_record do |root_record|
        result[root_record['address']] = root_record

        root_record['references'].each do |addr|
            ref_record = find_by_address(addr)
            next if !ref_record

            if with_keepalive_count
                ref_record = ref_record.dup
                count = 0
                depth_first_visit(addr) { count += 1 }
                ref_record['keepalive_count'] = count
            end
            result[addr] = ref_record
        end
    end
    MemoryDump.new(result)
end
addresses() click to toggle source
# File lib/memdump/memory_dump.rb, line 19
def addresses
    address_to_record.keys
end
clear_graph() click to toggle source

@api private

Force recomputation of the graph representation of the dump the next time it is needed

# File lib/memdump/memory_dump.rb, line 379
def clear_graph
    @forward_graph = nil
    @backward_graph = nil
end
collapse(entries) click to toggle source

Remove entries from this dump, keeping the transitivity in the remaining graph

@param [MemoryDump] entries entries to remove

@example remove all entries that are of type HASH

collapse(objects_of_type('HASH'))
# File lib/memdump/memory_dump.rb, line 153
def collapse(entries)
    collapsed_entries = Hash.new
    entries.each_record do |r|
        collapsed_entries[r['address']] = r['references'].dup
    end


    # Remove references in-between the entries to collapse
    already_expanded = Hash.new { |h, k| h[k] = Set[k] }
    begin
        changed_entries  = Hash.new
        collapsed_entries.each do |address, references|
            sets = references.classify { |ref_address| collapsed_entries.has_key?(ref_address) }
            updated_references = sets[false] || Set.new
            if to_collapse = sets[true]
                to_collapse.each do |ref_address|
                    next if already_expanded[address].include?(ref_address)
                    updated_references.merge(collapsed_entries[ref_address])
                end
                already_expanded[address].merge(to_collapse)
                changed_entries[address] = updated_references
            end
        end
        puts "#{changed_entries.size} changed entries"
        collapsed_entries.merge!(changed_entries)
    end while !changed_entries.empty?

    find_and_map do |record|
        next if collapsed_entries.has_key?(record['address'])

        sets = record['references'].classify do |ref_address|
            collapsed_entries.has_key?(ref_address)
        end
        updated_references = sets[false] || Set.new
        if to_collapse = sets[true]
            to_collapse.each do |ref_address|
                updated_references.merge(collapsed_entries[ref_address])
            end
            record = record.dup
            record['references'] = updated_references
        end
        record
    end
end
common_cleanup() click to toggle source

Perform common initial cleanup

It basically removes common classes that usually make a dump analysis more complicated without providing more information

Namely, it collapses internal Ruby node types ROOT and IMEMO, as well as common collection classes {COMMON_COLLAPSE_CLASSES}.

One usually analyses a cleaned-up dump before getting into the full dump

@return [MemDump] the filtered dump

# File lib/memdump/memory_dump.rb, line 268
def common_cleanup
    without_weakrefs = remove(objects_of_class 'WeakRef')
    to_collapse = without_weakrefs.find_all do |r|
        COMMON_COLLAPSE_CLASSES.include?(r['class']) ||
            COMMON_COLLAPSE_TYPES.include?(r['type']) ||
            r['method'] == 'dump_all'
    end
    without_weakrefs.collapse(to_collapse)
end
compute_graphs() click to toggle source

@api private

Create two RGL::DirectedAdjacencyGraph, for the forward and backward edges of the graph

# File lib/memdump/memory_dump.rb, line 387
def compute_graphs
    forward_graph  = RGL::DirectedAdjacencyGraph.new
    forward_graph.add_vertex 'ALL_ROOTS'
    address_to_record.each do |address, record|
        forward_graph.add_vertex(address)

        if record['type'] == 'ROOT'
            forward_graph.add_edge('ALL_ROOTS', address)
        end
        record['references'].each do |ref_address|
            forward_graph.add_edge(address, ref_address)
        end
    end

    backward_graph  = RGL::DirectedAdjacencyGraph.new
    forward_graph.each_edge do |u, v|
        backward_graph.add_edge(v, u)
    end
    return forward_graph, backward_graph
end
depth_first_visit(root, &block) click to toggle source
# File lib/memdump/memory_dump.rb, line 408
def depth_first_visit(root, &block)
    ensure_graphs_computed
    @forward_graph.depth_first_visit(root, &block)
end
diff(to) click to toggle source

Compute the set of records that are not in self but are in to

@param [MemoryDump] @return [MemoryDump]

# File lib/memdump/memory_dump.rb, line 569
def diff(to)
    diff = Hash.new
    to.each_record do |r|
        address = r['address']
        if !@address_to_record.include?(address)
            diff[address] = r
        end
    end
    MemoryDump.new(diff)
end
dup() click to toggle source
# File lib/memdump/memory_dump.rb, line 496
def dup
    find_all { true }
end
each_record(&block) click to toggle source
# File lib/memdump/memory_dump.rb, line 15
def each_record(&block)
    address_to_record.each_value(&block)
end
ensure_graphs_computed() click to toggle source

@api private

Ensure that @forward_graph and @backward_graph are computed

# File lib/memdump/memory_dump.rb, line 369
def ensure_graphs_computed
    if !@forward_graph
        @forward_graph, @backward_graph = compute_graphs
    end
end
find_all() { |r| ... } click to toggle source

Filter the records

@yieldparam record a record @yieldreturn [Object] the record object that should be included in the

returned dump

@return [MemoryDump]

# File lib/memdump/memory_dump.rb, line 53
def find_all
    return enum_for(__method__) if !block_given?

    address_to_record = Hash.new
    each_record do |r|
        if yield(r)
            address_to_record[r['address']] = r
        end
    end
    MemoryDump.new(address_to_record)
end
find_and_map() { |dup| ... } click to toggle source

Filter the entries, removing those for which the block returns falsy

@yieldparam record a record @yieldreturn [nil,Object] either a record object, or falsy to remove

this record in the returned dump

@return [MemoryDump]

# File lib/memdump/memory_dump.rb, line 87
def find_and_map
    return enum_for(__method__) if !block_given?

    address_to_record = Hash.new
    each_record do |r|
        if result = yield(r.dup)
            address_to_record[r['address']] = result.to_hash
        end
    end
    MemoryDump.new(address_to_record)
end
find_by_address(address) click to toggle source
# File lib/memdump/memory_dump.rb, line 27
def find_by_address(address)
    address_to_record[address]
end
group(name, dump, attributes = Hash.new) click to toggle source

Replace all objects in dump by a single “group” object

# File lib/memdump/memory_dump.rb, line 609
def group(name, dump, attributes = Hash.new)
    group_addresses   = Set.new
    group_references  = Set.new
    dump.each_record do |r|
        group_addresses << r['address']
        group_references.merge(r['references'])
    end
    group_record = attributes.dup
    group_record['address']    = name
    group_record['references'] = group_references - group_addresses

    updated = Hash[name => group_record]
    each_record do |record|
        next if group_addresses.include?(record['address'])

        updated_record = record.dup
        updated_record['references'] -= group_addresses
        if updated_record['references'].size != record['references'].size
            updated_record['references'] << name
        end

        if group_addresses.include?(updated_record['class_address'])
            updated_record['class_address'] = name
        end
        if group_addresses.include?(updated_record['class'])
            updated_record['class'] = name
        end

        updated[updated_record['address']] = updated_record
    end

    MemoryDump.new(updated)
end
include?(address) click to toggle source
# File lib/memdump/memory_dump.rb, line 11
def include?(address)
    address_to_record.has_key?(address)
end
inspect() click to toggle source
# File lib/memdump/memory_dump.rb, line 31
def inspect
    to_s
end
interface_with(dump) click to toggle source

Compute the interface between self and the other dump, that is the elements of self that have a child in dump, and the elements of dump that have a parent in self

# File lib/memdump/memory_dump.rb, line 583
def interface_with(dump)
    self_border = Hash.new
    dump_border = Hash.new
    each_record do |r|
        next if dump.find_by_address(r['address'])

        refs_in_dump = r['references'].map do |addr|
            dump.find_by_address(addr)
        end.compact

        if !refs_in_dump.empty?
            self_border[r['address']] = r
            refs_in_dump.each do |child|
                dump_border[child['address']] = child.dup
            end
        end
    end

    self_border = MemoryDump.new(self_border)
    dump_border = MemoryDump.new(dump_border)

    dump.update_keepalive_count(dump_border)
    return self_border, dump_border
end
map() { |dup| ... } click to toggle source

Map the records

@yieldparam record a record @yieldreturn [Object] the record object that should be included in the

returned dump

@return [MemoryDump]

# File lib/memdump/memory_dump.rb, line 71
def map
    return enum_for(__method__) if !block_given?

    address_to_record = Hash.new
    each_record do |r|
        address_to_record[r['address']] = yield(r.dup).to_hash
    end
    MemoryDump.new(address_to_record)
end
minimum_spanning_tree(root_dump) click to toggle source
# File lib/memdump/memory_dump.rb, line 345
def minimum_spanning_tree(root_dump)
    if root_dump.size != 1
        raise ArgumentError, "there should be exactly one root"
    end
    root_address, _ = root_dump.address_to_record.first
    if !(root = address_to_record[root_address])
        raise ArgumentError, "no record with address #{root_address} in self"
    end

    ensure_graphs_computed

    mst = @forward_graph.minimum_spanning_tree(root)
    map = Hash.new
    mst.each_vertex do |record|
        record = record.dup
        record['references'] = record['references'].dup
        record['references'].delete_if { |ref_address| !mst.has_vertex?(ref_address) }
    end
    MemoryDump.new(map)
end
objects_of_class(name) click to toggle source

Return the records of a given class

@param [String] name the class @return [MemoryDump] the matching entries

@example return all string records

objects_of_class("String")
# File lib/memdump/memory_dump.rb, line 117
def objects_of_class(name)
    find_all { |r| name === r['class'] }
end
objects_of_type(name) click to toggle source

Return the records of a given type

@param [String] name the type @return [MemoryDump] the matching records

@example return all ICLASS (singleton) records

objects_of_class("ICLASS")
# File lib/memdump/memory_dump.rb, line 106
def objects_of_type(name)
    find_all { |r| name === r['type'] }
end
parents_of(dump, min: 0, exclude_dump: false) click to toggle source

Return the entries that refer to the entries in the dump

@param [MemoryDump] the set of entries whose parents we're looking for @param [Integer] min only return the entries in self that refer to

more than this much entries in 'dump'

@param [Boolean] exclude_dump exclude the entries that are already in

'dump'

@return [(MemoryDump,Hash)] the parent entries, and a mapping from

records in the parent entries to the count of entries in 'dump' they
refer to
# File lib/memdump/memory_dump.rb, line 131
def parents_of(dump, min: 0, exclude_dump: false)
    children = dump.addresses.to_set
    counts = Hash.new
    filtered = find_all do |r|
        next if exclude_dump && children.include?(r['address'])

        count = r['references'].count { |r| children.include?(r) }
        if count > min
            counts[r] = count
            true
        end
    end
    return filtered, counts
end
remove(objects) click to toggle source

Simply remove the given objects

# File lib/memdump/memory_dump.rb, line 501
def remove(objects)
    removed_addresses = objects.addresses.to_set
    return dup if removed_addresses.empty?

    find_and_map do |r|
        if !removed_addresses.include?(r['address'])
            references = r['references'].dup
            references.delete_if { |a| removed_addresses.include?(a) }
            r['references'] = references
            r
        end
    end
end
remove_invalid_references() click to toggle source

Remove entries in the reference for which we can't find an object with the matching address

@return [(MemoryDump,Set)] the filtered dump and the set of missing addresses found

# File lib/memdump/memory_dump.rb, line 282
def remove_invalid_references
    addresses = self.addresses.to_set
    missing = Set.new
    result = map do |r|
        common = (addresses & r['references'])
        if common.size != r['references'].size
            missing.merge(r['references'] - common)
        end
        r = r.dup
        r['references'] = common
        r
    end
    return result, missing
end
remove_small_components(max_size: 1) click to toggle source

Remove all components that are smaller than the given number of nodes

It really looks only at the number of nodes reachable from a root (i.e. won't notice if two smaller-than-threshold roots have nodes in common)

# File lib/memdump/memory_dump.rb, line 520
def remove_small_components(max_size: 1)
    roots = self.addresses.to_set.dup
    leaves  = Set.new
    each_record do |r|
        refs = r['references']
        if refs.empty?
            leaves << r['address']
        else
            roots.subtract(r['references'])
        end
    end

    to_remove = Set.new
    roots.each do |root_address|
        component = Set[]
        queue = Set[root_address]
        while !queue.empty? && (component.size <= max_size)
            address = queue.first
            queue.delete(address)
            next if component.include?(address)
            component << address
            queue.merge(address_to_record[address]['references'])
        end

        if component.size <= max_size
            to_remove.merge(component)
        end
    end

    without(find_all { |r| to_remove.include?(r['address']) })
end
replace_class_id_by_class_name(add_reference_to_class: false) click to toggle source
# File lib/memdump/memory_dump.rb, line 653
def replace_class_id_by_class_name(add_reference_to_class: false)
    MemDump.replace_class_address_by_name(self, add_reference_to_class: add_reference_to_class)
end
root_addresses() click to toggle source

@api private

Return the set of record addresses that are the addresses of roots in the live graph

@return [Set<String>]

# File lib/memdump/memory_dump.rb, line 451
def root_addresses
    roots = self.addresses.to_set.dup
    each_record do |r|
        roots.subtract(r['references'])
    end
    roots
end
roots(with_keepalive_count: false) click to toggle source

Returns the set of roots

# File lib/memdump/memory_dump.rb, line 460
def roots(with_keepalive_count: false)
    result = Hash.new
    self.root_addresses.each do |addr|
        record = find_by_address(addr)
        if with_keepalive_count
            record = record.dup
            count = 0
            depth_first_visit(addr) { count += 1 }
            record['keepalive_count'] = count
        end
        result[addr] = record
    end
    MemoryDump.new(result)
end
roots_of(dump, root_dump: nil) click to toggle source

Return the graph of object that keeps objects in dump alive

It contains only the shortest paths from the roots to the objects in dump

@param [MemoryDump] dump @return [MemoryDump]

# File lib/memdump/memory_dump.rb, line 304
def roots_of(dump, root_dump: nil)
    if root_dump && root_dump.empty?
        raise ArgumentError, "no roots provided"
    end

    root_addresses =
        if root_dump then root_dump.addresses
        else
            ['ALL_ROOTS']
        end

    ensure_graphs_computed

    result_nodes = Set.new
    dump_addresses = dump.addresses
    root_addresses.each do |root_address|
        visitor = RGL::DijkstraVisitor.new(@forward_graph)
        dijkstra = RGL::DijkstraAlgorithm.new(@forward_graph, Hash.new(1), visitor)
        dijkstra.find_shortest_paths(root_address)
        path_builder = RGL::PathBuilder.new(root_address, visitor.parents_map)

        dump_addresses.each_with_index do |record_address, record_i|
            if path = path_builder.path(record_address)
                result_nodes.merge(path)
            end
        end
    end

    find_and_map do |record|
        address = record['address']
        next if !result_nodes.include?(address)

        # Prefer records in 'dump' to allow for annotations in the
        # source
        record = dump.find_by_address(address) || record
        record = record.dup
        record['references'] = result_nodes & record['references']
        record
    end
end
sample(ratio) click to toggle source

Get a random sample of the records

The sampling is random, so the returned set might be bigger or smaller than expected. Do not use on small sets.

@param [Float] the ratio of selected samples vs. total samples (0.1

will select approximately 10% of the samples)
# File lib/memdump/memory_dump.rb, line 435
def sample(ratio)
    result = Hash.new
    each_record do |record|
        if rand <= ratio
            result[record['address']] = record
        end
    end
    MemoryDump.new(result)
end
save(io_or_path) click to toggle source
# File lib/memdump/memory_dump.rb, line 35
def save(io_or_path)
    if io_or_path.respond_to?(:open)
        io_or_path.open 'w' do |io|
            save(io)
        end
    else
        each_record do |r|
            io_or_path.puts JSON.dump(r)
        end
    end
end
size() click to toggle source
# File lib/memdump/memory_dump.rb, line 23
def size
    address_to_record.size
end
stats() click to toggle source
# File lib/memdump/memory_dump.rb, line 552
def stats
    unknown_class = 0
    by_class = Hash.new(0)
    each_record do |r|
        if klass = (r['class'] || r['type'] || r['root'])
            by_class[klass] += 1
        else
            unknown_class += 1
        end
    end
    return unknown_class, by_class
end
to_gml(io_or_path) click to toggle source

Write the dump to a GML file that can loaded by Gephi

@param [Pathname,String,IO] the path or the IO stream into which we should

dump
# File lib/memdump/memory_dump.rb, line 220
def to_gml(io_or_path)
    if io_or_path.kind_of?(IO)
        MemDump.convert_to_gml(self, io_or_path)
    else
        Pathname(io_or_path).open 'w' do |io|
            to_gml(io)
        end
    end
    nil
end
to_s() click to toggle source
# File lib/memdump/memory_dump.rb, line 657
def to_s
    "#<MemoryDump size=#{size}>"
end
update_keepalive_count(dump) click to toggle source
# File lib/memdump/memory_dump.rb, line 643
def update_keepalive_count(dump)
    ensure_graphs_computed
    dump.each_record do |record|
        count = 0
        dump.depth_first_visit(record['address']) { |obj| count += 1 }
        record['keepalive_count'] = count
        record
    end
end
validate_references() click to toggle source

Validate that all reference entries have a matching dump entry

@raise [RuntimeError] if references have been found

# File lib/memdump/memory_dump.rb, line 416
def validate_references
    addresses = self.addresses.to_set
    each_record do |r|
        common = addresses & r['references']
        if common.size != r['references'].size
            missing = r['references'] - common
            raise "#{r} references #{missing.to_a.sort.join(", ")} which do not exist"
        end
    end
    nil
end
without(entries) click to toggle source

Remove entries from the dump, and all references to them

@param [MemoryDump] the set of entries to remove, as e.g. returned by

{#objects_of_class}

@return [MemoryDump] the filtered dump

# File lib/memdump/memory_dump.rb, line 203
def without(entries)
    find_and_map do |record|
        next if entries.include?(record['address'])
        record_refs = record['references']
        references = record_refs.find_all { |r| !entries.include?(r) }
        if references.size != record_refs.size
            record = record.dup
            record['references'] = references.to_set
        end
        record
    end
end