class MemDump::MemoryDump
Constants
- COMMON_COLLAPSE_CLASSES
- COMMON_COLLAPSE_TYPES
Attributes
Public Class Methods
# File lib/memdump/memory_dump.rb, line 5 def initialize(address_to_record) @address_to_record = address_to_record @forward_graph = nil @backward_graph = nil end
Public Instance Methods
# File lib/memdump/memory_dump.rb, line 475 def add_children(roots, with_keepalive_count: false) result = Hash.new roots.each_record do |root_record| result[root_record['address']] = root_record root_record['references'].each do |addr| ref_record = find_by_address(addr) next if !ref_record if with_keepalive_count ref_record = ref_record.dup count = 0 depth_first_visit(addr) { count += 1 } ref_record['keepalive_count'] = count end result[addr] = ref_record end end MemoryDump.new(result) end
# File lib/memdump/memory_dump.rb, line 19 def addresses address_to_record.keys end
@api private
Force recomputation of the graph representation of the dump the next time it is needed
# File lib/memdump/memory_dump.rb, line 379 def clear_graph @forward_graph = nil @backward_graph = nil end
Remove entries from this dump, keeping the transitivity in the remaining graph
@param [MemoryDump] entries entries to remove
@example remove all entries that are of type HASH
collapse(objects_of_type('HASH'))
# File lib/memdump/memory_dump.rb, line 153 def collapse(entries) collapsed_entries = Hash.new entries.each_record do |r| collapsed_entries[r['address']] = r['references'].dup end # Remove references in-between the entries to collapse already_expanded = Hash.new { |h, k| h[k] = Set[k] } begin changed_entries = Hash.new collapsed_entries.each do |address, references| sets = references.classify { |ref_address| collapsed_entries.has_key?(ref_address) } updated_references = sets[false] || Set.new if to_collapse = sets[true] to_collapse.each do |ref_address| next if already_expanded[address].include?(ref_address) updated_references.merge(collapsed_entries[ref_address]) end already_expanded[address].merge(to_collapse) changed_entries[address] = updated_references end end puts "#{changed_entries.size} changed entries" collapsed_entries.merge!(changed_entries) end while !changed_entries.empty? find_and_map do |record| next if collapsed_entries.has_key?(record['address']) sets = record['references'].classify do |ref_address| collapsed_entries.has_key?(ref_address) end updated_references = sets[false] || Set.new if to_collapse = sets[true] to_collapse.each do |ref_address| updated_references.merge(collapsed_entries[ref_address]) end record = record.dup record['references'] = updated_references end record end end
Perform common initial cleanup
It basically removes common classes that usually make a dump analysis more complicated without providing more information
Namely, it collapses internal Ruby node types ROOT and IMEMO, as well as common collection classes {COMMON_COLLAPSE_CLASSES}.
One usually analyses a cleaned-up dump before getting into the full dump
@return [MemDump] the filtered dump
# File lib/memdump/memory_dump.rb, line 268 def common_cleanup without_weakrefs = remove(objects_of_class 'WeakRef') to_collapse = without_weakrefs.find_all do |r| COMMON_COLLAPSE_CLASSES.include?(r['class']) || COMMON_COLLAPSE_TYPES.include?(r['type']) || r['method'] == 'dump_all' end without_weakrefs.collapse(to_collapse) end
@api private
Create two RGL::DirectedAdjacencyGraph, for the forward and backward edges of the graph
# File lib/memdump/memory_dump.rb, line 387 def compute_graphs forward_graph = RGL::DirectedAdjacencyGraph.new forward_graph.add_vertex 'ALL_ROOTS' address_to_record.each do |address, record| forward_graph.add_vertex(address) if record['type'] == 'ROOT' forward_graph.add_edge('ALL_ROOTS', address) end record['references'].each do |ref_address| forward_graph.add_edge(address, ref_address) end end backward_graph = RGL::DirectedAdjacencyGraph.new forward_graph.each_edge do |u, v| backward_graph.add_edge(v, u) end return forward_graph, backward_graph end
# File lib/memdump/memory_dump.rb, line 408 def depth_first_visit(root, &block) ensure_graphs_computed @forward_graph.depth_first_visit(root, &block) end
Compute the set of records that are not in self but are in to
@param [MemoryDump] @return [MemoryDump]
# File lib/memdump/memory_dump.rb, line 569 def diff(to) diff = Hash.new to.each_record do |r| address = r['address'] if !@address_to_record.include?(address) diff[address] = r end end MemoryDump.new(diff) end
# File lib/memdump/memory_dump.rb, line 496 def dup find_all { true } end
# File lib/memdump/memory_dump.rb, line 15 def each_record(&block) address_to_record.each_value(&block) end
@api private
Ensure that @forward_graph and @backward_graph are computed
# File lib/memdump/memory_dump.rb, line 369 def ensure_graphs_computed if !@forward_graph @forward_graph, @backward_graph = compute_graphs end end
Filter the records
@yieldparam record a record @yieldreturn [Object] the record object that should be included in the
returned dump
@return [MemoryDump]
# File lib/memdump/memory_dump.rb, line 53 def find_all return enum_for(__method__) if !block_given? address_to_record = Hash.new each_record do |r| if yield(r) address_to_record[r['address']] = r end end MemoryDump.new(address_to_record) end
Filter the entries, removing those for which the block returns falsy
@yieldparam record a record @yieldreturn [nil,Object] either a record object, or falsy to remove
this record in the returned dump
@return [MemoryDump]
# File lib/memdump/memory_dump.rb, line 87 def find_and_map return enum_for(__method__) if !block_given? address_to_record = Hash.new each_record do |r| if result = yield(r.dup) address_to_record[r['address']] = result.to_hash end end MemoryDump.new(address_to_record) end
# File lib/memdump/memory_dump.rb, line 27 def find_by_address(address) address_to_record[address] end
Replace all objects in dump by a single “group” object
# File lib/memdump/memory_dump.rb, line 609 def group(name, dump, attributes = Hash.new) group_addresses = Set.new group_references = Set.new dump.each_record do |r| group_addresses << r['address'] group_references.merge(r['references']) end group_record = attributes.dup group_record['address'] = name group_record['references'] = group_references - group_addresses updated = Hash[name => group_record] each_record do |record| next if group_addresses.include?(record['address']) updated_record = record.dup updated_record['references'] -= group_addresses if updated_record['references'].size != record['references'].size updated_record['references'] << name end if group_addresses.include?(updated_record['class_address']) updated_record['class_address'] = name end if group_addresses.include?(updated_record['class']) updated_record['class'] = name end updated[updated_record['address']] = updated_record end MemoryDump.new(updated) end
# File lib/memdump/memory_dump.rb, line 11 def include?(address) address_to_record.has_key?(address) end
# File lib/memdump/memory_dump.rb, line 31 def inspect to_s end
Compute the interface between self and the other dump, that is the elements of self that have a child in dump, and the elements of dump that have a parent in self
# File lib/memdump/memory_dump.rb, line 583 def interface_with(dump) self_border = Hash.new dump_border = Hash.new each_record do |r| next if dump.find_by_address(r['address']) refs_in_dump = r['references'].map do |addr| dump.find_by_address(addr) end.compact if !refs_in_dump.empty? self_border[r['address']] = r refs_in_dump.each do |child| dump_border[child['address']] = child.dup end end end self_border = MemoryDump.new(self_border) dump_border = MemoryDump.new(dump_border) dump.update_keepalive_count(dump_border) return self_border, dump_border end
Map the records
@yieldparam record a record @yieldreturn [Object] the record object that should be included in the
returned dump
@return [MemoryDump]
# File lib/memdump/memory_dump.rb, line 71 def map return enum_for(__method__) if !block_given? address_to_record = Hash.new each_record do |r| address_to_record[r['address']] = yield(r.dup).to_hash end MemoryDump.new(address_to_record) end
# File lib/memdump/memory_dump.rb, line 345 def minimum_spanning_tree(root_dump) if root_dump.size != 1 raise ArgumentError, "there should be exactly one root" end root_address, _ = root_dump.address_to_record.first if !(root = address_to_record[root_address]) raise ArgumentError, "no record with address #{root_address} in self" end ensure_graphs_computed mst = @forward_graph.minimum_spanning_tree(root) map = Hash.new mst.each_vertex do |record| record = record.dup record['references'] = record['references'].dup record['references'].delete_if { |ref_address| !mst.has_vertex?(ref_address) } end MemoryDump.new(map) end
Return the records of a given class
@param [String] name the class @return [MemoryDump] the matching entries
@example return all string records
objects_of_class("String")
# File lib/memdump/memory_dump.rb, line 117 def objects_of_class(name) find_all { |r| name === r['class'] } end
Return the records of a given type
@param [String] name the type @return [MemoryDump] the matching records
@example return all ICLASS (singleton) records
objects_of_class("ICLASS")
# File lib/memdump/memory_dump.rb, line 106 def objects_of_type(name) find_all { |r| name === r['type'] } end
Return the entries that refer to the entries in the dump
@param [MemoryDump] the set of entries whose parents we're looking for @param [Integer] min only return the entries in self that refer to
more than this much entries in 'dump'
@param [Boolean] exclude_dump exclude the entries that are already in
'dump'
@return [(MemoryDump
,Hash)] the parent entries, and a mapping from
records in the parent entries to the count of entries in 'dump' they refer to
# File lib/memdump/memory_dump.rb, line 131 def parents_of(dump, min: 0, exclude_dump: false) children = dump.addresses.to_set counts = Hash.new filtered = find_all do |r| next if exclude_dump && children.include?(r['address']) count = r['references'].count { |r| children.include?(r) } if count > min counts[r] = count true end end return filtered, counts end
Simply remove the given objects
# File lib/memdump/memory_dump.rb, line 501 def remove(objects) removed_addresses = objects.addresses.to_set return dup if removed_addresses.empty? find_and_map do |r| if !removed_addresses.include?(r['address']) references = r['references'].dup references.delete_if { |a| removed_addresses.include?(a) } r['references'] = references r end end end
Remove entries in the reference for which we can't find an object with the matching address
@return [(MemoryDump
,Set)] the filtered dump and the set of missing addresses found
# File lib/memdump/memory_dump.rb, line 282 def remove_invalid_references addresses = self.addresses.to_set missing = Set.new result = map do |r| common = (addresses & r['references']) if common.size != r['references'].size missing.merge(r['references'] - common) end r = r.dup r['references'] = common r end return result, missing end
Remove all components that are smaller than the given number of nodes
It really looks only at the number of nodes reachable from a root (i.e. won't notice if two smaller-than-threshold roots have nodes in common)
# File lib/memdump/memory_dump.rb, line 520 def remove_small_components(max_size: 1) roots = self.addresses.to_set.dup leaves = Set.new each_record do |r| refs = r['references'] if refs.empty? leaves << r['address'] else roots.subtract(r['references']) end end to_remove = Set.new roots.each do |root_address| component = Set[] queue = Set[root_address] while !queue.empty? && (component.size <= max_size) address = queue.first queue.delete(address) next if component.include?(address) component << address queue.merge(address_to_record[address]['references']) end if component.size <= max_size to_remove.merge(component) end end without(find_all { |r| to_remove.include?(r['address']) }) end
# File lib/memdump/memory_dump.rb, line 653 def replace_class_id_by_class_name(add_reference_to_class: false) MemDump.replace_class_address_by_name(self, add_reference_to_class: add_reference_to_class) end
@api private
Return the set of record addresses that are the addresses of roots in the live graph
@return [Set<String>]
# File lib/memdump/memory_dump.rb, line 451 def root_addresses roots = self.addresses.to_set.dup each_record do |r| roots.subtract(r['references']) end roots end
Returns the set of roots
# File lib/memdump/memory_dump.rb, line 460 def roots(with_keepalive_count: false) result = Hash.new self.root_addresses.each do |addr| record = find_by_address(addr) if with_keepalive_count record = record.dup count = 0 depth_first_visit(addr) { count += 1 } record['keepalive_count'] = count end result[addr] = record end MemoryDump.new(result) end
Return the graph of object that keeps objects in dump alive
It contains only the shortest paths from the roots to the objects in dump
@param [MemoryDump] dump @return [MemoryDump]
# File lib/memdump/memory_dump.rb, line 304 def roots_of(dump, root_dump: nil) if root_dump && root_dump.empty? raise ArgumentError, "no roots provided" end root_addresses = if root_dump then root_dump.addresses else ['ALL_ROOTS'] end ensure_graphs_computed result_nodes = Set.new dump_addresses = dump.addresses root_addresses.each do |root_address| visitor = RGL::DijkstraVisitor.new(@forward_graph) dijkstra = RGL::DijkstraAlgorithm.new(@forward_graph, Hash.new(1), visitor) dijkstra.find_shortest_paths(root_address) path_builder = RGL::PathBuilder.new(root_address, visitor.parents_map) dump_addresses.each_with_index do |record_address, record_i| if path = path_builder.path(record_address) result_nodes.merge(path) end end end find_and_map do |record| address = record['address'] next if !result_nodes.include?(address) # Prefer records in 'dump' to allow for annotations in the # source record = dump.find_by_address(address) || record record = record.dup record['references'] = result_nodes & record['references'] record end end
Get a random sample of the records
The sampling is random, so the returned set might be bigger or smaller than expected. Do not use on small sets.
@param [Float] the ratio of selected samples vs. total samples (0.1
will select approximately 10% of the samples)
# File lib/memdump/memory_dump.rb, line 435 def sample(ratio) result = Hash.new each_record do |record| if rand <= ratio result[record['address']] = record end end MemoryDump.new(result) end
# File lib/memdump/memory_dump.rb, line 35 def save(io_or_path) if io_or_path.respond_to?(:open) io_or_path.open 'w' do |io| save(io) end else each_record do |r| io_or_path.puts JSON.dump(r) end end end
# File lib/memdump/memory_dump.rb, line 23 def size address_to_record.size end
# File lib/memdump/memory_dump.rb, line 552 def stats unknown_class = 0 by_class = Hash.new(0) each_record do |r| if klass = (r['class'] || r['type'] || r['root']) by_class[klass] += 1 else unknown_class += 1 end end return unknown_class, by_class end
Write the dump to a GML file that can loaded by Gephi
@param [Pathname,String,IO] the path or the IO stream into which we should
dump
# File lib/memdump/memory_dump.rb, line 220 def to_gml(io_or_path) if io_or_path.kind_of?(IO) MemDump.convert_to_gml(self, io_or_path) else Pathname(io_or_path).open 'w' do |io| to_gml(io) end end nil end
# File lib/memdump/memory_dump.rb, line 657 def to_s "#<MemoryDump size=#{size}>" end
# File lib/memdump/memory_dump.rb, line 643 def update_keepalive_count(dump) ensure_graphs_computed dump.each_record do |record| count = 0 dump.depth_first_visit(record['address']) { |obj| count += 1 } record['keepalive_count'] = count record end end
Validate that all reference entries have a matching dump entry
@raise [RuntimeError] if references have been found
# File lib/memdump/memory_dump.rb, line 416 def validate_references addresses = self.addresses.to_set each_record do |r| common = addresses & r['references'] if common.size != r['references'].size missing = r['references'] - common raise "#{r} references #{missing.to_a.sort.join(", ")} which do not exist" end end nil end
Remove entries from the dump, and all references to them
@param [MemoryDump] the set of entries to remove, as e.g. returned by
{#objects_of_class}
@return [MemoryDump] the filtered dump
# File lib/memdump/memory_dump.rb, line 203 def without(entries) find_and_map do |record| next if entries.include?(record['address']) record_refs = record['references'] references = record_refs.find_all { |r| !entries.include?(r) } if references.size != record_refs.size record = record.dup record['references'] = references.to_set end record end end