class Seafoam::BGV::BGVParser

A parser for BGV files. It's a push-pull streaming interface that you need to drive with what you want next from the file. It's slightly complicated and some code is duplicated in order to support skipping over parts of the file that you don't need.

Constants

BEGIN_DOCUMENT
BEGIN_GRAPH
BEGIN_GROUP
CLOSE_GROUP
ENUM_KLASS
KLASS
MAGIC

File format constants.

POOL_CLASS
POOL_ENUM
POOL_FIELD
POOL_METHOD
POOL_NEW
POOL_NODE
POOL_NODE_CLASS
POOL_NODE_SOURCE_POSITION
POOL_NULL
POOL_SIGNATURE
POOL_STRING
PROPERTY_ARRAY
PROPERTY_DOUBLE
PROPERTY_FALSE
PROPERTY_FLOAT
PROPERTY_INT
PROPERTY_LONG
PROPERTY_POOL
PROPERTY_SUBGRAPH
PROPERTY_TRUE
SUPPORTED_VERSIONS

Public Class Methods

new(file) click to toggle source
# File lib/seafoam/bgv/bgv_parser.rb, line 11
def initialize(file)
  data = File.read(file, encoding: Encoding::ASCII_8BIT)
  if data[0..1].bytes == [0x1f, 0x8b]
    data = Zlib.gunzip(data)
  end
  @reader = Binary::IOBinaryReader.new(StringIO.new(data))
  @group_stack = []
  @pool = {}
  @index = 0
end

Public Instance Methods

graph_name(graph_header) click to toggle source

Produce a flat graph name from a header.

# File lib/seafoam/bgv/bgv_parser.rb, line 157
def graph_name(graph_header)
  groups_names = graph_header[:group].map { |g| g[:short_name] }
  count = 0
  name = graph_header[:format].sub(/%s/) do
    arg = graph_header[:args][count]
    count += 1
    arg
  end
  components = groups_names + [name]
  components.join('/')
end
read_document_props() click to toggle source
# File lib/seafoam/bgv/bgv_parser.rb, line 36
def read_document_props
  if @major >= 7
    token = @reader.peek_sint8
    if token == BEGIN_DOCUMENT
      @reader.skip_int8
      document_props = read_props
    end
  end
  document_props
end
read_file_header(version_check: true) click to toggle source

Read the file header and return the version.

# File lib/seafoam/bgv/bgv_parser.rb, line 23
def read_file_header(version_check: true)
  raise EncodingError, 'does not appear to be a BGV file - missing header' unless @reader.read_bytes(4) == MAGIC

  @major = @reader.read_sint8
  @minor = @reader.read_sint8
  version = [@major, @minor]
  if version_check && !SUPPORTED_VERSIONS.include?(version)
    raise NotImplementedError, "unsupported BGV version #{@major}.#{@minor}"
  end

  version
end
read_graph() click to toggle source

Read a graph having either read or skipped its headers, producing a Graph object.

# File lib/seafoam/bgv/bgv_parser.rb, line 98
def read_graph
  # Already read BEGIN_GRAPH, id, format, args, and props
  graph = Graph.new(@graph_props)
  edge_delay = []
  @reader.read_sint32.times do
    id = @reader.read_sint32
    node_class = read_pool_object
    has_predecessor = read_bool
    props = read_props
    props[:id] = id
    props[:node_class] = node_class
    props[:has_predecessor] = has_predecessor
    node = graph.create_node(id, props)
    edge_delay.push(*read_edges(node, node_class, true))
    edge_delay.push(*read_edges(node, node_class, false))
  end
  edge_delay.each do |edge|
    node = edge[:node]
    props = edge[:edge]
    inputs = edge[:inputs]
    others = edge[:ids].reject(&:nil?).map { |id| graph.nodes[id] || raise(EncodingError, "BGV edge with unknown node #{id}") }
    others.each_with_index do |other, index|
      # We need to give each edge their own property as they're annotated separately.
      props = props.dup
      props[:index] = index
      if inputs
        graph.create_edge other, node, props
      else
        graph.create_edge node, other, props
      end
    end
  end

  # Read block information.
  @reader.read_sint32.times do
    block_id = @reader.read_sint32
    block_nodes = @reader.read_sint32.times.map { @reader.read_sint32 }
    # Followers aren't used but could be.
    @reader.read_sint32.times.map { @reader.read_sint32 }
    graph.create_block block_id, block_nodes
  end
  graph
end
read_graph_header() click to toggle source

Read a graph's headers, having just read its ID. This gives you the graph's properties.

# File lib/seafoam/bgv/bgv_parser.rb, line 83
def read_graph_header
  # Already read BEGIN_GRAPH and id
  format = read_string
  args = read_args
  props = read_props
  @graph_props = props
  {
    group: @group_stack.dup,
    format: format,
    args: args,
    props: props
  }
end
read_graph_preheader() click to toggle source

Move to the next graph in the file, and return its index and ID, or nil if there are no more graphs.

# File lib/seafoam/bgv/bgv_parser.rb, line 59
def read_graph_preheader
  return nil unless read_groups

  # Already read BEGIN_GRAPH
  index = @index
  id = @reader.read_sint32
  if id
    @index += 1
    [index, id]
  else
    [nil, nil]
  end
end
skip_document_props() click to toggle source
# File lib/seafoam/bgv/bgv_parser.rb, line 47
def skip_document_props
  if @major >= 7
    token = @reader.peek_sint8
    if token == BEGIN_DOCUMENT
      @reader.skip_int8
      skip_props
    end
  end
end
skip_graph() click to toggle source

Skip over a graph, having read or skipped its headers.

# File lib/seafoam/bgv/bgv_parser.rb, line 143
def skip_graph
  # Already read BEGIN_GRAPH, id, format, args, and props.
  @reader.read_sint32.times do
    @reader.skip_int32
    node_class = read_pool_object
    skip_bool
    skip_props
    skip_edges node_class, true
    skip_edges node_class, false
  end
  skip_blocks
end
skip_graph_header() click to toggle source

Skip over a graph's headers, having just read its ID.

# File lib/seafoam/bgv/bgv_parser.rb, line 74
def skip_graph_header
  # Already read BEGIN_GRAPH and id
  skip_string
  skip_args
  @graph_props = read_props
end

Private Instance Methods

read_args() click to toggle source

Read arguments.

# File lib/seafoam/bgv/bgv_parser.rb, line 223
def read_args
  @reader.read_sint32.times.map do
    read_prop_object
  end
end
read_begin_group() click to toggle source

Read the opening of a group.

# File lib/seafoam/bgv/bgv_parser.rb, line 192
def read_begin_group
  # Already read BEGIN_GROUP
  name = read_pool_object
  short_name = read_pool_object
  method = read_pool_object
  bci = @reader.read_sint32
  props = read_props
  group = {
    name: name,
    short_name: short_name,
    method: method,
    bci: bci,
    props: props
  }
  @group_stack.push group
end
read_bool() click to toggle source

Read a boolean value.

# File lib/seafoam/bgv/bgv_parser.rb, line 568
def read_bool
  token = @reader.read_uint8
  case token
  when 0
    false
  when 1
    true
  else
    raise ::EncodingError, "unknown BGV boolean value 0x#{token.to_s(16)}"
  end
end
read_close_group() click to toggle source

Read the closing of a group.

# File lib/seafoam/bgv/bgv_parser.rb, line 210
def read_close_group
  # Already read CLOSE_GROUP
  @group_stack.pop
end
read_edges(node, node_class, inputs) click to toggle source

Read edges, producing an array of edge hashes.

# File lib/seafoam/bgv/bgv_parser.rb, line 249
def read_edges(node, node_class, inputs)
  edges = if inputs
            node_class[:inputs]
          else
            node_class[:outputs]
          end
  edges.map do |edge|
    count = if edge[:direct]
              1
            else
              @reader.read_sint16
            end
    ids = count.times.map do
      id = @reader.read_sint32
      raise if id < -1

      id = nil if id == -1
      id
    end
    {
      node: node,
      edge: edge,
      ids: ids,
      inputs: inputs
    }
  end
end
read_edges_info(inputs) click to toggle source

Read information about edges.

# File lib/seafoam/bgv/bgv_parser.rb, line 530
def read_edges_info(inputs)
  @reader.read_sint16.times.map do
    indirect = read_bool
    name = read_pool_object
    type = (read_pool_object if inputs)
    {
      direct: !indirect,
      name: name,
      type: type
    }
  end
end
read_groups() click to toggle source

Read through group declarations to get to the start of the next graph.

# File lib/seafoam/bgv/bgv_parser.rb, line 172
def read_groups
  until @reader.eof?
    token = @reader.read_sint8
    case token
    when BEGIN_GROUP
      read_begin_group
    when BEGIN_GRAPH
      break true
    when CLOSE_GROUP
      read_close_group
    when BEGIN_DOCUMENT
      # But what should we do with them?
      skip_props
    else
      raise EncodingError, "unknown token 0x#{token.to_s(16)} beginning BGV object"
    end
  end
end
read_pool_entry() click to toggle source

Read a new entry to the pool.

# File lib/seafoam/bgv/bgv_parser.rb, line 419
def read_pool_entry
  # Already read POOL_NEW
  id = @reader.read_uint16
  type = @reader.read_sint8
  case type
  when POOL_STRING
    object = read_string
  when POOL_ENUM
    enum_class = read_pool_object
    enum_ordinal = @reader.read_sint32
    raise EncodingError, "unknown BGV eum ordinal #{enum_ordinal} in #{enum_class}" if enum_ordinal.negative? || enum_ordinal >= enum_class.size

    object = enum_class[enum_ordinal]
  when POOL_CLASS
    type_name = read_string
    token = @reader.read_sint8
    case token
    when ENUM_KLASS
      values = @reader.read_sint32.times.map do
        read_pool_object
      end
      object = values
    when KLASS
      object = type_name
    else
      raise EncodingError, "unknown BGV pool class token 0x#{token.to_s(16)}"
    end
  when POOL_METHOD
    declaring_class = read_pool_object
    method_name = read_pool_object
    signature = read_pool_object
    modifiers = @reader.read_sint32
    bytes_length = @reader.read_sint32
    @reader.skip bytes_length if bytes_length != -1
    object = {
      declaring_class: declaring_class,
      method_name: method_name,
      signature: signature,
      modifiers: modifiers
    }
  when POOL_NODE_CLASS
    node_class = read_pool_object
    name_template = read_string
    inputs = read_edges_info(true)
    outputs = read_edges_info(false)
    object = {
      node_class: node_class,
      name_template: name_template,
      inputs: inputs,
      outputs: outputs
    }
  when POOL_FIELD
    field_class = read_pool_object
    name = read_pool_object
    type_name = read_pool_object
    modifiers = @reader.read_sint32
    object = {
      field_class: field_class,
      name: name,
      type_name: type_name,
      modifiers: modifiers
    }
  when POOL_SIGNATURE
    args = @reader.read_sint16.times.map do
      read_pool_object
    end
    ret = read_pool_object
    object = {
      args: args,
      ret: ret
    }
  when POOL_NODE_SOURCE_POSITION
    method = read_pool_object
    bci = @reader.read_sint32
    locs = []
    loop do
      uri = read_pool_object
      break unless uri

      location = read_string
      loc_line = @reader.read_sint32
      loc_start = @reader.read_sint32
      loc_end = @reader.read_sint32
      locs.push [location, loc_line, loc_start, loc_end]
    end
    caller = read_pool_object
    object = {
      method: method,
      bci: bci,
      locs: locs,
      caller: caller
    }
  when POOL_NODE
    node_id = @reader.read_sint32
    node_class = read_pool_object
    object = {
      node_id: node_id,
      node_class: node_class
    }
  else
    raise EncodingError, "unknown BGV pool type 0x#{type.to_s(16)}"
  end
  set_pool_entry id, object
end
read_pool_object() click to toggle source

Read an object from the pool.

# File lib/seafoam/bgv/bgv_parser.rb, line 400
def read_pool_object
  token = @reader.read_sint8
  case token
  when POOL_NULL
    nil
  when POOL_NEW
    read_pool_entry
  when POOL_STRING, POOL_ENUM, POOL_CLASS, POOL_METHOD, POOL_NODE_CLASS, POOL_FIELD, POOL_SIGNATURE, POOL_NODE_SOURCE_POSITION, POOL_NODE
    id = @reader.read_uint16
    object = @pool[id]
    raise EncodingError, "unknown BGV pool object #{token}" unless object

    object
  else
    raise EncodingError, "unknown token 0x#{token.to_s(16)} in BGV pool object"
  end
end
read_prop_object() click to toggle source

Read a single property value.

# File lib/seafoam/bgv/bgv_parser.rb, line 342
def read_prop_object
  token = @reader.read_sint8
  case token
  when PROPERTY_POOL
    read_pool_object
  when PROPERTY_INT
    @reader.read_sint32
  when PROPERTY_LONG
    @reader.read_sint64
  when PROPERTY_DOUBLE
    @reader.read_float64
  when PROPERTY_FLOAT
    @reader.read_float32
  when PROPERTY_TRUE
    true
  when PROPERTY_FALSE
    false
  when PROPERTY_ARRAY
    type = @reader.read_sint8
    case type
    when PROPERTY_POOL
      @reader.read_sint32.times.map do
        read_pool_object
      end
    when PROPERTY_INT
      @reader.read_sint32.times.map do
        @reader.read_sint32
      end
    when PROPERTY_DOUBLE
      @reader.read_sint32.times.map do
        @reader.read_float64
      end
    else
      raise EncodingError, "unknown BGV property array type 0x#{type.to_s(16)}"
    end
  when PROPERTY_SUBGRAPH
    @graph_props = read_props
    read_graph
  else
    raise EncodingError, "unknown BGV property 0x#{token.to_s(16)}"
  end
end
read_props() click to toggle source

Read a set of properties, producing a Hash.

# File lib/seafoam/bgv/bgv_parser.rb, line 295
def read_props
  @reader.read_sint16.times.map do
    key = read_pool_object
    value = read_prop_object
    [key, value]
  end.to_h
end
read_string() click to toggle source

Read a UTF-8 string.

# File lib/seafoam/bgv/bgv_parser.rb, line 550
def read_string
  length = @reader.read_sint32
  if length == -1
    nil
  else
    string = @reader.read_utf8(length)
    raise EncodingError, 'null byte in BGV string' if string.include?("\0")

    string
  end
end
set_pool_entry(id, object) click to toggle source

Hook method that can be overidden for debugging.

# File lib/seafoam/bgv/bgv_parser.rb, line 525
def set_pool_entry(id, object)
  @pool[id] = object
end
skip_args() click to toggle source

Skip over arguments.

# File lib/seafoam/bgv/bgv_parser.rb, line 216
def skip_args
  @reader.read_sint32.times do
    skip_prop_object
  end
end
skip_blocks() click to toggle source

Skip over blocks in a graph.

# File lib/seafoam/bgv/bgv_parser.rb, line 278
def skip_blocks
  @reader.read_sint32.times do
    @reader.skip_int32
    @reader.skip_int32 @reader.read_sint32
    @reader.skip_int32 @reader.read_sint32
  end
end
skip_bool() click to toggle source

Skip over a boolean value.

# File lib/seafoam/bgv/bgv_parser.rb, line 563
def skip_bool
  @reader.skip_int8
end
skip_edges(node_class, inputs) click to toggle source

Skip over edges.

# File lib/seafoam/bgv/bgv_parser.rb, line 230
def skip_edges(node_class, inputs)
  edges = if inputs
            node_class[:inputs]
          else
            node_class[:outputs]
          end
  edges.each do |edge|
    count = if edge[:direct]
              1
            else
              @reader.read_sint16
            end
    count.times do
      @reader.skip_int32
    end
  end
end
skip_pool_object() click to toggle source

Skip over an object from the pool.

# File lib/seafoam/bgv/bgv_parser.rb, line 386
def skip_pool_object
  token = @reader.read_sint8
  case token
  when POOL_NULL
  when POOL_NEW
    read_pool_entry
  when POOL_STRING, POOL_ENUM, POOL_CLASS, POOL_METHOD, POOL_NODE_CLASS, POOL_FIELD, POOL_SIGNATURE, POOL_NODE_SOURCE_POSITION, POOL_NODE
    @reader.skip_int16
  else
    raise EncodingError, "unknown token 0x#{token.to_s(16)} in BGV pool object"
  end
end
skip_prop_object() click to toggle source

Skip over a single property value.

# File lib/seafoam/bgv/bgv_parser.rb, line 304
def skip_prop_object
  token = @reader.read_sint8
  case token
  when PROPERTY_POOL
    skip_pool_object
  when PROPERTY_INT
    @reader.skip_int32
  when PROPERTY_LONG
    @reader.skip_int64
  when PROPERTY_DOUBLE
    @reader.skip_float64
  when PROPERTY_FLOAT
    @reader.skip_float32
  when PROPERTY_TRUE
  when PROPERTY_FALSE
  when PROPERTY_ARRAY
    type = @reader.read_sint8
    case type
    when PROPERTY_POOL
      @reader.read_sint32.times do
        skip_pool_object
      end
    when PROPERTY_INT
      @reader.skip_int32 @reader.read_sint32
    when PROPERTY_DOUBLE
      @reader.skip_float64 @reader.read_sint32
    else
      raise EncodingError, "unknown BGV property array type 0x#{type.to_s(16)}"
    end
  when PROPERTY_SUBGRAPH
    skip_props
    skip_graph
  else
    raise EncodingError, "unknown BGV property 0x#{token.to_s(16)}"
  end
end
skip_props() click to toggle source

Skip over a set of properties.

# File lib/seafoam/bgv/bgv_parser.rb, line 287
def skip_props
  @reader.read_sint16.times do
    skip_pool_object
    skip_prop_object
  end
end
skip_string() click to toggle source

Skip over a UTF-8 string.

# File lib/seafoam/bgv/bgv_parser.rb, line 544
def skip_string
  length = @reader.read_sint32
  @reader.skip length if length != -1
end