class Arv::Collection::StreamManifest

Public Class Methods

new(name) click to toggle source

Build a manifest text for a single stream, without substreams. The manifest includes files in the order they're added. If you want a normalized manifest, add files in lexical order by name.

# File lib/arvados/collection.rb, line 482
def initialize(name)
  @name = name
  @loc_ranges = []
  @loc_range_start = 0
  @file_specs = []
end

Public Instance Methods

add_file(coll_file) click to toggle source
# File lib/arvados/collection.rb, line 489
def add_file(coll_file)
  coll_file.each_segment do |segment|
    extend_file_specs(coll_file.name, segment)
  end
end
to_s() click to toggle source
# File lib/arvados/collection.rb, line 495
def to_s
  if @file_specs.empty?
    ""
  else
    "%s %s %s\n" % [escape_name(@name),
                    @loc_ranges.collect(&:locator).join(" "),
                    @file_specs.join(" ")]
  end
end

Private Instance Methods

escape_name(name) click to toggle source
# File lib/arvados/collection.rb, line 547
def escape_name(name)
  name.gsub(/\\/, "\\\\\\\\").gsub(/\s/) do |s|
    s.each_byte.map { |c| "\\%03o" % c }.join("")
  end
end
extend_file_specs(filename, segment) click to toggle source
# File lib/arvados/collection.rb, line 507
def extend_file_specs(filename, segment)
  found_overlap = false
  # Find the longest prefix of segment.locators that's a suffix
  # of the existing @loc_ranges. If we find one, drop those
  # locators (they'll be added back below, when we're handling
  # the normal/no-overlap case).
  (1..segment.locators.length).each do |overlap|
    if @loc_ranges.length >= overlap && @loc_ranges[-overlap..-1].collect(&:locator) == segment.locators[0..overlap-1]
      (1..overlap).each do
        discarded = @loc_ranges.pop
        @loc_range_start -= (discarded.end - discarded.begin)
      end
      found_overlap = true
      break
    end
  end

  # If there was no overlap at the end of our existing
  # @loc_ranges, check whether the full set of segment.locators
  # appears earlier in @loc_ranges. If so, use those instead of
  # appending the same locators again.
  if !found_overlap && segment.locators.length < @loc_ranges.length
    segment_start = 0
    (0..@loc_ranges.length-1).each do |ri|
      if @loc_ranges[ri..ri+segment.locators.length-1].collect(&:locator) == segment.locators
        @file_specs << "#{segment.start_pos + @loc_ranges[ri].begin}:#{segment.length}:#{escape_name(filename)}"
        return
      end
    end
  end

  segment_start = @loc_range_start
  segment.locators.each do |loc_s|
    r = LocatorRange.new(loc_s, @loc_range_start)
    @loc_ranges << r
    @loc_range_start = r.end
  end
  @file_specs << "#{segment.start_pos + segment_start}:#{segment.length}:#{escape_name(filename)}"
end