class Keep::Manifest
Constants
- EMPTY_DIR_TOKEN_REGEXP
- FILE_NAME_REGEXP
- FILE_TOKEN_REGEXP
- NON_8BIT_ENCODED_CHAR
- STREAM_NAME_REGEXP
- STREAM_TOKEN_REGEXP
Public Class Methods
new(manifest_text)
click to toggle source
Class to parse a manifest text and provide common views of that data.
# File lib/arvados/keep.rb, line 114 def initialize(manifest_text) @text = manifest_text @files = nil end
unescape(s)
click to toggle source
# File lib/arvados/keep.rb, line 140 def self.unescape(s) return nil if s.nil? # Parse backslash escapes in a Keep manifest stream or file name. s.gsub(/\\(\\|[0-7]{3})/) do |_| case $1 when '\\' '\\' else $1.to_i(8).chr end end end
valid?(manifest)
click to toggle source
# File lib/arvados/keep.rb, line 303 def self.valid? manifest begin validate! manifest true rescue ArgumentError false end end
validate!(manifest)
click to toggle source
Verify that a given manifest is valid according to arvados.org/projects/arvados/wiki/Keep_manifest_format
# File lib/arvados/keep.rb, line 252 def self.validate! manifest raise ArgumentError.new "No manifest found" if !manifest return true if manifest.empty? raise ArgumentError.new "Invalid manifest: does not end with newline" if !manifest.end_with?("\n") line_count = 0 manifest.each_line do |line| line_count += 1 words = line[0..-2].split(/ /) raise ArgumentError.new "Manifest invalid for stream #{line_count}: missing stream name" if words.empty? count = 0 word = words.shift raise ArgumentError.new "Manifest invalid for stream #{line_count}: >8-bit encoded chars not allowed on stream token #{word.inspect}" if word =~ NON_8BIT_ENCODED_CHAR unescaped_word = unescape(word) count += 1 if word =~ STREAM_TOKEN_REGEXP and unescaped_word =~ STREAM_NAME_REGEXP and unescaped_word !~ /\/\.\.?(\/|$)/ raise ArgumentError.new "Manifest invalid for stream #{line_count}: missing or invalid stream name #{word.inspect if word}" if count != 1 count = 0 word = words.shift while word =~ Locator::LOCATOR_REGEXP word = words.shift count += 1 end raise ArgumentError.new "Manifest invalid for stream #{line_count}: missing or invalid locator #{word.inspect if word}" if count == 0 count = 0 raise ArgumentError.new "Manifest invalid for stream #{line_count}: >8-bit encoded chars not allowed on file token #{word.inspect}" if word =~ NON_8BIT_ENCODED_CHAR while unescape(word) =~ EMPTY_DIR_TOKEN_REGEXP or (word =~ FILE_TOKEN_REGEXP and unescape(word) =~ FILE_NAME_REGEXP and ($~[1].split('/') & ['..', '.']).empty?) word = words.shift count += 1 end if word raise ArgumentError.new "Manifest invalid for stream #{line_count}: invalid file token #{word.inspect}" elsif count == 0 raise ArgumentError.new "Manifest invalid for stream #{line_count}: no file tokens" end # Ruby's split() method silently drops trailing empty tokens # (which are not allowed by the manifest format) so we have to # check trailing spaces manually. raise ArgumentError.new "Manifest invalid for stream #{line_count}: trailing space" if line.end_with? " \n" end true end
Public Instance Methods
each_file_spec() { |stream_name| ... }
click to toggle source
# File lib/arvados/keep.rb, line 166 def each_file_spec return to_enum(__method__) unless block_given? @text.each_line do |line| stream_name = nil in_file_tokens = false line.scan(/\S+/) do |token| if stream_name.nil? stream_name = unescape token elsif in_file_tokens or not Locator.valid? token in_file_tokens = true start_pos, file_size, file_name = split_file_token(token) stream_name_adjuster = '' if file_name.include?('/') # '/' in filename dirname, sep, basename = file_name.rpartition('/') stream_name_adjuster = sep + dirname # /dir_parts file_name = basename end yield [stream_name + stream_name_adjuster, start_pos, file_size, file_name] end end end true end
each_line() { |stream_name, block_tokens, file_tokens| ... }
click to toggle source
# File lib/arvados/keep.rb, line 119 def each_line return to_enum(__method__) unless block_given? @text.each_line do |line| stream_name = nil block_tokens = [] file_tokens = [] line.scan(/\S+/) do |token| if stream_name.nil? stream_name = unescape token elsif file_tokens.empty? and Locator.valid? token block_tokens << token else file_tokens << unescape(token) end end # Ignore blank lines next if stream_name.nil? yield [stream_name, block_tokens, file_tokens] end end
exact_file_count?(want_count)
click to toggle source
# File lib/arvados/keep.rb, line 230 def exact_file_count?(want_count) files_count(want_count + 1) == want_count end
files()
click to toggle source
# File lib/arvados/keep.rb, line 192 def files if @files.nil? file_sizes = Hash.new(0) each_file_spec do |streamname, _, filesize, filename| file_sizes[[streamname, filename]] += filesize end @files = file_sizes.each_pair.map do |(streamname, filename), size| [streamname, filename, size] end end @files end
files_count(stop_after=nil)
click to toggle source
# File lib/arvados/keep.rb, line 205 def files_count(stop_after=nil) # Return the number of files represented in this manifest. # If stop_after is provided, files_count will read the manifest # incrementally, and return immediately when it counts that number of # files. This can help you avoid parsing the entire manifest if you # just want to check if a small number of files are specified. if stop_after.nil? or not @files.nil? # Avoid counting empty dir placeholders return files.reject{|_, name, size| name == '.' and size == 0}.size end seen_files = {} each_file_spec do |streamname, _, filesize, filename| # Avoid counting empty dir placeholders next if filename == "." and filesize == 0 seen_files[[streamname, filename]] = true return stop_after if (seen_files.size >= stop_after) end seen_files.size end
files_size()
click to toggle source
# File lib/arvados/keep.rb, line 225 def files_size # Return the total size of all files in this manifest. files.reduce(0) { |total, (_, _, size)| total + size } end
has_file?(want_stream, want_file=nil)
click to toggle source
# File lib/arvados/keep.rb, line 238 def has_file?(want_stream, want_file=nil) if want_file.nil? want_stream, want_file = File.split(want_stream) end each_file_spec do |streamname, _, _, name| if streamname == want_stream and name == want_file return true end end false end
minimum_file_count?(want_count)
click to toggle source
# File lib/arvados/keep.rb, line 234 def minimum_file_count?(want_count) files_count(want_count) >= want_count end
split_file_token(token)
click to toggle source
# File lib/arvados/keep.rb, line 158 def split_file_token token start_pos, filesize, filename = token.split(':', 3) if filename.nil? raise ArgumentError.new "Invalid file token '#{token}'" end [start_pos.to_i, filesize.to_i, unescape(filename)] end
unescape(s)
click to toggle source
# File lib/arvados/keep.rb, line 154 def unescape(s) self.class.unescape(s) end