class Bio::Assembly::Caf

Public Class Methods

new(path) click to toggle source
# File lib/bio-assembly/caf.rb, line 8
def initialize(path)
   @file = File.new(path, 'r') 
end

Public Instance Methods

each_contig() { |contig| ... } click to toggle source

iterator that return one contig at a time

# File lib/bio-assembly/caf.rb, line 12
def each_contig
  contig = Contig.new
  feature = Hash.new
  @file.each do |line|
    feature = parse_blocks(line,feature) # search the file for CAF blocks like DNA and Sequence
    if feature[:type] == :read and feature[:parsed]
      read = convert_to_read(feature)
      contig.add_read(read)
      feature = Hash.new
    elsif feature[:type] == :contig and feature[:parsed]
      contig = convert_to_contig(contig,feature)
      yield contig
      contig = Contig.new
      feature = Hash.new
    end
  end
end

Private Instance Methods

convert_to_contig(contig,feature) click to toggle source

convert a generic feature into a Caf::Contig object

# File lib/bio-assembly/caf.rb, line 109
def convert_to_contig(contig,feature)
  contig.name = feature[:name]
  contig.seq = feature[:seq]
  contig.quality = feature[:qual]
  # assign reads ranges using Assembled_from lines in Contig
  feature[:af].each do |af|
    val = af.split("\s")
    contig.reads[val[-5]].from = val[-4]
    contig.reads[val[-5]].to = val[-3]
  end
  return contig       
end
convert_to_read(feature) click to toggle source

convert a generic feature into a Caf::Read object

# File lib/bio-assembly/caf.rb, line 97
def convert_to_read(feature)
  read = Read.new
  read.name = feature[:name]
  read.seq = feature[:seq]
  read.quality = feature[:qual]
  read.clear_range_from = feature[:clipping_start]
  read.clear_range_to = feature[:clipping_end]
  read.orientation = feature[:orientation]
  return read       
end
parse_af(feat,line) click to toggle source

parse Assembled_from lines in Contig. These lines also include read alignment positions within the contig

# File lib/bio-assembly/caf.rb, line 88
def parse_af(feat,line)
  if feat[:af].nil?
    feat[:af] = [line]
  else
    feat[:af] << line
  end    
end
parse_blocks(line,feat) click to toggle source
# File lib/bio-assembly/caf.rb, line 40
def parse_blocks(line,feat)
  keywords = line.split("\s")
  case keywords[0]
    when "DNA" then parse_dna(feat)
    when "Sequence" then parse_seq(feat,line)
  end  
  return feat
end
parse_clipping(feat,line) click to toggle source

parse read coordinates for quality clipping

# File lib/bio-assembly/caf.rb, line 76
def parse_clipping(feat,line)
  val = line.chomp.split("\s")
  feat[:clipping_start] = val[-2]
  feat[:clipping_end] = val[-1]
end
parse_dna(feat) click to toggle source

parse DNA sequence and BaseQuality

# File lib/bio-assembly/caf.rb, line 50
def parse_dna(feat)
  feat[:seq] = @file.gets("\n\n").tr("\n","")
  newline = @file.gets
  keywords = newline.split("\s")
  feat[:qual] = @file.gets("\n\n").tr("\n"," ").rstrip if keywords[0] == "BaseQuality"
  feat[:parsed] = true if feat[:type] == :contig
end
parse_seq(feat,line) click to toggle source

parse Sequence information like Name, Clipping, Strand and Type

# File lib/bio-assembly/caf.rb, line 59
def parse_seq(feat,line)
  feat[:name] = line.split(":")[1].tr("\s|\n","")
  sequence_block = @file.gets("\n\n")
  sequence_block.split("\n").each do |l|
    keywords = l.split("\s")
    case keywords[0]
      when "Clipping" then parse_clipping(feat,l)
      when "Strand" then parse_strand(feat,l)
      when "Assembled_from" then parse_af(feat,l)
      when "Is_read" then feat[:type] = :read
      when "Is_contig" then feat[:type] = :contig
    end
  end
  feat[:parsed] = true if feat[:type] == :read
end
parse_strand(feat,line) click to toggle source

parse sequence strand information

# File lib/bio-assembly/caf.rb, line 83
def parse_strand(feat,line)
  feat[:orientation] = line.split("\s")[1].tr("\n","")
end