class Bio::Assembly::Ace

Public Class Methods

new(path) click to toggle source
# File lib/bio-assembly/ace.rb, line 10
def initialize(path)
   @file = File.new(path, 'r') 
   parse_as
 end

Public Instance Methods

each_contig() { |contig| ... } click to toggle source
# File lib/bio-assembly/ace.rb, line 15
def each_contig
    each_identifier do |identifier, attrs|
      next unless identifier == 'CO'
      contig = parse_contig(attrs)
      yield(contig)   
    end
end
to_ace() click to toggle source
# File lib/bio-assembly/ace.rb, line 23
def to_ace
  ace = ""
  ace += "AS " + num_contigs.to_s + " " + num_reads.to_s + "\n\n"
  each_contig { |contig| ace += contig.to_ace + "\n" }
  ace
end

Private Instance Methods

each_identifier() { |line, line| ... } click to toggle source

Finds the next_identifier

# File lib/bio-assembly/ace.rb, line 60
def each_identifier
  @file.each do |line|
    next if line !~ /^[ABCDQRW][ADFOQRST][\s\n].*/
    yield(line[0..1], line[3..-1])
  end
end
parse_af(contig, attrs) click to toggle source

parse read meta data

# File lib/bio-assembly/ace.rb, line 79
def parse_af(contig, attrs)
  read = Read.new
  read.name , read.orientation, read.from = attrs.split(" ")
  contig.add_read read
end
parse_as() click to toggle source

parse assembly meta data

# File lib/bio-assembly/ace.rb, line 68
def parse_as
  line = @file.gets
  identifier, @total_num_contigs, total_num_reads = line.split(" ")
end
parse_bq(contig) click to toggle source

parse contig sequence quality data

# File lib/bio-assembly/ace.rb, line 74
def parse_bq(contig)
  contig.quality = @file.gets("\n\n").tr("\r\n", "").gsub(/^\s/, "").split(' ')
end
parse_bs(contig, attrs) click to toggle source

parse base sequence data

# File lib/bio-assembly/ace.rb, line 86
def parse_bs(contig, attrs)
  from, to, read_name = attrs.split(" ")
  read = contig.find_read_by_name( read_name )
  read.add_base_sequence(from, to, read_name)
end
parse_contig(attrs) click to toggle source
# File lib/bio-assembly/ace.rb, line 32
def parse_contig(attrs)
  contig = Contig.new
  contig.name, base_num, @num_reads, base_segments_num, contig.orientation = attrs.split(" ")
  # keep track of the number of RD identifiers parsed
  @num_rds_parsed = 0

  # get sequence
  seq = @file.gets("\n\n").tr(" \r\n", "")
  contig.seq = seq

  # loop through identifiers (e.g AF, RD, etc)
  each_identifier do |identifier, attrs|    
    case identifier
      when "BQ" then parse_bq(contig)
      when "AF" then parse_af(contig, attrs)
      when "BS" then parse_bs(contig, attrs)
      when "RD" then parse_rd(contig, attrs); break if @num_rds_parsed == @num_reads.to_i
      when "WR" then parse_wr(contig, attrs)
      when "RT" then parse_rt(contig, attrs)
      when "CT" then parse_ct(contig, attrs)
      when "WA" then parse_wa(contig, attrs)
    end
  end

 contig
end
parse_ct(contig, attrs) click to toggle source

parse run meta data - ignored

# File lib/bio-assembly/ace.rb, line 134
def parse_ct(contig, attrs)
end
parse_ds(read, attrs) click to toggle source

parse file data - ignored

# File lib/bio-assembly/ace.rb, line 126
def parse_ds(read, attrs)
end
parse_qa(read, attrs) click to toggle source

parse a read's clear ranges (the part of the read that contributes to the contig)

# File lib/bio-assembly/ace.rb, line 119
def parse_qa(read, attrs)
  start, stop, clear_range_from, clear_range_to = attrs.split(" ")
  read.clear_range_from = clear_range_from
  read.clear_range_to = clear_range_to
end
parse_rd(contig, attrs) click to toggle source

parse read sequence and position data

# File lib/bio-assembly/ace.rb, line 93
def parse_rd(contig, attrs)
  # increment counter
  @num_rds_parsed += 1

  # parse read
  read_name, num_padded_bases, num_read_infos, num_read_tags = attrs.split(" ") 
  seq = @file.gets("\n\n").tr( " \r\n", "")

  # get read with matching name
  read = contig.find_read_by_name( read_name )
  read.seq = seq
  read.to = read.from.to_i + read.seq.length
  # set read.to to contig length if read runs off contig
  read.to = contig.seq.length if read.to > contig.seq.length

  # if present parse QA and DS associated with this read
  each_identifier do |identifier, attrs|
    case identifier
      when "QA" then parse_qa(read, attrs)
      when "DS" then parse_ds(read, attrs); break
    end
  end

end
parse_wa(contig, attrs) click to toggle source

parse run meta data - ignored

# File lib/bio-assembly/ace.rb, line 130
def parse_wa(contig, attrs)
end