class Bio::Big::FastaEmitter

Public Class Methods

new(fn, max_size = 100000) click to toggle source
# File lib/bigbio/db/emitters/fasta_emitter.rb, line 6
def initialize fn, max_size = 100000
  @fn = fn
  @max_size = max_size
end

Public Instance Methods

emit_seq() { |:tail,index,tag,seq| ... } click to toggle source

Yield sequence information in sections of a maximum size - usually iterators load the full sequence, but without penalty it is possible to use a lot less memory.

# File lib/bigbio/db/emitters/fasta_emitter.rb, line 15
def emit_seq 
  f = File.open(@fn)
  tag = tag_digest(f.gets.strip)
  seq = ""
  index = 0
  begin
    line = f.gets.strip
    if line =~ /^>/
      yield :tail,index,tag,seq
      tag = tag_digest(line)
      seq = ""
      index += 1
    else
      seq += line
    end
    while seq.size > @max_size
      yield :mid,index,tag,seq[0..@max_size-1]
      seq = seq[@max_size..-1]
    end
  end while !f.eof
  yield :tail,index,tag,seq
end
tag_digest(tag) click to toggle source
# File lib/bigbio/db/emitters/fasta_emitter.rb, line 38
def tag_digest tag 
  if tag[0..0] == '>'
    tag[1..-1]
  else
    raise "Tag error in '#{tag}'"
  end
end