class Bio::Big::FastaEmitter
Public Class Methods
new(fn, max_size = 100000)
click to toggle source
# File lib/bigbio/db/emitters/fasta_emitter.rb, line 6 def initialize fn, max_size = 100000 @fn = fn @max_size = max_size end
Public Instance Methods
emit_seq() { |:tail,index,tag,seq| ... }
click to toggle source
Yield sequence information in sections of a maximum size - usually iterators load the full sequence, but without penalty it is possible to use a lot less memory.
# File lib/bigbio/db/emitters/fasta_emitter.rb, line 15 def emit_seq f = File.open(@fn) tag = tag_digest(f.gets.strip) seq = "" index = 0 begin line = f.gets.strip if line =~ /^>/ yield :tail,index,tag,seq tag = tag_digest(line) seq = "" index += 1 else seq += line end while seq.size > @max_size yield :mid,index,tag,seq[0..@max_size-1] seq = seq[@max_size..-1] end end while !f.eof yield :tail,index,tag,seq end
tag_digest(tag)
click to toggle source
# File lib/bigbio/db/emitters/fasta_emitter.rb, line 38 def tag_digest tag if tag[0..0] == '>' tag[1..-1] else raise "Tag error in '#{tag}'" end end