class Bio::Big::ShortFrameState
The short frame uses the simplest concept to find ORFs. The sequence is immutable, always forward and in frame 0. That makes it easy to reason. It also return all ORF’s in one go, with the left/right locations.
Attributes
codons[R]
min_size_codons[R]
ntseq_pos[R]
seq[R]
Public Class Methods
new(seq, ntseq_pos, ntmin_size)
click to toggle source
# File lib/bigbio/db/emitters/orf_emitter.rb, line 116 def initialize seq, ntseq_pos, ntmin_size @reversed = nil # @seq = seq.upcase @seq = seq @min_size_codons = if ntmin_size > 3 (ntmin_size/3).to_i else 2 # otherwise we get single STOP codons end @codons = FrameCodonSequence.new(seq,ntseq_pos) @ntseq_pos = ntseq_pos # nucleotides # @codons.track_sequence_pos = seq_pos end
Public Instance Methods
get_codon_orfs1(splitter_func,do_include_leftmost_orf,do_strip_leading_codon)
click to toggle source
Splitter for one delimiter function. include_leftmost
decides the first sequence is returned when incomplete. strip_leading
is used to remove the shared codon with the last sequence.
# File lib/bigbio/db/emitters/orf_emitter.rb, line 147 def get_codon_orfs1 splitter_func,do_include_leftmost_orf,do_strip_leading_codon orfs = split(@codons,splitter_func) return [] if orfs.size == 0 # Drop the first sequence, if there is no match on the first position orfs.shift if !do_include_leftmost_orf and !splitter_func.call(orfs.first[0]) orfs = orfs.map { |codons| codons = codons.shift if do_strip_leading_codon and splitter_func.call(codons[0]) codons } if @reversed == nil TrackSequenceTrait.update_sequence_pos(orfs,@ntseq_pos) # nail against parent else TrackSequenceTrait.update_reversed_sequence_pos(orfs,@ntseq_pos) # nail against parent end end
get_codon_orfs2(splitter_func, start_func)
click to toggle source
Splitter for two delimeter functions
# File lib/bigbio/db/emitters/orf_emitter.rb, line 164 def get_codon_orfs2 splitter_func, start_func orfs = get_codon_orfs1(splitter_func,true,true) orfs.find_all { | orf | start_func.call(orf[0]) } end
get_startstop_orfs()
click to toggle source
Return a list of ORFs delimited by START-STOP codons
# File lib/bigbio/db/emitters/orf_emitter.rb, line 137 def get_startstop_orfs get_codon_orfs2( Proc.new { | codon | STOP_CODONS.include?(codon) }, Proc.new { | codon | START_CODONS.include?(codon) }) end
get_stopstop_orfs()
click to toggle source
Return a list of ORFs delimited by STOP codons.
# File lib/bigbio/db/emitters/orf_emitter.rb, line 132 def get_stopstop_orfs get_codon_orfs1(Proc.new { | codon | STOP_CODONS.include?(codon) },false,true) end
split(codons, is_splitter_func)
click to toggle source
Return list of codon sequences, split on the is_splitter
function.
# File lib/bigbio/db/emitters/orf_emitter.rb, line 172 def split codons, is_splitter_func list = [] node = [] codons.each_with_index do | c, pos | # p [c,pos] if is_splitter_func.call(c) node.push c size = node.size # p node list.push FrameCodonSequence.new(node,pos+1-size) if size > @min_size_codons node = [] end node.push c # always push boundary codon end list end