class Bio::Big::ShortFrameState

The short frame uses the simplest concept to find ORFs. The sequence is immutable, always forward and in frame 0. That makes it easy to reason. It also return all ORF’s in one go, with the left/right locations.

Attributes

codons[R]
min_size_codons[R]
ntseq_pos[R]
seq[R]

Public Class Methods

new(seq, ntseq_pos, ntmin_size) click to toggle source
# File lib/bigbio/db/emitters/orf_emitter.rb, line 116
def initialize seq, ntseq_pos, ntmin_size
  @reversed = nil
  # @seq = seq.upcase
  @seq = seq
  @min_size_codons = if ntmin_size > 3
                       (ntmin_size/3).to_i
                     else
                       2  # otherwise we get single STOP codons
                     end
 
  @codons = FrameCodonSequence.new(seq,ntseq_pos)
  @ntseq_pos = ntseq_pos # nucleotides
  # @codons.track_sequence_pos = seq_pos
end

Public Instance Methods

get_codon_orfs1(splitter_func,do_include_leftmost_orf,do_strip_leading_codon) click to toggle source

Splitter for one delimiter function. include_leftmost decides the first sequence is returned when incomplete. strip_leading is used to remove the shared codon with the last sequence.

# File lib/bigbio/db/emitters/orf_emitter.rb, line 147
def get_codon_orfs1 splitter_func,do_include_leftmost_orf,do_strip_leading_codon
  orfs = split(@codons,splitter_func)
  return [] if orfs.size == 0
  # Drop the first sequence, if there is no match on the first position
  orfs.shift if !do_include_leftmost_orf and !splitter_func.call(orfs.first[0])
  orfs = orfs.map { |codons| 
    codons = codons.shift if do_strip_leading_codon and splitter_func.call(codons[0])
    codons
  }
  if @reversed == nil
    TrackSequenceTrait.update_sequence_pos(orfs,@ntseq_pos) # nail against parent
  else
    TrackSequenceTrait.update_reversed_sequence_pos(orfs,@ntseq_pos) # nail against parent
  end
end
get_codon_orfs2(splitter_func, start_func) click to toggle source

Splitter for two delimeter functions

# File lib/bigbio/db/emitters/orf_emitter.rb, line 164
def get_codon_orfs2 splitter_func, start_func
  orfs = get_codon_orfs1(splitter_func,true,true)
  orfs.find_all { | orf | start_func.call(orf[0]) }
end
get_startstop_orfs() click to toggle source

Return a list of ORFs delimited by START-STOP codons

# File lib/bigbio/db/emitters/orf_emitter.rb, line 137
def get_startstop_orfs 
  get_codon_orfs2(
           Proc.new { | codon | STOP_CODONS.include?(codon) },
           Proc.new { | codon | START_CODONS.include?(codon) })
end
get_stopstop_orfs() click to toggle source

Return a list of ORFs delimited by STOP codons.

# File lib/bigbio/db/emitters/orf_emitter.rb, line 132
def get_stopstop_orfs 
  get_codon_orfs1(Proc.new { | codon | STOP_CODONS.include?(codon) },false,true)
end
split(codons, is_splitter_func) click to toggle source

Return list of codon sequences, split on the is_splitter function.

# File lib/bigbio/db/emitters/orf_emitter.rb, line 172
def split codons, is_splitter_func
  list = []
  node = []
  codons.each_with_index do | c, pos |
    # p [c,pos]
    if is_splitter_func.call(c)
      node.push c
      size = node.size
      # p node
      list.push FrameCodonSequence.new(node,pos+1-size) if size > @min_size_codons
      node = []
    end
    node.push c  # always push boundary codon
  end
  list
end