class PredictORF

Public Class Methods

new(id, descr, seq, trn_table = nil) click to toggle source
# File lib/bigbio/sequence/predictorf.rb, line 84
def initialize id, descr, seq, trn_table = nil
  @id        = id
  @descr     = descr
  @seq       = seq.gsub(/\s/,'')
  @trn_table = trn_table
  @startcodons =  # FIXME: this should be linked properly
    if trn_table == nil or trn_table == 0
      START_CODONS
    else # prokaryote
      ['ATG','TTG','CTG','AUG','UUG','CUG']
    end
end

Public Instance Methods

longest_startstop(minsize=0) click to toggle source

Return the longest ORF that has a START codon (see startstop) Returns nil if none is found

# File lib/bigbio/sequence/predictorf.rb, line 136
def longest_startstop minsize=0
  startstop(minsize).first
end
startstop(minsize=30) click to toggle source

Return a list of predicted ORFs with :minsize AA’s. The ORF’s are between START and STOP codons (ATG, TTG, CTG and AUG, UUG and CUG for now, a later version should use the EMBOSS translation table).

# File lib/bigbio/sequence/predictorf.rb, line 127
def startstop minsize=30
  stopstop(minsize).find_all { | orf | 
    # p [orf.nt.seq[0..2].upcase,@startcodons.include?(orf.nt.seq[0..2].upcase)]
    @startcodons.include?(orf.nt.seq[0..2].upcase)
  }
end
stopstop(minsize=30) click to toggle source

Return a list of predicted ORFs with :minsize AA’s. The ORF’s are between STOP codons (so sequences without a proper START codon are included)

# File lib/bigbio/sequence/predictorf.rb, line 100
def stopstop minsize=30
  type = "XX"
  orfs = []
  translate = Nucleotide::Translate.new(@trn_table)
  aa_frames = translate.aa_6_frames(@seq)
  # p @seq
  # pp aa_frames
  num = 0
  aa_frames.each do | aa_frame |
    frame = aa_frame[:frame]
    aa = aa_frame[:sequence]
    aa_start = 0
    aa.split(/\*/).each do | candidate |
      if candidate.size >= minsize and candidate.size > 0
        orf = ORF.new(num,type,@id,@descr,@seq,frame,aa_start*3,candidate)
        orfs.push orf
        num += 1
      end
      aa_start += candidate.size + 1
    end
  end
  orfs
end