class BlastSeqExtract

3

Attributes

seqObjs[RW]

class Error < StandardError; end

subject_regions[RW]

class Error < StandardError; end

Public Class Methods

new() click to toggle source
# File lib/bio/blast_seq_extract.rb, line 11
def initialize
  @subject_regions = Hash.new
  @seqObjs = Hash.new
end

Public Instance Methods

getRegion() click to toggle source
# File lib/bio/blast_seq_extract.rb, line 42
def getRegion()
  @gs = Array.new
  @subject_regions.each_pair do |subject, arr|
    s_start, s_end, evalue = arr
    seq = @seqObjs[subject].seq
    if s_start > s_end
      seq = Bio::Sequence.auto(seq).reverse_complement
      s_start, s_end = s_end, s_start
    end
    definition = [subject, arr[0,2].join('-')].flatten.join('_')
    seq = seq[s_start, s_end-s_start].upcase
    g = Bio::FastaFormat.new('>'+definition+"\n"+seq)
    @gs << g
  end
  return(@gs)
end
readBlast(infile, evalue_cutoff=1, max_no_seqs=1) click to toggle source
# File lib/bio/blast_seq_extract.rb, line 16
def readBlast(infile, evalue_cutoff=1, max_no_seqs=1)
  in_fh = File.open(infile, 'r')
  in_fh.each_line do |line|
    #query  subject 98.391  373 6 0 660 1032  373 1 0.0 656
    line.chomp!
    line_arr = line.split("\t")
    query, subject = line_arr[0, 2]
    q_start, q_end, s_start, s_end = line_arr.values_at(6,7,8,9).map(&:to_i)
    evalue = line_arr[10].to_f
    if evalue <= evalue_cutoff
      @subject_regions[subject] = [s_start, s_end, evalue]
      break if @subject_regions.size == max_no_seqs
    end
  end
  in_fh.close
end
readSeq(infile) click to toggle source
# File lib/bio/blast_seq_extract.rb, line 33
def readSeq(infile)
  in_fh = Bio::FlatFile.open(infile, 'r')
  in_fh.each_entry do |f|
    title = f.definition.split(' ')[0]
    @seqObjs[title] = f
  end
  in_fh.close
end