class Qu::Seqcluster::Cluster

Attributes

opts[R]

Public Class Methods

new(opts) click to toggle source
# File lib/qu/seqcluster/cluster.rb, line 7
def initialize(opts)
  @opts = opts
end

Public Instance Methods

find_cluster() click to toggle source
# File lib/qu/seqcluster/cluster.rb, line 11
def find_cluster
  records = Bio::FlatFile.new(Bio::FastaFormat, File.open(@opts.in)).to_a
  records.sort_by! {|r| -r.seq.size}

  groups = []
  while records.size > 0
    groups << []
    seed_record = records.shift
    groups[-1] << seed_record
    seed_kmer_set = seed_kmer(seed_record, @opts.kvalue, 1)
    records.each_entry do |record|
      plus = cal_similarity(record.naseq, @opts.kvalue, seed_kmer_set)
      minus = cal_similarity(record.naseq.reverse_complement, @opts.kvalue, seed_kmer_set)

      similarity, strand = plus > minus ? [plus, '+'] : [minus, '-']

      if similarity >= @opts.cutoff
        groups[-1] << [record, similarity, strand]
      end
    end
    records.delete_if {|record| groups[-1].collect {|group_record, similarity, strand| group_record}.include?(record)}
  end

  return groups
end

Private Instance Methods

cal_similarity(seq, k, seed_kmer_set) click to toggle source
# File lib/qu/seqcluster/cluster.rb, line 45
def cal_similarity(seq, k, seed_kmer_set)
  sum = 0
  remainder = seq.window_search(k, k) {|s| sum += 1 if seed_kmer_set.include?(s.to_sym)}
  return (sum.to_f * k) / (seq.size - remainder.size)
end
seed_kmer(record, window, step) click to toggle source
# File lib/qu/seqcluster/cluster.rb, line 39
def seed_kmer(record, window, step)
  mer_set = Set.new
  record.naseq.window_search(window, step) {|s| mer_set.add(s.to_sym)}
  return mer_set
end