class Qu::Seqcluster::Cluster
Attributes
opts[R]
Public Class Methods
new(opts)
click to toggle source
# File lib/qu/seqcluster/cluster.rb, line 7 def initialize(opts) @opts = opts end
Public Instance Methods
find_cluster()
click to toggle source
# File lib/qu/seqcluster/cluster.rb, line 11 def find_cluster records = Bio::FlatFile.new(Bio::FastaFormat, File.open(@opts.in)).to_a records.sort_by! {|r| -r.seq.size} groups = [] while records.size > 0 groups << [] seed_record = records.shift groups[-1] << seed_record seed_kmer_set = seed_kmer(seed_record, @opts.kvalue, 1) records.each_entry do |record| plus = cal_similarity(record.naseq, @opts.kvalue, seed_kmer_set) minus = cal_similarity(record.naseq.reverse_complement, @opts.kvalue, seed_kmer_set) similarity, strand = plus > minus ? [plus, '+'] : [minus, '-'] if similarity >= @opts.cutoff groups[-1] << [record, similarity, strand] end end records.delete_if {|record| groups[-1].collect {|group_record, similarity, strand| group_record}.include?(record)} end return groups end
Private Instance Methods
cal_similarity(seq, k, seed_kmer_set)
click to toggle source
# File lib/qu/seqcluster/cluster.rb, line 45 def cal_similarity(seq, k, seed_kmer_set) sum = 0 remainder = seq.window_search(k, k) {|s| sum += 1 if seed_kmer_set.include?(s.to_sym)} return (sum.to_f * k) / (seq.size - remainder.size) end
seed_kmer(record, window, step)
click to toggle source
# File lib/qu/seqcluster/cluster.rb, line 39 def seed_kmer(record, window, step) mer_set = Set.new record.naseq.window_search(window, step) {|s| mer_set.add(s.to_sym)} return mer_set end