class Phylip

Attributes

numtaxa[R]
seqlen[R]
seqs[R]

Public Class Methods

new(phylipfile) click to toggle source
# File lib/rphylip.rb, line 19
def initialize(phylipfile)
  raise "File #{phylipfile} does not exist" unless File.exists?(phylipfile)
  @filename = phylipfile
  @seqs = File.open(phylipfile).readlines
  @numtaxa, @seqlen = @seqs[0].split.map{|w| w.to_i}
  @seqs.delete_at(0)
  @seqs.delete_if{|l| l=~ /^\s+$/}
  raise "wrong number of seqs,parsed #{@seqs.size} expected ntaxa #{@numtaxa}" unless @seqs.size == @numtaxa
end

Public Instance Methods

expand_with(phylipfile) click to toggle source
# File lib/rphylip.rb, line 116
def expand_with(phylipfile)
  additional_phylip = Phylip.new(phylipfile)
  if additional_phylip.seqlen == self.seqlen then
    additional_phylip.seqs.each do |newseq|
      @seqs << newseq
      @numtaxa += 1
    end
  else
    raise "different sequence lengths for new #{phylipfile}, cannot be expanded"
  end
end
extract_partition(from_pos, to_pos) click to toggle source
# File lib/rphylip.rb, line 127
def extract_partition(from_pos, to_pos)
  @seqlen = to_pos - from_pos  + 1
  newseqs = []
  from = from_pos - 1
  to = to_pos - 1
  @seqs.each do |seq|
    name, info = seq.split
    newseqs << name + " " + info.slice!(from..to) 
  end
  @seqs = newseqs
  self.save_as(@filename + "_from#{from_pos}_to#{to_pos}")
end
generate_base_alignment(numseqs_base) click to toggle source
# File lib/rphylip.rb, line 36
def generate_base_alignment(numseqs_base)
    seqs = @seqs[0...numseqs_base].select{|s| s.split.last.split(//).uniq.size > 1}
    basename = @filename + "_initial"
    puts "Saving base alignment as #{basename} with #{seqs.size} seqs"
    raise "too few seqs" unless seqs.size > 4
    self.save_seqs_as(seqs, basename)
end
generate_update(seqs, update_filename) click to toggle source
# File lib/rphylip.rb, line 43
def generate_update(seqs, update_filename)
    # remove from seqs single char stuff
    seqs = seqs.select{|s| s.split.last.split(//).uniq.size > 1}
    puts "Saving update alignment as #{update_filename} with #{seqs.size} seqs"
    raise "too few seqs" unless seqs.size > 4
    self.save_seqs_as(seqs, update_filename)
end
names() click to toggle source
# File lib/rphylip.rb, line 28
def names
  ali_names = []
  @seqs.each do |seq|
    ali_names << seq.split.first.strip 
  end
  raise "unexpected #names" unless ali_names.size == @numtaxa
  ali_names
end
remove_taxa(taxa, pruned_phylip) click to toggle source
# File lib/rphylip.rb, line 98
def remove_taxa(taxa, pruned_phylip)
  puts "Original size #{@seqs.size}, after removal expect #{@seqs.size - taxa.size}"
  raise "empty list of taxa to prune" if not taxa or taxa.empty?
  taxa.each do |taxon|
    @seqs.delete_if{|l| l.split.first.strip == taxon}
  end
  self.save_as(pruned_phylip)
  puts "Final size #{@seqs.size} saved in #{pruned_phylip}"
end
save_as(newfile) click to toggle source
# File lib/rphylip.rb, line 107
def save_as(newfile)
  self.save_seqs_as(@seqs, newfile)
end
save_seqs_as(seqs, newfile) click to toggle source
# File lib/rphylip.rb, line 110
def save_seqs_as(seqs, newfile)
  File.open(newfile, "w") do |f|
    f.puts "#{seqs.size} #{@seqlen}"
    seqs.each{|seq| f.puts seq}
  end
end
subdivide(numseqs_base, num_subalignments) click to toggle source
# File lib/rphylip.rb, line 50
def subdivide(numseqs_base, num_subalignments)
  # Subdivides the present alignemnt in a file base.phy and num_subalignments (i) subi.phy
  # iterarions generated are of equal size
  if numseqs_base.to_i + num_subalignments.to_i > @numtaxa then
    raise "Wrong input to subdivide phylip file"
  else
    fisher_yates_shuffle(@seqs)
    generate_base_alignment(numseqs_base)
    # Generaute pseudo-new subsets of sequences
    rest = @seqs[numseqs_base...@seqs.size]
    subset_size = rest.size / num_subalignments 
    update_sequences = [] 
    rest.each_slice(subset_size) do |subset|
      # Note the last slice may be shorter unless rest.size % num_subalignments == 0
      if update_sequences.size < num_subalignments
        update_sequences << subset # a new subset
      else
        update_sequences[num_subalignments - 1] += subset # appends to the last subset
      end
    end
    update_sequences.each_with_index do |seqs, i|
      generate_update(seqs, @filename + "_sequpdate_#{i}.phy")
    end
  end
  return update_sequences.size 
end
subdivide_random(conf) click to toggle source
# File lib/rphylip.rb, line 76
def subdivide_random(conf)
  numseqs_base = conf[:initial_seqs]
  raise "too many seqs" if numseqs_base > @numtaxa
  fisher_yates_shuffle(@seqs)
  generate_base_alignment(numseqs_base)
  n = numseqs_base
  i = 0 #iteration id
  srand(12345) # be deterministic
  while n < @seqs.size
    num_newseqs = pseudonormal(conf[:min_size_update], conf[:max_size_update])
    num_newseqs = @seqs.size - n if (@seqs.size - n - num_newseqs < conf[:min_size_update])
    if conf[:updates_as_full_alignments].nil? or not conf[:updates_as_full_alignments]
      start = n
    else
      start = 0
    end
    generate_update(@seqs[start...n+num_newseqs], @filename + "_sequpdate_#{i}.phy")
    n += num_newseqs 
    i += 1
  end
  i
end