class Bio::EBD::Format

Express Beta Diversity input “OTU table” format parser.

Attributes

otu_names[RW]
sample_counts[RW]

Hash of sample names to array of counts. The counts are floats that correspond to the otu_names.

Public Class Methods

new() click to toggle source
# File lib/bio-express_beta_diversity/ebd_format.rb, line 11
def initialize
  @sample_counts = {}
  @otu_names = []
end
parse_from_file(filename) click to toggle source
# File lib/bio-express_beta_diversity/ebd_format.rb, line 16
def self.parse_from_file(filename)
  ebd = Bio::EBD::Format.new

  #        100535  1008038
  # sample1    5.0       0
  # sample2    0       1.0
  first_line = true
  CSV.foreach(filename, :col_sep => "\t") do |row|
    if first_line
      # First line is the IDs of the OTUs
      raise "EBD format file appears to be incorrectly formatted on the first line: #{row.inspect}" if row.length < 2
      ebd.otu_names = row[1...row.length]
      first_line = false
    else
      next if row.empty? #Ignore empty lines

      # all other lines are the sample names and then number of observations of the OTUs
      raise "Parse exception at this row: #{row.inspect}" unless row.length == ebd.otu_names.length+1

      sample_name = row[0]
      raise "Duplicate sample name detected in EBD format: #{row[0]}" if ebd.sample_counts.key?(sample_name)

      ebd.sample_counts[sample_name] = row[1...row.length].collect{|count| count.to_f}
    end
  end

  return ebd
end

Public Instance Methods

number_of_samples() click to toggle source
# File lib/bio-express_beta_diversity/ebd_format.rb, line 45
def number_of_samples
  @sample_counts.length
end