class FastqFile

Attributes

num_seqs[RW]

Public Class Methods

new(fasta_file_name, mode='r', fastq_type = :sanger, qual_to_array=true, qual_to_phred=true) click to toggle source
# File lib/scbi_fastq/fastq_file.rb, line 24
def initialize(fasta_file_name, mode='r', fastq_type = :sanger, qual_to_array=true, qual_to_phred=true)

   
  if mode.upcase.index('W.GZ')
    @fastq_file = Zlib::GzipWriter.open(fasta_file_name)
  elsif mode.upcase.index('W')
    @fastq_file = File.open(fasta_file_name,'w')
  elsif mode.upcase.index('A')
    if !File.exist?(fasta_file_name)
      raise "File #{fasta_file_name} doesn't exists"
    end

    @fastq_file = File.open(fasta_file_name,'a')
  else #read only
    if !File.exist?(fasta_file_name)
      raise "File #{fasta_file_name} doesn't exists"
    end

    if fasta_file_name.is_a?(IO)
      @fastq_file = fasta_file_name
    else
      if ScbiZcatFile.gz_file?(fasta_file_name)
        #@fastq_file = Zlib::GzipReader.open(fasta_file_name)
        #@fastq_file = MultiGzReader.new(fasta_file_name)
        @fastq_file = ScbiZcatFile.new(fasta_file_name)
        # puts "GZIP file detected"
      else
        @fastq_file = File.open(fasta_file_name,'r')
        # puts "NORMAL file detected"
      end
      
    end
  end

  @mode = mode
  @num_seqs = 0
  @fastq_type=fastq_type

  #  S - Sanger        Phred+33,  raw reads typically (0, 40)
  #  X - Solexa        Solexa+64, raw reads typically (-5, 40)
  #  I - Illumina 1.3+ Phred+64,  raw reads typically (0, 40)
  #  J - Illumina 1.5+ Phred+64,  raw reads typically (3, 40)
  # > >>> def solexa_quality_from_phred(phred_quality) :
  # > ...     return 10*log(10**(phred_quality/10.0) - 1, 10)
  # > ...
  # > >>> solexa_quality_from_phred(90)
  # > 89.999999995657035
  # > >>> solexa_quality_from_phred(50)
  # > 49.99995657033466
  # > >>> solexa_quality_from_phred(10)
  # > 9.5424250943932485
  # > >>> solexa_quality_from_phred(1)
  # > -5.8682532438011537
  # > >>> solexa_quality_from_phred(0.1)
  # > -16.32774717238372
  # >
  # > >>> def phred_quality_from_solexa(solexa_quality) :
  # > ...     return 10*log(10**(solexa_quality/10.0) + 1, 10)
  # > ...
  # > >>> phred_quality_from_solexa(90)
  # > 90.000000004342922
  # > >>> phred_quality_from_solexa(10)
  # > 10.41392685158225
  # > >>> phred_quality_from_solexa(0)
  # > 3.0102999566398116
  # > >>> phred_quality_from_solexa(-20)
  # > 0.043213737826425784


  #sanger by default
  @to_phred = lambda{|q| q - 33}
  @from_phred = lambda{|q| (q+33).chr}

  if @fastq_type == :ilumina
    @to_phred = lambda{|q| q - 64}
    # @from_phred = lambda{|q| (q+64).chr}

  elsif @fastq_type == :solexa
    #
    # solexa to phred quals

    @to_phred = lambda{|q| (10*Math.log(10**(q/10.0)+1,10)).round}
    # @from_phred = lambda{|q| (10*Math.log(10**(q/10.0)-1,10)).round.chr}

    #phred to solexa quals

  end

  @qual_to_array = qual_to_array

  @qual_to_phred = qual_to_phred

end
to_fastq(seq_name,seq_fasta,seq_qual,comments='') click to toggle source

creates fastq otuput in sanger format

# File lib/scbi_fastq/fastq_file.rb, line 176
def self.to_fastq(seq_name,seq_fasta,seq_qual,comments='')

  res=[]

  name = ""

  res << ("@#{seq_name} #{comments}")
  res << (seq_fasta)
  res << ("+")
  #res << ("+#{seq_name} #{comments}")

  if !seq_qual.empty?
    # if @qual_to_phred
      if seq_qual.is_a?(Array)
        res<<(seq_qual.map{|e| (e+33).chr}.join)
      else
        res<<(seq_qual.split(/\s+/).map{|e| (e.to_i+33).chr}.join)
      end
    # else
    #   res << seq_qual
    # end
  else # no qual provided, use a default value
    q='D'*seq_fasta.length;
    res << q
  end

  return res
end

Public Instance Methods

close() click to toggle source
# File lib/scbi_fastq/fastq_file.rb, line 118
def close
  @fastq_file.close
end
each() { |n,f,q,c| ... } click to toggle source
# File lib/scbi_fastq/fastq_file.rb, line 126
def each

  rewind

  n,f,q,c=next_seq

  while (!n.nil?)
    yield(n,f,q,c)
    n,f,q,c=next_seq
  end

  rewind

end
next_seq() click to toggle source
# File lib/scbi_fastq/fastq_file.rb, line 151
def next_seq
  #init variables
  res = read_fastq
  return res
end
rewind() click to toggle source

goto first position in file

# File lib/scbi_fastq/fastq_file.rb, line 142
def rewind
  @num_seqs = 0 ;
  # @fastq_file.pos=0
  @fastq_file.rewind
end
with_qual?() click to toggle source
# File lib/scbi_fastq/fastq_file.rb, line 205
def with_qual?
  true
end
write_seq(seq_name,seq_fasta,seq_qual,comments='') click to toggle source

write sequence to file in sanger format

# File lib/scbi_fastq/fastq_file.rb, line 158
def write_seq(seq_name,seq_fasta,seq_qual,comments='')
  name = ""

  @fastq_file.puts("@#{seq_name} #{comments}")
  @fastq_file.puts(seq_fasta)
  @fastq_file.puts("+")
  #@fastq_file.puts("+#{seq_name} #{comments}")

  if seq_qual.is_a?(Array)
    @fastq_file.puts(seq_qual.map{|e| @from_phred.call(e)}.join)
  else
    @fastq_file.puts(seq_qual.split(/\s+/).map{|e| @from_phred.call(e.to_i)}.join)
  end

end

Private Instance Methods

read_fastq() click to toggle source
# File lib/scbi_fastq/fastq_file.rb, line 220
def read_fastq

  seq_name = nil
  seq_fasta = nil
  seq_qual = nil
  comments = nil

  reading = :fasta

  if !@fastq_file.eof

    begin
      #read four lines
      name_line = @fastq_file.readline.chomp
      seq_fasta = @fastq_file.readline.chomp
      name2_line = @fastq_file.readline.chomp
      seq_qual = @fastq_file.readline.chomp
      
      
      # if there is no qual, but there is a fasta
      if seq_qual.empty? && !seq_fasta.empty?
        seq_qual = 'D'*seq_fasta.length
      end


      # parse name
      if name_line =~ /^@\s*([^\s]+)\s*(.*)$/
        # remove comments
        seq_name = $1
        comments=$2
      else
        raise "Invalid sequence name in #{name_line}"
      end

      # parse fasta
      seq_fasta.strip! if !seq_fasta.empty?

      # parse qual_name

      if !seq_name.nil? && !seq_qual.empty?

        @num_seqs += 1

        if @qual_to_phred
          seq_qual=seq_qual.each_char.map{|e| (@to_phred.call(e.ord))}

          if !@qual_to_array
            seq_qual=seq_qual.join(' ')
          end
        end

      end
    rescue EOFError
      raise "Bad format in FastQ file"
    end
  end

  return [seq_name,seq_fasta,seq_qual,comments]
end