class Bio::FastQC::Parser
Public Class Methods
new(fastqc_data_txt)
click to toggle source
# File lib/bio/fastqc/parser.rb, line 5 def initialize(fastqc_data_txt) @data = fastqc_data_txt @module_results = parse_modules @basic_statistics = basic_statistics end
Public Instance Methods
adapter_content()
click to toggle source
# File lib/bio/fastqc/parser.rb, line 110 def adapter_content get_module_matrix("Adapter Content", 1) end
basic_statistics()
click to toggle source
Basic Statistics module
# File lib/bio/fastqc/parser.rb, line 21 def basic_statistics Hash[*@module_results[0].flatten] end
encoding()
click to toggle source
# File lib/bio/fastqc/parser.rb, line 37 def encoding # quality encoding method for input file type @basic_statistics["Encoding"] end
fastqc_version()
click to toggle source
# File lib/bio/fastqc/parser.rb, line 25 def fastqc_version # software version of FastQC @basic_statistics["##FastQC"] end
file_type()
click to toggle source
# File lib/bio/fastqc/parser.rb, line 33 def file_type # input file type @basic_statistics["File type"] end
filename()
click to toggle source
# File lib/bio/fastqc/parser.rb, line 29 def filename # input filename for FastQC program @basic_statistics["Filename"] end
filtered_sequences()
click to toggle source
# File lib/bio/fastqc/parser.rb, line 49 def filtered_sequences # number of sequence reads filtered out @basic_statistics["Filtered Sequences"].to_i end
get_module_matrix(module_name, num_of_header_rows)
click to toggle source
Other modules
# File lib/bio/fastqc/parser.rb, line 65 def get_module_matrix(module_name, num_of_header_rows) mod = @module_results.select{|m| m[0][0] == ">>#{module_name}" }[0] mod.drop(num_of_header_rows) if mod end
kmer_content()
click to toggle source
# File lib/bio/fastqc/parser.rb, line 114 def kmer_content get_module_matrix("Kmer Content", 1) end
max_length()
click to toggle source
# File lib/bio/fastqc/parser.rb, line 126 def max_length sequence_length.sub(/^\d+-/,"").to_i end
mean_sequence_length()
click to toggle source
# File lib/bio/fastqc/parser.rb, line 162 def mean_sequence_length dist = sequence_length_distribution.drop(1) # drop column header if dist.size == 1 dist[0][0].to_f else sum = dist.map do |length_count| l = length_count[0] c = length_count[1].to_f ((l.sub(/-\d+$/,"").to_f + l.sub(/^\d+-/,"").to_f) / 2) * c end sum.reduce(:+) / dist.map{|l_c| l_c[1].to_f }.reduce(:+) end end
median_sequence_length()
click to toggle source
# File lib/bio/fastqc/parser.rb, line 176 def median_sequence_length dist = sequence_length_distribution.drop(1) # drop column header if dist.size == 1 dist[0][0].to_f else k = dist.map{|l_c| l_c[1].to_f }.reduce(:+) / 2 # position of median median = 0 dist.each do |l_c| c = l_c[1].to_f # count of reads in this length range if k > c k -= c else l = l_c[0] median = ((l.sub(/-\d+$/,"").to_f + l.sub(/^\d+-/,"").to_f) / 2) break end end median end end
min_length()
click to toggle source
Custom modules
# File lib/bio/fastqc/parser.rb, line 122 def min_length sequence_length.sub(/-\d+$/,"").to_i end
overall_mean_quality_score()
click to toggle source
# File lib/bio/fastqc/parser.rb, line 148 def overall_mean_quality_score overall_quality_score(:mean) end
overall_median_quality_score()
click to toggle source
# File lib/bio/fastqc/parser.rb, line 152 def overall_median_quality_score overall_quality_score(:median) end
overall_n_content()
click to toggle source
# File lib/bio/fastqc/parser.rb, line 156 def overall_n_content per_base = per_base_n_content v = per_base.map{|c| c[1].to_f } v.reduce(:+) / v.size end
overall_quality_score(mean_or_median)
click to toggle source
# File lib/bio/fastqc/parser.rb, line 139 def overall_quality_score(mean_or_median) per_base = per_base_sequence_quality.drop(1) # drop header column = per_base_quality_column(mean_or_median) v = per_base.map do |row| (10**(row[column].to_f / -10)).to_f end -10 * Math.log10(v.reduce(:+) / v.size) end
overrepresented_sequences()
click to toggle source
# File lib/bio/fastqc/parser.rb, line 106 def overrepresented_sequences get_module_matrix("Overrepresented sequences", 1) end
parse()
click to toggle source
# File lib/bio/fastqc/parser.rb, line 201 def parse { fastqc_version: fastqc_version, filename: filename, file_type: file_type, encoding: encoding, total_sequences: total_sequences, sequences_flagged_as_poor_quality: sequences_flagged_as_poor_quality, filtered_sequences: filtered_sequences, sequence_length: sequence_length, percent_gc: percent_gc, per_base_sequence_quality: per_base_sequence_quality, per_tile_sequence_quality: per_tile_sequence_quality, per_sequence_quality_scores: per_sequence_quality_scores, per_base_sequence_content: per_base_sequence_content, per_sequence_gc_content: per_sequence_gc_content, per_base_n_content: per_base_n_content, sequence_length_distribution: sequence_length_distribution, total_duplicate_percentage: total_duplicate_percentage, sequence_duplication_levels: sequence_duplication_levels, overrepresented_sequences: overrepresented_sequences, adapter_content: adapter_content, kmer_content: kmer_content, min_length: min_length, max_length: max_length, overall_mean_quality_score: overall_mean_quality_score, overall_median_quality_score: overall_median_quality_score, overall_n_content: overall_n_content, mean_sequence_length: mean_sequence_length, median_sequence_length: median_sequence_length, } end
parse_modules()
click to toggle source
# File lib/bio/fastqc/parser.rb, line 11 def parse_modules @data.split(">>END_MODULE\n").map do |mod| mod.split("\n").map{|line| line.split("\t") } end end
per_base_n_content()
click to toggle source
# File lib/bio/fastqc/parser.rb, line 90 def per_base_n_content get_module_matrix("Per base N content", 1) end
per_base_quality_column(mean_or_median)
click to toggle source
# File lib/bio/fastqc/parser.rb, line 130 def per_base_quality_column(mean_or_median) case mean_or_median when :mean 1 when :median 2 end end
per_base_sequence_content()
click to toggle source
# File lib/bio/fastqc/parser.rb, line 82 def per_base_sequence_content get_module_matrix("Per base sequence content", 1) end
per_base_sequence_quality()
click to toggle source
# File lib/bio/fastqc/parser.rb, line 70 def per_base_sequence_quality get_module_matrix("Per base sequence quality", 1) end
per_sequence_gc_content()
click to toggle source
# File lib/bio/fastqc/parser.rb, line 86 def per_sequence_gc_content get_module_matrix("Per sequence GC content", 1) end
per_sequence_quality_scores()
click to toggle source
# File lib/bio/fastqc/parser.rb, line 78 def per_sequence_quality_scores get_module_matrix("Per sequence quality scores", 1) end
per_tile_sequence_quality()
click to toggle source
# File lib/bio/fastqc/parser.rb, line 74 def per_tile_sequence_quality get_module_matrix("Per tile sequence quality", 1) end
percent_gc()
click to toggle source
# File lib/bio/fastqc/parser.rb, line 57 def percent_gc # overall percentage of GC content @basic_statistics["%GC"].to_f end
sequence_duplication_levels()
click to toggle source
# File lib/bio/fastqc/parser.rb, line 102 def sequence_duplication_levels get_module_matrix("Sequence Duplication Levels", 2) end
sequence_length()
click to toggle source
# File lib/bio/fastqc/parser.rb, line 53 def sequence_length # store as string: can be range @basic_statistics["Sequence length"] end
sequence_length_distribution()
click to toggle source
# File lib/bio/fastqc/parser.rb, line 94 def sequence_length_distribution get_module_matrix("Sequence Length Distribution", 1) end
sequences_flagged_as_poor_quality()
click to toggle source
# File lib/bio/fastqc/parser.rb, line 45 def sequences_flagged_as_poor_quality # number of sequence reads flagged as poor quality @basic_statistics["Sequences flagged as poor quality"].to_i end
summary()
click to toggle source
# File lib/bio/fastqc/parser.rb, line 197 def summary parse end
total_duplicate_percentage()
click to toggle source
# File lib/bio/fastqc/parser.rb, line 98 def total_duplicate_percentage get_module_matrix("Sequence Duplication Levels", 1)[0][1].to_f end
total_sequences()
click to toggle source
# File lib/bio/fastqc/parser.rb, line 41 def total_sequences # total number of sequence reads @basic_statistics["Total Sequences"].to_i end