class Object
Constants
- CHIMERA
- CODING
- COMPLETE
- C_TERMINAL
- FAILED
- INTERNAL
- MISASSEMBLED
- NCRNA
- N_TERMINAL
- OPERATION
- OTHER
- QUERY
- TARGET
- UNKNOWN
- UNMAPPED
Public Instance Methods
analysis_over_DB_annotated_seqs(seqs_annotation_DB, reptrans_fasta, cluster_file_path, stats_hash, key_stats, pfam_clustering)
click to toggle source
END MAIN FUNCTION
# File lib/full_lengther_next/reptrans.rb, line 67 def analysis_over_DB_annotated_seqs(seqs_annotation_DB, reptrans_fasta, cluster_file_path, stats_hash, key_stats, pfam_clustering) clusters_seqs_annot_DB = clustering_by_id(seqs_annotation_DB) representative_seqs_annot_DB = select_representative(clusters_seqs_annot_DB) if pfam_clustering clusters_seqs_annot_DB = clustering_by_annot(representative_seqs_annot_DB, :pfam_id) # pfam id, fix get the annotation guide on my_worker_manager_fln (@@func_annot_type) to this scope representative_seqs_annot_DB = select_representative(clusters_seqs_annot_DB) # merge clusters by id and by pfam end stats_hash[key_stats] += representative_seqs_annot_DB.length report_clustering(cluster_file_path, clusters_seqs_annot_DB, representative_seqs_annot_DB) write_fasta(representative_seqs_annot_DB, reptrans_fasta, 'w') end
artifact?(seq, query, db_name, db_path, options, new_seqs)
click to toggle source
MAIN FUNCTION
# File lib/full_lengther_next/artifacts.rb, line 9 def artifact?(seq, query, db_name, db_path, options, new_seqs) artifact = FALSE # UNMAPPED CONTIG DETECTION if query.nil? && seq.unmapped? #If seq is misassembled stop chimera analisys seq.hit = nil artifact = TRUE seq.type = UNMAPPED end if !query.nil? # MISASSEMBLED DETECTION if !artifact && misassembled_detection(query) #If seq is misassembled stop chimera analisys seq.hit = query.hits.first artifact = TRUE seq.type = MISASSEMBLED seq.warnings('ERROR#1') end # OVERLAPPING HSPS ON SUBJECT DETECTION =begin if !artifact hit_reference = query.hits.first.dup query, overlapping = overlapping_hsps_on_subject(query) if overlapping if query.hits.first.nil? seq.hit = hit_reference else seq.hit = query.hits.first end artifact = TRUE seq.type = OTHER seq.warnings('ERROR#2') end end =end # MULTIPLE HSP DETECTION if !artifact && multiple_hsps(query, 3) seq.hit = query.hits.first seq.warnings('ERROR#3') end # CHIMERA DETECTION if !artifact && !options[:chimera].include?('d') chimera = search_chimeras(seq, query, options, db_name, db_path) if !chimera.nil? new_seqs.concat(chimera) seq.db_name = db_name seq.type = CHIMERA artifact = TRUE end end end if artifact if $verbose > 1 puts seq.prot_annot_calification end seq.db_name = db_name seq.save_fasta = FALSE seq.ignore = TRUE end return artifact end
clean_by_identity(blast_result, ident)
click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 137 def clean_by_identity(blast_result, ident) blast_result.querys.each do |query| if !query.hits.first.nil? new_hits = query.hits.select{|hit| hit.ident > ident} new_hits = [nil] if new_hits.empty? #When no hit, set new_hits to [nil] query.hits = new_hits end query.full_query_length = query.full_query_length.to_i #to_i is used to correct a scbi_blast's bug. Returns this attribute like string instead integer end end
clean_by_query_length_match(blast_result, min_len_nt)
click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 148 def clean_by_query_length_match(blast_result, min_len_nt) blast_result.querys.each do |query| if !query.hits.first.nil? new_hits = query.hits.select{|hit| hit.align_len * 3 > min_len_nt} new_hits = [nil] if new_hits.empty? #When no hit, set new_hits to [nil] query.hits = new_hits end query.full_query_length = query.full_query_length.to_i #to_i is used to correct a scbi_blast's bug. Returns this attribute like string instead integer end end
clean_hsp_by_identity(hit, identity)
click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 298 def clean_hsp_by_identity(hit, identity) hit.select!{|hsp| hsp.ident >= identity} return hit end
clean_overlapping_hsps(blast_result, keep_if_diff_sense = FALSE)
click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 161 def clean_overlapping_hsps(blast_result, keep_if_diff_sense = FALSE) blast_result.querys.each do |query| if query.hits.length > 1 query.hits.each_with_index do |hit, j| if hit.nil? next end query.hits.each_with_index do |second_hit, i| if second_hit.nil? || i == j #Same hit next end if same_query_hsp(hit, second_hit) #|| same_subject_hsp(hit, second_hit) if keep_if_diff_sense if same_sense?(hit, second_hit) #Delete second_hit if is into the hit and has same sense query.hits[i] = nil end else query.hits[i] = nil end end end end query.hits.compact! end end end
clean_subject_overlapping_hsps(complete_hit, cleaned_hits)
click to toggle source
COMPLEMENTARY FUNCTIONS
# File lib/full_lengther_next/blast_functions.rb, line 246 def clean_subject_overlapping_hsps(complete_hit, cleaned_hits) if complete_hit.length > 1 complete_hit, overlapping = subject_overlapping_hsps(complete_hit) end cleaned_hits.concat(complete_hit) return complete_hit, overlapping end
cluster_hsps(hsps)
click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 303 def cluster_hsps(hsps) hits = [] last_acc = '' hsps.each do |hsp| if hsp.acc != last_acc hits << [hsp] else hits.last << hsp end last_acc = hsp.acc end return hits end
clustering_by_annot(seqs_with_hit, annotation_type)
click to toggle source
# File lib/full_lengther_next/reptrans.rb, line 165 def clustering_by_annot(seqs_with_hit, annotation_type) clusters = [] annot_id = [] no_annotation_clusters = [] seqs_with_hit.each do |seq| annot = seq.functional_annotations[annotation_type] annot = annot.split(';').sort.join(';') if !annot.nil? if annot == '-' || annot.nil? no_annotation_clusters << [seq] else position = annot_id.index(annot) if position.nil? annot_id << annot clusters << [seq] else clusters[position] << seq end end end clusters.concat(no_annotation_clusters) return clusters end
clustering_by_id(seqs_with_hit)
click to toggle source
# File lib/full_lengther_next/reptrans.rb, line 150 def clustering_by_id(seqs_with_hit) clusters=[] hit_id=[] seqs_with_hit.each do |seq| position=hit_id.index(seq.get_acc) if position.nil? hit_id << seq.get_acc clusters << [seq] else clusters[position] << seq end end return clusters end
count_cpu(options)
click to toggle source
# File lib/full_lengther_next/reptrans.rb, line 206 def count_cpu(options) cpu = 0 if options[:workers].class.to_s == 'Array' cpu = options[:workers].length + 1 else cpu = options[:workers] end return cpu end
do_blast_with_EST(putative_seqs, options, reptrans_fasta, blast_path, cluster_EST_annotated_path, stats_hash)
click to toggle source
# File lib/full_lengther_next/reptrans.rb, line 101 def do_blast_with_EST(putative_seqs, options, reptrans_fasta, blast_path, cluster_EST_annotated_path, stats_hash) # Second server to representative transcriptome $LOG.info 'Starting server for EST analysis' custom_worker_file = File.join(File.dirname(ROOT_PATH),'lib','full_lengther_next','classes','my_worker_EST.rb') options[:chimera] = nil #Inactive chimeras system on RepTrans, this resume the BLAST's output MyWorkerManagerEst.init_work_manager(putative_seqs, options, blast_path) server_EST = ScbiMapreduce::Manager.new(options[:server_ip], options[:port], options[:workers], MyWorkerManagerEst, custom_worker_file, STDOUT, FULL_LENGTHER_NEXT_INIT) server_EST.chunk_size = options[:chunk_size] server_EST.start_server $LOG.info 'Closing server for EST analysis' seqs_with_EST, putative_seqs = MyWorkerManagerEst.get_array_seqs if !seqs_with_EST.empty? analysis_over_DB_annotated_seqs(seqs_with_EST, reptrans_fasta, cluster_EST_annotated_path, stats_hash, 'est_annotated') end return putative_seqs end
do_makeblastdb(seqs, output, dbtype)
click to toggle source
# File lib/full_lengther_next/handle_db.rb, line 35 def do_makeblastdb(seqs, output, dbtype) cmd="makeblastdb -in - -out #{output} -title #{File.basename(output)} -dbtype #{dbtype} -parse_seqids" IO.popen(cmd,'w+') {|makedb| makedb.sync = TRUE makedb.write(seqs) makedb.close_write puts makedb.readlines makedb.close_read } end
filter_hits(query, select_hits=10)
click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 4 def filter_hits(query, select_hits=10) # Select best hits hits = query.hits if !hits.first.nil? hits = cluster_hsps(hits) hits = hits[0..select_hits] hits = select_hits_by_identity_query(hits) hits = select_hits_by_coverage_subject(hits) end if hits.empty? if select_hits >= query.hits.length || select_hits >= 100 # Condition to stop a infinite recursive function hits = [cluster_hsps(query.hits).first] else hits = filter_hits(query, select_hits+10) end end return hits end
find_hit(hit_acc, ar_hits)
click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 317 def find_hit(hit_acc, ar_hits) selected_hit = nil ar_hits.each do |hit| if hit.first.acc == hit_acc selected_hit = hit break end end return selected_hit end
get_coverage_subject(hit)
click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 22 def get_coverage_subject(hit) perc_identity = hit.align_len*100.0/hit.s_len if perc_identity > 100 && hit.class.to_s == 'ExoBlastHit' && !hit.q_frameshift.empty? hit.q_frameshift.length.times do |n| #Align len correction by frameshift. FS can create a gap in alignment adding extra aa. FS can be deletions or insertions so we check until get a perc_identity of 100 align_len = hit.align_len- (n + 1) perc_identity = align_len*100.0/hit.s_len break if perc_identity <= 100 end end return perc_identity end
go_for_graph(sequences_by_ontologies, fpkm = {})
click to toggle source
# File lib/full_lengther_next/go_methods.rb, line 1 def go_for_graph(sequences_by_ontologies, fpkm = {}) container = {} go_data = [ [:function_go, 'F:'], [:component_go, 'C:'], [:process_go, 'P:'] ] go_data.each do |key, prefix| go_ontology = sequences_by_ontologies.select{|go, seq_ids| go =~ /^#{prefix}/} go_names = [] go_vals = [] go_ontology.each do |go_name, seq_names| go_label = go_name.gsub(prefix, '') if fpkm.empty? go_vals << seq_names.length go_names << go_label else sum = seq_names.map{|seq_name| fpkm[seq_name].first }.inject { |sum, n| sum + n } if sum > 0 go_vals << sum go_names << go_label end end end go_table = [] go_names.each_with_index do |name, index| go_table << [name, go_vals[index]] end go_table.sort!{|v1, v2| v2[1] <=> v1[1]} go_table.unshift([key.to_s, 'GO']) if !go_names.empty? container[key] = go_table else container[key] = [ [key.to_s, 'GO'], ['No_data', 1] ] end end return container end
hsps_relationship_subject(hit)
click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 271 def hsps_relationship_subject(hit) hsps = [] hit.each_with_index do |hsp, j| hit.each_with_index do |second_hsp, i| if i == j #Same hit next end if same_subject_hsp(hsp, second_hsp) if !hsps.include?([hsp, second_hsp]) && !hsps.include?([second_hsp, hsp]) # Save if no exists direct relationship or his inverse hsps << [hsp, second_hsp] end end end end return hsps end
load_cd_hit_sequences_names(file)
click to toggle source
# File lib/full_lengther_next/reptrans.rb, line 120 def load_cd_hit_sequences_names(file) names=[] File.open(file).readlines.each do |line| if line =~ /^>/ line.chomp! line.gsub!('>','') names << line end end return names end
load_isoform_hash(file)
click to toggle source
# File lib/full_lengther_next/handle_db.rb, line 3 def load_isoform_hash(file) isoform_hash = {} if File.exists?(file) fasta = ScbiZcatFile.new(file) filtered_fasta = '' seq_name = nil seq = '' while !fasta.eof line = fasta.readline.chomp if line[0] == '>' load_seq_in_hash(seq_name, seq, isoform_hash) if !seq_name.nil? seq_name = line seq = '' else seq << line end end load_seq_in_hash(seq_name, seq, isoform_hash) end return isoform_hash end
load_seq_in_hash(seq_name, seq, isoform_hash)
click to toggle source
# File lib/full_lengther_next/handle_db.rb, line 25 def load_seq_in_hash(seq_name, seq, isoform_hash) name, desc = seq_name.split(' ', 2) name =~ /(\w+\|(\w+)\-\d+\|)/ if isoform_hash[$2].nil? isoform_hash[$2] = ">#{$1}#{desc}\n#{seq}" else isoform_hash[$2] += "\n>#{$1}#{desc}\n#{seq}" end end
misassembled_detection(query)
click to toggle source
DETECTION FUNCTIONS
# File lib/full_lengther_next/blast_functions.rb, line 192 def misassembled_detection(query) miss=FALSE hits = cluster_hsps(query.hits) misassembled_hits = [] hits.each do |hit| if hit.length > 1 negative_frame = hit.select{|hsp| hsp.q_frame < 0} if negative_frame.length > 0 && negative_frame.length != hit.length misassembled_hits << hit.first.acc end end end if misassembled_hits.length*1.0/ hits.length > 0.5 miss = TRUE else #Remove missassembled hits to avoid broken analysis query.hits.reverse_each do |hsp| if misassembled_hits.include?(hsp.acc) query.hits.delete(hsp) end end end return miss end
multiple_hsps(query, num)
click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 216 def multiple_hsps(query, num) multiple = FALSE hsps = query.hits.select{|h| h.acc == query.hits.first.acc} if hsps.length >= num multiple = TRUE end return multiple end
overlapping_hsps_on_subject(query)
click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 225 def overlapping_hsps_on_subject(query) overlapping = FALSE current_hit = query.hits.first.acc complete_hit = [] cleaned_hits = [] query.hits.each do |hit| if hit.acc != current_hit complete_hit, overlapping = clean_subject_overlapping_hsps(complete_hit, cleaned_hits) complete_hit = [] end complete_hit << hit current_hit = hit.acc end complete_hit, overlapping = clean_subject_overlapping_hsps(complete_hit, cleaned_hits) query.hits = cleaned_hits return query, overlapping end
reduce_pool_sequences(putative_seqs, main_path, cpu)
click to toggle source
# File lib/full_lengther_next/reptrans.rb, line 88 def reduce_pool_sequences(putative_seqs, main_path, cpu) temp_fasta = File.join(main_path, 'temp.fasta') temp_fasta_clean = File.join(main_path, 'temp_cln.fasta') log_file = File.join(main_path, 'log_cd_hit_Cod_Unk') write_fasta(putative_seqs, temp_fasta, 'w') $LOG.info "Start cd-hit with coding and unknow sequences" system("cd-hit -i #{temp_fasta} -o #{temp_fasta_clean} -c 0.95 -M 0 -T #{cpu} > #{log_file}") if !File.exists?(temp_fasta_clean) $LOG.info "Ended cd-hit with coding and unknow sequences" cd_hit_names_putative_seqs = load_cd_hit_sequences_names(temp_fasta_clean) putative_seqs = select_seqs_with_name(putative_seqs, cd_hit_names_putative_seqs) return putative_seqs end
report_clustering(cluster_file_path, clusters_seqs_annot_DB, representative_seqs_annot_DB)
click to toggle source
# File lib/full_lengther_next/reptrans.rb, line 79 def report_clustering(cluster_file_path, clusters_seqs_annot_DB, representative_seqs_annot_DB) cluster_file = File.open(cluster_file_path, 'w') representative_seqs_annot_DB.each_with_index do |rep_seq, i| cluster_seqs = clusters_seqs_annot_DB[i].map{|seq| seq.seq_name}.join(';') cluster_file.puts "#{rep_seq.seq_name}\t#{cluster_seqs}" end cluster_file.close end
reptrans(seqs_annotation_prot, seqs_some_coding ,seqs_unknown, options)
click to toggle source
MAIN FUNCTION
# File lib/full_lengther_next/reptrans.rb, line 9 def reptrans(seqs_annotation_prot, seqs_some_coding ,seqs_unknown, options) cpus = count_cpu(options) stats_hash = initialize_stats_hash_reptrans # Paths #--------------------------------------------- main_path = File.join(Dir.pwd, 'fln_results') reptrans_fasta = File.join(main_path, 'Representative_transcriptome.fasta') blast_path = File.join(main_path, 'ESTdb') cluster_prot_annotated_path =File.join(main_path, 'Prot_clusters') cluster_EST_annotated_path =File.join(main_path, 'EST_clusters') html_file = File.join(main_path, 'Representative_transcriptome_stats.html') txt_file = File.join(main_path, 'Representative_transcriptome_stats.txt') # Prot annotations sequence analysis #--------------------------------------------- analysis_over_DB_annotated_seqs(seqs_annotation_prot, reptrans_fasta, cluster_prot_annotated_path, stats_hash, 'prot_annotated', options[:high_clustering]) seqs_annotation_prot = nil # NOT Prot annotations sequence analysis #--------------------------------------------- putative_seqs = seqs_some_coding if !options[:est_db].nil? # WITH EST DATABASE putative_seqs += seqs_unknown # Coding & unknown putative_seqs = reduce_pool_sequences(putative_seqs, main_path, cpus) if !File.exists?(blast_path +'.nsq') $LOG.info "Start makeblastdb over EST DB" system("makeblastdb -in #{options[:est_db]} -out #{blast_path} -dbtype nucl -parse_seqids > #{File.join(main_path, 'log_makeblast_db')}") $LOG.info "Ended makeblastdb over EST DB" end putative_seqs = do_blast_with_EST(putative_seqs, options, reptrans_fasta, blast_path, cluster_EST_annotated_path, stats_hash) end # Coding sequence analysis #--------------------------------------------- if !putative_seqs.nil? && !putative_seqs.empty? putative_seqs = select_seqs_more_500pb(putative_seqs) putative_seqs = reduce_pool_sequences(putative_seqs, main_path, cpus) if options[:est_db].nil? # NOT EST database putative_seqs.sort!{|s1, s2| #Order by testcode (first) and sequence length (last) if s2.t_code == s1.t_code s2.fasta_length <=> s1.fasta_length else s2.t_code <=> s1.t_code end } count = 0 putative_seqs.each do |coding_seq| coding_stats_reptrans(coding_seq, stats_hash) count +=1 end write_fasta(putative_seqs, reptrans_fasta, 'a') end write_reptrans_stats(stats_hash, html_file, txt_file) end
same_query_hsp(hit, second_hit)
click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 117 def same_query_hsp(hit, second_hit) same = FALSE if hit.acc == second_hit.acc if hit.q_beg <= second_hit.q_beg && hit.q_end >= hit.q_end && (second_hit.q_beg - hit.q_end).abs > 1 same = TRUE end end return same end
same_sense?(hit, second_hit)
click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 127 def same_sense?(hit, second_hit) same= FALSE hit_sense = hit.q_frame <=> 0 second_hit_sense = second_hit.q_frame <=> 0 if hit_sense == second_hit_sense same = TRUE end return same end
same_subject_hsp(hit, second_hit)
click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 107 def same_subject_hsp(hit, second_hit) same = FALSE if hit.acc == second_hit.acc if hit.s_beg <= second_hit.s_beg && hit.s_end >= hit.s_end && (second_hit.s_beg - hit.s_end).abs > 1 same = TRUE end end return same end
select_hits_by_coverage_subject(hits)
click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 34 def select_hits_by_coverage_subject(hits) selected_hits = [] coverage_thresold = get_coverage_subject(hits.first.first) coverage_thresold = 100 if coverage_thresold > 100 hits.map{|hit| hit.each do |hsp| coverage = get_coverage_subject(hsp) if coverage > 100 next end if coverage >= coverage_thresold selected_hits << hit break end end } return selected_hits end
select_hits_by_evalue(hits, evalue)
click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 68 def select_hits_by_evalue(hits, evalue) selected_hits = [] hits.map{|hit| hit.each do |hsp| if hsp.e_val <= evalue selected_hits << hit end end } return selected_hits end
select_hits_by_identity_query(hits)
click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 54 def select_hits_by_identity_query(hits) selected_hits = [] identity = hits.first.first.ident hits.map{|hit| hit.each do |hsp| if hsp.ident >= identity selected_hits << hit break end end } return selected_hits end
select_hsps_by_id(hits, selected_ids)
click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 80 def select_hsps_by_id(hits, selected_ids) selected_hits = [] hits.map{|hsp| if selected_ids.include?(hsp.acc) selected_hits << hsp end } return selected_hits end
select_representative(clusters_seqs_annot_prot)
click to toggle source
# File lib/full_lengther_next/reptrans.rb, line 188 def select_representative(clusters_seqs_annot_prot) seqs = [] clusters_seqs_annot_prot.each do |cluster| if !cluster.first.coverage_analysis.empty? # filtering by mapping coverage max_transcript_mean_coverage = cluster.map{|seq| seq.coverage_analysis[3] }.max - 0.05 # Relaxed limit of 5% cluster.select!{|seq| seq.coverage_analysis[3] >= max_transcript_mean_coverage} end seq = cluster.select{|s| s.type == COMPLETE}.sort{|fl1, fl2| fl2.seq_fasta <=> fl1.seq_fasta}.first # Take longest full-length, s -> sequence, fl -> full-lentgh if seq.nil? cluster.sort!{|cl1, cl2| cl2.get_pident <=> cl1.get_pident} best_pident = cluster.first.get_pident seq = cluster.select{|s| s.get_pident == best_pident}.sort{|s1, s2| s2.seq_fasta <=> s1.seq_fasta}.first end seqs << seq end return seqs end
select_seqs_more_500pb(seqs_array)
click to toggle source
# File lib/full_lengther_next/reptrans.rb, line 132 def select_seqs_more_500pb(seqs_array) seqs = seqs_array.select{|seq| seq.fasta_length > 500 } return seqs end
select_seqs_with_name(array_seqs, array_names)
click to toggle source
# File lib/full_lengther_next/reptrans.rb, line 137 def select_seqs_with_name(array_seqs, array_names) seqs = array_seqs.select{|seq| array_names.include?(seq.seq_name)} return seqs end
set_thresold_evalue(hits)
click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 90 def set_thresold_evalue(hits) evalue = 100 hits.map{|hit| if hit.e_val != 0 && hit.e_val < evalue evalue = hit.e_val end } if evalue == 100 evalue = 0 else exp = Math.log10(evalue).abs.to_i min_exp = (exp/10.0).ceil evalue = 10.0**-(exp-min_exp) end return evalue end
subject_overlapping_hsps(hit)
click to toggle source
# File lib/full_lengther_next/blast_functions.rb, line 254 def subject_overlapping_hsps(hit) overlapping = FALSE hsp_table = hsps_relationship_subject(hit) if !hsp_table.empty? hit = clean_hsp_by_identity(hit, 55) if hit.empty? overlapping = TRUE else hsp_table = hsps_relationship_subject(hit) if !hsp_table.empty? overlapping = TRUE end end end return hit, overlapping end
write_fasta(seqs_array, file_name, mode)
click to toggle source
# File lib/full_lengther_next/reptrans.rb, line 142 def write_fasta(seqs_array, file_name, mode) file=File.open(file_name, mode) seqs_array.each do |seq| file.puts ">#{seq.seq_name}\n#{seq.seq_fasta}" end file.close end