module Bio::GFFbrowser::Digest::Parser
Both in-memory and no-cache fully digest parsers share this Parser
module.
Public Instance Methods
each_CDS() { |id, recs, component| ... }
click to toggle source
Yield the id, recs, and containing component
# File lib/bio/db/gff/digest/gffparser.rb, line 90 def each_CDS parse if !@cdslist each_item(@cdslist) { |id, recs, component | yield id, recs, component } end
each_CDS_seq() { |description(id,component,reclist), seq| ... }
click to toggle source
Yield a unique description and the sequence
# File lib/bio/db/gff/digest/gffparser.rb, line 132 def each_CDS_seq each_CDS do | id, reclist, component | if component # p id,reclist,component sequence = @sequencelist[component.seqname] # p sequence if sequence seq = assemble(sequence,component.start,reclist,@options.merge(:codonize=>true)) if seq.size % 3 != 0 p reclist # leave this in # raise "CDS size #{seq.size} is not a multiple of 3! <#{seq}>" warn "CDS size is not a multiple of 3",id end yield description(id,component,reclist), seq else warn "No sequence information for",id end end end end
each_exon() { |id, recs, component| ... }
click to toggle source
Yield the id, recs, and containing component
# File lib/bio/db/gff/digest/gffparser.rb, line 96 def each_exon parse if !@exonlist each_item(@exonlist) { |id, recs, component | yield id, recs, component } end
each_exon_seq() { |description(id,component,reclist), seq| ... }
click to toggle source
Yield a unique description and the sequence
# File lib/bio/db/gff/digest/gffparser.rb, line 154 def each_exon_seq each_exon do | id, reclist, component | if component sequence = @sequencelist[component.seqname] if sequence seq = assemble(sequence,component.start,reclist) yield description(id,component,reclist), seq else warn "No sequence information for",id end end end end
each_gene() { |id, recs, component| ... }
click to toggle source
Yield the id, recs, containing component and sequence of genes
# File lib/bio/db/gff/digest/gffparser.rb, line 78 def each_gene parse if !@orflist each_item(@orflist) { |id, recs, component | yield id, recs, component } end
each_gene_seq() { |description(id,component,reclist), assemble(sequence,start,reclist)| ... }
click to toggle source
Yield a unique description and the sequence
# File lib/bio/db/gff/digest/gffparser.rb, line 102 def each_gene_seq each_gene do | id, reclist, component | if component sequence = @sequencelist[component.seqname] # p sequence if sequence yield description(id,component,reclist), assemble(sequence,component.start,reclist) else warn "No sequence information for",id end end end end
each_mRNA() { |id, recs, component| ... }
click to toggle source
Yield the id, recs, containing component and sequence of mRNAs
# File lib/bio/db/gff/digest/gffparser.rb, line 84 def each_mRNA parse if !@mrnalist each_item(@mrnalist) { |id, recs, component | yield id, recs, component } end
each_mRNA_seq() { |description(id,component,reclist), assemble(sequence,start,reclist)| ... }
click to toggle source
Yield a unique description and the sequence
# File lib/bio/db/gff/digest/gffparser.rb, line 117 def each_mRNA_seq each_mRNA do | id, reclist, component | if component sequence = @sequencelist[component.seqname] # p sequence if sequence yield description(id,component,reclist), assemble(sequence,component.start,reclist) else warn "No sequence information for",id end end end end
read_fasta()
click to toggle source
# File lib/bio/db/gff/digest/gffparser.rb, line 64 def read_fasta if @options[:fasta_filename] File.open(@options[:fasta_filename]) do | f | fasta = Bio::GFF::FastaReader.new(f) fasta.each do | id, fastarec | # p fastarec @sequencelist[id] = fastarec end end end # p :inmemory, @sequencelist end
show_unrecognized_features()
click to toggle source
# File lib/bio/db/gff/digest/gffparser.rb, line 58 def show_unrecognized_features @unrecognized_features.keys.each do | k | warn "Unknown feature is ignored",k if k end end
store_record(rec)
click to toggle source
Takes a parsed record rec
and stores items in the relevant lists/tables
# File lib/bio/db/gff/digest/gffparser.rb, line 28 def store_record rec return if rec.comment # skip GFF comments id = Helpers::Record::formatID(rec) @count_ids.add(id) @count_seqnames.add(rec.seqname) is_component = COMPONENT_TYPES.include?(rec.feature_type.upcase) if is_component # check for container ID warn("Container <#{rec.feature_type}> has no ID, so using sequence name instead",id) if rec.id == nil @componentlist[id] = rec info "Added feature <#{rec.feature_type}> with component ID",id # $stderr.print rec end case rec.feature_type when 'gene' || 'SO:0000704' @orflist.add(id,rec) when 'mRNA' || 'SO:0000234' @mrnalist.add(id,rec) when 'CDS' || 'SO:0000316' @cdslist.add(id,rec) when 'exon' || 'SO:0000147' @exonlist.add(id,rec) else if !is_component and !IGNORE_FEATURES.include?(rec.feature_type) @unrecognized_features[rec.feature_type] = true end end end