module Bio::GFFbrowser::Digest::Parser

Both in-memory and no-cache fully digest parsers share this Parser module.

Public Instance Methods

each_CDS() { |id, recs, component| ... } click to toggle source

Yield the id, recs, and containing component

# File lib/bio/db/gff/digest/gffparser.rb, line 90
def each_CDS
  parse if !@cdslist
  each_item(@cdslist) { |id, recs, component | yield id, recs, component }
end
each_CDS_seq() { |description(id,component,reclist), seq| ... } click to toggle source

Yield a unique description and the sequence

# File lib/bio/db/gff/digest/gffparser.rb, line 132
def each_CDS_seq
  each_CDS do | id, reclist, component |
    if component
      # p id,reclist,component
      sequence = @sequencelist[component.seqname]
      # p sequence
      if sequence
        seq = assemble(sequence,component.start,reclist,@options.merge(:codonize=>true))
        if seq.size % 3 != 0
          p reclist # leave this in
          # raise "CDS size #{seq.size} is not a multiple of 3! <#{seq}>"
          warn "CDS size is not a multiple of 3",id
        end
        yield description(id,component,reclist), seq
      else 
        warn "No sequence information for",id
      end
    end
  end
end
each_exon() { |id, recs, component| ... } click to toggle source

Yield the id, recs, and containing component

# File lib/bio/db/gff/digest/gffparser.rb, line 96
def each_exon
  parse if !@exonlist
  each_item(@exonlist) { |id, recs, component | yield id, recs, component }
end
each_exon_seq() { |description(id,component,reclist), seq| ... } click to toggle source

Yield a unique description and the sequence

# File lib/bio/db/gff/digest/gffparser.rb, line 154
def each_exon_seq
  each_exon do | id, reclist, component |
    if component
      sequence = @sequencelist[component.seqname]
      if sequence
        seq = assemble(sequence,component.start,reclist)
        yield description(id,component,reclist), seq
      else 
        warn "No sequence information for",id
      end
    end
  end
end
each_gene() { |id, recs, component| ... } click to toggle source

Yield the id, recs, containing component and sequence of genes

# File lib/bio/db/gff/digest/gffparser.rb, line 78
def each_gene
  parse if !@orflist
  each_item(@orflist) { |id, recs, component | yield id, recs, component }
end
each_gene_seq() { |description(id,component,reclist), assemble(sequence,start,reclist)| ... } click to toggle source

Yield a unique description and the sequence

# File lib/bio/db/gff/digest/gffparser.rb, line 102
def each_gene_seq
  each_gene do | id, reclist, component |
    if component
      sequence = @sequencelist[component.seqname]
      # p sequence
      if sequence
        yield description(id,component,reclist), assemble(sequence,component.start,reclist)
      else 
        warn "No sequence information for",id
      end
    end
  end
end
each_mRNA() { |id, recs, component| ... } click to toggle source

Yield the id, recs, containing component and sequence of mRNAs

# File lib/bio/db/gff/digest/gffparser.rb, line 84
def each_mRNA
  parse if !@mrnalist
  each_item(@mrnalist) { |id, recs, component | yield id, recs, component }
end
each_mRNA_seq() { |description(id,component,reclist), assemble(sequence,start,reclist)| ... } click to toggle source

Yield a unique description and the sequence

# File lib/bio/db/gff/digest/gffparser.rb, line 117
def each_mRNA_seq
  each_mRNA do | id, reclist, component |
    if component
      sequence = @sequencelist[component.seqname]
      # p sequence
      if sequence
        yield description(id,component,reclist), assemble(sequence,component.start,reclist)
      else 
        warn "No sequence information for",id
      end
    end
  end
end
read_fasta() click to toggle source
# File lib/bio/db/gff/digest/gffparser.rb, line 64
def read_fasta
  if @options[:fasta_filename]
    File.open(@options[:fasta_filename]) do | f |
      fasta = Bio::GFF::FastaReader.new(f)
      fasta.each do | id, fastarec |
        # p fastarec
        @sequencelist[id] = fastarec
      end
    end
  end
  # p :inmemory, @sequencelist
end
show_unrecognized_features() click to toggle source
# File lib/bio/db/gff/digest/gffparser.rb, line 58
def show_unrecognized_features 
  @unrecognized_features.keys.each do | k |
    warn "Unknown feature is ignored",k if k
  end
end
store_record(rec) click to toggle source

Takes a parsed record rec and stores items in the relevant lists/tables

# File lib/bio/db/gff/digest/gffparser.rb, line 28
def store_record rec
  return if rec.comment # skip GFF comments
  id = Helpers::Record::formatID(rec)
  @count_ids.add(id)
  @count_seqnames.add(rec.seqname)

  is_component = COMPONENT_TYPES.include?(rec.feature_type.upcase)
  if is_component
    # check for container ID
    warn("Container <#{rec.feature_type}> has no ID, so using sequence name instead",id) if rec.id == nil
    @componentlist[id] = rec
    info "Added feature <#{rec.feature_type}> with component ID",id
    # $stderr.print rec
  end 
  case rec.feature_type
    when 'gene' || 'SO:0000704'
      @orflist.add(id,rec)
    when 'mRNA' || 'SO:0000234'
      @mrnalist.add(id,rec)
    when 'CDS'  || 'SO:0000316'
      @cdslist.add(id,rec)
    when 'exon' || 'SO:0000147'
      @exonlist.add(id,rec)
    else
      if !is_component and !IGNORE_FEATURES.include?(rec.feature_type)
        @unrecognized_features[rec.feature_type] = true
      end
  end
end