class Bio::GFFbrowser::Block::GffBlockParser

The block parser simplifies parsing, by assuming GFF3 is organised into blocks. All relevant information is resolved a block at a time.

Public Class Methods

new(filename, options) click to toggle source
# File lib/bio/db/gff/block/gffblockparser.rb, line 13
def initialize filename, options
  info "Starting block parser"
  @filename = filename
  @options = options
  @iter = Bio::GFF::GFF3::FileIterator.new(@filename)
end

Public Instance Methods

each_CDS_seq() { |id,seq| ... } click to toggle source
# File lib/bio/db/gff/block/gffblockparser.rb, line 86
def each_CDS_seq
  each_seq('cds') { | id, seq | yield id,seq }
end
each_exon_seq() { |id,seq| ... } click to toggle source
# File lib/bio/db/gff/block/gffblockparser.rb, line 82
def each_exon_seq
  each_seq('exon') { | id, seq | yield id,seq }
end
each_gene_seq() { |id,seq| ... } click to toggle source
# File lib/bio/db/gff/block/gffblockparser.rb, line 74
def each_gene_seq
  each_seq('gene') { | id, seq | yield id,seq }
end
each_mRNA_seq() { |id,seq| ... } click to toggle source
# File lib/bio/db/gff/block/gffblockparser.rb, line 78
def each_mRNA_seq
  each_seq('mrna') { | id, seq | yield id,seq }
end
each_seq(gfftype) { |id,seq| ... } click to toggle source
# File lib/bio/db/gff/block/gffblockparser.rb, line 70
def each_seq(gfftype) 
  parse(gfftype) { | id, seq | yield id,seq }
end
parse(gfftype) { |id,seq| ... } click to toggle source
# File lib/bio/db/gff/block/gffblockparser.rb, line 20
def parse(gfftype)
  @inseqidlist = {}
  # Fetch FASTA first
  @sequencelist = {}
  if @options[:fasta_filename]
    File.open(@options[:fasta_filename]) do | f |
      fasta = Bio::GFF::FastaReader.new(f)
      fasta.each do | id, fastarec |
        # p fastarec
        @sequencelist[id] = fastarec
      end
    end
  else
    # Embedded FASTA
    @iter.each_sequence do | id, bioseq |
      @sequencelist[id] = bioseq.to_s
    end
  end
  seqid = nil
  recs = []
  @iter.each_rec do | fpos, line |
    rec = FastLineRecord.new(parse_line_fast(line))
    if seqid != rec.seqid 
      # starting a new block
      if @inseqidlist[rec.seqid]
        # not a well formed GFF3 file, we need
        # to drop
        error "GFF3 file not sorted, falling back to line parser"
        raise "ERROR, bailing out"
      end
      parse_block(gfftype,recs,@sequencelist[seqid])  { | id, seq | yield id,seq } if seqid
      recs = []
      seqid = rec.seqid
      @inseqidlist[seqid] = true
    end
    recs.push rec
  end
  parse_block(gfftype,recs,@sequencelist[seqid])  { | id, seq | yield id,seq } if seqid
end
parse_block(gfftype, recs, sequence) { |id, sequence| ... } click to toggle source

Parse sequence objects sharing the same seqid and yield each gfftype as an iq,seq

# File lib/bio/db/gff/block/gffblockparser.rb, line 62
def parse_block gfftype, recs, sequence
  recs.each do | rec |
    if rec.feature_type.downcase == gfftype
      yield rec.id, sequence[rec.start-1..rec.end-1]
    end
  end
end