class Gff_parser

Attributes

dataset[RW]

Public Class Methods

new(file) click to toggle source
# File lib/gene_assembler/gff_parser.rb, line 4
def initialize(file)
        @dataset=Gff_dataset.new
        File.open(file,'r').each do |line|
                line.chomp!
                if line=~ /^#/ || line=='' #Skip gff comments and blank lines
                        next
                end
                if line=~ /^>/ #Skip sequences
                        break
                end

                fields=line.split("\t")
                attribs=parse_attribs(fields.last)
                if !attribs.key?('Parent')
                        @dataset.add_master_feature(fields[0], fields[1],  fields[2], fields[3], fields[4], fields[5], fields[6], fields[7], attribs)
                else
                        @dataset.add_feature(fields[1],  fields[2], fields[3], fields[4], fields[5], fields[6], fields[7], attribs) # Feature is a child so it's put on his parent and it hasn't a parent attrib
                end
        end
        return @dataset
end

Public Instance Methods

parse_attribs(attribs) click to toggle source
# File lib/gene_assembler/gff_parser.rb, line 26
def parse_attribs(attribs) #Attribs must be a string with info of last column of gff
        attribs_hash={}
        fields=attribs.split(';')
        fields.each do |attrib|
                attrib_items= attrib.split('=')
                attribs_hash[attrib_items[0]]=attrib_items[1]
        end
        return attribs_hash
end