module Bio::BGZF

Constants

CM
FLG
ID1
ID2
MAX_BYTES
MTIME
OS
SI1
SI2
SLEN
XFL
XLEN

Public Class Methods

decompress_block(f) click to toggle source
# File lib/bio-bgzf/block.rb, line 42
def decompress_block(f)
  cdata, in_size, expected_crc = read_bgzf_block(f)
  return nil if cdata == nil
  data = unpack(cdata)
  if data.bytesize != in_size
    raise FormatError, "Expected #{in_size} bytes from BGZF block at #{pos}, but got #{data.bytesize} bytes!"
  end
  crc = Zlib.crc32(data, 0)
  if crc != expected_crc
    raise FormatError, "CRC error: expected #{expected_crc.to_s(16)}, got #{crc.to_s(16)}"
  end
  return data
end
pack(str, level=Zlib::BEST_COMPRESSION) click to toggle source

Packs str into a BGZF block using given compression level.

# File lib/bio-bgzf/pack.rb, line 7
def pack(str, level=Zlib::BEST_COMPRESSION)
  zs = Zlib::Deflate.new level, -15
  cdata = zs.deflate str, Zlib::FINISH
  zs.close

  crc32 = Zlib.crc32 str, 0
  isize = str.length

  bsize = cdata.length + 19 + XLEN

  array = [   ID1, 
              ID2, 
               CM, 
              FLG, 
            MTIME, 
              XFL,
               OS,
             XLEN,
              SI1,
              SI2,
             SLEN,
            bsize,
            cdata,
            crc32,
            isize
          ]

   array.pack('CCCCVCCvCCvva*VV')
end
read_bgzf_block(f) click to toggle source
# File lib/bio-bgzf/block.rb, line 9
def read_bgzf_block(f)
  hstart = f.read(12)
  return nil if hstart == nil # EOF?
  magic, gzip_extra_length = hstart.unpack('Vxxxxxxv')
  raise NotBGZFError, "wrong BGZF magic: #{sprintf('%08x', magic)}" unless magic == 0x04088B1F

  len = 0
  bsize = nil
  while len < gzip_extra_length do
    si1, si2, slen = f.read(4).unpack('CCv')
    if si1 == 66 and si2 == 67 then
      raise FormatError, "BC subfield length is #{slen} but must be 2" if slen != 2
      raise FormatError, 'duplicate field with block size' unless bsize.nil?
      bsize = f.read(2).unpack('v')[0]
      f.seek(slen - 2, IO::SEEK_CUR)
    else
      f.seek(slen, IO::SEEK_CUR)
    end
    len += 4 + slen
  end

  if len != gzip_extra_length then
    raise FormatError, "total length of subfields is #{len} bytes but must be #{gzip_extra_length}"
  end
  raise NotBGZFError, 'block size was not found in any subfield' if bsize.nil?

  compressed_data = f.read(bsize - gzip_extra_length - 19)
  crc32, input_size = f.read(8).unpack('VV')

  return compressed_data, input_size, crc32
end
unpack(str) click to toggle source

Unpacks compressed data, NOT a BGZF block.

# File lib/bio-bgzf/unpack.rb, line 5
def unpack(str)
    zs = Zlib::Inflate.new(-15)
    zs.inflate(str)
end
vo_block_offset(vo) click to toggle source
# File lib/bio-bgzf/vo.rb, line 2
def vo_block_offset(vo)
  vo >> 16
end
vo_data_offset(vo) click to toggle source
# File lib/bio-bgzf/vo.rb, line 7
def vo_data_offset(vo)
  vo & 0xFFFF
end

Private Instance Methods

decompress_block(f) click to toggle source
# File lib/bio-bgzf/block.rb, line 42
def decompress_block(f)
  cdata, in_size, expected_crc = read_bgzf_block(f)
  return nil if cdata == nil
  data = unpack(cdata)
  if data.bytesize != in_size
    raise FormatError, "Expected #{in_size} bytes from BGZF block at #{pos}, but got #{data.bytesize} bytes!"
  end
  crc = Zlib.crc32(data, 0)
  if crc != expected_crc
    raise FormatError, "CRC error: expected #{expected_crc.to_s(16)}, got #{crc.to_s(16)}"
  end
  return data
end
pack(str, level=Zlib::BEST_COMPRESSION) click to toggle source

Packs str into a BGZF block using given compression level.

# File lib/bio-bgzf/pack.rb, line 7
def pack(str, level=Zlib::BEST_COMPRESSION)
  zs = Zlib::Deflate.new level, -15
  cdata = zs.deflate str, Zlib::FINISH
  zs.close

  crc32 = Zlib.crc32 str, 0
  isize = str.length

  bsize = cdata.length + 19 + XLEN

  array = [   ID1, 
              ID2, 
               CM, 
              FLG, 
            MTIME, 
              XFL,
               OS,
             XLEN,
              SI1,
              SI2,
             SLEN,
            bsize,
            cdata,
            crc32,
            isize
          ]

   array.pack('CCCCVCCvCCvva*VV')
end
read_bgzf_block(f) click to toggle source
# File lib/bio-bgzf/block.rb, line 9
def read_bgzf_block(f)
  hstart = f.read(12)
  return nil if hstart == nil # EOF?
  magic, gzip_extra_length = hstart.unpack('Vxxxxxxv')
  raise NotBGZFError, "wrong BGZF magic: #{sprintf('%08x', magic)}" unless magic == 0x04088B1F

  len = 0
  bsize = nil
  while len < gzip_extra_length do
    si1, si2, slen = f.read(4).unpack('CCv')
    if si1 == 66 and si2 == 67 then
      raise FormatError, "BC subfield length is #{slen} but must be 2" if slen != 2
      raise FormatError, 'duplicate field with block size' unless bsize.nil?
      bsize = f.read(2).unpack('v')[0]
      f.seek(slen - 2, IO::SEEK_CUR)
    else
      f.seek(slen, IO::SEEK_CUR)
    end
    len += 4 + slen
  end

  if len != gzip_extra_length then
    raise FormatError, "total length of subfields is #{len} bytes but must be #{gzip_extra_length}"
  end
  raise NotBGZFError, 'block size was not found in any subfield' if bsize.nil?

  compressed_data = f.read(bsize - gzip_extra_length - 19)
  crc32, input_size = f.read(8).unpack('VV')

  return compressed_data, input_size, crc32
end
unpack(str) click to toggle source

Unpacks compressed data, NOT a BGZF block.

# File lib/bio-bgzf/unpack.rb, line 5
def unpack(str)
    zs = Zlib::Inflate.new(-15)
    zs.inflate(str)
end
vo_block_offset(vo) click to toggle source
# File lib/bio-bgzf/vo.rb, line 2
def vo_block_offset(vo)
  vo >> 16
end
vo_data_offset(vo) click to toggle source
# File lib/bio-bgzf/vo.rb, line 7
def vo_data_offset(vo)
  vo & 0xFFFF
end