module Bio::BGZF
Constants
- CM
- FLG
- ID1
- ID2
- MAX_BYTES
- MTIME
- OS
- SI1
- SI2
- SLEN
- XFL
- XLEN
Public Class Methods
decompress_block(f)
click to toggle source
# File lib/bio-bgzf/block.rb, line 42 def decompress_block(f) cdata, in_size, expected_crc = read_bgzf_block(f) return nil if cdata == nil data = unpack(cdata) if data.bytesize != in_size raise FormatError, "Expected #{in_size} bytes from BGZF block at #{pos}, but got #{data.bytesize} bytes!" end crc = Zlib.crc32(data, 0) if crc != expected_crc raise FormatError, "CRC error: expected #{expected_crc.to_s(16)}, got #{crc.to_s(16)}" end return data end
pack(str, level=Zlib::BEST_COMPRESSION)
click to toggle source
Packs str
into a BGZF
block using given compression level
.
# File lib/bio-bgzf/pack.rb, line 7 def pack(str, level=Zlib::BEST_COMPRESSION) zs = Zlib::Deflate.new level, -15 cdata = zs.deflate str, Zlib::FINISH zs.close crc32 = Zlib.crc32 str, 0 isize = str.length bsize = cdata.length + 19 + XLEN array = [ ID1, ID2, CM, FLG, MTIME, XFL, OS, XLEN, SI1, SI2, SLEN, bsize, cdata, crc32, isize ] array.pack('CCCCVCCvCCvva*VV') end
read_bgzf_block(f)
click to toggle source
# File lib/bio-bgzf/block.rb, line 9 def read_bgzf_block(f) hstart = f.read(12) return nil if hstart == nil # EOF? magic, gzip_extra_length = hstart.unpack('Vxxxxxxv') raise NotBGZFError, "wrong BGZF magic: #{sprintf('%08x', magic)}" unless magic == 0x04088B1F len = 0 bsize = nil while len < gzip_extra_length do si1, si2, slen = f.read(4).unpack('CCv') if si1 == 66 and si2 == 67 then raise FormatError, "BC subfield length is #{slen} but must be 2" if slen != 2 raise FormatError, 'duplicate field with block size' unless bsize.nil? bsize = f.read(2).unpack('v')[0] f.seek(slen - 2, IO::SEEK_CUR) else f.seek(slen, IO::SEEK_CUR) end len += 4 + slen end if len != gzip_extra_length then raise FormatError, "total length of subfields is #{len} bytes but must be #{gzip_extra_length}" end raise NotBGZFError, 'block size was not found in any subfield' if bsize.nil? compressed_data = f.read(bsize - gzip_extra_length - 19) crc32, input_size = f.read(8).unpack('VV') return compressed_data, input_size, crc32 end
unpack(str)
click to toggle source
Unpacks compressed data, NOT a BGZF
block.
# File lib/bio-bgzf/unpack.rb, line 5 def unpack(str) zs = Zlib::Inflate.new(-15) zs.inflate(str) end
vo_block_offset(vo)
click to toggle source
# File lib/bio-bgzf/vo.rb, line 2 def vo_block_offset(vo) vo >> 16 end
vo_data_offset(vo)
click to toggle source
# File lib/bio-bgzf/vo.rb, line 7 def vo_data_offset(vo) vo & 0xFFFF end
Private Instance Methods
decompress_block(f)
click to toggle source
# File lib/bio-bgzf/block.rb, line 42 def decompress_block(f) cdata, in_size, expected_crc = read_bgzf_block(f) return nil if cdata == nil data = unpack(cdata) if data.bytesize != in_size raise FormatError, "Expected #{in_size} bytes from BGZF block at #{pos}, but got #{data.bytesize} bytes!" end crc = Zlib.crc32(data, 0) if crc != expected_crc raise FormatError, "CRC error: expected #{expected_crc.to_s(16)}, got #{crc.to_s(16)}" end return data end
pack(str, level=Zlib::BEST_COMPRESSION)
click to toggle source
Packs str
into a BGZF
block using given compression level
.
# File lib/bio-bgzf/pack.rb, line 7 def pack(str, level=Zlib::BEST_COMPRESSION) zs = Zlib::Deflate.new level, -15 cdata = zs.deflate str, Zlib::FINISH zs.close crc32 = Zlib.crc32 str, 0 isize = str.length bsize = cdata.length + 19 + XLEN array = [ ID1, ID2, CM, FLG, MTIME, XFL, OS, XLEN, SI1, SI2, SLEN, bsize, cdata, crc32, isize ] array.pack('CCCCVCCvCCvva*VV') end
read_bgzf_block(f)
click to toggle source
# File lib/bio-bgzf/block.rb, line 9 def read_bgzf_block(f) hstart = f.read(12) return nil if hstart == nil # EOF? magic, gzip_extra_length = hstart.unpack('Vxxxxxxv') raise NotBGZFError, "wrong BGZF magic: #{sprintf('%08x', magic)}" unless magic == 0x04088B1F len = 0 bsize = nil while len < gzip_extra_length do si1, si2, slen = f.read(4).unpack('CCv') if si1 == 66 and si2 == 67 then raise FormatError, "BC subfield length is #{slen} but must be 2" if slen != 2 raise FormatError, 'duplicate field with block size' unless bsize.nil? bsize = f.read(2).unpack('v')[0] f.seek(slen - 2, IO::SEEK_CUR) else f.seek(slen, IO::SEEK_CUR) end len += 4 + slen end if len != gzip_extra_length then raise FormatError, "total length of subfields is #{len} bytes but must be #{gzip_extra_length}" end raise NotBGZFError, 'block size was not found in any subfield' if bsize.nil? compressed_data = f.read(bsize - gzip_extra_length - 19) crc32, input_size = f.read(8).unpack('VV') return compressed_data, input_size, crc32 end
unpack(str)
click to toggle source
Unpacks compressed data, NOT a BGZF
block.
# File lib/bio-bgzf/unpack.rb, line 5 def unpack(str) zs = Zlib::Inflate.new(-15) zs.inflate(str) end
vo_block_offset(vo)
click to toggle source
# File lib/bio-bgzf/vo.rb, line 2 def vo_block_offset(vo) vo >> 16 end
vo_data_offset(vo)
click to toggle source
# File lib/bio-bgzf/vo.rb, line 7 def vo_data_offset(vo) vo & 0xFFFF end