class Warc::Stream::Gzip

Public Class Methods

new(fh,options={},&block) click to toggle source
# File lib/warc/stream.rb, line 28
def initialize(fh,options={},&block)
  @options = DEFAULT_OPTS.merge options
  @index = 0
  fh = case fh
  when ::File
    @name = ::File.basename(fh)
    fh
  when String
    @name = fh
    @naming_proc = block || lambda {|name,index| "#{name}.#{sprintf('%06d',index)}"} 
    next_file_handle
  end
  @file_handle=fh
  @parser = ::Warc::Parser.new
end
new(fh,options={},&block) click to toggle source
Calls superclass method Warc::Stream::new
# File lib/warc/stream/gzip.rb, line 6
def initialize(fh,options={},&block)
  @ext = ".warc.gz"
  super(fh,options,&block)
end

Public Instance Methods

read_record() click to toggle source
# File lib/warc/stream/gzip.rb, line 11
def read_record
  begin
    gz = ::Zlib::GzipReader.new(@file_handle)
    rec = self.parser.parse(gz)
    loop {gz.readline} # Make sure we read the whole gzip
  
  rescue EOFError # End of gzipped record
    @file_handle.pos -= gz.unused.length unless gz.unused.nil? # We move the cursor back if extra bytes were read
    return rec # We return the record
  
  rescue ::Zlib::Error => e # Raised when there's no more gzipped data to read
    return nil
  end
end
write_record(record) click to toggle source
Calls superclass method Warc::Stream#write_record
# File lib/warc/stream/gzip.rb, line 26
def write_record(record)
  super
  
  # Prepare gzip IO object
  gz = ::Zlib::GzipWriter.new(@file_handle)
  record.dump_to(gz)
  gz.finish # Need to close GzipWriter for it to write the gzip footer
end