class Warc::Record
Constants
- VERSION
Attributes
content[RW]
header[R]
offset[RW]
Public Class Methods
new(h={},content=nil)
click to toggle source
# File lib/warc/record.rb, line 7 def initialize(h={},content=nil) @content=content case h when Hash @header = Header.new(self,h) when WEBrick::HTTPResponse @header = Header.new(self) @header["WARC-Type"] = "response" @header["WARC-Target-URI"] = h.request_uri.to_s @header["Content-Type"] = "application/http;msgtype=response" #@header["WARC-IP-Address"] body,crfl = String.new,"\r\n" body << h.status_line h.header.each do |k,v| body << "#{k}: #{v}" + crfl end body << crfl + h.body self.content = body self.header.block_digest @header["WARC-Payload-Digest"] = self.header.compute_digest(h.body) end end
Public Instance Methods
dump_to(out)
click to toggle source
# File lib/warc/record.rb, line 40 def dump_to(out) # # warc-file = 1*warc-record # warc-record = header CRLF # block CRLF CRLF # header = version CRLF # warc-fields # version = "WARC/0.16" CRLF # warc-fields = *named-field CRLF # block = *OCTET # crfl = "\r\n" out.write(VERSION + crfl) out.write(self.header.to_s) out.write(crfl) out.write(self.content + crfl*2) end
to_http()
click to toggle source
# File lib/warc/record.rb, line 30 def to_http if @header["Content-Type"] == "application/http;msgtype=response" url = @header["WARC-Target-URI"] socket = Net::BufferedIO.new(content) r=Net::HTTPResponse.read_new(socket) r.reading_body(socket,true) {} return r end end