class SpiderBot::Http::Response

Constants

CONTENT_TYPE
PARSERS

Attributes

response[R]

Public Class Methods

new(response) click to toggle source
# File lib/spider_bot/http/response.rb, line 20
def initialize(response)
  @response = response
end

Public Instance Methods

body(options = {}) click to toggle source
# File lib/spider_bot/http/response.rb, line 28
def body(options = {})
  options = options || {}
  decode(response.body, options)
end
charset_covert(charset) click to toggle source
# File lib/spider_bot/http/response.rb, line 73
def charset_covert(charset)
  case charset
  when "gb2312", "GB2312", "GBK"
    "gbk"
  else
    charset
  end
end
content_type() click to toggle source

Attempts to determine the content type of the response.

# File lib/spider_bot/http/response.rb, line 54
def content_type
  ((response.headers.values_at('content-type', 'Content-Type').compact.first || '').split(';').first || '').strip
end
decode(body, options = {}) click to toggle source
# File lib/spider_bot/http/response.rb, line 33
def decode(body, options = {})
  return '' if !body 
  return body if json?
  charset = body.match(/charset\s*=[\s|\W]*([\w-]+)/)
  return body if charset[1].downcase == "utf-8"
  charset_code = charset_covert(charset[1])
  begin
    if options[:encode]
      return body.encode! "utf-8", options[:encode], {:invalid => :replace} 
    end
    body.encode! "utf-8", charset_code, {:invalid => :replace} 
  rescue
    body
  end
end
headers() click to toggle source
# File lib/spider_bot/http/response.rb, line 24
def headers
  response.headers
end
json?() click to toggle source
# File lib/spider_bot/http/response.rb, line 58
def json?
  CONTENT_TYPE[content_type] == :json || !response.body.match(/\<html/)
end
parsed() click to toggle source
# File lib/spider_bot/http/response.rb, line 69
def parsed
  @parsed ||= PARSERS[parser].call(body)
end
parser() click to toggle source
# File lib/spider_bot/http/response.rb, line 62
def parser
  type = CONTENT_TYPE[content_type]
  type = :json if type == :html && !response.body.match(/\<.*html|/) 
  type = :html if type.nil?
  return type
end
status() click to toggle source
# File lib/spider_bot/http/response.rb, line 49
def status
  response.status
end