class Xapian::Indexer::Loaders::HTTP

Constants

UserAgent

Public Class Methods

new(options = {}) click to toggle source
# File lib/xapian/indexer/loaders/http.rb, line 30
def initialize(options = {})
        @options = options
        
        @logger = options[:logger] || Logger.new($stderr)
end

Public Instance Methods

call(name) { |code.to_i, header, body| ... } click to toggle source

Extract metadata from the document, including :content and :links

# File lib/xapian/indexer/loaders/http.rb, line 37
def call(name, &block)
        uri = URI.parse(name)
        
        if uri.absolute?
                Net::HTTP.start(uri.host, uri.port) do |http|
                        head = http.request_head(uri.path, 'User-Agent' => UserAgent)
                
                        body = lambda do
                                page = http.request_get(uri.path, 'User-Agent' => UserAgent)
                                page.body
                        end
                
                        @logger.info "Loading external URI: #{name.inspect}"
                
                        yield head.code.to_i, head.header, body
                end
                
                return true
        end
        
        return false
end