class Pluto::SiteFetcher
Public Class Methods
new()
click to toggle source
# File lib/pluto/update/site_fetcher.rb, line 11 def initialize @worker = Fetcher::Worker.new end
Public Instance Methods
debug?()
click to toggle source
# File lib/pluto/update/site_fetcher.rb, line 15 def debug?() Pluto.config.debug?; end
fetch( site_rec )
click to toggle source
# File lib/pluto/update/site_fetcher.rb, line 18 def fetch( site_rec ) #################################################### # try smart http update; will update db records site_url = site_rec.url site_key = site_rec.key ### todo/fix: normalize/unifiy feed_url ## - same in fetcher - use shared utitlity method or similar @worker.use_cache = true @worker.cache[ site_url ] = { 'etag' => site_rec.http_etag, 'last-modified' => site_rec.http_last_modified } response = @worker.get( site_url ) @worker.use_cache = false # fix/todo: restore old use_cache setting instead of false site_fetched = Time.now ### # Note: Net::HTTP will NOT set encoding UTF-8 etc. # will be set to ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here # thus, set/force encoding to utf-8 site_text = response.body.to_s site_text = site_text.force_encoding( Encoding::UTF_8 ) if response.code == '304' # not modified (conditional GET - e.g. using etag/last-modified) puts "OK - fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}" puts "no change; request returns not modified (304); skipping parsing site config" return nil # no updates available; nothing to do elsif response.code != '200' # note Net::HTTP response.code is a string in ruby puts "*** error: fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}" site_attribs = { http_code: response.code.to_i, http_server: response.header[ 'server' ], http_etag: nil, http_last_modified: nil, fetched: site_fetched } site_rec.update!( site_attribs ) ## add log error activity -- in future add to error log - better - why? why not? Activity.create!( text: "*** error: fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}" ) return nil # sorry; no feed for parsing available else # assume 200; continue w/ processing end puts "OK - fetching site '#{site_key}' - HTTP status #{response.code} #{response.message}" site_attribs = { http_code: response.code.to_i, http_server: response.header[ 'server' ], http_etag: response.header[ 'etag' ], http_last_modified: response.header[ 'last-modified' ], ## note: last_modified header gets stored as plain text (not datetime) fetched: site_fetched } ## if debug? puts "http header - server: #{response.header['server']} - #{response.header['server'].class.name}" puts "http header - etag: #{response.header['etag']} - #{response.header['etag'].class.name}" puts "http header - last-modified: #{response.header['last-modified']} - #{response.header['last-modified'].class.name}" ## end site_rec.update!( site_attribs ) ## logger.debug "site_text:" ## logger.debug site_text[ 0..300 ] # get first 300 chars site_text end