class Pluto::FeedFetcherBasic

Public Class Methods

new() click to toggle source
# File lib/pluto/feedfetcher/basic.rb, line 10
def initialize
  @worker  = Fetcher::Worker.new
end

Public Instance Methods

debug?() click to toggle source
# File lib/pluto/feedfetcher/basic.rb, line 14
def debug?()  Pluto.config.debug?;  end
fetch( feed_rec ) click to toggle source
# File lib/pluto/feedfetcher/basic.rb, line 17
  def fetch( feed_rec )
    # simple/basic feed fetcher; use for debugging (only/mostly)
    #  -- Note: will NOT change db records in any way

    feed_url = feed_rec.feed_url
    feed_key = feed_rec.key

    feed_xml = fix_me_fetch_utf8( feed_url )

    logger.debug "feed_xml:"
    logger.debug feed_xml[ 0..500 ] # get first 500 chars


####  todo/fix: make it generic - move out of this method (re)use - for all fetcher??
    #  if opts.verbose?  # also write a copy to disk
    if debug?
      logger.debug "saving feed to >./#{feed_key}.xml<..."
      File.open( "./#{feed_key}.xml", 'w' ) do |f|
        f.write( feed_xml )
      end
    end



    ### todo/fix:
    ###  return feed_xml !!! - move FeedUtils::Parser.parse to update or someting !!!

    logger.info "Before parsing feed >#{feed_key}<..."


    feed_xml

    ## fix/todo: check for feed.nil?   -> error parsing!!!
    #    or throw exception
    # feed = FeedUtils::Parser.parse( feed_xml )
    # feed
  end

Private Instance Methods

fix_me_fetch_utf8( url ) click to toggle source

todo/fix: use “standard” fetch method e.g. Fetcher.read_utf8!() - clean up/remove (duplicate) here??

# File lib/pluto/feedfetcher/basic.rb, line 59
def fix_me_fetch_utf8( url )
  response = @worker.get( url )

  ## if debug?
    logger.debug "http status #{response.code} #{response.message}"

    logger.debug "http header - server: #{response.header['server']} - #{response.header['server'].class.name}"
    logger.debug "http header - etag: #{response.header['etag']} - #{response.header['etag'].class.name}"
    logger.debug "http header - last-modified: #{response.header['last-modified']} - #{response.header['last-modified'].class.name}"
  ## end

  xml = response.body

  ###
  # Note: Net::HTTP will NOT set encoding UTF-8 etc.
  # will mostly be ASCII
  # - try to change encoding to UTF-8 ourselves
  logger.debug "xml.encoding.name (before): #{xml.encoding.name}"

  #####
  # NB: ASCII-8BIT == BINARY == Encoding Unknown; Raw Bytes Here

  ## NB:
  # for now "hardcoded" to utf8 - what else can we do?
  # - note: force_encoding will NOT change the chars only change the assumed encoding w/o translation
  xml = xml.force_encoding( Encoding::UTF_8 )
  logger.debug "xml.encoding.name (after): #{xml.encoding.name}"

  xml
end