class RSScache
Attributes
dx[R]
err_report[R]
Public Class Methods
new(rsslist=nil, filepath: '.', debug: false)
click to toggle source
# File lib/rsscache.rb, line 17 def initialize(rsslist=nil, filepath: '.', debug: false) rsslist ||= File.join(filepath, 'rsscache.xml') @dx = open_dynarex(rsslist) @filepath = filepath @cache_filepath = File.join(filepath, 'rsscache') FileX.mkdir_p @cache_filepath @err_report = [] @debug = debug end
Public Instance Methods
import(raw_s)
click to toggle source
Import a list of URLs into the Dynarex document. URLs which already exist are ignored.
# File lib/rsscache.rb, line 33 def import(raw_s) s, _ = RXFReader.read(raw_s) s.strip.lines.each do |raw_url| url = raw_url.chomp puts 'url : ' + url.inspect if @debug r = @dx.find_by_url url.chomp if r then puts 'exists' if @debug else puts 'new URL found' if @debug @dx.create url: url end end save() end
refresh()
click to toggle source
refresh each RSS feed
# File lib/rsscache.rb, line 57 def refresh @err_report = [] puts '@dx.to_xml' + @dx.to_xml(pretty: true) if @debug @dx.all.each do |feed| puts 'feed:' + feed.inspect if @debug if feed.next_refresh.empty? or \ Time.now >= Time.parse(feed.next_refresh) then any_new_items = updates? feed feed.refresh_rate = if feed.refresh_rate.empty? then 10 else if Time.now > Time.parse(feed.next_refresh) + \ feed.refresh_rate.to_i and not any_new_items then feed.refresh_rate.to_i + 10 end end feed.next_refresh = Time.now + feed.refresh_rate.to_i * 60 else feed.refresh_rate = feed.refresh_rate.to_i - 10 if feed.refresh_rate.to_i > 10 end end puts '@dx: ' + @dx.to_xml(pretty: true) if @debug save() end
Also aliased as: update
save()
click to toggle source
# File lib/rsscache.rb, line 103 def save() @dx.save File.join(@filepath, 'rsscache.xml') FileX.write File.join(@filepath, 'rsscache.txt'), @dx.to_s end
Private Instance Methods
fetch(url, timeout: 2)
click to toggle source
# File lib/rsscache.rb, line 129 def fetch(url, timeout: 2) puts 'inside fetch: url: ' + url.inspect if @debug begin Timeout::timeout(timeout){ buffer = URI.open(url).read.force_encoding("utf-8") return [buffer, 200] } rescue Timeout::Error => e ['connection timed out', 408] rescue OpenURI::HTTPError => e ['400 bad request', 400] end end
open_dynarex(raw_s)
click to toggle source
# File lib/rsscache.rb, line 147 def open_dynarex(raw_s) s, _ = RXFReader.read(raw_s) puts 'inside open_dynarex s: ' + s.inspect if @debug case s when /^<?dynarex/ Dynarex.new.import s when /^</ Dynarex.new s else Dynarex.new.import raw_doc(s) end end
raw_doc(s)
click to toggle source
# File lib/rsscache.rb, line 113 def raw_doc(s) heading = '<?dynarex schema="rsscache[title]/feed(uid, title, ' + 'url, refresh_rate, next_refresh, filename)"?>' raw_dx=<<EOF #{heading} title: RSS Feeds to be cached --+ #{s.strip.lines.map {|x| 'url: ' + x }.join } EOF end
updates?(feed)
click to toggle source
checks for any updates and save the latest RSS file to the cache if there are updates
# File lib/rsscache.rb, line 166 def updates?(feed) if @debug then puts 'inside updates?' puts 'feed: ' + feed.inspect end # fetch the feeds from the web begin buffer, code = fetch(feed.url) rescue puts 'RSScache::updates?: fetch() warning for feed ' + feed.url \ + ' ' + ($!).inspect return end if code == 200 then begin rss = SimpleRSS.parse(buffer) rescue puts 'RSScache::updates?: err: 100 SimpleRSS warning for feed ' \ + feed.url + ' ' + ($!).inspect return end else @err_report << [feed.url, code] return false end if feed.filename.empty? then filename = feed.url[6..-1].gsub(/\W+/,'').\ reverse.slice(0,40).reverse.downcase + '.xml' feed.filename = filename end rssfile = File.join(@cache_filepath, feed.filename) if FileX.exists? rssfile then begin rss_cache = SimpleRSS.parse FileX.read(rssfile) rescue puts 'RSScache::updates?: err: 200 SimpleRSS warning for feed ' \ + feed.url + ' ' + ($!).inspect FileX.rm rssfile return false end new_rss_items = rss.items - rss_cache.items (FileX.write rssfile, rss.source; return true) if new_rss_items.any? else FileX.write rssfile, rss.source feed.title = rss.title if feed.title.empty? return true end return false end