class Parser::News
Public Class Methods
new(source, options = {})
click to toggle source
Calls superclass method
# File lib/fly_parser/sources/news.rb, line 3 def initialize(source, options = {}) super end
Public Instance Methods
parse_all()
click to toggle source
# File lib/fly_parser/sources/news.rb, line 7 def parse_all items = @source.search('//item') last_date = Time.now - 2.years # for dev 2 years # select! or reject! is not exists for Nokogiri#NodeSet items = items.select {|item| item.xpath('pubDate').first.content() > last_date } items.map do |item| title = item.xpath('title/text()').text() date = item.xpath('pubDate').first.content() link = item.xpath('link/text()').text() begin page = Nokogiri::HTML(open(link)) rescue Exception => e puts e.message next end next if page.search('.article_illustration img').first.nil? poster_image = page.search('.article_illustration img').first.attributes['src'].value short_desc = page.search('.article_lead').first().content() full_desc = page.search('.article_full_text') full_desc.search('.article_illustration').remove() full_desc.search('.inject-data').remove() full_desc.search('.inject_poll').remove() full_desc.search('a').remove() tags = @enable_tags ? parse_tags(page) : nil copyright = "<p>Источник: <a href='#{@copyright[:url]}'>#{@copyright[:title]}</a></p>" content = "<p>#{short_desc}</p>" + full_desc.inner_html + copyright {title: title, content: content, poster_image: poster_image, tags: tags } end.compact end