class Parser::NewsAZ

Public Class Methods

new(source, options = {}) click to toggle source
Calls superclass method
# File lib/fly_parser/sources/news-az.rb, line 3
def initialize(source, options = {})
  super
end

Public Instance Methods

parse_all() click to toggle source
# File lib/fly_parser/sources/news-az.rb, line 7
def parse_all
  items = @source.search('//item')
  # last_date = Time.now - 2.years # for dev 2 years
  # select! or reject! is not exists for Nokogiri#NodeSet
  # items = items.select {|item| item.xpath('pubDate').first.content() > last_date }
  items.map do |item|
    title = item.xpath('title/text()').text()
    date = item.xpath('//pubdate').first.content
    link = item.xpath('link/following-sibling::text()[1]').first
    begin
      page = Nokogiri::HTML(open(link))
    rescue Exception => e
      puts e.message
      next
    end
    gallery_image = page.search('.gallery-photo img').first
    single_image = page.search('.content-block .visual img').first
    poster_image = single_image || gallery_image
    next if poster_image.nil?
    poster_image = poster_image.attributes['src'].value
    full_desc = page.search('.content-block .text-block .text')
    full_desc.search('a').remove()
    full_desc.search("//text()[contains(., 'Автор')]").remove()
    full_desc.search("//text()[contains(., 'Связаться')]").remove()
    full_desc.search('//p[1]').remove()
    copyright = "<p>Mənbə: <a href='#{@copyright[:url]}'>#{@copyright[:title]}</a></p>"
    full_desc = full_desc.inner_html
    full_desc.gsub!(/<!--noindex-->.*/,"")
    full_desc.gsub!(/<!--\/noindex-->.*/,"")
    full_desc.gsub!(/\r\n/,"")
    full_desc.strip!
    content = full_desc + copyright
    {title: title, content: content, poster_image: poster_image}
  end.compact
end