class Parser::NewsKZ

Public Class Methods

new(source, options = {}) click to toggle source
Calls superclass method Parser::Base::new
# File lib/fly_parser/sources/news-kz.rb, line 3
def initialize(source, options = {})
  @delay = 2
  super
end

Public Instance Methods

parse_all() click to toggle source
# File lib/fly_parser/sources/news-kz.rb, line 8
def parse_all
  links = @source.search("li.c__news_item a:first")
  
  links.map do |link|
    page = click(link)
    begin
    title = page.search(".c__article_caption").text()

    content_wrapper = page.search('.c__article_text')
    
    image_wrapper = content_wrapper.search('.wp-caption img').first
    next unless image_wrapper
    poster_image = image_wrapper.attributes['src'].value
    
    content_wrapper.search('.wp-caption').remove()
    content_wrapper.search('.c__article_mistake').remove()
    content_wrapper.search('p[style="display:none"]').remove()
    content_wrapper.search("a").remove()
    content_wrapper.search("span:contains(Копирование)").remove()
    
    full_desc = content_wrapper.to_html
    full_desc.gsub!(/<iframe.*><\/iframe>/, '')
    copyright = "<p>Source: <a href='#{@copyright[:url]}'>#{@copyright[:title]}</a></p>"
    content = full_desc + copyright
    
    {title: title, content: content, poster_image: poster_image}

    rescue Exception => e
      puts e.message
      next
    end 
  end.compact
end