module General

Public Instance Methods

crl_general( sym, cmd, channel, response, data, obj, custom={} ) click to toggle source
# File lib/modules/general.rb, line 2
def crl_general( sym, cmd, channel, response, data, obj, custom={} )
  messages = []
  
  case sym
    when :download
      result, messages = crl_general_download( cmd[:url], obj )
    when :pre_titles
      result, messages = crl_general_pre_titles( cmd, channel, data, obj )
    when :mining_rss_one
      result = crl_general_mining_rss_one( cmd[:url], response, obj )
    when :mining_rss_two
      result = crl_general_mining_rss_two( cmd[:url], response, obj )
    when :format_url_s3
      result = crl_general_format_url_s3( obj, channel[:options][:html], custom[:query] )
    when :format_html_remove
      result = crl_general_format_html_remove( custom[:html] )
  else
    messages.push( "General: #{sym} not found." )
  end
  
  return result, messages
end

Private Instance Methods

crl_general_channels() click to toggle source
# File lib/modules/general.rb, line 29
def crl_general_channels()
  return []
end
crl_general_download( url, obj ) click to toggle source
# File lib/modules/general.rb, line 34
def crl_general_download( url, obj )
  version = ( rand( 89.0..91.0 ) + ( rand( 530.0..540.0 ) / 1000 ) ).round( 2 ) 
  agent = obj[:format][:download][:agent].gsub( '{{version}}', version.to_s )
  
  uri = URI( url )
  
  header = {}
  header['User-Agent'] = agent
  header['Accept'] = 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8'
  header['Accept-Language'] = 'en-US,en;q=0.5'
  header['Connection'] = 'keep-alive'
  header['Upgrade-Insecure-Requests'] = '1'
  header['Sec-Fetch-Dest'] = 'document'
  header['Sec-Fetch-Mode'] = 'navigate'
  header['Sec-Fetch-Site'] = 'none'
  header['Sec-Fetch-User'] = '?1'
  header['Pragma'] = 'no-cache'
  header['Cache-Control'] = 'no-cache'
  
  response = Net::HTTP.get_response( uri, header )
  return response.body, [ "Download: Status #{response.code}" ]
end
crl_general_format_html_remove( html ) click to toggle source
# File lib/modules/general.rb, line 157
def crl_general_format_html_remove( html )
  
  result = ''
  Nokogiri::HTML( CGI.unescapeHTML( html.to_s ) ).traverse do | e |
    result << e.text if e.text?
  end
  
  result = result
    .strip
    .split( ' ' )
    .map{ | word | word.capitalize }
    .join( ' ' )
  
  return result
end
crl_general_format_url_s3( obj, file, query ) click to toggle source
# File lib/modules/general.rb, line 140
def crl_general_format_url_s3( obj, file, query )
  result = ''
  result << 'https://'
  result << obj[:options][:s3][:bucket_name]
  result << '.s3.'
  result << obj[:options][:s3][:region]
  result << '.amazonaws.com/'
  result << obj[:options][:s3][:bucket_sub_folder]
  result << obj[:options][:s3][:bucket_folder]
  result << file
  result << '?'
  result << URI.encode_www_form( query )
  
  return result
end
crl_general_mining_rss_one( url, response, obj ) click to toggle source
# File lib/modules/general.rb, line 58
def crl_general_mining_rss_one( url, response, obj )
  doc = Nokogiri::XML( response )
  
  feed = {
      meta: {
        title: nil,
        url: nil
      },
      items: []
  }
  
  feed[:meta][:title] = doc.at( 'title' ).text.gsub( '"',"'" )
  feed[:meta][:url] = url
  
  entries = doc.css( 'item' )
  entries.each do | entry | 
    item = {
      title: nil,
      time: {
        stamp: nil,
        utc: nil
      }
    }
  
    tmp = entry.at( 'title' ).text
    item[:title] = self
      .method( 'crl_general' )
      .call( :format_html_remove, nil, nil, nil, nil, nil, { html: tmp } )[ 0 ]
  
    item[:title_viewer] = item[:title]
    item[:time][:stamp] = Time.parse( entry.at( 'pubDate' ) ).to_i
    item[:time][:utc] = entry.at( 'pubDate' ).text
    item[:url] = entry.at( 'link' ).text
  
    feed[:items].push( item )
  end
  
  return feed
end
crl_general_mining_rss_two( url, response, obj ) click to toggle source
# File lib/modules/general.rb, line 99
def crl_general_mining_rss_two( url, response, obj )
  doc = Nokogiri::XML( response )
  
  feed = {
      meta: {
        title: nil,
        url: nil
      },
      items: []
  }
  
  feed[:meta][:title] = doc.at( 'title' ).text.gsub( '"',"'" )
  feed[:meta][:url] = url
  
  entries = doc.css( 'entry' )
  entries.each do | entry | 
    item = {
      title: nil,
      time: {
        stamp: nil,
        utc: nil
      }
    }
  
    tmp = entry.at( 'title' ).text
    item[:title] = self
      .method( 'crl_general' )
      .call( :format_html_remove, nil, nil, nil, nil, nil, { html: tmp } )[ 0 ]
  
    item[:title_viewer] = item[:title]
    item[:time][:stamp] = Time.parse( entry.at( 'updated' ) ).to_i
    item[:time][:utc] = entry.at( 'updated' ).text
    item[:url] = entry.at( 'link' ).attribute('href').value
  
    feed[:items].push( item )
  end
  
  return feed
end
crl_general_pre_title( cmd, channel, data, d_index, obj ) click to toggle source
# File lib/modules/general.rb, line 187
def crl_general_pre_title( cmd, channel, data, d_index, obj )
  messages = []
  str = obj[:format][:title][:str]
  
  parts = str
    .scan( /\{{[a-z,_,:]+\}}/ )
    .map { | match | 
      {
        gsub: match,
        cmd: match.gsub( /[{:}]/, '' )
      }
    }
  
  parts.each do | part |
    text = part[:cmd].to_sym
    formats = []
    
    if !part[:cmd].index( '__' ).nil?
      tmp = part[:cmd].split( '__' )
      formats = tmp.last.split( '_' ).map { | a | a.to_sym }
      text = tmp[ 0 ].to_sym
    end
  
    case text
      when :cmd_name
        insert = cmd[:name].dup.to_s
      when :channel_name
        insert = channel[:name].dup.to_s.gsub( '_', ' ' )
      when :sym
        insert = obj[:format][:title][:symbol][ channel[:sym] ].dup
      when :separator
        insert = obj[:format][:title][:separator].dup
      when :title_channel
        insert = channel[:name].dup
      when :title_item
        insert = data[:items][ d_index ][:title].dup
      when :title_meta
        insert = data[:meta][:title].dup
    else
      messages.push( "Set Title (insert): #{text} not found." )
    end
  
    formats.each do | f |
      case f
        when :upcase
          f.eql?( :upcase ) ? insert.upcase! : ''
        when :titleize
          insert = insert
            .split( ' ' )
            .map { | word | word.capitalize }
            .join( ' ' )
      else
        messages.push( "Set Title (format): #{text} not found." )
      end
    end
    str = str.gsub( part[:gsub], insert )
  end
  
  if str.length > obj[:format][:title][:length]
    str = str[ 0, obj[:format][:title][:length] ] + obj[:format][:title][:more]
  end
  
  return str, messages
end
crl_general_pre_titles( cmd, channel, data, obj ) click to toggle source
# File lib/modules/general.rb, line 174
def crl_general_pre_titles( cmd, channel, data, obj )
  messages = []
  
  data[:items].map.with_index do | item, index |
    title, errors = crl_general_pre_title( cmd, channel, data, index, obj )
    messages.concat( errors )
    item[:title] = title
  end
  
  return data, messages
end