class Jetel::Modules::Alexa

Public Class Methods

sources() click to toggle source
# File lib/jetel/modules/alexa/alexa.rb, line 32
def sources
  [
    {
      name: 'alexa',
      # filename_downloaded: 'top-1m.csv.zip',
      filename_extracted: 'top-1m.csv',
      filename_transformed: 'top-1m.csv',
      url: 'http://s3.amazonaws.com/alexa-static/top-1m.csv.zip'
    }
  ]
end

Public Instance Methods

download(global_options, options, args) click to toggle source
# File lib/jetel/modules/alexa/alexa.rb, line 45
def download(global_options, options, args)
  self.class.sources.pmap do |source|
    download_source(source, global_options.merge(options))
  end
end
extract(global_options, options, args) click to toggle source
# File lib/jetel/modules/alexa/alexa.rb, line 51
def extract(global_options, options, args)
  self.class.sources.pmap do |source|
    unzip(source, global_options.merge(options))
  end
end
transform(global_options, options, args) click to toggle source
# File lib/jetel/modules/alexa/alexa.rb, line 57
def transform(global_options, options, args)
  self.class.sources.pmap do |source|
    extracted_file = extracted_file(source, global_options.merge(options))
    transformed_file = transformed_file(source, global_options.merge(options))
    dest_dir = transform_dir(source, global_options.merge(options))

    puts "Transforming #{extracted_file}"

    FileUtils.mkdir_p(dest_dir)
    File.open(extracted_file, 'rt') do |fin|
      File.open(transformed_file, 'wt') do |fout|
        fout.puts('rank,url')

        while buff = fin.read(4096)
          fout.write(buff)
        end
      end
    end
  end
end