class Sites::JobsRails42

Constants

HOST

@NOTE: I had to rename this class because we are not allowed to have numbers on the beginning of the class name (42JobsRails won't work). file paths follow this convention

JOB_ITEM_SELECTOR
PATH
STORE_DIR

Public Class Methods

new() click to toggle source
# File lib/sites/jobs_rails42.rb, line 13
def initialize
  @url = "#{self.class::HOST}#{self.class::PATH}"
  @current_time = Time.now
  @timestamp = @current_time.strftime("%Y%m%d%H%M%S")
  @doc = nil
  @total_pages = 4
  @rows_count = 0
  @jobs_count = get_jobs_count
end

Public Instance Methods

collect_jobs(limit: nil) click to toggle source
# File lib/sites/jobs_rails42.rb, line 23
def collect_jobs(limit: nil)
  FileUtils.mkdir_p STORE_DIR

  (1..@total_pages).each do |page|
    process_page(page: page, limit: limit)
  end
end

Private Instance Methods

get_jobs_count() click to toggle source
# File lib/sites/jobs_rails42.rb, line 65
def get_jobs_count
  jobs_count = 25 * @total_pages
  puts "[Info] There are #{jobs_count} remote jobs on [42JobsRails]."
  jobs_count
end
get_row(job_url) click to toggle source
# File lib/sites/jobs_rails42.rb, line 54
def get_row(job_url)
  job_page = Nokogiri::HTML(open_page(job_url))
  offer_text = job_page.css('.job-offer__description').to_s

  location = Support::OfferParser.get_location(offer_text)
  keywords = Support::OfferParser.get_keywords(offer_text)
  company = job_page.css('.job-offer__summary a').text

  [job_url, location, keywords, company]
end
process_page(page:, limit:) click to toggle source
# File lib/sites/jobs_rails42.rb, line 33
def process_page(page:, limit:)
  current_page = "#{@url}?page=#{page}"
  doc = Nokogiri::HTML(open_page(current_page))
  puts "[Info] Getting the data from #{current_page}"

  CSV.open(filepath, 'ab') do |csv|
    doc.css(JOB_ITEM_SELECTOR).each do |link|
      return if limit == @rows_count

      job_url = "#{HOST}#{link["href"]}"
      puts "[Info] Parsing #{job_url}..."

      csv << get_row(job_url)

      @rows_count += 1
    end
  end

  puts "[Done] Collected #{@jobs_count} job offers from #{url}. Data stored in: #{filepath}." if page == @total_pages
end