class Sites::JobsRails42
Constants
- HOST
@NOTE: I had to rename this class because we are not allowed to have numbers on the beginning of the class name (42JobsRails won't work). file paths follow this convention
- JOB_ITEM_SELECTOR
- PATH
- STORE_DIR
Public Class Methods
new()
click to toggle source
# File lib/sites/jobs_rails42.rb, line 13 def initialize @url = "#{self.class::HOST}#{self.class::PATH}" @current_time = Time.now @timestamp = @current_time.strftime("%Y%m%d%H%M%S") @doc = nil @total_pages = 4 @rows_count = 0 @jobs_count = get_jobs_count end
Public Instance Methods
collect_jobs(limit: nil)
click to toggle source
# File lib/sites/jobs_rails42.rb, line 23 def collect_jobs(limit: nil) FileUtils.mkdir_p STORE_DIR (1..@total_pages).each do |page| process_page(page: page, limit: limit) end end
Private Instance Methods
get_jobs_count()
click to toggle source
# File lib/sites/jobs_rails42.rb, line 65 def get_jobs_count jobs_count = 25 * @total_pages puts "[Info] There are #{jobs_count} remote jobs on [42JobsRails]." jobs_count end
get_row(job_url)
click to toggle source
# File lib/sites/jobs_rails42.rb, line 54 def get_row(job_url) job_page = Nokogiri::HTML(open_page(job_url)) offer_text = job_page.css('.job-offer__description').to_s location = Support::OfferParser.get_location(offer_text) keywords = Support::OfferParser.get_keywords(offer_text) company = job_page.css('.job-offer__summary a').text [job_url, location, keywords, company] end
process_page(page:, limit:)
click to toggle source
# File lib/sites/jobs_rails42.rb, line 33 def process_page(page:, limit:) current_page = "#{@url}?page=#{page}" doc = Nokogiri::HTML(open_page(current_page)) puts "[Info] Getting the data from #{current_page}" CSV.open(filepath, 'ab') do |csv| doc.css(JOB_ITEM_SELECTOR).each do |link| return if limit == @rows_count job_url = "#{HOST}#{link["href"]}" puts "[Info] Parsing #{job_url}..." csv << get_row(job_url) @rows_count += 1 end end puts "[Done] Collected #{@jobs_count} job offers from #{url}. Data stored in: #{filepath}." if page == @total_pages end