class Sites::ElixirCompanies
Constants
- HOST
- JOB_ITEM_SELECTOR
- PATH
- STORE_DIR
Public Class Methods
new()
click to toggle source
# File lib/sites/elixir_companies.rb, line 11 def initialize @url = "#{self.class::HOST}#{self.class::PATH}" @current_time = Time.now @timestamp = @current_time.strftime("%Y%m%d%H%M%S") @doc = nil @total_pages = 26 @rows_count = 0 @jobs_count = get_jobs_count end
Public Instance Methods
collect_companies(limit: nil)
click to toggle source
# File lib/sites/elixir_companies.rb, line 21 def collect_companies(limit: nil) FileUtils.mkdir_p STORE_DIR (1..@total_pages).each do |page| process_page(page: page, limit: limit) end end
companies_count()
click to toggle source
# File lib/sites/elixir_companies.rb, line 29 def companies_count @rows_count end
Private Instance Methods
get_jobs_count()
click to toggle source
# File lib/sites/elixir_companies.rb, line 81 def get_jobs_count jobs_count = 16 * @total_pages # roughly - first page has 14 items puts "[Info] There are #{jobs_count} remote jobs on [ElixirCompanies]." jobs_count end
get_row(company_box)
click to toggle source
# File lib/sites/elixir_companies.rb, line 51 def get_row(company_box) company_title = company_box.css('div.content p.title').text company_info = company_box.css('div.content.company-info p') # A bit ugly way to get a data between span elements array = company_info.text.split("\n").select do |element| element =~ /[a-zA-Z]/ end.map!(&:strip).delete_if do |element| element == "GitHub" || element == "Add a job" end has_blog = array[2] && (array[2].include?("/") || array[2].include?("blog")) industry = array[0] company_website = array[1] is_hiring = company_box["class"].include?("has-ribbon") if has_blog blog = array[2] location = array[4] else blog = nil location = array[2] end row = [company_title, industry, company_website, blog, location] hiring = is_hiring ? "Hiring!" : nil row.push hiring end
process_page(page:, limit:)
click to toggle source
# File lib/sites/elixir_companies.rb, line 35 def process_page(page:, limit:) current_page = "#{@url}?page=#{page}" doc = Nokogiri::HTML(open_page(current_page)) puts "[Info] Getting the data from #{current_page}" CSV.open(filepath, 'ab') do |csv| doc.css(JOB_ITEM_SELECTOR).each do |company_box| return if limit == @rows_count csv << get_row(company_box) @rows_count += 1 end end puts "[Done] Collected #{@jobs_count} job offers from #{url}. Data stored in: #{filepath}." if page == @total_pages end