class BerkleeValencia::SCRAPER

Public Class Methods

get_article_extended_info(url) click to toggle source
# File lib/berklee_valencia/scraper.rb, line 19
def self.get_article_extended_info(url)
  article = Nokogiri::HTML(open(url, :allow_redirections => :all))
  extended_info = {
    author: article.css("span.author").text,
    related_links: [],
    body: []
  }
  sort_content(article, extended_info)
  extended_info
end
get_program_extended_info(url) click to toggle source
# File lib/berklee_valencia/scraper.rb, line 46
def self.get_program_extended_info(url)
  program = Nokogiri::HTML(open(url, :allow_redirections => :all))
  extended_info = {
    introduction: "",
    highlights: []
  }
  scrape_program_intro(program, extended_info)
  scrape_program_highlights(program, extended_info)
  extended_info
end
make_articles() click to toggle source
# File lib/berklee_valencia/scraper.rb, line 5
def self.make_articles
  articles = Nokogiri::HTML(open(@@bv_news, :allow_redirections => :all))
  articles.css("div#news_container div.content").each do |article|
    attributes = {
      title: article.css("div.news_excerpt h3 a").text,
      category: article.css("span.category_name").text,
      date: article.css("div.news_excerpt span.date").text,
      excerpt: article.css("div.news_excerpt p").text,
      url: article.css("div.news_excerpt h3 a").attribute("href").value
    }
    BerkleeValencia::ARTICLE.new_from_scraper(attributes)
  end
end
make_programs() click to toggle source
# File lib/berklee_valencia/scraper.rb, line 30
def self.make_programs
  course_types = Nokogiri::HTML(open(@@bv_programs, :allow_redirections => :all)).css("div.col-3-5")
  course_types.each do |type|
    type.css("ul a").each do |program|
      attributes = {
        title: program.text,
        subtitle: program.css("span").text,
        url: program.attribute("href").value,
        type: type.css("h4").text
      }
    BerkleeValencia::PROGRAM.new_from_scraper(attributes)
    end
  end
end
scrape_program_highlights(program, extended_info) click to toggle source
# File lib/berklee_valencia/scraper.rb, line 82
def self.scrape_program_highlights(program, extended_info)
  program.css("div#tab_intro div.block_content").each do |highlight|
    hl_title = highlight.css("p.block_content_item_title").text
    hl_body = highlight.css("div.bk_txt").text
    extended_info[:highlights] << {
      hl_title: hl_title,
      hl_body: hl_body
    }
  end
end
scrape_program_intro(program, extended_info) click to toggle source
# File lib/berklee_valencia/scraper.rb, line 71
def self.scrape_program_intro(program, extended_info)
  if program.css("div#tab_intro p").first.text.length > 0
    extended_info[:introduction] = program.css("div#tab_intro p").first.text
  elsif program.css("div#tab_intro h4").length > 0
    extended_info[:introduction] = program.css("div#tab_intro h4").first.text
  else
    intro = program.css("div#tab_intro p").detect {|p| p.text.length > 150}
    extended_info[:introduction] = intro.text
  end
end
sort_content(article, extended_info) click to toggle source
# File lib/berklee_valencia/scraper.rb, line 57
def self.sort_content(article, extended_info)
  article.css("div#tab_intro p").each do |para|
    if para.css("iframe").length == 0 && para.css("strong").text == ""
      extended_info[:body] << para.text
    elsif para.css("strong").text != ""
      extended_info[:body] << " --- #{para.text} ---"
    elsif para.css("iframe").length > 0
      extended_info[:body] << para.text
      extended_info[:related_links] << para.css("iframe").attribute("src").value
    end
  end
  extended_info
end