class Leonardita::Dcb::ScheduleScrapper

Constants

WEEKDAYS

Public Instance Methods

data() click to toggle source
# File lib/leonardita/dcb/schedule_scrapper.rb, line 8
def data
  @data ||= build_data
end

Private Instance Methods

build_data() click to toggle source
# File lib/leonardita/dcb/schedule_scrapper.rb, line 14
def build_data
  document.css("#program").children.each_with_object({}) do |node, acc|
    node_name = node.name
    node_data = node&.next&.next
    acc[node.text] = scrape_table(node_data) if node_name == "h3" && node_data&.name == "table"
  end
end
row_values(row) click to toggle source
# File lib/leonardita/dcb/schedule_scrapper.rb, line 42
def row_values(row)
  row.css("td").map(&:text).map { |word| word.scan(safe_row_regex).join(" ") }
end
safe_row_regex() click to toggle source
# File lib/leonardita/dcb/schedule_scrapper.rb, line 46
def safe_row_regex
  @safe_row_regex ||= %r{[[:alnum:]/.\-?:]+}
end
scrape_table(node) click to toggle source
# File lib/leonardita/dcb/schedule_scrapper.rb, line 22
def scrape_table(node)
  node.css("tbody tr").each_with_object([]) do |row, acc|
    acc << scrape_table_row(row)
  end
end
scrape_table_row(row) click to toggle source
# File lib/leonardita/dcb/schedule_scrapper.rb, line 28
def scrape_table_row(row)
  values = row_values(row)
  {
    name: values[0],
    start_at: values[1],
    end_at: values[2],
    days: values[3..7].map(&:empty?).zip(WEEKDAYS).reject(&:first).map(&:last),
    classroom: values[8],
    form_url: row.css("td a").attribute("href").value,
    access_data: values[10],
    url: values[10].scan(url_regex).first
  }
end
url_regex() click to toggle source
# File lib/leonardita/dcb/schedule_scrapper.rb, line 50
def url_regex
  @url_regex ||= %r{https?://[[:alnum:]./?\-]+}
end