class Leonardita::Dcb::ScheduleScrapper
Constants
- WEEKDAYS
Public Instance Methods
data()
click to toggle source
# File lib/leonardita/dcb/schedule_scrapper.rb, line 8 def data @data ||= build_data end
Private Instance Methods
build_data()
click to toggle source
# File lib/leonardita/dcb/schedule_scrapper.rb, line 14 def build_data document.css("#program").children.each_with_object({}) do |node, acc| node_name = node.name node_data = node&.next&.next acc[node.text] = scrape_table(node_data) if node_name == "h3" && node_data&.name == "table" end end
row_values(row)
click to toggle source
# File lib/leonardita/dcb/schedule_scrapper.rb, line 42 def row_values(row) row.css("td").map(&:text).map { |word| word.scan(safe_row_regex).join(" ") } end
safe_row_regex()
click to toggle source
# File lib/leonardita/dcb/schedule_scrapper.rb, line 46 def safe_row_regex @safe_row_regex ||= %r{[[:alnum:]/.\-?:]+} end
scrape_table(node)
click to toggle source
# File lib/leonardita/dcb/schedule_scrapper.rb, line 22 def scrape_table(node) node.css("tbody tr").each_with_object([]) do |row, acc| acc << scrape_table_row(row) end end
scrape_table_row(row)
click to toggle source
# File lib/leonardita/dcb/schedule_scrapper.rb, line 28 def scrape_table_row(row) values = row_values(row) { name: values[0], start_at: values[1], end_at: values[2], days: values[3..7].map(&:empty?).zip(WEEKDAYS).reject(&:first).map(&:last), classroom: values[8], form_url: row.css("td a").attribute("href").value, access_data: values[10], url: values[10].scan(url_regex).first } end
url_regex()
click to toggle source
# File lib/leonardita/dcb/schedule_scrapper.rb, line 50 def url_regex @url_regex ||= %r{https?://[[:alnum:]./?\-]+} end