class HolidayScraper::Parser
Constants
- DATE_PATTERN
- Event
Attributes
page[R]
Public Instance Methods
parse(page)
click to toggle source
# File lib/holiday_scraper/parser.rb, line 8 def parse(page) events = [] current_year = nil each_table_header_on(page) do |table_header| if year_header?(table_header) current_year = table_header.text elsif event_header?(table_header) events << as_event(current_year, table_header) end end events end
Private Instance Methods
as_event(current_year, table_header)
click to toggle source
# File lib/holiday_scraper/parser.rb, line 33 def as_event(current_year, table_header) start_date, end_date = parse_dates(date_string_from(table_header), current_year) sanitize(Event.new(table_header.text, start_date, end_date)) end
date_string_from(table_header)
click to toggle source
# File lib/holiday_scraper/parser.rb, line 38 def date_string_from(table_header) table_header.next_sibling.next_sibling.text end
each_table_header_on(page, &block)
click to toggle source
# File lib/holiday_scraper/parser.rb, line 29 def each_table_header_on(page, &block) holiday_table(page).css('th').each &block end
event_header?(table_header)
click to toggle source
# File lib/holiday_scraper/parser.rb, line 42 def event_header?(table_header) table_header.has_attribute?('valign') end
holiday_table(page)
click to toggle source
# File lib/holiday_scraper/parser.rb, line 25 def holiday_table(page) page.css('#contentboxsub table') end
parse_dates(str, year)
click to toggle source
# File lib/holiday_scraper/parser.rb, line 50 def parse_dates(str, year) dates = [] match = DATE_PATTERN.match(str) dates << Date.parse_international("#{match[1]} #{year}") if match[2] dates << Date.parse_international("#{match[2]} #{year}") end dates end
sanitize(event)
click to toggle source
# File lib/holiday_scraper/parser.rb, line 60 def sanitize(event) event.end_date += 365 if event.end_date && event.end_date < event.start_date event end
year_header?(table_header)
click to toggle source
# File lib/holiday_scraper/parser.rb, line 46 def year_header?(table_header) table_header.has_attribute?('colspan') end