class HolidayScraper::Parser

Constants

DATE_PATTERN
Event

Attributes

page[R]

Public Instance Methods

parse(page) click to toggle source
# File lib/holiday_scraper/parser.rb, line 8
def parse(page)
  events = []
  current_year = nil
  each_table_header_on(page) do |table_header|
    if year_header?(table_header)
      current_year = table_header.text
    elsif event_header?(table_header)
      events << as_event(current_year, table_header)
    end
  end
  events
end

Private Instance Methods

as_event(current_year, table_header) click to toggle source
# File lib/holiday_scraper/parser.rb, line 33
def as_event(current_year, table_header)
  start_date, end_date = parse_dates(date_string_from(table_header), current_year)
  sanitize(Event.new(table_header.text, start_date, end_date))
end
date_string_from(table_header) click to toggle source
# File lib/holiday_scraper/parser.rb, line 38
def date_string_from(table_header)
  table_header.next_sibling.next_sibling.text
end
each_table_header_on(page, &block) click to toggle source
# File lib/holiday_scraper/parser.rb, line 29
def each_table_header_on(page, &block)
  holiday_table(page).css('th').each &block
end
event_header?(table_header) click to toggle source
# File lib/holiday_scraper/parser.rb, line 42
def event_header?(table_header)
  table_header.has_attribute?('valign')
end
holiday_table(page) click to toggle source
# File lib/holiday_scraper/parser.rb, line 25
def holiday_table(page)
  page.css('#contentboxsub table')
end
parse_dates(str, year) click to toggle source
# File lib/holiday_scraper/parser.rb, line 50
def parse_dates(str, year)
  dates = []
  match = DATE_PATTERN.match(str)
  dates << Date.parse_international("#{match[1]} #{year}")
  if match[2]
    dates << Date.parse_international("#{match[2]} #{year}")
  end
  dates
end
sanitize(event) click to toggle source
# File lib/holiday_scraper/parser.rb, line 60
def sanitize(event)
  event.end_date += 365 if event.end_date && event.end_date < event.start_date
  event
end
year_header?(table_header) click to toggle source
# File lib/holiday_scraper/parser.rb, line 46
def year_header?(table_header)
  table_header.has_attribute?('colspan')
end