class Object
Public Instance Methods
export_to_file(hash_data, dir)
click to toggle source
# File lib/history_scraper.rb, line 63 def export_to_file(hash_data, dir) File.write(dir, hash_data.to_json) puts "Results stored in #{dir}" end
extract_from(day, month)
click to toggle source
# File lib/history_scraper.rb, line 36 def extract_from(day, month) html = Nokogiri::HTML open("https://en.wikipedia.org/wiki/#{month}_#{day}") description = html.css('#mw-content-text p') .map(&:text) .find { |text| text.include?("#{month} #{day}") } events = parse_ul html.css('#Events')[0].parent.next_element births = parse_ul html.css('#Births')[0].parent.next_element deaths = parse_ul html.css('#Deaths')[0].parent.next_element [description, events.compact, births.compact, deaths.compact] end
form_date(day_index, month_index)
click to toggle source
# File lib/history_scraper.rb, line 31 def form_date(day_index, month_index) date = Date._strptime("#{day_index}/#{month_index}", '%d/%m') [date[:mday], Date::MONTHNAMES[date[:mon]]] end
parse_keywords(li)
click to toggle source
# File lib/history_scraper.rb, line 59 def parse_keywords(li) li.css('a').map { |a| { title: a['title'], href: a['href'] } } end
parse_ul(ul)
click to toggle source
# File lib/history_scraper.rb, line 50 def parse_ul(ul) ul.css('li').map do |li| year, *text = li.text.split(' – ') next unless year && !text.empty? { year: year, data: text.join(' – '), kw: parse_keywords(li) } end end
scrap_year(output_dir)
click to toggle source
# File lib/history_scraper.rb, line 6 def scrap_year(output_dir) result = {} (1..12).each do |month_index| (1..31).each do |day_index| begin day, month = form_date(day_index, month_index) puts "Scraping #{month} #{day}..." description, events, births, deaths = extract_from(day, month) result["#{month}-#{day}".to_sym] = { description: description, events: events, births: births, deaths: deaths } rescue NoMethodError puts 'It seems this date does not have any episodes.' end end end export_to_file(result, output_dir) end