class Whatsa::Article

Attributes

contents[R]
sections[RW]
title[R]

Public Class Methods

new(noko_doc) click to toggle source
# File lib/whatsa/article.rb, line 11
def initialize(noko_doc)
  @title = noko_doc.css('h1').text
  content_text = noko_doc.css('#mw-content-text')
  wiki_parser_output = content_text.css('.mw-parser-output') # no idea why wiki is structured like this
  @contents = wiki_parser_output.empty? ? content_text.children : wiki_parser_output.children
  @sections = build_sections(noko_doc)

  # comment out the next line if you want sections with non-<p> and non-<ul>
  # content to display as "[no displayable information]"
  remove_empty_sections
end

Public Instance Methods

choose_section(choice) click to toggle source
# File lib/whatsa/article.rb, line 40
def choose_section(choice)
  if choice.to_i > 0
    sections[choice.to_i - 1]
  else
    get_section_by_title(choice)
  end
end
full_text() click to toggle source
# File lib/whatsa/article.rb, line 28
def full_text
  return no_text_found for_subject: 'full summary' if sections.empty?
  # name might be a little confusing: it's not really the "full text" of the
  # article, it's the full text of the article summary. I'm naming it #full_text
  # for duck-typing reasons
  sections.first.full_text
end
section_titles() click to toggle source
# File lib/whatsa/article.rb, line 36
def section_titles
  sections.map { |s| s.title }
end
summary() click to toggle source
# File lib/whatsa/article.rb, line 23
def summary
  return no_text_found for_subject: 'summary' if sections.empty?
  sections.first.summary
end

Private Instance Methods

build_section_nodes(header_level, start_node, siblings = []) click to toggle source
# File lib/whatsa/article.rb, line 77
def build_section_nodes(header_level, start_node, siblings = [])
  next_sib = start_node.next_sibling
  return siblings if next_sib.nil?
  return siblings if next_sib.name =~ /^h\d$/i && header_level =~ /^h\d$/i && next_sib.name <= header_level

  include_node = %w(p ul ol h1 h2 h3 h4 h5 h6).include?(next_sib.name) && !next_sib.text.strip.empty?
  build_section_nodes(header_level, next_sib, include_node ? [*siblings, next_sib] : siblings)
end
build_sections(noko_doc) click to toggle source
# File lib/whatsa/article.rb, line 67
def build_sections(noko_doc)
  headers = noko_doc.css('.mw-headline').map(&:parent)
  sects = headers.map do |header|
    title = heading_to_title(header.text)
    nodes = build_section_nodes(header.name, header).map { |node| node.text.strip }
    Whatsa::Section.new(title, nodes).tap { |section| section.article = self }
  end
  [Whatsa::Section.new("#{title} - Introduction", intro_pars), *sects]
end
get_section_by_title(section_title) click to toggle source
# File lib/whatsa/article.rb, line 54
def get_section_by_title(section_title)
  sections.find { |s| s.title.downcase == section_title.downcase }
end
intro_pars() click to toggle source
# File lib/whatsa/article.rb, line 58
def intro_pars
  wiki_parser_output = contents.css('.mw-parser-output') # no idea why wiki is structured like this
  content_nodes = wiki_parser_output.empty? ? contents : wiki_parser_output
  breakpoint = content_nodes.to_a.index { |element| element.name == 'h2' }
  breakpoint ||= -1 # breakpoint would be nil for zero-section articles
  pars = content_nodes[0...breakpoint].css('p').map { |par| par.text }
  pars.reject { |par| par == "" }
end
no_text_found(for_subject: title) click to toggle source
# File lib/whatsa/article.rb, line 50
def no_text_found(for_subject: title)
  "[no text found for #{for_subject}]"
end
remove_empty_sections() click to toggle source
# File lib/whatsa/article.rb, line 86
def remove_empty_sections
  sections.reject! { |sec| sec.summary == "[no displayable information]" }
end