class OpenWebslides::Converter::Pressbooks

Attributes

html[RW]
result[RW]

Public Instance Methods

from_xhtml(html) click to toggle source

Convert an XHTML document to Open Webslides

@param [String] html XHTML document string @return [Result] Result object containing all data

# File lib/openwebslides/converter/pressbooks.rb, line 20
def from_xhtml(html)
  @html = Nokogiri::HTML html
  @result = Result.new

  root = Content::Root.new

  parse_metadata

  root.child_item_ids << parse_matter('front').id
  root.child_item_ids.concat parse_parts.map(&:id)
  root.child_item_ids << parse_matter('back').id

  result.content_items << root

  result
end

Protected Instance Methods

parse_chapter(html) click to toggle source

Parse chapter XHTML object into content item

# File lib/openwebslides/converter/pressbooks.rb, line 107
def parse_chapter(html)
  # Create new chapter
  chapter = Content::Heading.new

  # Set title
  chapter.text = html.at('.chapter-title').content

  # Add chapter heading to result
  result.content_items << chapter

  # Start with chapter heading as current heading
  heading = chapter

  # Parse chapter content
  html.at('.chapter-ugc').children.each do |child|
    if child.name == 'p'
      # Parse paragraph
      paragraph = parse_paragraph child

      # Add paragraph to current heading
      heading.sub_item_ids << paragraph.id if paragraph
    elsif child.name == 'h4'
      # Create new heading
      h = Content::Heading.new

      # Set title
      h.text = child.content

      # Add heading to result
      result.content_items << h

      # Add heading to current heading
      chapter.sub_item_ids << h.id

      # Replace current heading
      heading = h
    elsif child.name == 'ul' || child.name == 'ol'
      # Extract list items
      paragraphs = child.search('li').map { |l| parse_paragraph l }

      # Add paragraphs to heading
      heading.sub_item_ids.concat paragraphs.map(&:id)
    end
  end

  chapter
end
parse_matter(position) click to toggle source

Find and parse matter

@param [String] position 'front' or 'back'

# File lib/openwebslides/converter/pressbooks.rb, line 53
def parse_matter(position)
  # Create new heading
  heading = Content::Heading.new

  # Set title
  heading.text = html.at(".#{position}-matter-title").content

  # Parse front matter paragraphs
  paragraphs = parse_paragraphs html.at(".#{position}-matter-ugc")

  # Add paragraphs to the front matter header
  heading.sub_item_ids = paragraphs.map(&:id)

  # Add front matter heading to result
  result.content_items << heading

  heading
end
parse_metadata() click to toggle source

Find and parse metadata

# File lib/openwebslides/converter/pressbooks.rb, line 42
def parse_metadata
  result.title = html.at('meta[name="pb-title"]')['content']
  result.author = html.at('meta[name="pb-authors"]')['content']
  result.license = html.at('meta[name="pb-book-license"]')['content']
end
parse_paragraph(html) click to toggle source

Parse paragraph XHTML object into content item

# File lib/openwebslides/converter/pressbooks.rb, line 165
def parse_paragraph(html)
  # Create new paragraph
  paragraph = Content::Paragraph.new

  # Extract and sanitize paragraph contents
  paragraph.text = sanitize ReverseMarkdown.convert html.to_xhtml

  return nil if paragraph.text.empty?

  # Add paragraph to result
  result.content_items << paragraph

  paragraph
end
parse_paragraphs(html) click to toggle source

Parse XHTML object into paragraph content items

# File lib/openwebslides/converter/pressbooks.rb, line 158
def parse_paragraphs(html)
  html.search('p').map { |p| parse_paragraph p }
end
parse_parts() click to toggle source

Find and parse parts

# File lib/openwebslides/converter/pressbooks.rb, line 75
def parse_parts
  parts = []

  html.search('div.part, div.chapter').each do |div|
    if div.classes.include? 'part'
      # Create new part heading
      parts << Content::Heading.new

      # Set title
      parts.last.text = div.at('.part-title').content

      # Part intro (paragraphs under part header)
      intro = parse_paragraphs div

      # Add intro to part
      parts.last.sub_item_ids = intro.map(&:id)
    elsif div.classes.include? 'chapter'
      chapter = parse_chapter div

      # Add chapter heading to the part
      parts.last.sub_item_ids << chapter.id
    end
  end

  # Add parts to result
  result.content_items.concat parts

  parts
end