class ArticleJSON::Import::GoogleDoc::HTML::Parser
Public Class Methods
new(html)
click to toggle source
@param [String] html
# File lib/article_json/import/google_doc/html/parser.rb, line 7 def initialize(html) doc = Nokogiri::HTML(html) @body_enumerator = doc.xpath('//body').last.children.to_enum css_node = doc.xpath('//head/style').last @css_analyzer = CSSAnalyzer.new(css_node&.inner_text) end
Public Instance Methods
parsed_content()
click to toggle source
Parse the body of the document and return the result @return [Array]
# File lib/article_json/import/google_doc/html/parser.rb, line 17 def parsed_content @parsed_content ||= parse_body end
Private Instance Methods
body_has_more_nodes?()
click to toggle source
@return [Boolean]
# File lib/article_json/import/google_doc/html/parser.rb, line 128 def body_has_more_nodes? @body_enumerator.peek true rescue StopIteration false end
next_node()
click to toggle source
Return the next node if available, and advance the enumerator @return [Nokogiri::HTML::Node]
# File lib/article_json/import/google_doc/html/parser.rb, line 123 def next_node body_has_more_nodes? ? @body_enumerator.next : nil end
nodes_until_hr()
click to toggle source
Collect all nodes until a horizontal line, advancing the enumerator @return [Array]
# File lib/article_json/import/google_doc/html/parser.rb, line 112 def nodes_until_hr nodes = [] until !body_has_more_nodes? || NodeAnalyzer.new(@body_enumerator.peek).hr? nodes << @body_enumerator.next end nodes end
parse_body()
click to toggle source
Loop over all body nodes and parse them @return [Array]
# File lib/article_json/import/google_doc/html/parser.rb, line 25 def parse_body @parsed_content = [] while body_has_more_nodes? @parsed_content << begin @current_node = NodeAnalyzer.new(@body_enumerator.next) parse_current_node || next end end @parsed_content end
parse_current_node()
click to toggle source
Parse the current node and return an element, if available @return [ArticleJSON::Elements::Base]
# File lib/article_json/import/google_doc/html/parser.rb, line 38 def parse_current_node case @current_node.type when :heading then parse_heading when :paragraph then parse_paragraph when :list then parse_list when :image then parse_image when :text_box then parse_text_box when :quote then parse_quote when :embed then parse_embed when :hr, :empty, :unknown then nil end end
parse_embed()
click to toggle source
@return [ArticleJSON::Elements::Embed]
# File lib/article_json/import/google_doc/html/parser.rb, line 102 def parse_embed EmbeddedParser.build( node: @current_node.node, caption_node: next_node, css_analyzer: @css_analyzer ) end
parse_heading()
click to toggle source
@return [ArticleJSON::Elements::Heading]
# File lib/article_json/import/google_doc/html/parser.rb, line 52 def parse_heading HeadingParser.new(node: @current_node.node).element end
parse_image()
click to toggle source
@return [ArticleJSON::Elements::Image]
# File lib/article_json/import/google_doc/html/parser.rb, line 73 def parse_image ImageParser .new( node: @current_node.node, caption_node: next_node, css_analyzer: @css_analyzer ) .element end
parse_list()
click to toggle source
@return [ArticleJSON::Elements::List]
# File lib/article_json/import/google_doc/html/parser.rb, line 66 def parse_list ListParser .new(node: @current_node.node, css_analyzer: @css_analyzer) .element end
parse_paragraph()
click to toggle source
@return [ArticleJSON::Elements::Paragraph|nil]
# File lib/article_json/import/google_doc/html/parser.rb, line 57 def parse_paragraph paragraph = ParagraphParser .new(node: @current_node.node, css_analyzer: @css_analyzer) .element paragraph unless paragraph.blank? end
parse_quote()
click to toggle source
@return [ArticleJSON::Elements::Quote]
# File lib/article_json/import/google_doc/html/parser.rb, line 95 def parse_quote QuoteParser .new(nodes: nodes_until_hr, css_analyzer: @css_analyzer) .element end
parse_text_box()
click to toggle source
@return [ArticleJSON::Elements::TextBox]
# File lib/article_json/import/google_doc/html/parser.rb, line 84 def parse_text_box TextBoxParser .new( type_node: @current_node.node, nodes: nodes_until_hr, css_analyzer: @css_analyzer ) .element end