class ArticleJSON::Import::GoogleDoc::HTML::ImageParser
Public Class Methods
@param [Nokogiri::HTML::Node] node @param [Nokogiri::HTML::Node] caption_node @param [ArticleJSON::Import::GoogleDoc::HTML::CSSAnalyzer] css_analyzer
# File lib/article_json/import/google_doc/html/image_parser.rb, line 12 def initialize(node:, caption_node:, css_analyzer:) @node = node @caption_node = caption_node @href = href @css_analyzer = css_analyzer # Main node indicates the floating behavior @float_node = @node end
Public Instance Methods
The value of the image's `alt` attribute @return [String]
# File lib/article_json/import/google_doc/html/image_parser.rb, line 24 def alt image_node.attribute('alt')&.value || '' end
@return [ArticleJSON::Elements::Image]
# File lib/article_json/import/google_doc/html/image_parser.rb, line 58 def element ArticleJSON::Elements::Image.new( source_url: source_url, float: float, caption: caption, href: @href, alt: alt ) end
Check if the image is floating (left, right or not at all) @return [Symbol]
ArticleJSON::Import::GoogleDoc::HTML::Shared::Float#float
# File lib/article_json/import/google_doc/html/image_parser.rb, line 42 def float super if floatable_size? end
Extracts an href from the tag [image-link-to: url]) if present in the caption node. @return [String]
# File lib/article_json/import/google_doc/html/image_parser.rb, line 49 def href return if @caption_node.nil? match = @caption_node.content.strip.match(href_regexp) return if match.nil? remove_image_link_tag match[:url] end
The node of the actual image @return [Nokogiri::HTML::Node]
# File lib/article_json/import/google_doc/html/image_parser.rb, line 36 def image_node @node.xpath('.//img').first end
The value of the image's `src` attribute @return [String]
# File lib/article_json/import/google_doc/html/image_parser.rb, line 30 def source_url image_node.attribute('src').value end
Private Instance Methods
Check if the image's width can be determined and is less than 500px This is about 3/4 of the google document width… @return [Boolean]
# File lib/article_json/import/google_doc/html/image_parser.rb, line 86 def floatable_size? image_width && image_width < 500 end
Regular expression to check if there's a [image-link-to: url] tag @return [Regexp]
# File lib/article_json/import/google_doc/html/image_parser.rb, line 80 def href_regexp %r{\[image-link-to:\s+(?<url>.*?)\]} end
Get the specified width of the image if available The width can either be specified in a width attribute or via style attribute. If not, `nil` is returned. @return [Integer]
# File lib/article_json/import/google_doc/html/image_parser.rb, line 94 def image_width @image_width ||= if image_node.has_attribute?('width') image_node.attribute('width').value.to_i elsif image_node.has_attribute?('style') regex = /width:\s?(?<px>\d+|(\d+?\.\d+))px/ match = image_node.attribute('style').value.match(regex) match['px'].to_i if match && match['px'] end end
Removes the [image-link-to: url] tag from the caption node
# File lib/article_json/import/google_doc/html/image_parser.rb, line 71 def remove_image_link_tag @caption_node .children .first .content = @caption_node.content.sub(href_regexp, '').strip end