class HtmlSlicer::Mappers::Slicing
Attributes
map[R]
options[R]
slice_number[R]
Public Class Methods
new(document, options)
click to toggle source
# File lib/html_slicer/mappers/slicing.rb, line 20 def initialize(document, options) raise(TypeError, "::HTML::Document expected, '#{document.class}' passed") unless document.is_a?(::HTML::Document) raise(TypeError, "HtmlSlicer::Options expected, '#{options.class}' passed") unless options.is_a?(HtmlSlicer::Options) @options = options @map = Map.new @slice_number = 1 @options.unit.is_a?(Hash) ? process_by_node!(document.root) : process_by_text!(document.root) end
Private Instance Methods
limited?()
click to toggle source
# File lib/html_slicer/mappers/slicing.rb, line 85 def limited? @options.limit && @slice_number >= @options.limit end
process_by_node!(root)
click to toggle source
# File lib/html_slicer/mappers/slicing.rb, line 71 def process_by_node!(root) units_count = 0 parse(root) do |node| @map.commit(node, @slice_number, true) if node.match(@options.unit) && sliceable?(node) units_count += 1 if units_count == @options.maximum units_count = 0 limited? ? break : @slice_number += 1 end end end end
process_by_text!(root)
click to toggle source
# File lib/html_slicer/mappers/slicing.rb, line 34 def process_by_text!(root) units_count = 0 parse(root) do |node| if node.is_a?(::HTML::Text) && sliceable?(node) sanitize_content!(node) content = node.to_s begin start_index = 0 last_index = 0 content.scan(@options.unit) do if $~.begin(0) >= start_index units_count += 1 index = $~.end(0) if units_count == @options.maximum units_count = 0 if complete_regexp = @options.complete index = content.match(complete_regexp, index).try(:begin, 0)||index start_index = index end @map.commit(node, @slice_number, [last_index, index-1]) last_index = index limited? ? raise(Exception) : @slice_number += 1 end end end if units_count > 0 @map.commit(node, @slice_number, [last_index, -1]) end rescue Exception break end else @map.commit(node, @slice_number, true) end end end
sanitize_content!(node)
click to toggle source
# File lib/html_slicer/mappers/slicing.rb, line 89 def sanitize_content!(node) content = ::HTML::FullSanitizer.new.sanitize(node.to_s) node.instance_variable_set(:@content, content) end
sliceable?(node)
click to toggle source
# File lib/html_slicer/mappers/slicing.rb, line 94 def sliceable?(node) able_to?(node, @options) end