class UnderOs::Parser::HTML

Public Instance Methods

close_tag() click to toggle source
# File lib/under_os/parser/html.rb, line 44
def close_tag
  if m = @chunk.match(/\A<\/([a-z]+)>/)
    while node = @stack.pop
      if node[:tag] != m[1]
        if @stack.size > 0
          @stack.last[:children] += node[:children] || []
          node.delete(:children)
          node.delete(:text)
        end
      else
        break
      end
    end

    @node = @stack.last

    m[0].size
  end
end
merge_data_attrs(hash) click to toggle source
# File lib/under_os/parser/html.rb, line 83
def merge_data_attrs(hash)
  hash.keys.each do |key|
    if key.to_s.starts_with?('data-')
      hash[:data] ||= {}

      value = hash.delete(key)
      key   = key.to_s.gsub(/^data\-/, '').camelize

      hash[:data][key.to_sym] = value
    end
  end

  hash
end
open_tag() click to toggle source
# File lib/under_os/parser/html.rb, line 26
def open_tag
  if m = @chunk.match(/\A<([a-z]+)([^>]*)>/)
    @node = {tag: m[1], attrs: parse_attrs_in(m[2])}

    if parent = @stack.last
      parent[:children] ||= []
      parent[:children] << @node
      parent.delete(:text) # it can have either text or children
    else
      @top << @node
    end

    @stack << @node

    m[0].size
  end
end
parse(html) click to toggle source
# File lib/under_os/parser/html.rb, line 2
def parse(html)
  html = html.strip.gsub(/<\!--[\s\S]*?-->/, '').gsub(/>\s+/, '>').gsub(/\s+</, '<')

  [].tap do |top|
    @top   = top
    @stack = []
    @node  = nil
    i      = 0

    while i < html.size
      @chunk = html.slice(i, html.size)

      i += open_tag || close_tag || plain_text
    end

    # closing all the missing tags
    while node = @stack.shift
      node.delete(:children)
      node.delete(:text)
      @top << node if ! @top.include?(node)
    end
  end
end
parse_attrs_in(string) click to toggle source
# File lib/under_os/parser/html.rb, line 72
def parse_attrs_in(string)
  merge_data_attrs({}.tap do |hash|
    string.scan(/([a-z][a-z_\-\d]+)=('|")(.+?)(\2)/).each do |match|
      value = match[0] == match[2] ? true : match[2]
      value = true  if value == 'true'
      value = false if value == 'false'
      hash[match[0].to_sym] = value
    end
  end)
end
plain_text() click to toggle source
# File lib/under_os/parser/html.rb, line 64
def plain_text
  if m = @chunk.match(/\A([^<]+)/)
    @stack.last[:text] = m[1] if @stack.last

    m[0].size
  end
end