class SeoReport::Extractions::Microdata::MicrodataExtractor

Attributes

document[R]

Public Class Methods

new(document) click to toggle source
# File lib/seo_report/extractions/microdata.rb, line 10
def initialize(document)
  @document = document
end

Public Instance Methods

elements() click to toggle source
# File lib/seo_report/extractions/microdata.rb, line 24
def elements
  body = document.at_xpath('/html/body')
  if body
    body.children
  else
    []
  end
end
extract!() click to toggle source
# File lib/seo_report/extractions/microdata.rb, line 14
def extract!
  elements.each { |child| process(child) }
  {
    microdata: {
      elements: result_set,
      errors: errors,
    }
  }
end

Protected Instance Methods

current_scope() click to toggle source
# File lib/seo_report/extractions/microdata.rb, line 93
def current_scope
  scopes.last
end
errors() click to toggle source
# File lib/seo_report/extractions/microdata.rb, line 105
def errors
  @errors ||= {}
end
microdata_element?(element) click to toggle source
# File lib/seo_report/extractions/microdata.rb, line 109
def microdata_element?(element)
  %w(itemscope itemtype itemprop).
    any? { |attr| element.has_attribute?(attr) }
end
microdata_itemprop_value(element) click to toggle source
# File lib/seo_report/extractions/microdata.rb, line 122
def microdata_itemprop_value(element)
  if element.name == "meta"
    element["content"]
  elsif %w(audio embed iframe img source track video).include?(element.name)
    element["src"]
  elsif %w(a area link).include?(element.name)
    element["href"]
  elsif element.name == "object"
    element["data"]
  elsif %w(data meter).include?(element.name)
    element["value"]
  else
    element.text
  end
end
microdata_prop?(element) click to toggle source
# File lib/seo_report/extractions/microdata.rb, line 118
def microdata_prop?(element)
  element.has_attribute?("itemprop")
end
microdata_scope?(element) click to toggle source
# File lib/seo_report/extractions/microdata.rb, line 114
def microdata_scope?(element)
  element.has_attribute?("itemscope")
end
process(child, level = 1) click to toggle source
# File lib/seo_report/extractions/microdata.rb, line 34
def process(child, level = 1)
  if microdata_element?(child)
    process_microdata(child, level)
  else
    child.children.each { |c| process(c, level) }
  end
end
process_microdata(child, level) click to toggle source
# File lib/seo_report/extractions/microdata.rb, line 42
def process_microdata(child, level)
  if microdata_scope?(child)
    push_scope(child)
    child.children.each { |c| process(c, level + 1) }
    scope = wrap_scope!
    store_as_dangling_if_necessary!(child, scope, level)
    result_set << scope if level == 1
  else
    provide_data_for_itemprop(child)
    child.children.each { |c| process(c, level) }
  end
end
provide_data_for_itemprop(element) click to toggle source
# File lib/seo_report/extractions/microdata.rb, line 64
def provide_data_for_itemprop(element)
  data = microdata_itemprop_value(element)
  if current_scope
    current_scope[element["itemprop"]] = data
  else
    error = {
      tag: element.name,
      itemprop: element["itemprop"],
      value: data,
    }
    error.merge!(id: element["id"]) if element["id"]
    (errors[:scopeless_elements] ||= []) << error
  end
end
push_scope(element) click to toggle source
# File lib/seo_report/extractions/microdata.rb, line 79
def push_scope(element)
  new_scope = {
    type: element["itemtype"]
  }
  if microdata_prop?(element)
    current_scope[element["itemprop"]] = new_scope
  end
  scopes.push(new_scope)
end
result_set() click to toggle source
# File lib/seo_report/extractions/microdata.rb, line 101
def result_set
  @result_set ||= []
end
scopes() click to toggle source
# File lib/seo_report/extractions/microdata.rb, line 97
def scopes
  @scopes ||= []
end
store_as_dangling_if_necessary!(element, scope, level) click to toggle source
# File lib/seo_report/extractions/microdata.rb, line 55
def store_as_dangling_if_necessary!(element, scope, level)
  itemprop = element["itemprop"]
  is_top_level = level == 1
  is_assigned_with_itemprop = !itemprop.nil? && !itemprop.empty?
  if !is_top_level && !is_assigned_with_itemprop
    (current_scope[:@dangling_children] ||= []) << scope
  end
end
wrap_scope!() click to toggle source
# File lib/seo_report/extractions/microdata.rb, line 89
def wrap_scope!
  scopes.pop
end