class SemanticCrawler::Websites::MicroData
Extract microdata from a website and output it as JSON
Attributes
microdata[RW]
url[RW]
Public Class Methods
new(url)
click to toggle source
# File lib/semantic_crawler/websites/micro_data.rb, line 12 def initialize(url) doc = Nokogiri::HTML(open(url)) microdata = Microdata::Document.new(doc.to_s) items = microdata.extract_items self.microdata = extract_microdata(items) end
Public Instance Methods
to_json()
click to toggle source
# File lib/semantic_crawler/websites/micro_data.rb, line 19 def to_json microdata.to_json end
to_s()
click to toggle source
# File lib/semantic_crawler/websites/micro_data.rb, line 23 def to_s microdata end
Private Instance Methods
extract_microdata(items)
click to toggle source
# File lib/semantic_crawler/websites/micro_data.rb, line 28 def extract_microdata(items) hash = Hash.new if items.kind_of? Array and items.first and items.first.kind_of? String hash = items elsif items.kind_of? Array and items.first items.each do |item| props = item.properties properties = Hash.new props.each do |key, value| hash[item.type.first] ||= Array.new values = extract_microdata(value) properties.merge!(key.to_s => values) end hash[item.type.first] << properties end else raise "Not implemented!" end hash end