class Tychus::Parsers::SchemaOrgParser
Attributes
review_doc[R]
root_doc[R]
video_object_doc[R]
Public Class Methods
new(uri)
click to toggle source
Calls superclass method
Tychus::Parsers::Base::new
# File lib/tychus/parsers/schema_org_parser.rb, line 8 def initialize(uri) @root_doc = '[itemtype="http://schema.org/Recipe"]' @review_doc = '[itemtype="http://schema.org/Review"]' @video_object_doc = '[itemtype="http://www.schema.org/VideoObject"]' super strip_review_microformat strip_video_object_microformat end
Public Instance Methods
itemprop_node_for(property)
click to toggle source
# File lib/tychus/parsers/schema_org_parser.rb, line 17 def itemprop_node_for(property) recipe_doc.css("[itemprop='#{property}']").first || NullObject.new end
parse_cook_time()
click to toggle source
# File lib/tychus/parsers/schema_org_parser.rb, line 30 def parse_cook_time # leverage iso8601 parse_duration(itemprop_node_for(:cookTime)) end
parse_description()
click to toggle source
# File lib/tychus/parsers/schema_org_parser.rb, line 25 def parse_description # is it always first? itemprop_node_for(:description).content end
parse_duration(node)
click to toggle source
# File lib/tychus/parsers/schema_org_parser.rb, line 35 def parse_duration(node) # Allrecipes - 'time' element # Foodnetwork - 'meta' element (std according to # Schema.org/Recipe) case node.name when "meta", "span" node.attr('content') when "time" node.attr('datetime') else NullObject.new end end
parse_image()
click to toggle source
# File lib/tychus/parsers/schema_org_parser.rb, line 49 def parse_image itemprop_node_for(:image).attr('src') end
parse_ingredients()
click to toggle source
# File lib/tychus/parsers/schema_org_parser.rb, line 53 def parse_ingredients # NOT FIRST recipe_doc .css('[itemprop="ingredients"]') .map do |node| node.content .squeeze(" ") .rstrip .lstrip .split("\r\n") end.flatten end
parse_name()
click to toggle source
# File lib/tychus/parsers/schema_org_parser.rb, line 66 def parse_name itemprop_node_for(:name).content end
parse_prep_time()
click to toggle source
# File lib/tychus/parsers/schema_org_parser.rb, line 70 def parse_prep_time parse_duration(itemprop_node_for(:prepTime)) end
parse_recipe_instructions()
click to toggle source
# File lib/tychus/parsers/schema_org_parser.rb, line 74 def parse_recipe_instructions # strip empty strings, drop trailing whitespace, clean carriage returns (\r\n) # # Allrecipes: <li><span>lorem ipsum</span></li> # FoodNetwork: <p>lorem ipsum</p> # reject headers such as "Directions" and divs such as .categories for Foodnetwork recipes reject_regex = /^(h.|div)$/ itemprop_node_for(:recipeInstructions) .element_children .reject { |node| node.name =~ reject_regex } .map do |node| node.content .squeeze(" ") .rstrip .split("\r\n\s\r\n\s") end.flatten.reject(&:blank?) end
parse_recipe_yield()
click to toggle source
# File lib/tychus/parsers/schema_org_parser.rb, line 93 def parse_recipe_yield itemprop_node_for(:recipeYield).content end
parse_total_time()
click to toggle source
# File lib/tychus/parsers/schema_org_parser.rb, line 97 def parse_total_time # leverage iso8601 parse_duration(itemprop_node_for(:totalTime)) end
strip_review_microformat()
click to toggle source
# File lib/tychus/parsers/schema_org_parser.rb, line 102 def strip_review_microformat recipe_doc.css(review_doc).remove end
strip_video_object_microformat()
click to toggle source
# File lib/tychus/parsers/schema_org_parser.rb, line 106 def strip_video_object_microformat recipe_doc.css(video_object_doc).remove end