class RecipeScraper::Recipe

represent a recipe fetched from an Url

Constants

CUISINEAZ_HOST
G750_HOST
MARMITON_HOST

Attributes

cooktime[R]
image[R]
ingredients[R]
nb_of_persons[R]
preptime[R]
steps[R]
title[R]

Public Class Methods

new(url) click to toggle source

Instanciate a Recipe object with data crawled from an url

@param url [String] representing an url from Marmiton or 750g website

# File lib/recipe_scraper.rb, line 19
def initialize(url)
  if marmiton_host? url
    fetch_from_marmiton url

  elsif g750_host? url
    fetch_from_g750 url

  elsif cuisineaz_host? url
    fetch_from_cuisineaz url

  else
    raise ArgumentError, 'Instantiation cancelled (Host not supported).'
  end
end

Public Instance Methods

to_hash() click to toggle source

export object properties to hash

@return [Hash] as object's properties

# File lib/recipe_scraper.rb, line 37
def to_hash
  attrs = {}
  instance_variables.each do |var|
    str = var.to_s.gsub /^@/, ''
    attrs[str.to_sym] = instance_variable_get(var)
  end
  attrs
end
to_json() click to toggle source

convert object properties to json

@return [String] data formated in JSON

# File lib/recipe_scraper.rb, line 49
def to_json
  to_hash.to_json
end

Private Instance Methods

cuisineaz_host?(url) click to toggle source

test if url is from a valid cuisineaz.com host

@param url [String] representing an url @return [Boolean] as true if coresponding to a valid url

# File lib/recipe_scraper.rb, line 86
def cuisineaz_host?(url)
  url.include? CUISINEAZ_HOST[:desktop]
end
fetch_from_cuisineaz(url) click to toggle source

fill object properties from a 750g url

@param url [String] representing an url

# File lib/recipe_scraper.rb, line 175
def fetch_from_cuisineaz(url)
  if cuisineaz_host? url
    page = Nokogiri::HTML(open(url).read)
    @title = page.css('h1').text

    # get persons
    @nb_of_persons = page.css('#ContentPlaceHolder_LblRecetteNombre').text.to_i
    # get times
    @preptime = page.css('#ContentPlaceHolder_LblRecetteTempsPrepa').text.to_i
    @cooktime = page.css('#ContentPlaceHolder_LblRecetteTempsCuisson').text.to_i

    @steps = []
    page.css('#preparation p').each do |step_node|
      @steps << sanitize(step_node.text)
    end

    @ingredients = []
    page.css('section.recipe_ingredients li').each do |ing_node|
      @ingredients << sanitize(ing_node.text)
    end

    begin
      @image = page.css('#ContentPlaceHolder_recipeImg').attr('data-src').to_s
    rescue NoMethodError => e
    end

  else
    raise ArgumentError, "Instantiation cancelled (ulr not from #{G750_HOST})."
  end
end
fetch_from_g750(url) click to toggle source

fill object properties from a 750g url

@param url [String] representing an url

# File lib/recipe_scraper.rb, line 134
def fetch_from_g750(url)
  if g750_host? url
    page = Nokogiri::HTML(open(url).read)
    @title = page.css('h1.c-article__title').text

    # get persons
    nb_of_persons_matches = page.css('h2.u-title-section').text.match(/(\d{1,5})/)
    if !nb_of_persons_matches.nil? && nb_of_persons_matches[1]
      @nb_of_persons = nb_of_persons_matches[1].to_i
    end

    # get times
    @preptime = sanitize(page.css('ul.c-recipe-summary > li.c-recipe-summary__rating[title="Temps de préparation"]').text).to_i
    @cooktime = sanitize(page.css('ul.c-recipe-summary > li.c-recipe-summary__rating[title="Temps de cuisson"]').text).to_i

    @steps = []
    css_step = 'div[itemprop=recipeInstructions] p'
    @steps = page.css(css_step).text.split /[( ),(<br>)]/

    @ingredients = []
    css_ingredient = 'ul.c-recipe-ingredients__list li'
    page.css(css_ingredient).each do |ing_node|
      @ingredients << sanitize(ing_node.text)
    end

    # get image
    css_image = 'div.swiper-wrapper img.photo'

    begin
      @image = page.css(css_image).attr('src').to_s
    rescue NoMethodError => e
    end

  else
    raise ArgumentError, "Instantiation cancelled (ulr not from #{G750_HOST})."
  end
end
fetch_from_marmiton(url) click to toggle source

fill object properties from a Marmiton url

@param url [String] representing an url

# File lib/recipe_scraper.rb, line 93
def fetch_from_marmiton(url)
  if marmiton_host? url

    url.gsub! MARMITON_HOST[:mobile], MARMITON_HOST[:desktop]

    page = Nokogiri::HTML(open(url).read)
    @title = page.css('h1').text

    # get persons
    @nb_of_persons = page.css('div.recipe-infos__quantity > span.recipe-infos__quantity__value').text.to_i

    # get times
    @preptime = page.css('div.recipe-infos__timmings__preparation > span.recipe-infos__timmings__value').text.to_i
    @cooktime = page.css('div.recipe-infos__timmings__cooking > span.recipe-infos__timmings__value').text.to_i

    # get ingredients
    @ingredients = []
    ingredients_text = page.css('ul.recipe-ingredients__list li.recipe-ingredients__list__item').each do |ingredient_tag|
      @ingredients << sanitize(ingredient_tag.text)
    end

    # get steps
    @steps = []
    steps_text = page.css('ol.recipe-preparation__list').each do |step_tag|
      @steps << sanitize(step_tag.text)
    end

    # get image
    @image = begin
               page.css('#af-diapo-desktop-0_img').attr('src').to_s
             rescue StandardError
               NoMethodError
             end
  else
    raise ArgumentError, "Instantiation cancelled (ulr not from #{MARMITON_HOST})."
  end
end
g750_host?(url) click to toggle source

test if url is from a valid 750g.com host

@param url [String] representing an url @return [Boolean] as true if coresponding to a valid url

# File lib/recipe_scraper.rb, line 78
def g750_host?(url)
  url.include? G750_HOST[:desktop]
end
marmiton_host?(url) click to toggle source

test if url is from a valid marmiton.org host

@param url [String] representing an url @return [Boolean] as true if coresponding to a valid url

# File lib/recipe_scraper.rb, line 70
def marmiton_host?(url)
  url.include?(MARMITON_HOST[:desktop]) || url.include?(MARMITON_HOST[:mobile])
end
sanitize(text) click to toggle source

remove `rn` & unwanted espaces

@param text [String] a text to sanitize @return [String] as text corrected formated

# File lib/recipe_scraper.rb, line 59
def sanitize(text)
  ['  ', '\r\n', "\r\n", "\n", "\r", "\t", / ^/, / $+/, /^  /, /^ /, /Etape [0-9]/, 'Icone horloge', 'Icone casserole '].each do |text_to_remove|
    text.gsub!(text_to_remove, '')
  end
  text
end