class BudgetBytesCli::Scraper

Public Class Methods

create_categories() click to toggle source
# File lib/budget_bytes_cli/scraper.rb, line 12
 def self.create_categories
    locate_categories.each do |item|
        url = item.css("a").attribute("href").value
        title = item.css("a").children[0].text
        BudgetBytesCli::Category.new(url, title)
    end
end
create_page_url(num, url) click to toggle source
# File lib/budget_bytes_cli/scraper.rb, line 45
def self.create_page_url(num, url)
    if num == 1
        url
    else
        url + "page/" + num.to_s + "/"
    end
end
get_recipes(url) click to toggle source

functions for scraping recipes within a category

# File lib/budget_bytes_cli/scraper.rb, line 21
def self.get_recipes(url)
    first_page = Nokogiri::HTML(open(url))
    
    page_nums = first_page.css(".page-numbers")
    if page_nums.empty?
        pages_total = 1
    else
        pages_total = page_nums.map{|p| p.text.to_i}.max
    end
    
    (1..pages_total).map {|p|get_recipes_from(create_page_url(p, url))}.flatten
end
get_recipes_from(page_url) click to toggle source
# File lib/budget_bytes_cli/scraper.rb, line 34
def self.get_recipes_from(page_url)
    recipe_page = Nokogiri::HTML(open(page_url))
    recipe_links = recipe_page.css(".archive-post a")
    
    recipe_links.map do |r|
        recipe_title = r.attribute("title").value
        recipe_url = r.attribute("href").value
        BudgetBytesCli::Recipe.new(recipe_url, recipe_title)
    end
end
locate_categories() click to toggle source
# File lib/budget_bytes_cli/scraper.rb, line 8
def self.locate_categories
    open_page.css(".cat-item")
end
open_page() click to toggle source

functions for getting categories

# File lib/budget_bytes_cli/scraper.rb, line 4
def self.open_page
    Nokogiri::HTML(open("https://www.budgetbytes.com/recipes/"))
end
scrape_recipe(url) click to toggle source

scrape instructions, ingredients from recipe page

# File lib/budget_bytes_cli/scraper.rb, line 54
def self.scrape_recipe(url)
    page = Nokogiri::HTML(open(url))
    
    ingredient_amounts = page.css('.wprm-recipe-ingredient-amount').map {|i| i.text}
    ingredient_units = page.css('.wprm-recipe-ingredient-unit').map {|i| i.text}
    ingredient_names = page.css('.wprm-recipe-ingredient-name').map {|i| i.text}
    
    ingredient_array = []
    
    #scraping ingredients for new site
    ingredient_amounts.each_with_index do |ele, idx|
        ingredient_array << [ele, ingredient_units[idx], ingredient_names[idx]].join(' ').strip
    end
    
    #scraping for old site before css switched so that the code above scrapes ingredients
    old_ingredients_table = page.css("tr")
    
    #get rid of first, last rows in table (header and total cost)
    old_ingredients_table.shift
    old_ingredients_table.pop 
    
    old_ingredients_table.each do |old_ingredient|
        old_ingredient_text = old_ingredient.text.split("\n")
        
        #get rid of first blank item, last cost item
        old_ingredient_text.shift
        old_ingredient_text.pop
        
        ingredient_array << old_ingredient_text.join(" ")
    end
    
    recipe_steps = []
    
    #scraping instructions for old site before css switched to have recipe instructions in own class
    page.css("p").map {|i| i.text}.each do |p|
        if p.split(" ")[0] == "STEP"
            recipe_steps << p.split(" ").slice(2, p.length - 2).join(" ")
        end
    end
    
    #scraping instructions for new site
    page.css(".wprm-recipe-instruction-text").each {|i| recipe_steps << i.text}

    [ingredient_array.join("\n"), recipe_steps.join("\n")]
end