module InstaScrape
Constants
- VERSION
Public Class Methods
hashtag(hashtag, include_meta_data: false)
click to toggle source
get a hashtag
# File lib/insta_scrape.rb, line 7 def self.hashtag(hashtag, include_meta_data: false) visit "https://www.instagram.com/explore/tags/#{hashtag}/" @posts = [] scrape_posts(include_meta_data: include_meta_data) end
long_scrape_hashtag(hashtag, scrape_length, include_meta_data: false)
click to toggle source
long scrape a hashtag
# File lib/insta_scrape.rb, line 14 def self.long_scrape_hashtag(hashtag, scrape_length, include_meta_data: false) visit "https://www.instagram.com/explore/tags/#{hashtag}/" @posts = [] long_scrape_posts(scrape_length, include_meta_data: include_meta_data) end
long_scrape_user_info_and_posts(username, scrape_length, include_meta_data: false)
click to toggle source
get user info and posts
# File lib/insta_scrape.rb, line 27 def self.long_scrape_user_info_and_posts(username, scrape_length, include_meta_data: false) scrape_user_info(username) long_scrape_user_posts_method(username, scrape_length, include_meta_data: include_meta_data) @user = InstaScrape::InstagramUserWithPosts.new(username, @image, @post_count, @follower_count, @following_count, @description, @posts) end
long_scrape_user_posts(username, scrape_length, include_meta_data: false)
click to toggle source
long scrape a hashtag
# File lib/insta_scrape.rb, line 21 def self.long_scrape_user_posts(username, scrape_length, include_meta_data: false) @posts = [] long_scrape_user_posts_method(username, scrape_length, include_meta_data: include_meta_data) end
user_description(username)
click to toggle source
get user description
# File lib/insta_scrape.rb, line 70 def self.user_description(username) scrape_user_info(username) return @description end
user_follower_count(username)
click to toggle source
get user follower count
# File lib/insta_scrape.rb, line 52 def self.user_follower_count(username) scrape_user_info(username) return @follower_count end
user_following_count(username)
click to toggle source
get user following count
# File lib/insta_scrape.rb, line 58 def self.user_following_count(username) scrape_user_info(username) return @following_count end
user_info(username)
click to toggle source
get user info
# File lib/insta_scrape.rb, line 34 def self.user_info(username) scrape_user_info(username) @user = InstaScrape::InstagramUser.new(username, @image, @post_count, @follower_count, @following_count, @description) end
user_info_and_posts(username, include_meta_data: false)
click to toggle source
get user info and posts
# File lib/insta_scrape.rb, line 40 def self.user_info_and_posts(username, include_meta_data: false) scrape_user_info(username) scrape_user_posts(username, include_meta_data: false) @user = InstaScrape::InstagramUserWithPosts.new(username, @image, @post_count, @follower_count, @following_count, @description, @posts) end
user_post_count(username)
click to toggle source
get user post count
# File lib/insta_scrape.rb, line 64 def self.user_post_count(username) scrape_user_info(username) return @post_count end
user_posts(username, include_meta_data: false)
click to toggle source
get user posts only
# File lib/insta_scrape.rb, line 47 def self.user_posts(username, include_meta_data: false) scrape_user_posts(username, include_meta_data: include_meta_data) end
Private Class Methods
get_span_value(element)
click to toggle source
split away span tags from user info numbers
# File lib/insta_scrape.rb, line 204 def self.get_span_value(element) begin_split = "\">" end_split = "</span>" return element[/#{begin_split}(.*?)#{end_split}/m, 1] end
iterate_through_posts(include_meta_data:)
click to toggle source
post iteration method
# File lib/insta_scrape.rb, line 78 def self.iterate_through_posts(include_meta_data:) posts = all("article div div div a").collect do |post| { link: post["href"], image: post.find("img")["src"], text: post.find("img")["alt"]} end posts.each do |post| if include_meta_data visit(post[:link]) date = page.find('time')["datetime"] username = page.first("article header div a")["title"] hi_res_image = page.all("img").last["src"] likes = page.find("div section span span")["innerHTML"] info = InstaScrape::InstagramPost.new(post[:link], post[:image], { date: date, text: post[:text], username: username, hi_res_image: hi_res_image, likes: likes }) else info = InstaScrape::InstagramPost.new(post[:link], post[:image], { text: text }) end @posts << info end #log puts "POST COUNT: #{@posts.length}" self.log_posts #return result return @posts end
log_posts()
click to toggle source
post logger
# File lib/insta_scrape.rb, line 187 def self.log_posts post = @posts.sample puts "* Printing Sample Post *" puts "\n" puts "Image: #{post.image}\n" puts "Link: #{post.link}\n" puts "Text: #{post.text}\n" if post.date puts "Date: #{post.date}\n" puts "Username: #{post.username}\n" puts "Hi Res Image: #{post.hi_res_image}\n" puts "Likes: #{post.likes}\n" end puts "\n" end
long_scrape_posts(scrape_length_in_seconds, include_meta_data:)
click to toggle source
# File lib/insta_scrape.rb, line 149 def self.long_scrape_posts(scrape_length_in_seconds, include_meta_data:) begin page.find('a', :text => "Load more", exact: true).click max_iteration = (scrape_length_in_seconds / 0.3) iteration = 0 @loader = "." while iteration < max_iteration do puts "InstaScrape is working. Please wait.#{@loader}" iteration += 1 sleep 0.1 page.execute_script "window.scrollTo(0,document.body.scrollHeight);" sleep 0.1 page.execute_script "window.scrollTo(0,(document.body.scrollHeight - 5000));" sleep 0.1 @loader << "." system "clear" end iterate_through_posts(include_meta_data: include_meta_data) rescue Capybara::ElementNotFound => e begin iterate_through_posts(include_meta_data: include_meta_data) end end end
long_scrape_user_posts_method(username, scrape_length_in_seconds, include_meta_data:)
click to toggle source
# File lib/insta_scrape.rb, line 174 def self.long_scrape_user_posts_method(username, scrape_length_in_seconds, include_meta_data:) @posts = [] visit "https://www.instagram.com/#{username}/" long_scrape_posts(scrape_length_in_seconds, include_meta_data: include_meta_data) end
scrape_posts(include_meta_data:)
click to toggle source
scrape posts
# File lib/insta_scrape.rb, line 129 def self.scrape_posts(include_meta_data:) begin page.find('a', :text => "Load more", exact: true).click max_iteration = 10 iteration = 0 while iteration < max_iteration do iteration += 1 page.execute_script "window.scrollTo(0,document.body.scrollHeight);" sleep 0.1 page.execute_script "window.scrollTo(0,(document.body.scrollHeight - 5000));" sleep 0.1 end iterate_through_posts(include_meta_data: include_meta_data) rescue Capybara::ElementNotFound => e begin iterate_through_posts(include_meta_data: include_meta_data) end end end
scrape_user_info(username)
click to toggle source
user info scraper method
# File lib/insta_scrape.rb, line 113 def self.scrape_user_info(username) visit "https://www.instagram.com/#{username}/" @image = page.find('article header div img')["src"] within("header") do post_count_html = page.find('span', :text => "posts", exact: true)['innerHTML'] @post_count = get_span_value(post_count_html) follower_count_html = page.find('span', :text => "followers", exact: true)['innerHTML'] @follower_count = get_span_value(follower_count_html) following_count_html = page.find('span', :text => "following", exact: true)['innerHTML'] @following_count = get_span_value(following_count_html) description = page.find('h2').first(:xpath,".//..")['innerHTML'] @description = Nokogiri::HTML(description).text end end
scrape_user_posts(username, include_meta_data:)
click to toggle source
# File lib/insta_scrape.rb, line 180 def self.scrape_user_posts(username, include_meta_data:) @posts = [] visit "https://www.instagram.com/#{username}/" scrape_posts(include_meta_data: include_meta_data) end