module InstaScrape

Constants

VERSION

Public Class Methods

hashtag(hashtag, include_meta_data: false) click to toggle source

get a hashtag

# File lib/insta_scrape.rb, line 7
def self.hashtag(hashtag, include_meta_data: false)
  visit "https://www.instagram.com/explore/tags/#{hashtag}/"
  @posts = []
  scrape_posts(include_meta_data: include_meta_data)
end
long_scrape_hashtag(hashtag, scrape_length, include_meta_data: false) click to toggle source

long scrape a hashtag

# File lib/insta_scrape.rb, line 14
def self.long_scrape_hashtag(hashtag, scrape_length, include_meta_data: false)
  visit "https://www.instagram.com/explore/tags/#{hashtag}/"
  @posts = []
  long_scrape_posts(scrape_length, include_meta_data: include_meta_data)
end
long_scrape_user_info_and_posts(username, scrape_length, include_meta_data: false) click to toggle source

get user info and posts

# File lib/insta_scrape.rb, line 27
def self.long_scrape_user_info_and_posts(username, scrape_length, include_meta_data: false)
  scrape_user_info(username)
  long_scrape_user_posts_method(username, scrape_length, include_meta_data: include_meta_data)
  @user = InstaScrape::InstagramUserWithPosts.new(username, @image, @post_count, @follower_count, @following_count, @description, @posts)
end
long_scrape_user_posts(username, scrape_length, include_meta_data: false) click to toggle source

long scrape a hashtag

# File lib/insta_scrape.rb, line 21
def self.long_scrape_user_posts(username, scrape_length, include_meta_data: false)
  @posts = []
  long_scrape_user_posts_method(username, scrape_length, include_meta_data: include_meta_data)
end
user_description(username) click to toggle source

get user description

# File lib/insta_scrape.rb, line 70
def self.user_description(username)
  scrape_user_info(username)
  return @description
end
user_follower_count(username) click to toggle source

get user follower count

# File lib/insta_scrape.rb, line 52
def self.user_follower_count(username)
  scrape_user_info(username)
  return @follower_count
end
user_following_count(username) click to toggle source

get user following count

# File lib/insta_scrape.rb, line 58
def self.user_following_count(username)
  scrape_user_info(username)
  return @following_count
end
user_info(username) click to toggle source

get user info

# File lib/insta_scrape.rb, line 34
def self.user_info(username)
  scrape_user_info(username)
  @user = InstaScrape::InstagramUser.new(username, @image, @post_count, @follower_count, @following_count, @description)
end
user_info_and_posts(username, include_meta_data: false) click to toggle source

get user info and posts

# File lib/insta_scrape.rb, line 40
def self.user_info_and_posts(username, include_meta_data: false)
  scrape_user_info(username)
  scrape_user_posts(username, include_meta_data: false)
  @user = InstaScrape::InstagramUserWithPosts.new(username, @image, @post_count, @follower_count, @following_count, @description, @posts)
end
user_post_count(username) click to toggle source

get user post count

# File lib/insta_scrape.rb, line 64
def self.user_post_count(username)
  scrape_user_info(username)
  return @post_count
end
user_posts(username, include_meta_data: false) click to toggle source

get user posts only

# File lib/insta_scrape.rb, line 47
def self.user_posts(username, include_meta_data: false)
  scrape_user_posts(username, include_meta_data: include_meta_data)
end

Private Class Methods

get_span_value(element) click to toggle source

split away span tags from user info numbers

# File lib/insta_scrape.rb, line 204
def self.get_span_value(element)
  begin_split = "\">"
  end_split = "</span>"
  return element[/#{begin_split}(.*?)#{end_split}/m, 1]
end
iterate_through_posts(include_meta_data:) click to toggle source

post iteration method

# File lib/insta_scrape.rb, line 78
def self.iterate_through_posts(include_meta_data:)
  posts = all("article div div div a").collect do |post|
    { link: post["href"],
      image: post.find("img")["src"],
      text: post.find("img")["alt"]}
  end

  posts.each do |post|
    if include_meta_data
      visit(post[:link])
      date = page.find('time')["datetime"]
      username = page.first("article header div a")["title"]
      hi_res_image = page.all("img").last["src"]
      likes = page.find("div section span span")["innerHTML"]
      info = InstaScrape::InstagramPost.new(post[:link], post[:image], {
        date: date,
        text: post[:text],
        username: username,
        hi_res_image: hi_res_image,
        likes: likes
      })
    else
      info = InstaScrape::InstagramPost.new(post[:link], post[:image], { text: text })
    end
    @posts << info
  end

  #log
  puts "POST COUNT: #{@posts.length}"
  self.log_posts
  #return result
  return @posts
end
log_posts() click to toggle source

post logger

# File lib/insta_scrape.rb, line 187
def self.log_posts
  post = @posts.sample
  puts "* Printing Sample Post *"
  puts "\n"
  puts "Image: #{post.image}\n"
  puts "Link: #{post.link}\n"
  puts "Text: #{post.text}\n"
  if post.date
    puts "Date: #{post.date}\n"
    puts "Username: #{post.username}\n"
    puts "Hi Res Image: #{post.hi_res_image}\n"
    puts "Likes: #{post.likes}\n"
  end
  puts "\n"
end
long_scrape_posts(scrape_length_in_seconds, include_meta_data:) click to toggle source
# File lib/insta_scrape.rb, line 149
def self.long_scrape_posts(scrape_length_in_seconds, include_meta_data:)
  begin
    page.find('a', :text => "Load more", exact: true).click
    max_iteration = (scrape_length_in_seconds / 0.3)
    iteration = 0
    @loader = "."
    while iteration < max_iteration do
      puts "InstaScrape is working. Please wait.#{@loader}"
      iteration += 1
      sleep 0.1
      page.execute_script "window.scrollTo(0,document.body.scrollHeight);"
      sleep 0.1
      page.execute_script "window.scrollTo(0,(document.body.scrollHeight - 5000));"
      sleep 0.1
      @loader << "."
      system "clear"
    end
    iterate_through_posts(include_meta_data: include_meta_data)
  rescue Capybara::ElementNotFound => e
    begin
      iterate_through_posts(include_meta_data: include_meta_data)
    end
  end
end
long_scrape_user_posts_method(username, scrape_length_in_seconds, include_meta_data:) click to toggle source
# File lib/insta_scrape.rb, line 174
def self.long_scrape_user_posts_method(username, scrape_length_in_seconds, include_meta_data:)
  @posts = []
  visit "https://www.instagram.com/#{username}/"
  long_scrape_posts(scrape_length_in_seconds, include_meta_data: include_meta_data)
end
scrape_posts(include_meta_data:) click to toggle source

scrape posts

# File lib/insta_scrape.rb, line 129
def self.scrape_posts(include_meta_data:)
  begin
    page.find('a', :text => "Load more", exact: true).click
    max_iteration = 10
    iteration = 0
    while iteration < max_iteration do
      iteration += 1
      page.execute_script "window.scrollTo(0,document.body.scrollHeight);"
      sleep 0.1
      page.execute_script "window.scrollTo(0,(document.body.scrollHeight - 5000));"
      sleep 0.1
    end
    iterate_through_posts(include_meta_data: include_meta_data)
  rescue Capybara::ElementNotFound => e
    begin
      iterate_through_posts(include_meta_data: include_meta_data)
    end
  end
end
scrape_user_info(username) click to toggle source

user info scraper method

# File lib/insta_scrape.rb, line 113
def self.scrape_user_info(username)
  visit "https://www.instagram.com/#{username}/"
  @image = page.find('article header div img')["src"]
  within("header") do
    post_count_html = page.find('span', :text => "posts", exact: true)['innerHTML']
    @post_count = get_span_value(post_count_html)
    follower_count_html = page.find('span', :text => "followers", exact: true)['innerHTML']
    @follower_count = get_span_value(follower_count_html)
    following_count_html = page.find('span', :text => "following", exact: true)['innerHTML']
    @following_count = get_span_value(following_count_html)
    description = page.find('h2').first(:xpath,".//..")['innerHTML']
    @description = Nokogiri::HTML(description).text
  end
end
scrape_user_posts(username, include_meta_data:) click to toggle source
# File lib/insta_scrape.rb, line 180
def self.scrape_user_posts(username, include_meta_data:)
  @posts = []
  visit "https://www.instagram.com/#{username}/"
  scrape_posts(include_meta_data: include_meta_data)
end