class InstagramCrawler::Parser::Json

Attributes

page_info[R]
user_id[R]

Public Class Methods

new(page_info, user_id) click to toggle source
# File lib/instagram_crawler/parser/json.rb, line 6
def initialize(page_info, user_id)
  @page_info = page_info
  @user_id   = user_id
end

Public Instance Methods

parsing() click to toggle source
# File lib/instagram_crawler/parser/json.rb, line 11
def parsing
  begin
    end_cursor = page_info["end_cursor"][0..-3]
    url        = next_url(end_cursor, user_id)
    html       = get_json(url)
    json       = JSON.parse(html)
    @page_info = json["data"]["user"]["edge_owner_to_timeline_media"]["page_info"]
    edges      = json["data"]["user"]["edge_owner_to_timeline_media"]["edges"]

    loop_edges(edges)
  end while page_info["has_next_page"]
end

Private Instance Methods

get_json(url) click to toggle source
# File lib/instagram_crawler/parser/json.rb, line 50
def get_json(url)
  http = HTTP.cookies(sessionid: ENV["sessionid"])
  res = Config.proxyname ?
    http.via(Config.proxyname, Config.port).get(url) : http.get(url)
  raise Errors::HttpError, "#{res.code} #{res.reason}" if res.code != 200
  res.to_s
end
loop_edges(edges) click to toggle source
# File lib/instagram_crawler/parser/json.rb, line 26
def loop_edges(edges)
  edges.each do |edge|
    node = edge["node"]
    next if Config.before_date && (Config.parse_before_date < node["taken_at_timestamp"])
    check_after_time(node["taken_at_timestamp"])
    time = parse_to_date(node["taken_at_timestamp"])

    if node["is_video"]
      Logger.info "========VIDEO========".light_yellow
      url = node["video_url"]
      output(time, url)
      File.download(url, 'video', time)
    elsif !node["edge_sidecar_to_children"].nil?
      Logger.info "========POST========".light_magenta
      parse_post(node["edge_sidecar_to_children"]["edges"], time)
    else
      Logger.info "========PHOTO========".light_green
      url = node["display_url"]
      output(time, node["display_url"])
      File.download(url, 'photo', time)
    end
  end
end
next_url(end_cursor, user_id) click to toggle source
# File lib/instagram_crawler/parser/json.rb, line 58
def next_url(end_cursor, user_id)
  "https://www.instagram.com/graphql/query/?query_hash=f412a8bfd8332a76950fefc1da5785ef&variables=%7B%22id%22%3A%22#{user_id}%22%2C%22first%22%3A12%2C%22after%22%3A%22#{end_cursor}%3D%3D%22%7D"
end