class Twitterscraper::Tweet

Constants

KEYS

Public Class Methods

from_html(text) click to toggle source

.js-stream-item

.js-stream-tweet{data: {screen-name:, tweet-id:}}
  .stream-item-header
  .js-tweet-text-container
  .stream-item-footer
# File lib/twitterscraper/tweet.rb, line 60
def from_html(text)
  html = Nokogiri::HTML(text)
  from_tweets_html(html.xpath("//li[@class[contains(., 'js-stream-item')]]/div[@class[contains(., 'js-stream-tweet')]]"))
end
from_json(text) click to toggle source
# File lib/twitterscraper/tweet.rb, line 47
def from_json(text)
  json = JSON.parse(text)
  json.map do |tweet|
    tweet['created_at'] = Time.parse(tweet['created_at'])
    new(tweet)
  end
end
from_tweet_html(html) click to toggle source
# File lib/twitterscraper/tweet.rb, line 71
def from_tweet_html(html)
  screen_name = html.attr('data-screen-name')
  tweet_id = html.attr('data-tweet-id')&.to_i

  unless html.to_s.include?('js-tweet-text-container')
    Twitterscraper.logger.warn "html doesn't include div.js-tweet-text-container url=https://twitter.com/#{screen_name}/status/#{tweet_id}"
    return nil
  end

  inner_html = Nokogiri::HTML(html.inner_html)

  profile_image_url = inner_html.xpath("//img[@class[contains(., 'js-action-profile-avatar')]]").first.attr('src').gsub(/_bigger/, '')
  text = inner_html.xpath("//div[@class[contains(., 'js-tweet-text-container')]]/p[@class[contains(., 'js-tweet-text')]]").first.text
  links = inner_html.xpath("//a[@class[contains(., 'twitter-timeline-link')]]").map { |elem| elem.attr('data-expanded-url') }.select { |link| link && !link.include?('pic.twitter') }
  image_urls = inner_html.xpath("//div[@class[contains(., 'AdaptiveMedia-photoContainer')]]").map { |elem| elem.attr('data-image-url') }
  video_url = inner_html.xpath("//div[@class[contains(., 'PlayableMedia-container')]]/a").map { |elem| elem.attr('href') }[0]
  has_media = !image_urls.empty? || (video_url && !video_url.empty?)

  actions = inner_html.xpath("//div[@class[contains(., 'ProfileTweet-actionCountList')]]")
  likes = actions.xpath("//span[@class[contains(., 'ProfileTweet-action--favorite')]]/span[@class[contains(., 'ProfileTweet-actionCount')]]").first.attr('data-tweet-stat-count').to_i || 0
  retweets = actions.xpath("//span[@class[contains(., 'ProfileTweet-action--retweet')]]/span[@class[contains(., 'ProfileTweet-actionCount')]]").first.attr('data-tweet-stat-count').to_i || 0
  replies = actions.xpath("//span[@class[contains(., 'ProfileTweet-action--reply u-hiddenVisually')]]/span[@class[contains(., 'ProfileTweet-actionCount')]]").first.attr('data-tweet-stat-count').to_i || 0
  is_replied = replies != 0

  parent_tweet_id = inner_html.xpath('//*[@data-conversation-id]').first.attr('data-conversation-id').to_i
  if tweet_id == parent_tweet_id
    is_reply_to = false
    parent_tweet_id = nil
    reply_to_users = []
  else
    is_reply_to = true
    reply_to_users = inner_html.xpath("//div[@class[contains(., 'ReplyingToContextBelowAuthor')]]/a").map { |user| {screen_name: user.text.delete_prefix('@'), user_id: user.attr('data-user-id')} }
  end

  timestamp = inner_html.xpath("//span[@class[contains(., 'js-short-timestamp')]]").first.attr('data-time').to_i
  new(
      screen_name: screen_name,
      name: html.attr('data-name'),
      user_id: html.attr('data-user-id').to_i,
      profile_image_url: profile_image_url,
      tweet_id: tweet_id,
      text: text,
      links: links,
      hashtags: text.scan(/#\w+/).map { |tag| tag.delete_prefix('#') },
      image_urls: image_urls,
      video_url: video_url,
      has_media: has_media,
      likes: likes,
      retweets: retweets,
      replies: replies,
      is_replied: is_replied,
      is_reply_to: is_reply_to,
      parent_tweet_id: parent_tweet_id,
      reply_to_users: reply_to_users,
      tweet_url: 'https://twitter.com' + html.attr('data-permalink-path'),
      timestamp: timestamp,
      created_at: Time.at(timestamp, in: '+00:00'),
  )
end
from_tweets_html(html) click to toggle source
# File lib/twitterscraper/tweet.rb, line 65
def from_tweets_html(html)
  html.map do |tweet|
    from_tweet_html(tweet)
  end.compact
end
new(attrs) click to toggle source
# File lib/twitterscraper/tweet.rb, line 30
def initialize(attrs)
  attrs.each do |key, value|
    instance_variable_set("@#{key}", value)
  end
end

Public Instance Methods

attrs() click to toggle source
# File lib/twitterscraper/tweet.rb, line 36
def attrs
  KEYS.map do |key|
    [key, send(key)]
  end.to_h
end
to_json(options = {}) click to toggle source
# File lib/twitterscraper/tweet.rb, line 42
def to_json(options = {})
  attrs.to_json
end