class Driftwood::Bigquery

Public Class Methods

new(config) click to toggle source
# File lib/driftwood/bigquery.rb, line 4
def initialize(config)
  $logger.info "Starting Bigquery connection"

  config[:keyfile] = File.expand_path(config[:keyfile])

  @bigquery = Google::Cloud::Bigquery.new(
    :project_id  => config[:project],
    :credentials => Google::Cloud::Bigquery::Credentials.new(config[:keyfile]),
  )
  @dataset = @bigquery.dataset(config[:dataset])
  raise "\nThere is a problem with the gCloud configuration: \n #{JSON.pretty_generate(config)}" if @dataset.nil?

  if config[:noinit]
    $logger.warn "Skipping reinitialization as instructed"
    @teams     = @dataset.table('slack_teams')
    @active    = @dataset.table('slack_billing_active')
    @creation  = @dataset.table('slack_user_creation')
    @messages  = @dataset.table('slack_messages')
    @channels  = @dataset.table('slack_channels')
    @users     = @dataset.table('slack_users')
    return
  end

  @teams = @dataset.table('slack_teams') || @dataset.create_table('slack_teams') do |table|
                                                    table.name        = 'Slack Teams'
                                                    table.description = 'A list of all team names & ID, plus some metadata'
                                                    table.schema do |s|
                                                      s.string "team_id",           mode: :required
                                                      s.string "name",              mode: :required
                                                      s.string "user_access_token", mode: :required
                                                      s.string "bot_user_id",       mode: :required
                                                      s.string "bot_access_token",  mode: :required
                                                      s.string "application_id",    mode: :required
                                                    end
                                                  end

  @creation = @dataset.table('slack_user_creation') || @dataset.create_table('slack_user_creation') do |table|
                                                    table.name        = 'Slack User Creation'
                                                    table.description = 'This keeps track of when users are created. The Slack API does not provide this info.'
                                                    table.schema do |s|
                                                      s.string    "team_id",   mode: :required
                                                      s.string    "user_id",   mode: :required
                                                      s.timestamp "creation",  mode: :required
                                                    end
                                                  end

  @messages = @dataset.table('slack_messages') || @dataset.create_table('slack_messages') do |table|
                                                    table.name        = 'Slack Message Log'
                                                    table.description = 'All messages are logged here permanently.'
                                                    table.schema do |s|
                                                      s.string "team_id",    mode: :required
                                                      s.string "channel_id", mode: :required
                                                      s.string "user_id",    mode: :required
                                                      s.string "ts",         mode: :required
                                                      s.string "text",       mode: :required
                                                    end
                                                  end

  # these we delete and rebuild because it's about 17 million times faster. Only slightly exaggerated.
  begin
    @dataset.table('slack_channels').delete
    @channels = @dataset.create_table('slack_channels') do |table|
      table.name        = 'Slack Channels'
      table.description = 'A list of all channels and metadata'
      table.schema do |s|
        s.string  "team_id",     mode: :required
        s.string  "channel_id",  mode: :required
        s.string  "name",        mode: :required
        s.integer "created",     mode: :required
        s.boolean "is_private",  mode: :required
        s.string  "topic",       mode: :required
        s.string  "purpose",     mode: :required
        s.integer "num_members", mode: :required
      end
    end
  rescue => e
    $logger.error e.message
    $logger.debug e.backtrace.join("\n")
    @channels = @dataset.table('slack_channels')
  end

  begin
    @dataset.table('slack_users').delete
    @users = @dataset.create_table('slack_users') do |table|
      table.name        = 'Slack Users'
      table.description = 'A list of all users and metadata. Ironically, we only store the email so we can delete if a GDPR request is made. We cannot do anything else with it.'
      table.schema do |s|
        s.string  "team_id",       mode: :required
        s.string  "user_id",       mode: :required
        s.string  "name",          mode: :required
        s.string  "real_name",     mode: :required
        s.string  "display_name",  mode: :required
        s.boolean "is_owner",      mode: :required
        s.boolean "is_admin",      mode: :required
        s.string  "title"
        s.string  "phone"
        s.string  "skype"
        s.string  "email"
        s.string  "tz"
        s.string  "tz_offset"
        s.string  "status_text",   mode: :required
        s.string  "status_emoji",  mode: :required
        s.string  "image_72",      mode: :required
        s.string  "image_192",     mode: :required
        s.integer "updated",       mode: :required
        s.boolean "deleted",       mode: :required
      end
    end
  rescue => e
    $logger.error e.message
    $logger.debug e.backtrace.join("\n")
    @users = @dataset.table('slack_users')
  end
  $logger.info "Bigquery initialization complete"
end

Public Instance Methods

deduplicate_table(table) click to toggle source
# File lib/driftwood/bigquery.rb, line 181
def deduplicate_table(table)

end
get_auth_tokens(application_id) click to toggle source
# File lib/driftwood/bigquery.rb, line 120
def get_auth_tokens(application_id)
  @dataset.query("SELECT * FROM slack_teams WHERE application_id = '#{application_id}'").to_a rescue []
end
insert_channel(record) click to toggle source
# File lib/driftwood/bigquery.rb, line 133
def insert_channel(record)
  @channels.insert(record).success?
end
insert_message(record) click to toggle source
# File lib/driftwood/bigquery.rb, line 146
def insert_message(record)
  @messages.insert(record).success?
end
insert_team(auth_token, application_id) click to toggle source
# File lib/driftwood/bigquery.rb, line 124
def insert_team(auth_token, application_id)
  auth_token[:application_id] = application_id

  # We can take the 2.5s delete-before-insert here because it only happens when
  # authorizing a new team integration.
  @dataset.query("DELETE FROM slack_teams WHERE team_id = '#{auth_token[:team_id]}' AND application_id = '#{application_id}'")
  @teams.insert(auth_token).success?
end
insert_user(record) click to toggle source
# File lib/driftwood/bigquery.rb, line 137
def insert_user(record)
  if @users.insert(record).success?
    record_user_creation(record['team_id'], record['user_id'])
  else
    false
  end

end
newest_message_timestamp() click to toggle source
# File lib/driftwood/bigquery.rb, line 150
def newest_message_timestamp
  @dataset.query("SELECT ts FROM slack_messages ORDER BY ts DESC LIMIT 1").first[:ts] rescue 0
end
purge_deleted_users() click to toggle source
# File lib/driftwood/bigquery.rb, line 160
def purge_deleted_users
  @dataset.query('DELETE FROM slack_users WHERE deleted = true')
end
reconcile_user_creations() click to toggle source

Ensure that all users have creation dates. If they don't have one yet, assume now This will catch any users who've been created during downtime if we restart the service

# File lib/driftwood/bigquery.rb, line 166
def reconcile_user_creations()
  @dataset.query("INSERT INTO
    slack_user_creation (team_id, user_id, creation)
    (
      SELECT team_id, user_id, CURRENT_TIMESTAMP()
      FROM
        slack_users
      WHERE
        user_id NOT IN
        (
          SELECT user_id from slack_user_creation
        )
    )")
end
record_user_creation(team_id, user_id) click to toggle source
# File lib/driftwood/bigquery.rb, line 154
def record_user_creation(team_id, user_id)
  @creation.insert(:team_id  => team_id,
                   :user_id  => user_id,
                   :creation => Time.now.strftime('%Y-%m-%d %H:%M:%S%z')).success?
end
shell() click to toggle source
# File lib/driftwood/bigquery.rb, line 185
def shell
  require 'pry'
  binding.pry
end