class GoodData::LCM2::SynchronizeUsers

Constants

DESCRIPTION
MODES
PARAMS

Public Class Methods

call(params) click to toggle source
# File lib/gooddata/lcm/actions/synchronize_users.rb, line 142
def call(params)
  client = params.gdc_gd_client
  domain_name = params.organization || params.domain
  fail "Either organisation or domain has to be specified in params" unless domain_name
  project = client.projects(params.gdc_project) || client.projects(params.gdc_project_id)
  fail "Either project or project_id has to be specified in params" unless project
  data_source = GoodData::Helpers::DataSource.new(params.input_source)
  data_product = params.data_product
  mode = params.sync_mode
  unless MODES.include?(mode)
    fail "The parameter \"sync_mode\" has to have one of the values #{MODES.map(&:to_s).join(', ')} or has to be empty."
  end

  whitelists = Set.new(params.whitelists || []) + Set.new((params.regexp_whitelists || []).map { |r| /#{r}/ }) + Set.new([client.user.login])

  [domain_name, data_source].each do |param|
    fail param + ' is required in the block parameters.' unless param
  end

  domain = client.domain(domain_name)

  ignore_failures = GoodData::Helpers.to_boolean(params.ignore_failures)
  remove_users_from_project = GoodData::Helpers.to_boolean(params.remove_users_from_project)
  do_not_touch_users_that_are_not_mentioned = GoodData::Helpers.to_boolean(params.do_not_touch_users_that_are_not_mentioned)
  create_non_existing_user_groups = GoodData::Helpers.to_boolean(params.create_non_existing_user_groups || true)

  new_users = load_data(params, data_source).compact

  # There are several scenarios we want to provide with this brick
  # 1) Sync only domain
  # 2) Sync both domain and project
  # 3) Sync multiple projects. Sync them by using one file. The file has to
  #     contain additional column that contains the PID of the project so the
  #     process can partition the users correctly. The column is configurable
  # 4) Sync one project the users are filtered based on a column in the data
  #     that should contain pid of the project
  # 5) Sync one project. The users are filtered form a given file based on the
  #     value in the file. The value is compared against the value
  #     GOODOT_CUSTOM_PROJECT_ID that is saved in project metadata. This is
  #     aiming at solving the problem that the customer cannot give us the
  #     value of a project id in the data since he does not know it upfront
  #     and we cannot influence its value.
  common_params = {
    domain: domain,
    whitelists: whitelists,
    ignore_failures: ignore_failures,
    remove_users_from_project: remove_users_from_project,
    do_not_touch_users_that_are_not_mentioned: do_not_touch_users_that_are_not_mentioned,
    create_non_existing_user_groups: create_non_existing_user_groups,
    user_groups_cache: nil
  }
  GoodData.gd_logger.info("Synchronizing in mode=#{mode}, data_rows=#{new_users.size} ,")

  GoodData.logger.info("Synchronizing in mode \"#{mode}\"")
  results = case mode
            when 'add_to_organization'
              domain.create_users(new_users.uniq { |u| u[:login] || u[:email] })
            when 'remove_from_organization'
              user_ids = new_users.uniq { |u| u[:login] || u[:email] }.map { |u| u[:login] || u[:email] }
              users = user_ids.map { |u| domain.users(u, client: client) }.reject(&:nil?)
              params.gdc_logger.info "#{user_ids.count - users.count} users were not found (or were deleted) in domain #{domain_name}" if user_ids.count > users.count
              params.gdc_logger.warn "Deleting #{users.count} users from domain #{domain_name}"

              GoodData.gd_logger.info("Synchronizing in mode=#{mode}, domain=#{domain_name}, data_rows=#{users.count} ,")
              users.map(&:delete)
            when 'sync_project'
              project.import_users(new_users, common_params)
            when 'sync_multiple_projects_based_on_pid'
              new_users.group_by { |u| u[:pid] }.flat_map do |project_id, users|
                begin
                  project = client.projects(project_id)

                  GoodData.gd_logger.info("Synchronizing in mode=#{mode}, project_id=#{project_id}, data_rows=#{users.count} ,")
                  project.import_users(users, common_params)
                rescue RestClient::ResourceNotFound
                  fail "Project \"#{project_id}\" was not found. Please check your project ids in the source file"
                rescue RestClient::Gone
                  fail "Seems like you (user executing the script - #{client.user.login}) do not have access to project \"#{project_id}\""
                rescue RestClient::Forbidden
                  fail "User #{client.user.login} is not enabled within project \"#{project_id}\""
                end
              end
            when 'sync_one_project_based_on_pid'
              filtered_users = new_users.select { |u| u[:pid] == project.pid }

              GoodData.gd_logger.info("Synchronizing in mode=#{mode}, data_rows=#{filtered_users.count} ,")
              project.import_users(filtered_users, common_params)
            when 'sync_one_project_based_on_custom_id'
              filter_value = UserBricksHelper.resolve_client_id(domain, project, data_product)

              filtered_users = new_users.select do |u|
                fail "Column for determining the project assignement is empty for \"#{u[:login]}\"" if u[:pid].blank?
                client_id = u[:pid].to_s
                client_id == filter_value
              end

              if filtered_users.empty?
                params.gdc_logger.warn(
                  "Project \"#{project.pid}\" does not match " \
                  "any client ids in input source (both " \
                  "GOODOT_CUSTOM_PROJECT_ID and SEGMENT/CLIENT). " \
                  "We are unable to get the value to filter users."
                )
              end

              GoodData.logger.info("Project #{project.pid} will receive #{filtered_users.count} from #{new_users.count} users")
              GoodData.gd_logger.info("Synchronizing in mode=#{mode}, project_id=#{project.pid}, filtered_users=#{filtered_users.count}, data_rows=#{new_users.count} ,")
              project.import_users(filtered_users, common_params)
            when 'sync_multiple_projects_based_on_custom_id'
              all_clients = domain.clients(:all, data_product).to_a
              new_users.group_by { |u| u[:pid] }.flat_map do |client_id, users|
                fail "Client id cannot be empty" if client_id.blank?

                c = all_clients.detect { |specific_client| specific_client.id == client_id }
                fail "The client \"#{client_id}\" does not exist in data product \"#{data_product.data_product_id}\"" if c.nil?

                project = c.project
                fail "Client #{client_id} does not have project." unless project

                GoodData.logger.info("Project #{project.pid} of client #{client_id} will receive #{users.count} users")

                GoodData.gd_logger.info("Synchronizing in mode=#{mode}, project_id=#{project.pid}, data_rows=#{users.count} ,")
                project.import_users(users, common_params)
              end
            when 'sync_domain_client_workspaces'
              all_domain_clients = domain.clients(:all, data_product)
              domain_clients = all_domain_clients
              if params.segments
                segment_uris = params.segments.map(&:uri)
                domain_clients = domain_clients.select { |c| segment_uris.include?(c.segment_uri) }
              end
              working_client_ids = []
              res = []
              res += new_users.group_by { |u| u[:pid] }.flat_map do |client_id, users|
                fail "Client id cannot be empty" if client_id.blank?

                c = domain_clients.detect { |specific_client| specific_client.id == client_id }
                if c.nil?
                  filtered_client = all_domain_clients.detect { |f_client| f_client.id == client_id }
                  fail "The client \"#{client_id}\" does not exist in data product \"#{data_product.data_product_id}\"" if filtered_client.nil?

                  GoodData.logger.info("Client \"#{client_id}\" is not belong to filtered segments")
                  next
                end

                if params.segments && !segment_uris.include?(c.segment_uri)
                  GoodData.logger.info("Client #{client_id} is outside segments_filter #{params.segments}")
                  next
                end
                project = c.project
                fail "Client #{client_id} does not have project." unless project

                working_client_ids << client_id.to_s
                GoodData.logger.info("Project #{project.pid} of client #{client_id} will receive #{users.count} users")

                GoodData.gd_logger.info("Synchronizing in mode=#{mode}, project_id=#{project.pid}, data_rows=#{users.count} ,")
                project.import_users(users, common_params)
              end

              params.gdc_logger.debug("Working client ids are: #{working_client_ids.join(', ')}")

              unless do_not_touch_users_that_are_not_mentioned
                domain_clients.each do |c|
                  next if working_client_ids.include?(c.client_id.to_s)
                  begin
                    project = c.project
                  rescue => e
                    GoodData.logger.error("Error when accessing project of client #{c.client_id}. Error: #{e}")
                    next
                  end
                  unless project
                    GoodData.logger.info("Client #{c.client_id} has no project.")
                    next
                  end
                  if project.deleted?
                    GoodData.logger.info("Project #{project.pid} of client #{c.client_id} is deleted.")
                    next
                  end
                  GoodData.logger.info("Synchronizing all users in project #{project.pid} of client #{c.client_id}")

                  GoodData.gd_logger.info("Synchronizing all users in project_id=#{project.pid}, client_id=#{c.client_id} ,")
                  res += project.import_users([], common_params)
                end
              end

              res
            when 'sync_domain_and_project'
              GoodData.gd_logger.info("Create users in mode=#{mode}, data_rows=#{new_users.count} ,")
              domain.create_users(new_users, ignore_failures: ignore_failures)

              GoodData.gd_logger.info("Import users in mode=#{mode}, data_rows=#{new_users.count} ,")
              project.import_users(new_users, common_params)
            end

  results.compact!
  counts = results.group_by { |r| r[:type] }.map { |g, r| [g, r.count] }
  counts.each do |category, count|
    GoodData.logger.info("There were #{count} events of type #{category}")
  end
  errors = results.select { |r| r[:type] == :error || r[:type] == :failed }
  return if errors.empty?

  GoodData.logger.info('Printing 10 first errors')
  GoodData.logger.info('========================')
  GoodData.logger.info(errors.take(10).pretty_inspect)
  fail 'There was an error syncing users'
end
load_data(params, data_source) click to toggle source
# File lib/gooddata/lcm/actions/synchronize_users.rb, line 350
def load_data(params, data_source)
  first_name_column           = params.first_name_column&.downcase || 'first_name'
  last_name_column            = params.last_name_column&.downcase || 'last_name'
  login_column                = params.login_column&.downcase || 'login'
  password_column             = params.password_column&.downcase || 'password'
  email_column                = params.email_column&.downcase || 'email'
  role_column                 = params.role_column&.downcase || 'role'
  sso_provider_column         = params.sso_provider_column&.downcase || 'sso_provider'
  authentication_modes_column = params.authentication_modes_column&.downcase || 'authentication_modes'
  user_groups_column          = params.user_groups_column&.downcase || 'user_groups'
  language_column             = params.language_column&.downcase || 'language'
  company_column              = params.company_column&.downcase || 'company'
  position_column             = params.position_column&.downcase || 'position'
  country_column              = params.country_column&.downcase || 'country'
  phone_column                = params.phone_column&.downcase || 'phone'
  ip_whitelist_column         = params.ip_whitelist_column&.downcase || 'ip_whitelist'

  sso_provider = params.sso_provider
  authentication_modes = params.authentication_modes || []

  tmp = without_check(PARAMS, params) do
    File.open(data_source.realize(params), 'r:UTF-8')
  end

  begin
    data = read_csv_file(tmp)
  rescue Exception => e # rubocop:disable RescueException
    fail "There was an error during loading users from csv file. Message: #{e.message}. Error: #{e}"
  end

  data.map do |row|
    params.gdc_logger.debug("Processing row: #{row}")

    modes = if authentication_modes.empty?
              row[authentication_modes_column] || row[authentication_modes_column.to_sym] || []
            else
              authentication_modes
            end

    modes = modes.split(',').map(&:strip).map { |x| x.to_s.upcase } unless modes.is_a? Array

    user_group = row[user_groups_column] || row[user_groups_column.to_sym]
    user_group = user_group.split(',').map(&:strip) if user_group
    user_group = [] if row.headers.include?(user_groups_column) && !user_group

    ip_whitelist = row[ip_whitelist_column] || row[ip_whitelist_column.to_sym]
    ip_whitelist = ip_whitelist.split(',').map(&:strip) if ip_whitelist

    user_login = row[login_column] || row[login_column.to_sym]
    user_login = user_login.strip unless user_login.nil?

    user_email = row[email_column] || row[login_column] || row[email_column.to_sym] || row[login_column.to_sym]
    user_email = user_email.strip unless user_email.nil?

    {
      :first_name => row[first_name_column] || row[first_name_column.to_sym],
      :last_name => row[last_name_column] || row[last_name_column.to_sym],
      :login => user_login,
      :password => row[password_column] || row[password_column.to_sym],
      :email => user_email,
      :role => row[role_column] || row[role_column.to_sym],
      :sso_provider => sso_provider || row[sso_provider_column] || row[sso_provider_column.to_sym],
      :authentication_modes => modes,
      :user_group => user_group,
      :pid => params.multiple_projects_column.nil? ? nil : (row[params.multiple_projects_column] || row[params.multiple_projects_column.to_sym]),
      :language => row[language_column] || row[language_column.to_sym],
      :company => row[company_column] || row[company_column.to_sym],
      :position => row[position_column] || row[position_column.to_sym],
      :country => row[country_column] || row[country_column.to_sym],
      :phone => row[phone_column] || row[phone_column.to_sym],
      :ip_whitelist => ip_whitelist
    }
  end
end
read_csv_file(path) { |row| ... } click to toggle source
# File lib/gooddata/lcm/actions/synchronize_users.rb, line 425
def read_csv_file(path)
  GoodData.logger.info('Start reading csv file')
  res = []
  row_count = 0

  CSV.foreach(path, :headers => true, :header_converters => :downcase, :encoding => 'utf-8') do |row|
    if block_given?
      data = yield row
    else
      data = row
    end

    if data
      row_count += 1
      res << data
    end

    GoodData.logger.info("Read #{row_count} rows") if (row_count % 50_000).zero?
  end

  GoodData.logger.info("Done reading csv file, total #{row_count} rows")
  res
end
version() click to toggle source
# File lib/gooddata/lcm/actions/synchronize_users.rb, line 138
def version
  '0.0.1'
end