class Embulk::Input::GoogleSpreadsheets

Attributes

client[R]
typecaster[R]

Public Class Methods

configure(config) click to toggle source
# File lib/embulk/input/google_spreadsheets.rb, line 85
def self.configure(config)
  task = {}
  # auth_method:
  #   - service_account
  #   - authorized_user
  #   - compute_engine
  #   - application_default
  task['auth_method'] = config.param('auth_method', :string, default: 'authorized_user')
  # json_keyfile: Fullpath of json key
  #   if `auth_method` is `authorized_user`, this plugin supposes the format
  #   is the below.
  #   {
  #     "client_id":"xxxxxxxxxxx.apps.googleusercontent.com",
  #     "client_secret":"xxxxxxxxxxx",
  #     "refresh_token":"xxxxxxxxxxx"
  #   }
  #
  #   if `auth_method` is `compute_engine` or `application_default`, this
  #   option is not required.
  task['json_keyfile']           = config.param('json_keyfile',          LocalFile, default: nil)
  task['spreadsheets_url']       = config.param('spreadsheets_url',       :string)
  task['worksheet_title']        = config.param('worksheet_title',        :string)
  task['start_column']           = config.param('start_column',           :integer, default: 1)
  task['start_row']              = config.param('start_row',              :integer, default: 1)
  task['end_row']                = config.param('end_row',                :integer, default: -1)
  task['max_fetch_rows']         = config.param('max_fetch_rows',         :integer, default: 10000)
  # FORMATTED_VALUE, UNFORMATTED_VALUE, FORMULA are available.
  # ref. https://developers.google.com/sheets/api/reference/rest/v4/ValueRenderOption
  task['value_render_option']    = config.param('value_render_option',    :string,  default: 'FORMATTED_VALUE')
  task['null_string']            = config.param('null_string',            :string,  default: '')
  task['stop_on_invalid_record'] = config.param('stop_on_invalid_record', :bool,    default: true)
  # columns: this option supposes an array of hash has the below structure.
  #   - name
  #   - type
  #   - format
  #   - timezone
  #   - typecast: default: strict
  CustomColumns.default_format   = task['default_timestamp_format'] = config.param('default_timestamp_format', :string, default: CustomColumns.default_format)
  CustomColumns.default_timezone = task['default_timezone']         = config.param('default_timezone',         :string, default: CustomColumns.default_timezone)
  CustomColumns.default_typecast = task['default_typecast']         = config.param('default_typecast',         :string, default: CustomColumns.default_typecast)
  task['columns'] = config.param('columns', CustomColumns)

  task['end_column'] = task['start_column'] + task['columns'].length - 1

  logger.debug { "`embulk-input-google_spreadsheets`: configured task '#{task.reject{|k, v| k == 'json_keyfile'}.to_json}'"}
  task
end
configure_columns(task) click to toggle source
# File lib/embulk/input/google_spreadsheets.rb, line 133
def self.configure_columns(task)
  task['columns'].map.with_index do |c, i|
    Column.new(i, c['name'], c['type'].to_sym, c['format'])
  end
end
logger() click to toggle source
# File lib/embulk/input/google_spreadsheets.rb, line 77
def self.logger
  ::Embulk.logger
end
resume(task, columns, count) { |task, columns, count| ... } click to toggle source
# File lib/embulk/input/google_spreadsheets.rb, line 145
def self.resume(task, columns, count, &control)
  task_reports = yield(task, columns, count)

  next_config_diff = {}
  return next_config_diff
end
transaction(config, &control) click to toggle source
# File lib/embulk/input/google_spreadsheets.rb, line 139
def self.transaction(config, &control)
  task = configure(config)
  columns = configure_columns(task)
  resume(task, columns, 1, &control)
end

Public Instance Methods

init() click to toggle source
# File lib/embulk/input/google_spreadsheets.rb, line 154
def init
  @typecaster = RecordTypecaster.new(task)
  @client = SpreadsheetsClient.new(task, auth: Auth.new(task), pager: Pager.new(task))
end
logger() click to toggle source
# File lib/embulk/input/google_spreadsheets.rb, line 81
def logger
  self.class.logger
end
run() click to toggle source
# File lib/embulk/input/google_spreadsheets.rb, line 163
def run
  client.worksheet_each_record do |record|
    begin
      record = typecaster.transform_by_columns(record)
      page_builder.add(record)
    rescue => e
      if stop_on_invalid_record?
        raise e if e.is_a?(ConfigError) or e.is_a?(DataError)
        raise DataError.new(e)
      end
      logger.warn{ "`embulk-input-google_spreadsheets`: Error '#{e}' occurred. Skip '#{record}'" }
    end
  end

  page_builder.finish

  task_report = {}
  return task_report

rescue Google::Apis::Error => e
  logger.error {
    m  = "Error: #{e.class}"
    m << ", Message: #{e.message}"        if e.message
    m << ", StatusCode: #{e.status_code}" if e.status_code
    m << ", Body: #{e.body}"              if e.body
    m
  }
  raise e
end
stop_on_invalid_record?() click to toggle source
# File lib/embulk/input/google_spreadsheets.rb, line 159
def stop_on_invalid_record?
  task['stop_on_invalid_record']
end