module GoodData::Model

Constants

ATTRIBUTE_FOLDER_PREFIX
ATTRIBUTE_PREFIX
DATE_ATTRIBUTE
DATE_ATTRIBUTE_DEFAULT_DISPLAY_FORM
DATE_COLUMN_PREFIX
DATE_FACT_PREFIX
DEFAULT_ATTRIBUTE_DATATYPE
DEFAULT_DATE_FORMAT
DEFAULT_FACT_DATATYPE
DEFAULT_TYPE
FACT_COLUMN_PREFIX
FACT_FOLDER_PREFIX
FACT_PREFIX
FIELD_PK

Model naming conventions

FK_SUFFIX
GD_DATA_TYPES
GD_TYPES

See confluence.intgdc.com/display/plat/Catalog+of+Attribute+Types

LABEL_COLUMN_PREFIX
LABEL_PREFIX
LDM_CTG

GoodData REST API categories

LDM_MANAGE_CTG
TIME_ATTRIBUTE_PREFIX
TIME_COLUMN_PREFIX
TIME_FACT_PREFIX

Public Class Methods

check_gd_data_type(value) click to toggle source
# File lib/gooddata/models/model.rb, line 116
def check_gd_data_type(value)
  GD_DATA_TYPES.any? do |v|
    case v
    when Regexp
      v =~ value
    when String
      v == (value && value.upcase)
    else
      fail 'Unkown predicate'
    end
  end
end
check_gd_type(value) click to toggle source
# File lib/gooddata/models/model.rb, line 112
def check_gd_type(value)
  GD_TYPES.any? { |v| v == value }
end
column_name(item) click to toggle source
# File lib/gooddata/models/model.rb, line 104
def column_name(item)
  item[:column_name] || item[:id]
end
description(item) click to toggle source
# File lib/gooddata/models/model.rb, line 108
def description(item)
  item[:description]
end
merge_dataset_columns(a_schema_blueprint, b_schema_blueprint) click to toggle source
# File lib/gooddata/models/model.rb, line 265
def merge_dataset_columns(a_schema_blueprint, b_schema_blueprint)
  a_schema_blueprint = a_schema_blueprint.to_hash
  b_schema_blueprint = b_schema_blueprint.to_hash
  d = GoodData::Helpers.deep_dup(a_schema_blueprint)
  d[:columns] = d[:columns] + b_schema_blueprint[:columns]
  d[:columns].uniq!
  columns_that_failed_to_merge = d[:columns]
    .group_by { |x| [:reference, :date].include?(x[:type]) ? x[:dataset] : x[:id] }
    .map { |k, v| [k, v.count, v] }.select { |x| x[1] > 1 }
  unless columns_that_failed_to_merge.empty?
    columns_that_failed_to_merge.each do |error|
      message = "Columns #{error[0]} failed to merge. There are " \
                "#{error[1]} conflicting columns. When merging columns " \
                "with the same name they have to be identical."
      GoodData.logger.error message
      GoodData.logger.error error[2]
    end
    unless columns_that_failed_to_merge.empty?
      fail "Columns #{columns_that_failed_to_merge.first} failed to " \
           "merge. There are #{columns_that_failed_to_merge[1]} " \
           "conflicting columns. #{columns_that_failed_to_merge[2]} " \
           "When merging columns with the same name they have to be " \
           "identical."
    end
  end
  d
end
normalize_gd_data_type(type) click to toggle source
# File lib/gooddata/models/model.rb, line 129
def normalize_gd_data_type(type)
  if type && type.upcase == 'INTEGER'
    'INT'
  else
    type
  end
end
title(item) click to toggle source
# File lib/gooddata/models/model.rb, line 100
def title(item)
  item[:title] || GoodData::Helpers.titleize(item[:id])
end
upload_data(path, project_blueprint, dataset, options = { :client => GoodData.connection, :project => GoodData.project }) click to toggle source

Load given file into a data set described by the given schema

# File lib/gooddata/models/model.rb, line 138
def upload_data(path, project_blueprint, dataset, options = { :client => GoodData.connection, :project => GoodData.project })
  data = [
    {
      data: path,
      dataset: dataset,
      options: options
    }
  ]
  GoodData::Model.upload_multiple_data(data, project_blueprint, options)
end
upload_multiple_data(data, project_blueprint, options = { :client => GoodData.connection, :project => GoodData.project }) click to toggle source

Uploads multiple data sets using batch upload interface @param data [String|Array] Input data @param project_blueprint [ProjectBlueprint] Project blueprint @param options [Hash] Additional options @return [Hash] Batch upload result

# File lib/gooddata/models/model.rb, line 154
def upload_multiple_data(data, project_blueprint, options = { :client => GoodData.connection, :project => GoodData.project })
  client, project = GoodData.get_client_and_project(options)
  project ||= GoodData.project

  manifest = {
    'dataSetSLIManifestList' => data.map do |d|
      mode = d[:options] && d[:options][:mode] ? d[:options][:mode] : options[:mode] || 'FULL'
      GoodData::Model::ToManifest.dataset_to_manifest(project_blueprint, d[:dataset], mode)
    end
  }

  csv_headers = []

  dir = Dir.mktmpdir
  begin
    Zip::File.open("#{dir}/upload.zip", Zip::File::CREATE) do |zip|
      # TODO: make sure schema columns match CSV column names
      zip.get_output_stream('upload_info.json') { |f| f.puts JSON.pretty_generate(manifest) }

      data.zip(manifest['dataSetSLIManifestList']).each do |item|
        column_mapping = item[0][:options] ? item[0][:options][:column_mapping] : nil
        path = item[0][:data]
        path = item[0][:data].path if item[0][:data].respond_to? :path
        inline_data = !path.is_a?(String)

        data_to_upload = inline_data ? path : File.open(path)

        filename = item[1]['dataSetSLIManifest']['file']

        zip.get_output_stream(filename) do |file|
          data_to_upload.each_with_index do |row, index|
            row = CSV.parse(row).first unless inline_data

            if index.zero?
              row.map! { |h| column_mapping.key(h) || h } if column_mapping
              csv_headers << row
            end

            file.puts row.to_csv
          end
        end
      end
    end

    # upload it
    client.upload_to_user_webdav("#{dir}/upload.zip", :directory => File.basename(dir), :client => options[:client], :project => options[:project])
  ensure
    FileUtils.rm_rf dir
  end
  csv_headers.flatten!

  # kick the load
  pull = { 'pullIntegration' => File.basename(dir) }
  link = project.md.links('etl')['pull2']

  # TODO: List uploaded datasets
  task = client.post(link, pull, :info_message => 'Starting the data load from user storage to dataset.')

  res = client.poll_on_response(task['pull2Task']['links']['poll'], :info_message => 'Getting status of the dataload task.') do |body|
    body['wTaskStatus']['status'] == 'RUNNING' || body['wTaskStatus']['status'] == 'PREPARED'
  end

  if res['wTaskStatus']['status'] == 'ERROR'
    s = StringIO.new

    messages = res['wTaskStatus']['messages'] || []
    messages.each do |msg|
      GoodData.logger.error(JSON.pretty_generate(msg))
    end

    begin
      client.download_from_user_webdav(File.basename(dir) + '/upload_status.json', s, :client => client, :project => project)
    rescue => e
      raise "Unable to download upload_status.json from remote server, reason: #{e.message}"
    end

    js = MultiJson.load(s.string)
    manifests = manifest['dataSetSLIManifestList'].map do |m|
      m['dataSetSLIManifest']
    end

    parts = manifests.map do |m|
      m['parts']
    end

    manifest_cols = parts.flatten.map { |c| c['columnName'] }

    # extract some human readable error message from the webdav file
    csv_headers.map!(&:to_sym)
    manifest_cols.map!(&:to_sym)
    manifest_extra = manifest_cols - csv_headers
    csv_extra = csv_headers - manifest_cols

    error_message = begin
      js['error']['message'] % js['error']['parameters']
    rescue NoMethodError, ArgumentError
      ''
    end
    m = "Load failed with error '#{error_message}'.\n"
    m += "Columns that should be there (manifest) but aren't in uploaded csv: #{manifest_extra}\n" unless manifest_extra.empty?
    m += "Columns that are in csv but shouldn't be there (manifest): #{csv_extra}\n" unless csv_extra.empty?
    m += "Columns in the uploaded csv: #{csv_headers}\n"
    m += "Columns in the manifest: #{manifest_cols}\n"
    m += "Original message:\n#{JSON.pretty_generate(js)}\n"
    m += "Manifest used for uploading:\n#{JSON.pretty_generate(manifest)}"
    fail m
  end

  res
end