class Google::Cloud::Bigquery::External::CsvSource

# CsvSource

{External::CsvSource} is a subclass of {External::DataSource} and represents a CSV external data source that can be queried from directly, such as Google Cloud Storage or Google Drive, even though the data is not stored in BigQuery. Instead of loading or streaming the data, this object references the external data source.

@example

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.autodetect = true
  csv.skip_leading_rows = 1
end

data = bigquery.query "SELECT * FROM my_ext_table",
                      external: { my_ext_table: csv_table }

# Iterate over the first page of results
data.each do |row|
  puts row[:name]
end
# Retrieve the next page of results
data = data.next if data.next?

Public Class Methods

from_gapi(gapi) click to toggle source

@private Google API Client object.

# File lib/google/cloud/bigquery/external/csv_source.rb, line 471
def self.from_gapi gapi
  new_table = super
  schema = Schema.from_gapi gapi.schema
  new_table.instance_variable_set :@schema, schema
  new_table
end
new() click to toggle source

@private Create an empty CsvSource object.

# File lib/google/cloud/bigquery/external/csv_source.rb, line 55
def initialize
  super
  @gapi.csv_options = Google::Apis::BigqueryV2::CsvOptions.new
end

Public Instance Methods

delimiter() click to toggle source

The separator for fields in a CSV file.

@return [String]

@example

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.delimiter = "|"
end

csv_table.delimiter #=> "|"
# File lib/google/cloud/bigquery/external/csv_source.rb, line 256
def delimiter
  @gapi.csv_options.field_delimiter
end
delimiter=(new_delimiter) click to toggle source

Set the separator for fields in a CSV file.

@param [String] new_delimiter New delimiter value

@example

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.delimiter = "|"
end

csv_table.delimiter #=> "|"
# File lib/google/cloud/bigquery/external/csv_source.rb, line 277
def delimiter= new_delimiter
  frozen_check!
  @gapi.csv_options.field_delimiter = new_delimiter
end
encoding() click to toggle source

The character encoding of the data.

@return [String]

@example

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.encoding = "UTF-8"
end

csv_table.encoding #=> "UTF-8"
# File lib/google/cloud/bigquery/external/csv_source.rb, line 167
def encoding
  @gapi.csv_options.encoding
end
encoding=(new_encoding) click to toggle source

Set the character encoding of the data.

@param [String] new_encoding New encoding value

@example

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.encoding = "UTF-8"
end

csv_table.encoding #=> "UTF-8"
# File lib/google/cloud/bigquery/external/csv_source.rb, line 188
def encoding= new_encoding
  frozen_check!
  @gapi.csv_options.encoding = new_encoding
end
fields() click to toggle source

The fields of the schema.

@return [Array<Schema::Field>] An array of field objects.

# File lib/google/cloud/bigquery/external/csv_source.rb, line 439
def fields
  schema.fields
end
headers() click to toggle source

The names of the columns in the schema.

@return [Array<Symbol>] An array of column names.

# File lib/google/cloud/bigquery/external/csv_source.rb, line 448
def headers
  schema.headers
end
iso8859_1?() click to toggle source

Checks if the character encoding of the data is “ISO-8859-1”.

@return [Boolean]

@example

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.encoding = "ISO-8859-1"
end

csv_table.encoding #=> "ISO-8859-1"
csv_table.iso8859_1? #=> true
# File lib/google/cloud/bigquery/external/csv_source.rb, line 235
def iso8859_1?
  encoding == "ISO-8859-1"
end
jagged_rows() click to toggle source

Indicates if BigQuery should accept rows that are missing trailing optional columns.

@return [Boolean]

@example

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.jagged_rows = true
end

csv_table.jagged_rows #=> true
# File lib/google/cloud/bigquery/external/csv_source.rb, line 78
def jagged_rows
  @gapi.csv_options.allow_jagged_rows
end
jagged_rows=(new_jagged_rows) click to toggle source

Set whether BigQuery should accept rows that are missing trailing optional columns.

@param [Boolean] new_jagged_rows New jagged_rows value

@example

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.jagged_rows = true
end

csv_table.jagged_rows #=> true
# File lib/google/cloud/bigquery/external/csv_source.rb, line 100
def jagged_rows= new_jagged_rows
  frozen_check!
  @gapi.csv_options.allow_jagged_rows = new_jagged_rows
end
param_types() click to toggle source

The types of the fields in the data in the schema, using the same format as the optional query parameter types.

@return [Hash] A hash with field names as keys, and types as values.

# File lib/google/cloud/bigquery/external/csv_source.rb, line 458
def param_types
  schema.param_types
end
quote() click to toggle source

The value that is used to quote data sections in a CSV file.

@return [String]

@example

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.quote = "'"
end

csv_table.quote #=> "'"
# File lib/google/cloud/bigquery/external/csv_source.rb, line 299
def quote
  @gapi.csv_options.quote
end
quote=(new_quote) click to toggle source

Set the value that is used to quote data sections in a CSV file.

@param [String] new_quote New quote value

@example

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.quote = "'"
end

csv_table.quote #=> "'"
# File lib/google/cloud/bigquery/external/csv_source.rb, line 320
def quote= new_quote
  frozen_check!
  @gapi.csv_options.quote = new_quote
end
quoted_newlines() click to toggle source

Indicates if BigQuery should allow quoted data sections that contain newline characters in a CSV file.

@return [Boolean]

@example

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.quoted_newlines = true
end

csv_table.quoted_newlines #=> true
# File lib/google/cloud/bigquery/external/csv_source.rb, line 123
def quoted_newlines
  @gapi.csv_options.allow_quoted_newlines
end
quoted_newlines=(new_quoted_newlines) click to toggle source

Set whether BigQuery should allow quoted data sections that contain newline characters in a CSV file.

@param [Boolean] new_quoted_newlines New quoted_newlines value

@example

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.quoted_newlines = true
end

csv_table.quoted_newlines #=> true
# File lib/google/cloud/bigquery/external/csv_source.rb, line 145
def quoted_newlines= new_quoted_newlines
  frozen_check!
  @gapi.csv_options.allow_quoted_newlines = new_quoted_newlines
end
schema(replace: false) { |schema| ... } click to toggle source

The schema for the data.

@param [Boolean] replace Whether to replace the existing schema with

the new schema. If `true`, the fields will replace the existing
schema. If `false`, the fields will be added to the existing
schema. The default value is `false`.

@yield [schema] a block for setting the schema @yieldparam [Schema] schema the object accepting the schema

@return [Google::Cloud::Bigquery::Schema]

@example

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.schema do |schema|
    schema.string "name", mode: :required
    schema.string "email", mode: :required
    schema.integer "age", mode: :required
    schema.boolean "active", mode: :required
  end
end
# File lib/google/cloud/bigquery/external/csv_source.rb, line 397
def schema replace: false
  @schema ||= Schema.from_gapi @gapi.schema
  if replace
    frozen_check!
    @schema = Schema.from_gapi
  end
  @schema.freeze if frozen?
  yield @schema if block_given?
  @schema
end
schema=(new_schema) click to toggle source

Set the schema for the data.

@param [Schema] new_schema The schema object.

@example

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_shema = bigquery.schema do |schema|
  schema.string "name", mode: :required
  schema.string "email", mode: :required
  schema.integer "age", mode: :required
  schema.boolean "active", mode: :required
end

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url
csv_table.schema = csv_shema
# File lib/google/cloud/bigquery/external/csv_source.rb, line 429
def schema= new_schema
  frozen_check!
  @schema = new_schema
end
skip_leading_rows() click to toggle source

The number of rows at the top of a CSV file that BigQuery will skip when reading the data.

@return [Integer]

@example

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.skip_leading_rows = 1
end

csv_table.skip_leading_rows #=> 1
# File lib/google/cloud/bigquery/external/csv_source.rb, line 343
def skip_leading_rows
  @gapi.csv_options.skip_leading_rows
end
skip_leading_rows=(row_count) click to toggle source

Set the number of rows at the top of a CSV file that BigQuery will skip when reading the data.

@param [Integer] row_count New skip_leading_rows value

@example

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.skip_leading_rows = 1
end

csv_table.skip_leading_rows #=> 1
# File lib/google/cloud/bigquery/external/csv_source.rb, line 365
def skip_leading_rows= row_count
  frozen_check!
  @gapi.csv_options.skip_leading_rows = row_count
end
to_gapi() click to toggle source

@private Google API Client object.

# File lib/google/cloud/bigquery/external/csv_source.rb, line 464
def to_gapi
  @gapi.schema = @schema.to_gapi if @schema
  @gapi
end
utf8?() click to toggle source

Checks if the character encoding of the data is “UTF-8”. This is the default.

@return [Boolean]

@example

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.encoding = "UTF-8"
end

csv_table.encoding #=> "UTF-8"
csv_table.utf8? #=> true
# File lib/google/cloud/bigquery/external/csv_source.rb, line 212
def utf8?
  return true if encoding.nil?
  encoding == "UTF-8"
end