module Google::Cloud::Bigquery::External

# External

Creates a new {External::DataSource} (or subclass) object that represents the external data source that can be queried from directly, even though the data is not stored in BigQuery. Instead of loading or streaming the data, this object references the external data source.

See {External::DataSource}, {External::CsvSource}, {External::JsonSource}, {External::SheetsSource}, {External::BigtableSource}

@example

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

csv_url = "gs://bucket/path/to/data.csv"
csv_table = bigquery.external csv_url do |csv|
  csv.autodetect = true
  csv.skip_leading_rows = 1
end

data = bigquery.query "SELECT * FROM my_ext_table",
                      external: { my_ext_table: csv_table }

# Iterate over the first page of results
data.each do |row|
  puts row[:name]
end
# Retrieve the next page of results
data = data.next if data.next?

@example Hive partitioning options:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

gcs_uri = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/*"
source_uri_prefix = "gs://cloud-samples-data/bigquery/hive-partitioning-samples/autolayout/"
external_data = bigquery.external gcs_uri, format: :parquet do |ext|
  ext.hive_partitioning_mode = :auto
  ext.hive_partitioning_require_partition_filter = true
  ext.hive_partitioning_source_uri_prefix = source_uri_prefix
end

external_data.hive_partitioning? #=> true
external_data.hive_partitioning_mode #=> "AUTO"
external_data.hive_partitioning_require_partition_filter? #=> true
external_data.hive_partitioning_source_uri_prefix #=> source_uri_prefix

Public Class Methods

from_gapi(gapi) click to toggle source

@private Google API Client object.

# File lib/google/cloud/bigquery/external.rb, line 92
def self.from_gapi gapi
  external_format = source_format_for gapi.source_uris,
                                      gapi.source_format
  raise ArgumentError, "Unable to determine external table format" if external_format.nil?
  external_class = table_class_for external_format
  external_class.from_gapi gapi
end
from_urls(urls, format = nil) click to toggle source

@private New External from URLs and format

# File lib/google/cloud/bigquery/external.rb, line 80
def self.from_urls urls, format = nil
  external_format = source_format_for urls, format
  raise ArgumentError, "Unable to determine external table format" if external_format.nil?
  external_class = table_class_for external_format
  external_class.new.tap do |e|
    e.gapi.source_uris = Array(urls)
    e.gapi.source_format = external_format
  end
end
source_format_for(urls, format) click to toggle source

@private Determine source_format from inputs

# File lib/google/cloud/bigquery/external.rb, line 102
def self.source_format_for urls, format
  val = {
    "csv"                    => "CSV",
    "avro"                   => "AVRO",
    "json"                   => "NEWLINE_DELIMITED_JSON",
    "newline_delimited_json" => "NEWLINE_DELIMITED_JSON",
    "sheets"                 => "GOOGLE_SHEETS",
    "google_sheets"          => "GOOGLE_SHEETS",
    "datastore"              => "DATASTORE_BACKUP",
    "backup"                 => "DATASTORE_BACKUP",
    "datastore_backup"       => "DATASTORE_BACKUP",
    "bigtable"               => "BIGTABLE",
    "orc"                    => "ORC",
    "parquet"                => "PARQUET"
  }[format.to_s.downcase]
  return val unless val.nil?
  Array(urls).each do |url|
    return "CSV" if url.end_with? ".csv"
    return "NEWLINE_DELIMITED_JSON" if url.end_with? ".json"
    return "PARQUET" if url.end_with? ".parquet"
    return "AVRO" if url.end_with? ".avro"
    return "DATASTORE_BACKUP" if url.end_with? ".backup_info"
    return "GOOGLE_SHEETS" if url.start_with? "https://docs.google.com/spreadsheets/"
    return "BIGTABLE" if url.start_with? "https://googleapis.com/bigtable/projects/"
  end
  nil
end
table_class_for(format) click to toggle source

@private Determine table class from source_format

# File lib/google/cloud/bigquery/external.rb, line 132
def self.table_class_for format
  case format
  when "CSV"                    then External::CsvSource
  when "NEWLINE_DELIMITED_JSON" then External::JsonSource
  when "PARQUET"                then External::ParquetSource
  when "GOOGLE_SHEETS"          then External::SheetsSource
  when "BIGTABLE"               then External::BigtableSource
  else
    # AVRO, DATASTORE_BACKUP
    External::DataSource
  end
end