class Google::Cloud::Bigquery::QueryJob

# QueryJob

A {Job} subclass representing a query operation that may be performed on a {Table}. A QueryJob instance is created when you call {Project#query_job}, {Dataset#query_job}.

@see cloud.google.com/bigquery/querying-data Querying Data @see cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API

reference

@example

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

job = bigquery.query_job "SELECT COUNT(word) as count FROM " \
                         "`bigquery-public-data.samples.shakespeare`"

job.wait_until_done!

if job.failed?
  puts job.error
else
  puts job.data.first
end

@example With multiple statements and child jobs:

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

multi_statement_sql = <<~SQL
  -- Declare a variable to hold names as an array.
  DECLARE top_names ARRAY<STRING>;
  -- Build an array of the top 100 names from the year 2017.
  SET top_names = (
  SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100)
  FROM `bigquery-public-data.usa_names.usa_1910_current`
  WHERE year = 2017
  );
  -- Which names appear as words in Shakespeare's plays?
  SELECT
  name AS shakespeare_name
  FROM UNNEST(top_names) AS name
  WHERE name IN (
  SELECT word
  FROM `bigquery-public-data.samples.shakespeare`
  );
SQL

job = bigquery.query_job multi_statement_sql

job.wait_until_done!

child_jobs = bigquery.jobs parent_job: job

child_jobs.each do |child_job|
  script_statistics = child_job.script_statistics
  puts script_statistics.evaluation_kind
  script_statistics.stack_frames.each do |stack_frame|
    puts stack_frame.text
  end
end

Public Instance Methods

batch?() click to toggle source

Checks if the priority for the query is `BATCH`.

@return [Boolean] `true` when the priority is `BATCH`, `false`

otherwise.
# File lib/google/cloud/bigquery/query_job.rb, line 96
def batch?
  @gapi.configuration.query.priority == "BATCH"
end
bytes_processed() click to toggle source

The number of bytes processed by the query.

@return [Integer, nil] Total bytes processed for the job.

# File lib/google/cloud/bigquery/query_job.rb, line 213
def bytes_processed
  Integer @gapi.statistics.query.total_bytes_processed
rescue StandardError
  nil
end
cache?() click to toggle source

Checks if the query job looks for an existing result in the query cache. For more information, see [Query Caching](cloud.google.com/bigquery/querying-data#querycaching).

@return [Boolean] `true` when the query cache will be used, `false`

otherwise.
# File lib/google/cloud/bigquery/query_job.rb, line 133
def cache?
  val = @gapi.configuration.query.use_query_cache
  return false if val.nil?
  val
end
cache_hit?() click to toggle source

Checks if the query results are from the query cache.

@return [Boolean] `true` when the job statistics indicate a cache hit,

`false` otherwise.
# File lib/google/cloud/bigquery/query_job.rb, line 203
def cache_hit?
  return false unless @gapi.statistics.query
  @gapi.statistics.query.cache_hit
end
clustering?() click to toggle source

Checks if the destination table will be clustered.

See {QueryJob::Updater#clustering_fields=}, {Table#clustering_fields} and {Table#clustering_fields=}.

@see cloud.google.com/bigquery/docs/clustered-tables

Introduction to clustered tables

@see cloud.google.com/bigquery/docs/creating-clustered-tables

Creating and using clustered tables

@return [Boolean] `true` when the table will be clustered,

or `false` otherwise.

@!group Attributes

# File lib/google/cloud/bigquery/query_job.rb, line 638
def clustering?
  !@gapi.configuration.query.clustering.nil?
end
clustering_fields() click to toggle source

One or more fields on which the destination table should be clustered. Must be specified with time-based partitioning, data in the table will be first partitioned and subsequently clustered. The order of the returned fields determines the sort order of the data.

BigQuery supports clustering for both partitioned and non-partitioned tables.

See {QueryJob::Updater#clustering_fields=}, {Table#clustering_fields} and {Table#clustering_fields=}.

@see cloud.google.com/bigquery/docs/clustered-tables

Introduction to clustered tables

@see cloud.google.com/bigquery/docs/creating-clustered-tables

Creating and using clustered tables

@return [Array<String>, nil] The clustering fields, or `nil` if the

destination table will not be clustered.

@!group Attributes

# File lib/google/cloud/bigquery/query_job.rb, line 664
def clustering_fields
  @gapi.configuration.query.clustering.fields if clustering?
end
data(token: nil, max: nil, start: nil) click to toggle source

Retrieves the query results for the job.

@param [String] token Page token, returned by a previous call,

identifying the result set.

@param [Integer] max Maximum number of results to return. @param [Integer] start Zero-based index of the starting row to read.

@return [Google::Cloud::Bigquery::Data] An object providing access to

data read from the destination table for the job.

@example

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

sql = "SELECT word FROM `bigquery-public-data.samples.shakespeare`"
job = bigquery.query_job sql

job.wait_until_done!
data = job.data

# Iterate over the first page of results
data.each do |row|
  puts row[:word]
end
# Retrieve the next page of results
data = data.next if data.next?
# File lib/google/cloud/bigquery/query_job.rb, line 726
def data token: nil, max: nil, start: nil
  return nil unless done?
  return Data.from_gapi_json({ rows: [] }, nil, @gapi, service) if dryrun?
  if ddl? || dml?
    data_hash = { totalRows: nil, rows: [] }
    return Data.from_gapi_json data_hash, nil, @gapi, service
  end
  ensure_schema!

  data_hash = service.list_tabledata destination_table_dataset_id,
                                     destination_table_table_id,
                                     token: token,
                                     max: max,
                                     start: start
  Data.from_gapi_json data_hash, destination_table_gapi, @gapi, service
end
Also aliased as: query_results
ddl?() click to toggle source

Whether the query is a DDL statement.

@see cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language

Using Data Definition Language Statements

@return [Boolean]

@example

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new
query_job = bigquery.query_job "CREATE TABLE my_table (x INT64)"

query_job.statement_type #=> "CREATE_TABLE"
query_job.ddl? #=> true
# File lib/google/cloud/bigquery/query_job.rb, line 297
def ddl?
  ["CREATE_MODEL", "CREATE_TABLE", "CREATE_TABLE_AS_SELECT", "CREATE_VIEW", "\n", "DROP_MODEL", "DROP_TABLE",
   "DROP_VIEW"].include? statement_type
end
ddl_operation_performed() click to toggle source

The DDL operation performed, possibly dependent on the pre-existence of the DDL target. (See {#ddl_target_table}.) Possible values (new values might be added in the future):

  • “CREATE”: The query created the DDL target.

  • “SKIP”: No-op. Example cases: the query is `CREATE TABLE IF NOT EXISTS` while the table already exists, or the query is `DROP TABLE IF EXISTS` while the table does not exist.

  • “REPLACE”: The query replaced the DDL target. Example case: the query is `CREATE OR REPLACE TABLE`, and the table already exists.

  • “DROP”: The query deleted the DDL target.

@return [String, nil] The DDL operation performed.

# File lib/google/cloud/bigquery/query_job.rb, line 340
def ddl_operation_performed
  return nil unless @gapi.statistics.query
  @gapi.statistics.query.ddl_operation_performed
end
ddl_target_routine() click to toggle source

The DDL target routine, in reference state. (See {Routine#reference?}.) Present only for `CREATE/DROP FUNCTION/PROCEDURE` queries. (See {#statement_type}.)

@return [Google::Cloud::Bigquery::Routine, nil] The DDL target routine, in

reference state.
# File lib/google/cloud/bigquery/query_job.rb, line 353
def ddl_target_routine
  return nil unless @gapi.statistics.query
  ensure_service!
  routine = @gapi.statistics.query.ddl_target_routine
  return nil unless routine
  Google::Cloud::Bigquery::Routine.new_reference_from_gapi routine, service
end
ddl_target_table() click to toggle source

The DDL target table, in reference state. (See {Table#reference?}.) Present only for `CREATE/DROP TABLE/VIEW` queries. (See {#statement_type}.)

@return [Google::Cloud::Bigquery::Table, nil] The DDL target table, in

reference state.
# File lib/google/cloud/bigquery/query_job.rb, line 369
def ddl_target_table
  return nil unless @gapi.statistics.query
  ensure_service!
  table = @gapi.statistics.query.ddl_target_table
  return nil unless table
  Google::Cloud::Bigquery::Table.new_reference_from_gapi table, service
end
deleted_row_count() click to toggle source

The number of deleted rows. Present only for DML statements `DELETE`, `MERGE` and `TRUNCATE`. (See {#statement_type}.)

@return [Integer, nil] The number of deleted rows, or `nil` if not

applicable.
# File lib/google/cloud/bigquery/query_job.rb, line 396
def deleted_row_count
  @gapi.statistics.query&.dml_stats&.deleted_row_count
end
destination() click to toggle source

The table in which the query results are stored.

@return [Table] A table instance.

# File lib/google/cloud/bigquery/query_job.rb, line 427
def destination
  table = @gapi.configuration.query.destination_table
  return nil unless table
  retrieve_table table.project_id,
                 table.dataset_id,
                 table.table_id
end
dml?() click to toggle source

Whether the query is a DML statement.

@see cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax

Data Manipulation Language Syntax

@return [Boolean]

@example

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new
query_job = bigquery.query_job "UPDATE my_table " \
                               "SET x = x + 1 " \
                               "WHERE x IS NOT NULL"

query_job.statement_type #=> "UPDATE"
query_job.dml? #=> true
# File lib/google/cloud/bigquery/query_job.rb, line 321
def dml?
  ["INSERT", "UPDATE", "MERGE", "DELETE"].include? statement_type
end
dry_run()
Alias for: dryrun?
dry_run?()
Alias for: dryrun?
dryrun()
Alias for: dryrun?
dryrun?() click to toggle source

If set, don't actually run this job. A valid query will return a mostly empty response with some processing statistics, while an invalid query will return the same error it would if it wasn't a dry run.

@return [Boolean] `true` when the dry run flag is set for the query

job, `false` otherwise.
# File lib/google/cloud/bigquery/query_job.rb, line 148
def dryrun?
  @gapi.configuration.dry_run
end
Also aliased as: dryrun, dry_run, dry_run?
encryption() click to toggle source

The encryption configuration of the destination table.

@return [Google::Cloud::BigQuery::EncryptionConfiguration] Custom

encryption configuration (e.g., Cloud KMS keys).

@!group Attributes

# File lib/google/cloud/bigquery/query_job.rb, line 479
def encryption
  EncryptionConfiguration.from_gapi @gapi.configuration.query.destination_encryption_configuration
end
flatten?() click to toggle source

Checks if the query job flattens nested and repeated fields in the query results. The default is `true`. If the value is `false`, large_results? should return `true`.

@return [Boolean] `true` when the job flattens results, `false`

otherwise.
# File lib/google/cloud/bigquery/query_job.rb, line 163
def flatten?
  val = @gapi.configuration.query.flatten_results
  return true if val.nil?
  val
end
inserted_row_count() click to toggle source

The number of inserted rows. Present only for DML statements `INSERT` and `MERGE`. (See {#statement_type}.)

@return [Integer, nil] The number of inserted rows, or `nil` if not

applicable.
# File lib/google/cloud/bigquery/query_job.rb, line 407
def inserted_row_count
  @gapi.statistics.query&.dml_stats&.inserted_row_count
end
interactive?() click to toggle source

Checks if the priority for the query is `INTERACTIVE`.

@return [Boolean] `true` when the priority is `INTERACTIVE`, `false`

otherwise.
# File lib/google/cloud/bigquery/query_job.rb, line 106
def interactive?
  val = @gapi.configuration.query.priority
  return true if val.nil?
  val == "INTERACTIVE"
end
large_results?() click to toggle source

Checks if the the query job allows arbitrarily large results at a slight cost to performance.

@return [Boolean] `true` when large results are allowed, `false`

otherwise.
# File lib/google/cloud/bigquery/query_job.rb, line 119
def large_results?
  val = @gapi.configuration.query.allow_large_results
  return false if val.nil?
  val
end
legacy_sql?() click to toggle source

Checks if the query job is using legacy sql.

@return [Boolean] `true` when legacy sql is used, `false` otherwise.

# File lib/google/cloud/bigquery/query_job.rb, line 440
def legacy_sql?
  val = @gapi.configuration.query.use_legacy_sql
  return true if val.nil?
  val
end
maximum_billing_tier() click to toggle source

Limits the billing tier for this job. Queries that have resource usage beyond this tier will raise (without incurring a charge). If unspecified, this will be set to your project default. For more information, see [High-Compute queries](cloud.google.com/bigquery/pricing#high-compute).

@return [Integer, nil] The tier number, or `nil` for the project

default.
# File lib/google/cloud/bigquery/query_job.rb, line 179
def maximum_billing_tier
  @gapi.configuration.query.maximum_billing_tier
end
maximum_bytes_billed() click to toggle source

Limits the bytes billed for this job. Queries that will have bytes billed beyond this limit will raise (without incurring a charge). If `nil`, this will be set to your project default.

@return [Integer, nil] The number of bytes, or `nil` for the project

default.
# File lib/google/cloud/bigquery/query_job.rb, line 191
def maximum_bytes_billed
  Integer @gapi.configuration.query.maximum_bytes_billed
rescue StandardError
  nil
end
num_dml_affected_rows() click to toggle source

The number of rows affected by a DML statement. Present only for DML statements `INSERT`, `UPDATE` or `DELETE`. (See {#statement_type}.)

@return [Integer, nil] The number of rows affected by a DML statement,

or `nil` if the query is not a DML statement.
# File lib/google/cloud/bigquery/query_job.rb, line 384
def num_dml_affected_rows
  return nil unless @gapi.statistics.query
  @gapi.statistics.query.num_dml_affected_rows
end
query_plan() click to toggle source

Describes the execution plan for the query.

@return [Array<Google::Cloud::Bigquery::QueryJob::Stage>, nil] An

array containing the stages of the execution plan.

@example

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

sql = "SELECT word FROM `bigquery-public-data.samples.shakespeare`"
job = bigquery.query_job sql

job.wait_until_done!

stages = job.query_plan
stages.each do |stage|
  puts stage.name
  stage.steps.each do |step|
    puts step.kind
    puts step.substeps.inspect
  end
end
# File lib/google/cloud/bigquery/query_job.rb, line 244
def query_plan
  return nil unless @gapi&.statistics&.query&.query_plan
  Array(@gapi.statistics.query.query_plan).map { |stage| Stage.from_gapi stage }
end
query_results(token: nil, max: nil, start: nil)
Alias for: data
range_partitioning?() click to toggle source

Checks if the destination table will be range partitioned. See [Creating and using integer range partitioned tables](cloud.google.com/bigquery/docs/creating-integer-range-partitions).

@return [Boolean] `true` when the table is range partitioned, or `false` otherwise.

@!group Attributes

# File lib/google/cloud/bigquery/query_job.rb, line 491
def range_partitioning?
  !@gapi.configuration.query.range_partitioning.nil?
end
range_partitioning_end() click to toggle source

The end of range partitioning, exclusive. See [Creating and using integer range partitioned tables](cloud.google.com/bigquery/docs/creating-integer-range-partitions).

@return [Integer, nil] The end of range partitioning, exclusive, or `nil` if not range partitioned.

@!group Attributes

# File lib/google/cloud/bigquery/query_job.rb, line 542
def range_partitioning_end
  @gapi.configuration.query.range_partitioning.range.end if range_partitioning?
end
range_partitioning_field() click to toggle source

The field on which the destination table will be range partitioned, if any. The field must be a top-level `NULLABLE/REQUIRED` field. The only supported type is `INTEGER/INT64`. See [Creating and using integer range partitioned tables](cloud.google.com/bigquery/docs/creating-integer-range-partitions).

@return [String, nil] The partition field, if a field was configured, or `nil` if not range partitioned.

@!group Attributes

# File lib/google/cloud/bigquery/query_job.rb, line 505
def range_partitioning_field
  @gapi.configuration.query.range_partitioning.field if range_partitioning?
end
range_partitioning_interval() click to toggle source

The width of each interval. See [Creating and using integer range partitioned tables](cloud.google.com/bigquery/docs/creating-integer-range-partitions).

@return [Integer, nil] The width of each interval, for data in range partitions, or `nil` if not range

partitioned.

@!group Attributes

# File lib/google/cloud/bigquery/query_job.rb, line 530
def range_partitioning_interval
  @gapi.configuration.query.range_partitioning.range.interval if range_partitioning?
end
range_partitioning_start() click to toggle source

The start of range partitioning, inclusive. See [Creating and using integer range partitioned tables](cloud.google.com/bigquery/docs/creating-integer-range-partitions).

@return [Integer, nil] The start of range partitioning, inclusive, or `nil` if not range partitioned.

@!group Attributes

# File lib/google/cloud/bigquery/query_job.rb, line 517
def range_partitioning_start
  @gapi.configuration.query.range_partitioning.range.start if range_partitioning?
end
standard_sql?() click to toggle source

Checks if the query job is using standard sql.

@return [Boolean] `true` when standard sql is used, `false` otherwise.

# File lib/google/cloud/bigquery/query_job.rb, line 451
def standard_sql?
  !legacy_sql?
end
statement_type() click to toggle source

The type of query statement, if valid. Possible values (new values might be added in the future):

@return [String, nil] The type of query statement.

# File lib/google/cloud/bigquery/query_job.rb, line 275
def statement_type
  return nil unless @gapi.statistics.query
  @gapi.statistics.query.statement_type
end
time_partitioning?() click to toggle source

Checks if the destination table will be time-partitioned. See [Partitioned Tables](cloud.google.com/bigquery/docs/partitioned-tables).

@return [Boolean] `true` when the table will be time-partitioned,

or `false` otherwise.

@!group Attributes

# File lib/google/cloud/bigquery/query_job.rb, line 555
def time_partitioning?
  !@gapi.configuration.query.time_partitioning.nil?
end
time_partitioning_expiration() click to toggle source

The expiration for the destination table partitions, if any, in seconds. See [Partitioned Tables](cloud.google.com/bigquery/docs/partitioned-tables).

@return [Integer, nil] The expiration time, in seconds, for data in

partitions, or `nil` if not present.

@!group Attributes

# File lib/google/cloud/bigquery/query_job.rb, line 600
def time_partitioning_expiration
  tp = @gapi.configuration.query.time_partitioning
  tp.expiration_ms / 1_000 if tp && !tp.expiration_ms.nil?
end
time_partitioning_field() click to toggle source

The field on which the destination table will be partitioned, if any. If not set, the destination table will be partitioned by pseudo column `_PARTITIONTIME`; if set, the table will be partitioned by this field. See [Partitioned Tables](cloud.google.com/bigquery/docs/partitioned-tables).

@return [String, nil] The partition field, if a field was configured.

`nil` if not partitioned or not set (partitioned by pseudo column
'_PARTITIONTIME').

@!group Attributes

# File lib/google/cloud/bigquery/query_job.rb, line 585
def time_partitioning_field
  return nil unless time_partitioning?
  @gapi.configuration.query.time_partitioning.field
end
time_partitioning_require_filter?() click to toggle source

If set to true, queries over the destination table will require a partition filter that can be used for partition elimination to be specified. See [Partitioned Tables](cloud.google.com/bigquery/docs/partitioned-tables).

@return [Boolean] `true` when a partition filter will be required,

or `false` otherwise.

@!group Attributes

# File lib/google/cloud/bigquery/query_job.rb, line 616
def time_partitioning_require_filter?
  tp = @gapi.configuration.query.time_partitioning
  return false if tp.nil? || tp.require_partition_filter.nil?
  tp.require_partition_filter
end
time_partitioning_type() click to toggle source

The period for which the destination table will be partitioned, if any. See [Partitioned Tables](cloud.google.com/bigquery/docs/partitioned-tables).

@return [String, nil] The partition type. The supported types are `DAY`,

`HOUR`, `MONTH`, and `YEAR`, which will generate one partition per day,
hour, month, and year, respectively; or `nil` if not present.

@!group Attributes

# File lib/google/cloud/bigquery/query_job.rb, line 569
def time_partitioning_type
  @gapi.configuration.query.time_partitioning.type if time_partitioning?
end
udfs() click to toggle source

The user-defined function resources used in the query. May be either a code resource to load from a Google Cloud Storage URI (`gs://bucket/path`), or an inline resource that contains code for a user-defined function (UDF). Providing an inline code resource is equivalent to providing a URI for a file containing the same code. See [User-Defined Functions](cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions).

@return [Array<String>] An array containing Google Cloud Storage URIs

and/or inline source code.
# File lib/google/cloud/bigquery/query_job.rb, line 466
def udfs
  udfs_gapi = @gapi.configuration.query.user_defined_function_resources
  return nil unless udfs_gapi
  Array(udfs_gapi).map { |udf| udf.inline_code || udf.resource_uri }
end
updated_row_count() click to toggle source

The number of updated rows. Present only for DML statements `UPDATE` and `MERGE`. (See {#statement_type}.)

@return [Integer, nil] The number of updated rows, or `nil` if not

applicable.
# File lib/google/cloud/bigquery/query_job.rb, line 418
def updated_row_count
  @gapi.statistics.query&.dml_stats&.updated_row_count
end
wait_until_done!() click to toggle source

Refreshes the job until the job is `DONE`. The delay between refreshes will incrementally increase.

@example

require "google/cloud/bigquery"

bigquery = Google::Cloud::Bigquery.new

sql = "SELECT word FROM `bigquery-public-data.samples.shakespeare`"
job = bigquery.query_job sql

job.wait_until_done!
job.done? #=> true
# File lib/google/cloud/bigquery/query_job.rb, line 683
def wait_until_done!
  return if done?

  ensure_service!
  loop do
    query_results_gapi = service.job_query_results job_id, location: location, max: 0
    if query_results_gapi.job_complete
      @destination_schema_gapi = query_results_gapi.schema
      break
    end
  end
  reload!
end

Protected Instance Methods

destination_schema() click to toggle source
# File lib/google/cloud/bigquery/query_job.rb, line 1746
def destination_schema
  @destination_schema_gapi
end
destination_table_dataset_id() click to toggle source
# File lib/google/cloud/bigquery/query_job.rb, line 1750
def destination_table_dataset_id
  @gapi.configuration.query.destination_table.dataset_id
end
destination_table_gapi() click to toggle source
# File lib/google/cloud/bigquery/query_job.rb, line 1758
def destination_table_gapi
  Google::Apis::BigqueryV2::Table.new(
    table_reference: @gapi.configuration.query.destination_table,
    schema:          destination_schema
  )
end
destination_table_table_id() click to toggle source
# File lib/google/cloud/bigquery/query_job.rb, line 1754
def destination_table_table_id
  @gapi.configuration.query.destination_table.table_id
end
ensure_schema!() click to toggle source
# File lib/google/cloud/bigquery/query_job.rb, line 1738
def ensure_schema!
  return unless destination_schema.nil?

  query_results_gapi = service.job_query_results job_id, location: location, max: 0
  # raise "unable to retrieve schema" if query_results_gapi.schema.nil?
  @destination_schema_gapi = query_results_gapi.schema
end