class Google::Cloud::Bigquery::QueryJob
# QueryJob
A {Job} subclass representing a query operation that may be performed on a {Table}. A QueryJob
instance is created when you call {Project#query_job}, {Dataset#query_job}.
@see cloud.google.com/bigquery/querying-data Querying Data
@see cloud.google.com/bigquery/docs/reference/v2/jobs Jobs API
reference
@example
require "google/cloud/bigquery" bigquery = Google::Cloud::Bigquery.new job = bigquery.query_job "SELECT COUNT(word) as count FROM " \ "`bigquery-public-data.samples.shakespeare`" job.wait_until_done! if job.failed? puts job.error else puts job.data.first end
@example With multiple statements and child jobs:
require "google/cloud/bigquery" bigquery = Google::Cloud::Bigquery.new multi_statement_sql = <<~SQL -- Declare a variable to hold names as an array. DECLARE top_names ARRAY<STRING>; -- Build an array of the top 100 names from the year 2017. SET top_names = ( SELECT ARRAY_AGG(name ORDER BY number DESC LIMIT 100) FROM `bigquery-public-data.usa_names.usa_1910_current` WHERE year = 2017 ); -- Which names appear as words in Shakespeare's plays? SELECT name AS shakespeare_name FROM UNNEST(top_names) AS name WHERE name IN ( SELECT word FROM `bigquery-public-data.samples.shakespeare` ); SQL job = bigquery.query_job multi_statement_sql job.wait_until_done! child_jobs = bigquery.jobs parent_job: job child_jobs.each do |child_job| script_statistics = child_job.script_statistics puts script_statistics.evaluation_kind script_statistics.stack_frames.each do |stack_frame| puts stack_frame.text end end
Public Instance Methods
Checks if the priority for the query is `BATCH`.
@return [Boolean] `true` when the priority is `BATCH`, `false`
otherwise.
# File lib/google/cloud/bigquery/query_job.rb, line 96 def batch? @gapi.configuration.query.priority == "BATCH" end
The number of bytes processed by the query.
@return [Integer, nil] Total bytes processed for the job.
# File lib/google/cloud/bigquery/query_job.rb, line 213 def bytes_processed Integer @gapi.statistics.query.total_bytes_processed rescue StandardError nil end
Checks if the query job looks for an existing result in the query cache. For more information, see [Query Caching](cloud.google.com/bigquery/querying-data#querycaching).
@return [Boolean] `true` when the query cache will be used, `false`
otherwise.
# File lib/google/cloud/bigquery/query_job.rb, line 133 def cache? val = @gapi.configuration.query.use_query_cache return false if val.nil? val end
Checks if the query results are from the query cache.
@return [Boolean] `true` when the job statistics indicate a cache hit,
`false` otherwise.
# File lib/google/cloud/bigquery/query_job.rb, line 203 def cache_hit? return false unless @gapi.statistics.query @gapi.statistics.query.cache_hit end
Checks if the destination table will be clustered.
See {QueryJob::Updater#clustering_fields=}, {Table#clustering_fields} and {Table#clustering_fields=}.
@see cloud.google.com/bigquery/docs/clustered-tables
Introduction to clustered tables
@see cloud.google.com/bigquery/docs/creating-clustered-tables
Creating and using clustered tables
@return [Boolean] `true` when the table will be clustered,
or `false` otherwise.
@!group Attributes
# File lib/google/cloud/bigquery/query_job.rb, line 638 def clustering? !@gapi.configuration.query.clustering.nil? end
One or more fields on which the destination table should be clustered. Must be specified with time-based partitioning, data in the table will be first partitioned and subsequently clustered. The order of the returned fields determines the sort order of the data.
BigQuery supports clustering for both partitioned and non-partitioned tables.
See {QueryJob::Updater#clustering_fields=}, {Table#clustering_fields} and {Table#clustering_fields=}.
@see cloud.google.com/bigquery/docs/clustered-tables
Introduction to clustered tables
@see cloud.google.com/bigquery/docs/creating-clustered-tables
Creating and using clustered tables
@return [Array<String>, nil] The clustering fields, or `nil` if the
destination table will not be clustered.
@!group Attributes
# File lib/google/cloud/bigquery/query_job.rb, line 664 def clustering_fields @gapi.configuration.query.clustering.fields if clustering? end
Retrieves the query results for the job.
@param [String] token Page token, returned by a previous call,
identifying the result set.
@param [Integer] max Maximum number of results to return. @param [Integer] start Zero-based index of the starting row to read.
@return [Google::Cloud::Bigquery::Data] An object providing access to
data read from the destination table for the job.
@example
require "google/cloud/bigquery" bigquery = Google::Cloud::Bigquery.new sql = "SELECT word FROM `bigquery-public-data.samples.shakespeare`" job = bigquery.query_job sql job.wait_until_done! data = job.data # Iterate over the first page of results data.each do |row| puts row[:word] end # Retrieve the next page of results data = data.next if data.next?
# File lib/google/cloud/bigquery/query_job.rb, line 726 def data token: nil, max: nil, start: nil return nil unless done? return Data.from_gapi_json({ rows: [] }, nil, @gapi, service) if dryrun? if ddl? || dml? data_hash = { totalRows: nil, rows: [] } return Data.from_gapi_json data_hash, nil, @gapi, service end ensure_schema! data_hash = service.list_tabledata destination_table_dataset_id, destination_table_table_id, token: token, max: max, start: start Data.from_gapi_json data_hash, destination_table_gapi, @gapi, service end
Whether the query is a DDL statement.
@see cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language
Using Data Definition Language Statements
@return [Boolean]
@example
require "google/cloud/bigquery" bigquery = Google::Cloud::Bigquery.new query_job = bigquery.query_job "CREATE TABLE my_table (x INT64)" query_job.statement_type #=> "CREATE_TABLE" query_job.ddl? #=> true
# File lib/google/cloud/bigquery/query_job.rb, line 297 def ddl? ["CREATE_MODEL", "CREATE_TABLE", "CREATE_TABLE_AS_SELECT", "CREATE_VIEW", "\n", "DROP_MODEL", "DROP_TABLE", "DROP_VIEW"].include? statement_type end
The DDL operation performed, possibly dependent on the pre-existence of the DDL target. (See {#ddl_target_table}.) Possible values (new values might be added in the future):
-
“CREATE”: The query created the DDL target.
-
“SKIP”: No-op. Example cases: the query is `CREATE TABLE IF NOT EXISTS` while the table already exists, or the query is `DROP TABLE IF EXISTS` while the table does not exist.
-
“REPLACE”: The query replaced the DDL target. Example case: the query is `CREATE OR REPLACE TABLE`, and the table already exists.
-
“DROP”: The query deleted the DDL target.
@return [String, nil] The DDL operation performed.
# File lib/google/cloud/bigquery/query_job.rb, line 340 def ddl_operation_performed return nil unless @gapi.statistics.query @gapi.statistics.query.ddl_operation_performed end
The DDL target routine, in reference state. (See {Routine#reference?}.) Present only for `CREATE/DROP FUNCTION/PROCEDURE` queries. (See {#statement_type}.)
@return [Google::Cloud::Bigquery::Routine, nil] The DDL target routine, in
reference state.
# File lib/google/cloud/bigquery/query_job.rb, line 353 def ddl_target_routine return nil unless @gapi.statistics.query ensure_service! routine = @gapi.statistics.query.ddl_target_routine return nil unless routine Google::Cloud::Bigquery::Routine.new_reference_from_gapi routine, service end
The DDL target table, in reference state. (See {Table#reference?}.) Present only for `CREATE/DROP TABLE/VIEW` queries. (See {#statement_type}.)
@return [Google::Cloud::Bigquery::Table, nil] The DDL target table, in
reference state.
# File lib/google/cloud/bigquery/query_job.rb, line 369 def ddl_target_table return nil unless @gapi.statistics.query ensure_service! table = @gapi.statistics.query.ddl_target_table return nil unless table Google::Cloud::Bigquery::Table.new_reference_from_gapi table, service end
The number of deleted rows. Present only for DML statements `DELETE`, `MERGE` and `TRUNCATE`. (See {#statement_type}.)
@return [Integer, nil] The number of deleted rows, or `nil` if not
applicable.
# File lib/google/cloud/bigquery/query_job.rb, line 396 def deleted_row_count @gapi.statistics.query&.dml_stats&.deleted_row_count end
The table in which the query results are stored.
@return [Table] A table instance.
# File lib/google/cloud/bigquery/query_job.rb, line 427 def destination table = @gapi.configuration.query.destination_table return nil unless table retrieve_table table.project_id, table.dataset_id, table.table_id end
Whether the query is a DML statement.
@see cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax
Data Manipulation Language Syntax
@return [Boolean]
@example
require "google/cloud/bigquery" bigquery = Google::Cloud::Bigquery.new query_job = bigquery.query_job "UPDATE my_table " \ "SET x = x + 1 " \ "WHERE x IS NOT NULL" query_job.statement_type #=> "UPDATE" query_job.dml? #=> true
# File lib/google/cloud/bigquery/query_job.rb, line 321 def dml? ["INSERT", "UPDATE", "MERGE", "DELETE"].include? statement_type end
If set, don't actually run this job. A valid query will return a mostly empty response with some processing statistics, while an invalid query will return the same error it would if it wasn't a dry run.
@return [Boolean] `true` when the dry run flag is set for the query
job, `false` otherwise.
# File lib/google/cloud/bigquery/query_job.rb, line 148 def dryrun? @gapi.configuration.dry_run end
The encryption configuration of the destination table.
@return [Google::Cloud::BigQuery::EncryptionConfiguration] Custom
encryption configuration (e.g., Cloud KMS keys).
@!group Attributes
# File lib/google/cloud/bigquery/query_job.rb, line 479 def encryption EncryptionConfiguration.from_gapi @gapi.configuration.query.destination_encryption_configuration end
Checks if the query job flattens nested and repeated fields in the query results. The default is `true`. If the value is `false`, large_results?
should return `true`.
@return [Boolean] `true` when the job flattens results, `false`
otherwise.
# File lib/google/cloud/bigquery/query_job.rb, line 163 def flatten? val = @gapi.configuration.query.flatten_results return true if val.nil? val end
The number of inserted rows. Present only for DML statements `INSERT` and `MERGE`. (See {#statement_type}.)
@return [Integer, nil] The number of inserted rows, or `nil` if not
applicable.
# File lib/google/cloud/bigquery/query_job.rb, line 407 def inserted_row_count @gapi.statistics.query&.dml_stats&.inserted_row_count end
Checks if the priority for the query is `INTERACTIVE`.
@return [Boolean] `true` when the priority is `INTERACTIVE`, `false`
otherwise.
# File lib/google/cloud/bigquery/query_job.rb, line 106 def interactive? val = @gapi.configuration.query.priority return true if val.nil? val == "INTERACTIVE" end
Checks if the the query job allows arbitrarily large results at a slight cost to performance.
@return [Boolean] `true` when large results are allowed, `false`
otherwise.
# File lib/google/cloud/bigquery/query_job.rb, line 119 def large_results? val = @gapi.configuration.query.allow_large_results return false if val.nil? val end
Checks if the query job is using legacy sql.
@return [Boolean] `true` when legacy sql is used, `false` otherwise.
# File lib/google/cloud/bigquery/query_job.rb, line 440 def legacy_sql? val = @gapi.configuration.query.use_legacy_sql return true if val.nil? val end
Limits the billing tier for this job. Queries that have resource usage beyond this tier will raise (without incurring a charge). If unspecified, this will be set to your project default. For more information, see [High-Compute queries](cloud.google.com/bigquery/pricing#high-compute).
@return [Integer, nil] The tier number, or `nil` for the project
default.
# File lib/google/cloud/bigquery/query_job.rb, line 179 def maximum_billing_tier @gapi.configuration.query.maximum_billing_tier end
Limits the bytes billed for this job. Queries that will have bytes billed beyond this limit will raise (without incurring a charge). If `nil`, this will be set to your project default.
@return [Integer, nil] The number of bytes, or `nil` for the project
default.
# File lib/google/cloud/bigquery/query_job.rb, line 191 def maximum_bytes_billed Integer @gapi.configuration.query.maximum_bytes_billed rescue StandardError nil end
The number of rows affected by a DML statement. Present only for DML statements `INSERT`, `UPDATE` or `DELETE`. (See {#statement_type}.)
@return [Integer, nil] The number of rows affected by a DML statement,
or `nil` if the query is not a DML statement.
# File lib/google/cloud/bigquery/query_job.rb, line 384 def num_dml_affected_rows return nil unless @gapi.statistics.query @gapi.statistics.query.num_dml_affected_rows end
Describes the execution plan for the query.
@return [Array<Google::Cloud::Bigquery::QueryJob::Stage>, nil] An
array containing the stages of the execution plan.
@example
require "google/cloud/bigquery" bigquery = Google::Cloud::Bigquery.new sql = "SELECT word FROM `bigquery-public-data.samples.shakespeare`" job = bigquery.query_job sql job.wait_until_done! stages = job.query_plan stages.each do |stage| puts stage.name stage.steps.each do |step| puts step.kind puts step.substeps.inspect end end
# File lib/google/cloud/bigquery/query_job.rb, line 244 def query_plan return nil unless @gapi&.statistics&.query&.query_plan Array(@gapi.statistics.query.query_plan).map { |stage| Stage.from_gapi stage } end
Checks if the destination table will be range partitioned. See [Creating and using integer range partitioned tables](cloud.google.com/bigquery/docs/creating-integer-range-partitions).
@return [Boolean] `true` when the table is range partitioned, or `false` otherwise.
@!group Attributes
# File lib/google/cloud/bigquery/query_job.rb, line 491 def range_partitioning? !@gapi.configuration.query.range_partitioning.nil? end
The end of range partitioning, exclusive. See [Creating and using integer range partitioned tables](cloud.google.com/bigquery/docs/creating-integer-range-partitions).
@return [Integer, nil] The end of range partitioning, exclusive, or `nil` if not range partitioned.
@!group Attributes
# File lib/google/cloud/bigquery/query_job.rb, line 542 def range_partitioning_end @gapi.configuration.query.range_partitioning.range.end if range_partitioning? end
The field on which the destination table will be range partitioned, if any. The field must be a top-level `NULLABLE/REQUIRED` field. The only supported type is `INTEGER/INT64`. See [Creating and using integer range partitioned tables](cloud.google.com/bigquery/docs/creating-integer-range-partitions).
@return [String, nil] The partition field, if a field was configured, or `nil` if not range partitioned.
@!group Attributes
# File lib/google/cloud/bigquery/query_job.rb, line 505 def range_partitioning_field @gapi.configuration.query.range_partitioning.field if range_partitioning? end
The width of each interval. See [Creating and using integer range partitioned tables](cloud.google.com/bigquery/docs/creating-integer-range-partitions).
@return [Integer, nil] The width of each interval, for data in range partitions, or `nil` if not range
partitioned.
@!group Attributes
# File lib/google/cloud/bigquery/query_job.rb, line 530 def range_partitioning_interval @gapi.configuration.query.range_partitioning.range.interval if range_partitioning? end
The start of range partitioning, inclusive. See [Creating and using integer range partitioned tables](cloud.google.com/bigquery/docs/creating-integer-range-partitions).
@return [Integer, nil] The start of range partitioning, inclusive, or `nil` if not range partitioned.
@!group Attributes
# File lib/google/cloud/bigquery/query_job.rb, line 517 def range_partitioning_start @gapi.configuration.query.range_partitioning.range.start if range_partitioning? end
Checks if the query job is using standard sql.
@return [Boolean] `true` when standard sql is used, `false` otherwise.
# File lib/google/cloud/bigquery/query_job.rb, line 451 def standard_sql? !legacy_sql? end
The type of query statement, if valid. Possible values (new values might be added in the future):
-
“CREATE_MODEL”: DDL statement, see [Using
Data
Definition Language Statements](cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language) -
“CREATE_TABLE”: DDL statement, see [Using
Data
Definition Language Statements](cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language) -
“CREATE_TABLE_AS_SELECT”: DDL statement, see [Using
Data
Definition Language Statements](cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language) -
“CREATE_VIEW”: DDL statement, see [Using
Data
Definition Language Statements](cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language) -
“DELETE”: DML statement, see [Data Manipulation Language Syntax](cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax)
-
“DROP_MODEL”: DDL statement, see [Using
Data
Definition Language Statements](cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language) -
“DROP_TABLE”: DDL statement, see [Using
Data
Definition Language Statements](cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language) -
“DROP_VIEW”: DDL statement, see [Using
Data
Definition Language Statements](cloud.google.com/bigquery/docs/reference/standard-sql/data-definition-language) -
“INSERT”: DML statement, see [Data Manipulation Language Syntax](cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax)
-
“MERGE”: DML statement, see [Data Manipulation Language Syntax](cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax)
-
“SELECT”: SQL query, see [Standard SQL Query Syntax](cloud.google.com/bigquery/docs/reference/standard-sql/query-syntax)
-
“UPDATE”: DML statement, see [Data Manipulation Language Syntax](cloud.google.com/bigquery/docs/reference/standard-sql/dml-syntax)
@return [String, nil] The type of query statement.
# File lib/google/cloud/bigquery/query_job.rb, line 275 def statement_type return nil unless @gapi.statistics.query @gapi.statistics.query.statement_type end
Checks if the destination table will be time-partitioned. See [Partitioned Tables](cloud.google.com/bigquery/docs/partitioned-tables).
@return [Boolean] `true` when the table will be time-partitioned,
or `false` otherwise.
@!group Attributes
# File lib/google/cloud/bigquery/query_job.rb, line 555 def time_partitioning? !@gapi.configuration.query.time_partitioning.nil? end
The expiration for the destination table partitions, if any, in seconds. See [Partitioned Tables](cloud.google.com/bigquery/docs/partitioned-tables).
@return [Integer, nil] The expiration time, in seconds, for data in
partitions, or `nil` if not present.
@!group Attributes
# File lib/google/cloud/bigquery/query_job.rb, line 600 def time_partitioning_expiration tp = @gapi.configuration.query.time_partitioning tp.expiration_ms / 1_000 if tp && !tp.expiration_ms.nil? end
The field on which the destination table will be partitioned, if any. If not set, the destination table will be partitioned by pseudo column `_PARTITIONTIME`; if set, the table will be partitioned by this field. See [Partitioned Tables](cloud.google.com/bigquery/docs/partitioned-tables).
@return [String, nil] The partition field, if a field was configured.
`nil` if not partitioned or not set (partitioned by pseudo column '_PARTITIONTIME').
@!group Attributes
# File lib/google/cloud/bigquery/query_job.rb, line 585 def time_partitioning_field return nil unless time_partitioning? @gapi.configuration.query.time_partitioning.field end
If set to true, queries over the destination table will require a partition filter that can be used for partition elimination to be specified. See [Partitioned Tables](cloud.google.com/bigquery/docs/partitioned-tables).
@return [Boolean] `true` when a partition filter will be required,
or `false` otherwise.
@!group Attributes
# File lib/google/cloud/bigquery/query_job.rb, line 616 def time_partitioning_require_filter? tp = @gapi.configuration.query.time_partitioning return false if tp.nil? || tp.require_partition_filter.nil? tp.require_partition_filter end
The period for which the destination table will be partitioned, if any. See [Partitioned Tables](cloud.google.com/bigquery/docs/partitioned-tables).
@return [String, nil] The partition type. The supported types are `DAY`,
`HOUR`, `MONTH`, and `YEAR`, which will generate one partition per day, hour, month, and year, respectively; or `nil` if not present.
@!group Attributes
# File lib/google/cloud/bigquery/query_job.rb, line 569 def time_partitioning_type @gapi.configuration.query.time_partitioning.type if time_partitioning? end
The user-defined function resources used in the query. May be either a code resource to load from a Google
Cloud
Storage URI (`gs://bucket/path`), or an inline resource that contains code for a user-defined function (UDF). Providing an inline code resource is equivalent to providing a URI for a file containing the same code. See [User-Defined Functions](cloud.google.com/bigquery/docs/reference/standard-sql/user-defined-functions).
@return [Array<String>] An array containing Google
Cloud
Storage URIs
and/or inline source code.
# File lib/google/cloud/bigquery/query_job.rb, line 466 def udfs udfs_gapi = @gapi.configuration.query.user_defined_function_resources return nil unless udfs_gapi Array(udfs_gapi).map { |udf| udf.inline_code || udf.resource_uri } end
The number of updated rows. Present only for DML statements `UPDATE` and `MERGE`. (See {#statement_type}.)
@return [Integer, nil] The number of updated rows, or `nil` if not
applicable.
# File lib/google/cloud/bigquery/query_job.rb, line 418 def updated_row_count @gapi.statistics.query&.dml_stats&.updated_row_count end
Refreshes the job until the job is `DONE`. The delay between refreshes will incrementally increase.
@example
require "google/cloud/bigquery" bigquery = Google::Cloud::Bigquery.new sql = "SELECT word FROM `bigquery-public-data.samples.shakespeare`" job = bigquery.query_job sql job.wait_until_done! job.done? #=> true
# File lib/google/cloud/bigquery/query_job.rb, line 683 def wait_until_done! return if done? ensure_service! loop do query_results_gapi = service.job_query_results job_id, location: location, max: 0 if query_results_gapi.job_complete @destination_schema_gapi = query_results_gapi.schema break end end reload! end
Protected Instance Methods
# File lib/google/cloud/bigquery/query_job.rb, line 1746 def destination_schema @destination_schema_gapi end
# File lib/google/cloud/bigquery/query_job.rb, line 1750 def destination_table_dataset_id @gapi.configuration.query.destination_table.dataset_id end
# File lib/google/cloud/bigquery/query_job.rb, line 1758 def destination_table_gapi Google::Apis::BigqueryV2::Table.new( table_reference: @gapi.configuration.query.destination_table, schema: destination_schema ) end
# File lib/google/cloud/bigquery/query_job.rb, line 1754 def destination_table_table_id @gapi.configuration.query.destination_table.table_id end
# File lib/google/cloud/bigquery/query_job.rb, line 1738 def ensure_schema! return unless destination_schema.nil? query_results_gapi = service.job_query_results job_id, location: location, max: 0 # raise "unable to retrieve schema" if query_results_gapi.schema.nil? @destination_schema_gapi = query_results_gapi.schema end