class SolveBio::Query

Attributes

dataset_id[R]
filters[RW]
limit[RW]
page_offset[R]
page_size[RW]
response[R]
slice[RW]

Public Class Methods

new(dataset_id, params={}) click to toggle source

Creates a new Query object.

Parameters:

- `dataset_id`: Unique ID of dataset to query.
- `genome_build`: The genome build to use for the query.
- `query` (optional): Query string.
- `fields` (optional): List of specific fields to retrieve.
- `filters` (optional): List of filter objects.
- `limit` (optional): Maximum number of query results to return.
- `page_size` (optional): Max number of results to fetch per query page.
# File lib/solvebio/query.rb, line 32
def initialize(dataset_id, params={})
    unless dataset_id.is_a?(Fixnum) or dataset_id.respond_to?(:to_str)
        raise TypeError, "'dataset_id' parameter must an Integer or String"
    end

    @dataset_id   = dataset_id
    @data_url     = params[:data_url] || "/v1/datasets/#{dataset_id}/data"
    @genome_build = params[:genome_build]
    @fields       = params[:fields]
    @filters      = params[:filters].kind_of?(SolveBio::Filter) ? params[:filters].filters : (params[:filters] || [])
    @query        = params[:query]

    @response     = nil
    # limit defines the total number of results that will be returned
    # from a query involving 1 or more pagination requests.
    @limit        = params[:limit] || INT_MAX
    # page_size/page_offset are the low level API limit and offset params.
    @page_size   = params[:page_size] || DEFAULT_PAGE_SIZE
    # page_offset can only be set by execute()
    # It always contains the current absolute offset contained in the buffer.
    @page_offset  = nil
    # slice is set to tell the Query object that is being sliced and "def each" should not
    # reset the page_offset to 0 before iterating.
    @slice        = nil

    begin
        @limit = Integer(@limit)
        raise RangeError if @limit < 0
    rescue
        raise TypeError, "'limit' parameter must an Integer >= 0"
    end

    begin
        @page_size = Integer(@page_size)
        raise RangeError if @page_size <= 0
    rescue
        raise TypeError, "'page_size' parameter must an Integer > 0"
    end

    self
end

Public Instance Methods

[](key) click to toggle source

Retrieve an item or range from the set of results

# File lib/solvebio/query.rb, line 165
def [](key)
    unless [Range, Fixnum].member?(key.class)
        raise TypeError, "Expecting index value to be a Range or Fixnum; is #{key.class}"
    end

    if key.kind_of?(Range)
        start, stop = key.begin, key.end

        # Reverse ranges aren't supported
        return [] if (start > stop)

        if start < 0 or stop < 0
            raise IndexError, 'Negative indexing is not supported'
        end

        # If a slice is already set, the new slice should be relative to it
        if @slice
            start += @slice.begin
            stop = [@slice.begin + stop, @slice.end].min
            # Return nil if the user requests something outside the current slice
            return nil if start >= @slice.end
        end

        # Does the current buffer contain the desired range?
        if buffer && start >= @page_offset && stop < (@page_offset + buffer.length)
            # Cursor's buffer has the items already
            # Avoid a query and just return the buffered items.
            # Calculate the offsets relative to the buffer.
            start = start - @page_offset
            stop = stop - @page_offset - 1
            return buffer[start..stop]
        end

        # We need to make a few requests to get the data between start and stop.
        # We should respect the user's @limit (used by each()) if it is smaller than the given Range.
        # To prevent the state of page_size and page_offset from being stored, we'll clone this object first.
        q = clone()
        q.limit = [stop-start, @limit].min
        # Setting range will signal to "each" which page_offset to start at.
        q.slice = Range.new(start, stop)
        return q
    end

    if key < 0
        raise IndexError, 'Negative indexing is not supported'
    end

    # If a slice already exists, the key is relative to that slice
    if @slice
        key = key + @slice.begin
        # Return nil if the user requests something outside the slice
        return nil if key >= @slice.end
    end

    # If the value at key is already in the buffer, return it.
    if buffer && key >= @page_offset && key < (@page_offset + buffer.length)
        return buffer[key - @page_offset]
    end
    
    # Otherwise, use key as the new page_offset and fetch a new page of results
    q = clone()
    q.limit = [1, @limit].min
    q.execute(key)
    return q.buffer[0]
end
buffer() click to toggle source
# File lib/solvebio/query.rb, line 266
def buffer
    return nil unless @response
    @response[:results]
end
build_query() click to toggle source
# File lib/solvebio/query.rb, line 271
def build_query
    q = {}

    if @filters
        filters = Filter.process_filters(@filters)
        if filters.size > 1
            q[:filters] = [{:and => filters}]
        else
            q[:filters] = filters
        end
    end

    q[:fields] = @fields if @fields
    q[:genome_build] = @genome_build if @genome_build
    q[:query] = @query if @query

    return q
end
clone(filters=[]) click to toggle source
# File lib/solvebio/query.rb, line 74
def clone(filters=[])
    q = Query.new(@dataset_id, {
        :data_url => @data_url,
        :genome_build => @genome_build,
        :query => @query,
        :fields => @fields,
        :limit => @limit,
        :page_size => @page_size
    })

    q.filters += @filters unless @filters.empty?
    q.filters += filters unless filters.empty?
    q
end
count() click to toggle source

Returns the total number of results in the result-set. The count is dependent on the filters, but independent of any limit. It is like SQL: SELECT COUNT(*) FROM <depository> [WHERE condition]. See also size() a function that is dependent on limit. Requires at least one request.

# File lib/solvebio/query.rb, line 123
def count 
    execute unless @response
    @response[:total]
end
Also aliased as: total
each(*args) { |buffer| ... } click to toggle source
# File lib/solvebio/query.rb, line 231
def each(*args)
    return self unless block_given?

    # When calling each, we always reset the offset and buffer, unless called from
    # the slice function (def []).
    if @slice
        execute(@slice.begin)
    else
        execute(0)
    end

    # Keep track when iterating through the buffer
    buffer_idx = 0
    # This will yield a max of @limit or count() results, whichever comes first.
    0.upto(size - 1).each do |i|
        # i is the current index within the result-set.
        # @page_offset + i is the current absolute index within the result-set.

        if buffer_idx == buffer.length
            # No more buffer! Get more results
            execute(@page_offset + buffer_idx)
            # Reset the buffer index.
            buffer_idx = 0
        end

        yield buffer[buffer_idx]
        buffer_idx += 1
    end
end
empty?() click to toggle source
# File lib/solvebio/query.rb, line 142
def empty?
    return size == 0
end
execute(offset=0) click to toggle source
# File lib/solvebio/query.rb, line 290
def execute(offset=0)
    # Executes the current query.
    params = build_query()

    # Always set the page offset before querying.
    @page_offset = offset

    params.merge!(
        :offset => @page_offset,
        # The user's limit trumps the page limit if it's smaller
        :limit => [@page_size, @limit].min
    )

    SolveBio::logger.debug("Executing query with offset: #{params[:offset]} limit: #{params[:limit]}")
    # TODO: handle request errors and reset page_offset
    @response = Client.post(@data_url, params)
    SolveBio::logger.debug("Query response took #{@response[:took]}ms, buffer size: #{buffer.length}, total: #{@response[:total]}")
    return params, @response
end
filter(params={}, conn=:and) click to toggle source

Returns this Query instance with the query args combined with existing set with AND.

kwargs are simply passed to a new SolveBio::Filter object and combined to any other filters with AND.

By default, everything is combined using AND. If you provide multiple filters in a single filter call, those are ANDed together. If you provide multiple filters in multiple filter calls, those are ANDed together.

If you want something different, use the F class which supports “&“ (and), “|“ (or) and “~“ (not) operators. Then call filter once with the resulting Filter instance.

# File lib/solvebio/query.rb, line 103
def filter(params={}, conn=:and)
    return clone(Filter.new(params, conn).filters)
end
inspect()

Force inspect to always call to_s. This happens automatically in Ruby < 2.0.0

Alias for: to_s
length()
Alias for: size
position(chromosome, position, exact=false) click to toggle source

Shortcut to do a single position filter on genomic datasets.

# File lib/solvebio/query.rb, line 113
def position(chromosome, position, exact=false)
    return clone(GenomicFilter.new(chromosome, position, position, exact).filters)
end
range(chromosome, start, stop, exact=false) click to toggle source

Shortcut to do range queries on supported datasets.

# File lib/solvebio/query.rb, line 108
def range(chromosome, start, stop, exact=false)
    return clone(GenomicFilter.new(chromosome, start, stop, exact).filters)
end
size() click to toggle source

Returns the total number of results that will be retrieved given @limit set by the user. Requires at least one API request to retrieve the total count.

In SQL it is like: SELECT COUNT(*) FROM (

SELECT * FROM <table> [WHERE condition] [LIMIT number]

)

# File lib/solvebio/query.rb, line 137
def size
    [@limit, count].min
end
Also aliased as: length
to_h() click to toggle source

Convert SolveBio::QueryPaging object to a Hash type

# File lib/solvebio/query.rb, line 160
def to_h
    self[0]
end
to_range(range_or_idx) click to toggle source
# File lib/solvebio/query.rb, line 261
def to_range(range_or_idx)
    return range_or_idx.kind_of?(Range) ? range_or_idx :
        (range_or_idx..range_or_idx + 1)
end
to_s() click to toggle source

Convert SolveBio::QueryPaging object to a String type

# File lib/solvebio/query.rb, line 147
def to_s
    if @limit == 0 || count == 0
        return 'Query returned 0 results'
    end

    result = Tabulate.tabulate(buffer[0], ['Fields', 'Data'], ['right', 'left'], true)
    return "\n#{result}\n\n... #{(size - 1).pretty_int} more results."
end
Also aliased as: inspect
total()
Alias for: count