class Gini::Api::Client

Main class to operate on the Gini API

Attributes

log[R]
token[R]

Public Class Methods

new(options = {}) click to toggle source

Instantiate a new Gini::Api::Client object with OAuth capabilities

@param [Hash] options Hash of available config settings @option options [String] :client_id OAuth client_id @option options [String] :client_secret OAuth client_secret @option options [String] :oauth_site OAuth site to connect to (user.gini.net) @option options [String] :oauth_redirect Redirect URI @option options [Integer] :upload_timeout Upload timeout in seconds @option options [Integer] :processing_timeout API operational timeout in seconds @option options [String] :api_uri API URI (api.gini.net) @option options [String] :api_version API version to use (v1) @option options [Logger] :log logger object to use (initialized with STDOUT otherwise) @option options [String] :user_agent HTTP User-Agent (gini-api-ruby/VERSION (Faraday vFaraday::VERSION))

@example

api = Gini::Api::Client.new(
  client_id: 'my_client_id',
  client_secret: 'my_client_secret',
)
# File lib/gini-api/client.rb, line 36
def initialize(options = {})
  opts = {
    oauth_site: 'https://user.gini.net/',
    oauth_redirect: 'http://localhost',
    api_uri: 'https://api.gini.net',
    api_version: 'v1',
    api_type: 'json',
    upload_timeout: 90,
    processing_timeout: 180,
    log: Logger.new(STDOUT),
    user_agent: "gini-api-ruby/#{VERSION} (Faraday v#{Faraday::VERSION})"
  }.merge(options)

  # Ensure mandatory keys are set
  [:client_id, :client_secret].each do |k|
    raise Gini::Api::Error.new("Mandatory option key is missing: #{k}") unless opts.key?(k)
  end

  # Populate instance variables from merged opts
  opts.each do |k, v|
    instance_variable_set("@#{k}", v)
    self.class.send(:attr_reader, k)
  end

  # Ensure STDOUT is flushed
  STDOUT.sync = true

  # Sanitize api_uri
  @api_uri.sub!(/(\/)+$/, '')

  # Register parser (json+xml) based on API version
  register_parser

  @log.info('Gini API client initialized')
  @log.info("Target: #{@api_uri}")
end

Public Instance Methods

delete(id) click to toggle source

Delete document

@param [String] id document ID

# File lib/gini-api/client.rb, line 201
def delete(id)
  response = request(:delete, "/documents/#{id}")
  unless response.status == 204
    raise Gini::Api::DocumentError.new(
      "Deletion of docId #{id} failed (code=#{response.status})",
      response
    )
  end
  @log.info("Deleted document #{id}")
end
get(id) click to toggle source

Get document by Id

@param [String] id document ID

@return [Gini::Api::Document] Return Gini::Api::Document object

# File lib/gini-api/client.rb, line 218
def get(id)
  Gini::Api::Document.new(self, "/documents/#{id}")
end
list(options = {}) click to toggle source

List all documents

@param [Hash] options List options (offset and limit) @option options [Integer] :limit Maximum number of documents to return (defaults to 20) @option options [Integer] :offset Start offset. Defaults to 0

@return [Gini::Api::DocumentSet] Returns a DocumentSet with total, offset and a list of Document objects

# File lib/gini-api/client.rb, line 230
def list(options = {})
  opts   = { limit: 20, offset: 0 }.merge(options)
  limit  = Integer(opts[:limit])
  offset = Integer(opts[:offset])

  response = request(:get, "/documents?limit=#{limit}&next=#{offset}")
  unless response.status == 200
    raise Gini::Api::DocumentError.new(
      "Failed to get list of documents (code=#{response.status})",
      response
    )
  end
  Gini::Api::DocumentSet.new(self, response.parsed)
end
login(opts) click to toggle source

Acquire OAuth2 token and popolate @oauth (instance of Gini::Api::OAuth.new) and @token (OAuth2::AccessToken). Supports 2 strategies: username/password and authorization code

@param [Hash] opts Your authorization credentials @option opts [String] :auth_code OAuth authorization code. Will be exchanged for a token @option opts [String] :username API username @option opts [String] :password API password

@example

api.login(auth_code: '1234567890')

@example

api.login(username: 'me@example.com', password: 'secret')
# File lib/gini-api/client.rb, line 100
def login(opts)
  @oauth = Gini::Api::OAuth.new(self, opts)
  @token = @oauth.token
end
logout() click to toggle source

Destroy OAuth2 token

# File lib/gini-api/client.rb, line 107
def logout
  @oauth.destroy
end
register_parser() click to toggle source

Register OAuth2 response parser

# File lib/gini-api/client.rb, line 75
def register_parser
  OAuth2::Response.register_parser(:gini_json, [version_header(:json)[:accept]]) do |body|
    MultiJson.load(body, symbolize_keys: true) rescue body
  end
  OAuth2::Response.register_parser(:gini_xml, [version_header(:xml)[:accept]]) do |body|
    MultiXml.parse(body) rescue body
  end
  OAuth2::Response.register_parser(:gini_incubator, [version_header(:json, :incubator)[:accept]]) do |body|
    MultiJson.load(body, symbolize_keys: true) rescue body
  end
end
request(verb, resource, options = {}) click to toggle source

Request wrapper that sets URI and accept header

@param [Symbol] verb HTTP request verb (:get, :post, :put, :delete) @param [String] resource API resource like /documents @param [Hash] options Optional type and custom headers @option options [String] :type Type to pass to version_header (:xml, :json) @option options [Hash] :headers Custom headers. Must include accept

# File lib/gini-api/client.rb, line 130
def request(verb, resource, options = {})
  opts = {
    headers: version_header(options.delete(:type) || @api_type)
  }.merge(options)

  timeout(@processing_timeout) do
    @token.send(verb.to_sym, resource_to_location(resource).to_s , opts)
  end
rescue OAuth2::Error => e
  raise Gini::Api::RequestError.new(
    "API request failed: #{verb} #{resource} (code=#{e.response.status})",
    e.response
  )
rescue Timeout::Error => e
  raise Gini::Api::ProcessingError.new(
    "API request timed out: #{verb} #{resource} (#{e.message})"
  )
end
upload(file, options = {}, &block) click to toggle source

Upload a document

@param [String] file path or open filehandle of the document to upload @param [Hash] options Hash of available upload settings @option options [String] :doctype_hint Document type hint to optimize results or get incubator results @option options [String] :text Use given file-string as text upload @option options [Float] :interval Interval to poll progress

@return [Gini::Api::Document] Return Gini::Api::Document object for uploaded document

@example Upload and wait for completion

doc = api.upload('/tmp/myfile.pdf')

@example Upload with doctype hint

doc = api.upload('/tmp/myfile.pdf', doctype_hint: 'Receipt')

@example Upload and monitor progress

doc = api.upload('/tmp/myfile.pdf') { |d| puts "Progress: #{d.progress}" }

@example Upload and monitor progress

doc = api.upload('This is a text message i would love to get extractions from', text: true)
# File lib/gini-api/client.rb, line 168
def upload(file, options = {}, &block)
  opts = {
    doctype_hint: nil,
    text: false,
    interval: 0.5
  }.merge(options)

  duration = Hash.new(0)

  # Document upload
  duration[:upload], response = upload_document(file, opts)

  # Start polling (0.5s) when document has been uploaded successfully
  if response.status == 201
    doc = Gini::Api::Document.new(self, response.headers['location'])
    duration[:processing] = poll_document(doc, opts[:interval], &block)

    duration[:total] = duration.values.inject(:+)
    doc.duration = duration

    doc
  else
    fail Gini::Api::UploadError.new(
      "Document upload failed with HTTP code #{response.status}",
      response
    )
  end
end
version_header(type = @api_type, version = @api_version) click to toggle source

Version accept header based on @api_version

@param [Symbol, String] type Expected response type (:xml, :json) @param [Symbol, String] version API version (:v1, :incubator)

@return [Hash] Return accept header or empty hash

# File lib/gini-api/client.rb, line 118
def version_header(type = @api_type, version = @api_version)
  { accept: "application/vnd.gini.#{version}+#{type}" }
end

Private Instance Methods

poll_document(doc, interval, &block) click to toggle source

Poll document and duration

@param [Gini::Api::Document] doc Document instance to poll @param [Float] interval Polling interval for completion

@return [Integer] Processing duration

# File lib/gini-api/client.rb, line 298
def poll_document(doc, interval, &block)
  duration = 0
  timeout(@processing_timeout) do
    duration = Benchmark.realtime do
      doc.poll(interval, &block)
    end
  end
  duration
rescue Timeout::Error => e
  ex = Gini::Api::ProcessingError.new(e.message)
  ex.docid = doc.id
  raise ex
end
resource_to_location(resource) click to toggle source

Helper to covert resource to a valid location.

@param [String] resource URI to be converted

@return [URI::HTTPS] URI::HTTPS object create from resource

# File lib/gini-api/client.rb, line 280
def resource_to_location(resource)
  parsed_resource = URI.parse(resource)
  @api_host ||= URI.parse(@api_uri).host

  URI::HTTPS.build(
    host:  @api_host,
    path:  parsed_resource.path,
    query: parsed_resource.query
  )
end
upload_connection() click to toggle source

Setup API upload connection

@return [Faraday] Faraday object to use in upload

# File lib/gini-api/client.rb, line 316
def upload_connection
  @upload_connection ||= Faraday.new(url: @api_uri) do |builder|
    builder.use(Faraday::Request::Multipart)
    builder.use(Faraday::Request::UrlEncoded)
    builder.request(:retry, 3)
    builder.adapter(Faraday.default_adapter)
  end
end
upload_document(file, opts) click to toggle source

Helper to upload document

@param [String] file location of document or open filehandle to be uploaded @param [String] doctype_hint Document type hint to optimize results or get incubator results

@return [Faraday::Response] Response object from upload

# File lib/gini-api/client.rb, line 332
def upload_document(file, opts)
  response = nil

  # Use StringIO on file string and force utf-8
  file = StringIO.new(file.force_encoding('UTF-8')) if opts[:text]

  duration = Benchmark.realtime do
    response = upload_connection.post do |req|
      req.options[:timeout] = @upload_timeout
      req.url 'documents'
      req.params[:doctype] = opts[:doctype_hint] if opts[:doctype_hint]
      req.headers['Content-Type']  = 'multipart/form-data'
      req.headers['Authorization'] = "Bearer #{@token.token}"
      req.headers.merge!(version_header)
      req.body = { file: Faraday::UploadIO.new(file, 'application/octet-stream') }
    end
  end

  return duration, response
end