class OMGF::HystericalRaisins

Basic REST interface. This is bug-for-bug compatible with an old app that's been deployed for years in a private LAN.

This started out as a WORM (write-once, read-many) system, but eventually gained the ability to handle deletes.

This was meant to behave like the MogileFS protocol somewhat (but is pure-HTTP), so it redirects to the storage nodes and bypasses Ruby for bulk I/O when retrieving files. PUTs (writes) still go through Ruby, however.

Public Class Methods

new(opts) click to toggle source
# File lib/omgf/hysterical_raisins.rb, line 27
def initialize(opts)
  @default_class_cb = opts[:default_class_cb] || {}
  mg_opts = {
    domain: "any",
    hosts: opts[:hosts],
    fail_timeout: opts[:fail_timeout] || 0.5,

    # high defaults because of slow seeks on storage nodes (for size verify)
    timeout: opts[:timeout] || 30,
    get_file_data_timeout: opts[:get_file_data_timeout] || 30,
  }
  @reproxy_header = opts[:reproxy_header] || "HTTP_X_OMGF_REPROXY"
  @reproxy_path = opts[:reproxy_path]
  @get_paths_opts = {
    noverify: opts[:noverify],
    pathcount: opts[:pathcount] || 0x7fffffff,
  }
  @new_file_opts = {
    content_md5: opts[:content_md5], # :trailer is acceptable here

    # largefile: auto-selects based on env["CONTENT_LENGTH"]
    largefile: opts[:largefile] || :stream,
  }
  @put_overwrite_header = opts[:put_overwrite_header] || "HTTP_X_OMGF_FORCE"
  @vp = OMGF::VerifyPaths.new(opts[:logger])
  @verify_timeout = opts[:verify_timeout] || 0.5
  pool_init(mg_opts)

  # we may use regurgitator for reads
  if db = opts[:db]
    require 'omgf/regurgitator'
    extend OMGF::Regurgitator
    regurgitator_init(db)
  end
end

Public Instance Methods

call(env) click to toggle source

The entry point for Rack

# File lib/omgf/hysterical_raisins.rb, line 64
def call(env)
  case env["REQUEST_METHOD"]
  when "GET"
    get(env)
  when "HEAD"
    head(env)
  when "PUT"
    put(env)
  when "DELETE"
    delete(env)
  else
    r(405)
  end
rescue MogileFS::Backend::UnknownKeyError,
       MogileFS::Backend::DomainNotFoundError,
       MogileFS::Backend::UnregDomainError
  r(404, "")
rescue => e
  logger = env["rack.logger"]
  logger.error "#{e.message} (#{e.class})"
  e.backtrace.each { |line| logger.error(line) }
  r(500, "")
end
delete(env) click to toggle source

DELETE /domain/key

# File lib/omgf/hysterical_raisins.rb, line 354
def delete(env)
  case env["PATH_INFO"]
  when %r{\A/([^/]+)/(.+)\z} # /$DOMAIN/$KEY
    delete_key(env, $1, $2)
  else
    r(404, "")
  end
end
delete_key(env, domain, key) click to toggle source

DELETE /domain/key

# File lib/omgf/hysterical_raisins.rb, line 364
def delete_key(env, domain, key)
  pool_use(domain) { |mg| mg.delete(key) }
  r(204, "")
rescue MogileFS::Backend::UnregDomainError,
       MogileFS::Backend::DomainNotFoundError
  r(406, "Invalid domain: #{domain}")
rescue MogileFS::Backend::UnknownKeyError
  r(404, "")
end
get(env) click to toggle source

GET /$DOMAIN?prefix=foo - list keys GET /$DOMAIN/$KEY - redirects to FIDs on storage nodes

# File lib/omgf/hysterical_raisins.rb, line 90
def get(env)
  case env["PATH_INFO"].squeeze("/")
  when %r{\A/([^/]+)/(.+)\z} # /$DOMAIN/$KEY
    redirect_key(env, $1, $2)
  when %r{\A/([^/]+)/?\z}    # /$DOMAIN
    get_keys(env, $1)
  when "/"
    r(200, "")
  else
    r(404, "")
  end
end
head(env) click to toggle source

returns metadata for a given domain/key

# File lib/omgf/hysterical_raisins.rb, line 104
def head(env)
  case env["PATH_INFO"].squeeze("/")
  when %r{\A/([^/]+)/(.+)\z} # HEAD /$DOMAIN/$KEY
    stat_key(env, $1, $2)
  else
    # pass on headers from listing results
    status, headers, _ = get(env)
    [ status, headers, [] ]
  end
end
put(env) click to toggle source

PUT /domain/key

# File lib/omgf/hysterical_raisins.rb, line 251
def put(env)
  case env["PATH_INFO"]
  when %r{\A/([^/]+)/(.+)\z} # /$DOMAIN/$KEY
    put_key(env, $1, $2)
  else
    r(404, "")
  end
end
put_key(env, domain, key) click to toggle source

PUT /$DOMAIN/$KEY

# File lib/omgf/hysterical_raisins.rb, line 269
def put_key(env, domain, key)
  return r(403, "") if env[@reproxy_header].to_i != 0
  return r(406, "key `#{key}' is not URI-friendly") if bad_key?(key)
  return r(406, "key is too long") if key.size > 128

  clen = env["CONTENT_LENGTH"]

  # this was written before MogileFS supported empty files,
  # but empty files waste DB space so we don't support them
  # Not bothering with Transfer-Encoding: chunked, though...
  return r(403, "empty files forbidden") if "0" == clen

  params = query(env)
  input = env["rack.input"]
  paths = nil
  retried = false

  # prepare options for create_open/create_close:
  new_file_opts = @new_file_opts.dup

  # the original deployment of this created a class for every
  # domain with the same class having the same name as the domain
  new_file_opts[:class] = params['class'] || @default_class_cb[domain]

  # try to give a Content-Length to the tracker
  clen and new_file_opts[:content_length] = clen.to_i

  if /\bContent-MD5\b/i =~ env["HTTP_TRAILER"]
    # if the client will give the Content-MD5 as the trailer,
    # we must lazily populate it since we're not guaranteed to
    # have the trailer, yet (rack.input is lazily read on unicorn)
    new_file_opts[:content_md5] = lambda { env["HTTP_CONTENT_MD5"] }
  elsif cmd5 = env["HTTP_CONTENT_MD5"]
    # maybe the client gave the Content-MD5 in the header
    new_file_opts[:content_md5] = cmd5
  end

  begin
    pool_use(domain) do |mg|
      begin
        # TOCTOU issue, but probably not worth worrying about
        # Nothing we can do about it without explicit MogileFS support
        # or a 3rd-party locking daemon
        paths = mg.get_paths(key)
        if paths && paths[0]

          # overwriting existing files is not permitted by default
          if env[@put_overwrite_header] != "true"
            # show the existing paths in response
            return r(403, paths.join("\n"))
          end
        end
      rescue MogileFS::Backend::UnknownKeyError
        # good, not clobbering anything
      end

      # finally, upload the file
      mg.new_file(key, new_file_opts) do |io|
        IO.copy_stream(input, io)
      end
    end # pool_use

    # should always return 201 if ! found, but we keep 200 for legacy
    # compat if they're not logged in (via REMOTE_USER)
    status = paths ? 204 : (env["REMOTE_USER"] ? 201 : 200)
    r(status, "")
  rescue MogileFS::Backend::UnregDomainError,
         MogileFS::Backend::DomainNotFoundError
    r(406, "Invalid domain: #{domain}")
  rescue => e
    if retried == false && input.respond_to?(:rewind)
      begin
        retried = true
        input.rewind # may raise on future perfectly compliant Rack servers
        env["rack.logger"].warn("#{e.message} (#{e.class})")
        retry
      rescue
      end
    end

    raise
  end
end
query(env) click to toggle source
# File lib/omgf/hysterical_raisins.rb, line 374
def query(env)
  Rack::Utils.parse_query(env["QUERY_STRING"])
end
redirect_key(env, domain, key) click to toggle source

GET /$DOMAIN/$KEY

# File lib/omgf/hysterical_raisins.rb, line 132
def redirect_key(env, domain, key)
  uris = mg_get_uris(env, domain, key, @get_paths_opts)
  uris = @vp.verify(uris, 1, @verify_timeout).flatten!

  return r(503, "") unless uris && dest = uris.shift

  location = dest.to_s
  h = {
    'Content-Length' => '0',
    'Location' => location,
    'Content-Type' => 'text/html'
  }

  unless reproxy?(env, key, h, location)
    uris.each_with_index { |uri,i| h["X-Alt-Location-#{i}"] = uri.to_s }
  end
  [ 302, h, [] ]
end
stat_key(env, domain, key) click to toggle source

HEAD /$DOMAIN/$KEY

# File lib/omgf/hysterical_raisins.rb, line 116
def stat_key(env, domain, key)
  size, uris = mg_size_and_uris(env, domain, key, @get_paths_opts)
  uris = @vp.verify(uris, 1, @verify_timeout).flatten!

  return r(503, "") unless uris && uris[0]

  h = { "Content-Length" => size.to_s }
  fn = filename(h, query(env)) || key
  h["Content-Type"] = key_mime_type(fn)
  unless reproxy?(env, key, h, uris[0].to_s)
    uris.each_with_index { |uri,i| h["X-Url-#{i}"] = uri.to_s }
  end
  [ 200, h, [] ]
end