class BELParser::Resource::ResourceURLReader

ResourceURLReader retrieves {Dataset datasets} and {Value values} from Annotation (i.e. belanno extension) and Namespace (i.e. belns extension) files. Value and encoding are stored in DBM database files to reduce the runtime memory usage (22 resources loaded, totaling 100MB memory usage).

Only supports resource identifiers with an HTTP or HTTPS scheme.

Constants

DEFAULT_RESOURCE_VALUE_DELIMITER

Public Class Methods

new(reuse_database_files = true) click to toggle source

Initializes a {ResourceURLReader}.

@param [Boolean] reuse_database_files specify true to reuse database

files; +false+ to create new database files (default)
# File lib/bel_parser/resource/resource_url_reader.rb, line 33
def initialize(reuse_database_files = true)
  @resources = {}
  @datasets  = ResourceURLReader.open_datasets_file
  @reuse     = reuse_database_files
end

Protected Class Methods

_temporary_datasets_file() click to toggle source
# File lib/bel_parser/resource/resource_url_reader.rb, line 163
def self._temporary_datasets_file
  resource_directory = File.join(Dir.tmpdir, 'belresources')
  FileUtils.mkdir_p(resource_directory)
  File.join(resource_directory, 'datasets.gdbm')
end
_temporary_resource_file(url) click to toggle source
# File lib/bel_parser/resource/resource_url_reader.rb, line 169
def self._temporary_resource_file(url)
  resource_directory = File.join(Dir.tmpdir, 'belresources')
  FileUtils.mkdir_p(resource_directory)
  File.join(resource_directory, "#{_hash_url(url)}.gdbm")
end
open_datasets_file() click to toggle source
# File lib/bel_parser/resource/resource_url_reader.rb, line 147
def self.open_datasets_file
  if RUBY_ENGINE =~ /^jruby/i
    @dataset_file ||= ::DBM.new
  else
    @dataset_file ||= ::DBM.open(_temporary_datasets_file)
  end
end
open_resource_file(url) click to toggle source
# File lib/bel_parser/resource/resource_url_reader.rb, line 155
def self.open_resource_file(url)
  if RUBY_ENGINE =~ /^jruby/i
    @resource_files[url] ||= ::DBM.new
  else
    @resource_files[url] ||= ::DBM.open(_temporary_resource_file(url))
  end
end

Private Class Methods

_hash_url(url) click to toggle source
# File lib/bel_parser/resource/resource_url_reader.rb, line 175
def self._hash_url(url)
  Base64.encode64(Digest::SHA1.digest(url)).delete("/=\n")
end

Public Instance Methods

retrieve_resource(resource_identifier) click to toggle source

Retrieve the resource identified by resource_identifier.

@param [String] resource_identifier the resource identifier @return [FileResource] the file resource

# File lib/bel_parser/resource/resource_url_reader.rb, line 43
def retrieve_resource(resource_identifier)
  dataset = read_resource(resource_identifier)[:dataset]
  return nil if dataset.types.all?(&:nil?)
  dataset
end
retrieve_value_from_resource(resource_identifier, value) click to toggle source
# File lib/bel_parser/resource/resource_url_reader.rb, line 49
def retrieve_value_from_resource(resource_identifier, value)
  resource = read_resource(resource_identifier)
  encoding = resource[:values][value]
  return nil unless encoding
  [FileResourceValue.new(resource[:dataset], value, encoding)]
end
retrieve_values_from_resource(resource_identifier) click to toggle source
# File lib/bel_parser/resource/resource_url_reader.rb, line 56
def retrieve_values_from_resource(resource_identifier)
  resource = read_resource(resource_identifier)
  dataset  = resource[:dataset]
  return nil if resource[:values].size.zero?
  resource[:values].lazy.map do |value, encoding|
    FileResourceValue.new(dataset, value, encoding)
  end
end

Protected Instance Methods

create_resource(url, line_enum) click to toggle source
# File lib/bel_parser/resource/resource_url_reader.rb, line 82
      def create_resource(url, line_enum)
        delimiter           = DEFAULT_RESOURCE_VALUE_DELIMITER
        dataset             = @datasets[url]
        values              = ResourceURLReader.open_resource_file(url)

        if @reuse && dataset && values.size > 0
          warn(
            <<-MSG.gsub(/^ {14}/, '')
              Warning - Reusing value database.
                URL:  #{url}
                File: #{ResourceURLReader._temporary_resource_file(url)}
            MSG
          )
          return {
            dataset: FileResource.new(url, *dataset.split('//')),
            values:  values
          }
        end

        value_section               = false
        type, name, keyword, domain = nil
        line_enum.each do |line|
          line.strip!
          case
          when line =~ /^#{Regexp.escape('[AnnotationDefinition]')}/
            type = Dataset::ANNOTATION
          when line =~ /^#{Regexp.escape('[Namespace]')}/
            type = Dataset::NAMESPACE
          when line =~ /^NameString *= *(.*)$/
            name = Regexp.last_match[1]
          when line =~ /^Keyword *= *(.*)$/
            keyword = Regexp.last_match[1]
          when line =~ /^DomainString *= *(.*)$/
            domain = Regexp.last_match[1]
          when line =~ /^DelimiterString *=(.*)$/
            delimiter = Regexp.last_match[1]
          when line =~ /^#{Regexp.escape('[Values]')}/
            dataset = FileResource.new(url, domain, keyword, name, type)
            value_section = true
          when value_section
            value, encoding    = line.strip.split(delimiter)
            values[value.to_s] = encoding.to_s
          end
        end

        resource = {
          dataset: FileResource.new(url, domain, keyword, name, type),
          values:  values
        }
        @datasets[url] = [domain, keyword, name, type].join('//')
        resource
      end
empty_resource() click to toggle source
# File lib/bel_parser/resource/resource_url_reader.rb, line 135
def empty_resource
  { dataset: nil, values: {} }
end
http_get(url) click to toggle source
# File lib/bel_parser/resource/resource_url_reader.rb, line 139
def http_get(url)
  begin
    _get(URI.parse(url))
  rescue URI::InvalidURIError, SocketError
    return nil
  end
end
read_resource(url) click to toggle source
# File lib/bel_parser/resource/resource_url_reader.rb, line 67
def read_resource(url)
  # return cached resource if present
  resource = @resources[url]
  return resource if resource

  # read url
  content = http_get(url)
  unless content
    # cache and return empty resource if not resolvable
    return @resources[url] = empty_resource
  end

  @resources[url] = create_resource(url, content.each_line)
end

Private Instance Methods

_get(url, &block) click to toggle source
# File lib/bel_parser/resource/resource_url_reader.rb, line 182
def _get(url, &block)
  http = Net::HTTP.new(url.host, url.port)
  http.request_get(url).response.read_body
end