class URI::BLURI

A URI class with a bit extra for canonicalising query strings

Constants

PATH_ESCAPE_MAPPINGS
PATH_UNESCAPE_MAPPINGS
REQUIRE_REGEX_ESCAPE

Public Class Methods

new(uri_str) click to toggle source
# File lib/uri/bluri.rb, line 30
def initialize(uri_str)
  @uri = ::Addressable::URI.parse(uri_str)

  raise URI::InvalidURIError, "'#{uri_str}' not a valid URI" unless valid_uri?
end
parse(uri_str) click to toggle source
# File lib/uri/bluri.rb, line 61
def self.parse(uri_str)
  # Deal with known URI spec breaks - leading/trailing spaces and unencoded entities
  if uri_str.is_a? String
    uri_str = uri_str.strip.downcase.gsub(" ", "%20")
    uri_str.gsub!("&", "%26") if uri_str =~ /^mailto:.*&.*/
  end
  BLURI.new(uri_str)
end
path_escape_char_regex() click to toggle source

Generate a regex which matches all characters in PATH_ESCAPE_MAPPINGS

# File lib/uri/bluri.rb, line 100
def self.path_escape_char_regex
  @path_escape_char_regex ||= begin
    escaped_characters_for_regex = PATH_ESCAPE_MAPPINGS.keys.map do |char|
      REQUIRE_REGEX_ESCAPE.include?(char) ? "\\#{char}" : char
    end

    Regexp.new("[" + escaped_characters_for_regex.join + "]")
  end
end
path_unescape_code_regex() click to toggle source

Generate a regex which matches all escape sequences in PATH_UNESCAPE_MAPPINGS

# File lib/uri/bluri.rb, line 112
def self.path_unescape_code_regex
  @path_unescape_code_regex ||= Regexp.new(
    PATH_UNESCAPE_MAPPINGS.keys.map { |code| "(?:#{code})" }.join("|"),
  )
end

Public Instance Methods

canonicalize!(options = {}) click to toggle source
# File lib/uri/bluri.rb, line 74
def canonicalize!(options = {})
  @uri.scheme = "http" if @uri.scheme == "https"

  @uri.path = @uri.path.sub(/\/*$/, "") if @uri.path =~ /^*\/$/
  @uri.path.gsub!(BLURI.path_escape_char_regex,   PATH_ESCAPE_MAPPINGS)
  @uri.path.gsub!(BLURI.path_unescape_code_regex, PATH_UNESCAPE_MAPPINGS)

  canonicalize_query!(options)

  @uri.fragment = nil
  self
end
canonicalize_query!(options) click to toggle source
# File lib/uri/bluri.rb, line 87
def canonicalize_query!(options)
  allow_all = (options[:allow_query] == :all)
  allowed_keys = [options[:allow_query]].flatten.compact.map(&:to_s) unless allow_all

  query_hash.keep_if do |k, _|
    allow_all || allowed_keys.include?(k.to_s)
  end

  self.query_hash = QueryHash[query_hash.sort_by { |k, _| k }]
end
has_query?() click to toggle source
# File lib/uri/bluri.rb, line 70
def has_query?
  %w(http https).include?(@uri.scheme) && query
end
query=(query_str) click to toggle source
# File lib/uri/bluri.rb, line 56
def query=(query_str)
  @query_hash = nil
  @uri.query = query_str == "" ? nil : query_str
end
query_hash() click to toggle source
# File lib/uri/bluri.rb, line 42
def query_hash
  @query_hash ||= CGI::parse(self.query || "").tap do |query_hash|
    # By default, CGI::parse produces lots of arrays. Usually they have a single element
    # in them. That's correct but not terribly usable. Fix it here.
    query_hash.each_pair { |k, v| query_hash[k] = v[0] if v.length == 1 }
    query_hash.extend QueryHash
  end
end
query_hash=(value) click to toggle source
# File lib/uri/bluri.rb, line 51
def query_hash=(value)
  @query_hash = value
  @uri.query = @query_hash.to_s == "" ? nil : @query_hash.to_s
end
valid_uri?() click to toggle source
# File lib/uri/bluri.rb, line 36
def valid_uri?
  return unless @uri

  %w[http https mailto].include?(@uri.scheme)
end