class URLGrey
Constants
- ABOUT_BLANK_URL
- AUTHORITY_TERMINATORS
- DEFAULT_PORTS
- HOST_CHROME_DEFAULT
- HOST_ESCAPE_CHARS
- HOST_NORMAL_CHARS
- PATH_PASS_CHARS
- PATH_UNESCAPE_CHARS
- QUERY_NORMAL_CHARS
- STANDARD_SCHEMES
- VERSION
Attributes
coerced[RW]
host[RW]
original[RW]
password[RW]
path[RW]
port[RW]
query[RW]
ref[RW]
scheme[RW]
slashes[RW]
username[RW]
Public Class Methods
new(_original)
click to toggle source
# File lib/url_grey.rb, line 28 def initialize(_original) self.original = _original.sub(%r{^\s*}, '') parse! end
Public Instance Methods
fixed()
click to toggle source
# File lib/url_grey.rb, line 47 def fixed return ABOUT_BLANK_URL if self.original == ABOUT_BLANK_URL "#{fixed_scheme}#{fixed_credentials}#{fixed_host}#{fixed_port}#{fixed_path}#{fixed_query}#{fixed_ref}" end
fixed_credentials()
click to toggle source
# File lib/url_grey.rb, line 53 def fixed_credentials return "" unless (!self.username.empty? || !self.password.empty?) return "#{self.username}@" if self.password.empty? "#{self.username}:#{self.password}@" end
fixed_host()
click to toggle source
from components/url_formatter/url_fixer.cc FixupHost
# File lib/url_grey.rb, line 60 def fixed_host fixed = self.host.gsub(%r{\s}, '').downcase unless fixed.match(%r{^\.*$}) fixed = fixed.sub(%r{^\.*}, '') fixed = fixed.sub(%r{(?<=\.)\.*$}, '') end if fixed.empty? && ["about", "chrome"].include?(self.scheme) fixed = HOST_CHROME_DEFAULT end if fixed.match(%r{^[[:ascii:]]*$}) fixed = fixed.chars.map do |char| if HOST_NORMAL_CHARS.include?(char) char else "%#{char.codepoints.first.to_s(16).upcase}" end end.join("") else fixed = SimpleIDN.to_ascii(fixed) end fixed end
fixed_path()
click to toggle source
from url/url_canon_path.cc CanonicalizePath
# File lib/url_grey.rb, line 85 def fixed_path fixed = self.path if (fixed[0] != '/') && ((STANDARD_SCHEMES + ["about", "chrome"]).include?(self.scheme)) fixed = '/' + fixed end fixed.chars.map do |char| if PATH_PASS_CHARS.include?(char) char elsif PATH_UNESCAPE_CHARS.include?(char) char elsif char == "." # TODO: if the dot is preceded by a slash, do directory stuff: # google.com/abc/.././def -> google.com/def char else "%#{char.codepoints.first.to_s(16).upcase}" end end.join("") end
fixed_port()
click to toggle source
# File lib/url_grey.rb, line 106 def fixed_port return "" if (self.port.empty? || self.port.to_i == DEFAULT_PORTS[self.scheme.to_sym]) ":#{self.port}" end
fixed_query()
click to toggle source
# File lib/url_grey.rb, line 111 def fixed_query fixed = self.query return "" if fixed.nil? fixed = fixed.bytes.map do |byte| if QUERY_NORMAL_CHARS.unpack("U*").include?(byte) [byte].pack("U") else "%#{byte.to_s(16).upcase}" end end.join('') "?#{fixed}" end
fixed_ref()
click to toggle source
# File lib/url_grey.rb, line 124 def fixed_ref return "" if self.ref.nil? "\##{self.ref}" end
fixed_scheme()
click to toggle source
# File lib/url_grey.rb, line 129 def fixed_scheme fixed = self.scheme if fixed == "about" fixed = "chrome" end if (STANDARD_SCHEMES + ["about", "chrome"]).include?(fixed) "#{fixed}://" else "#{fixed}:#{self.slashes}" end end
parts()
click to toggle source
# File lib/url_grey.rb, line 34 def parts { scheme: self.scheme, username: self.username, password: self.password, host: self.host, port: self.port, path: self.path, query: self.query, ref: self.ref } end
Private Instance Methods
find_scheme(text)
click to toggle source
# File lib/url_grey.rb, line 231 def find_scheme(text) # extract scheme return false unless match = text.match(%r{^(.*?):}) component = match[1].downcase return "" if component.empty? # first character must be a letter return false unless component.match(%r{^[a-z]}) # reject anything with invalid characters return false unless component.match(%r{^[+\-0-9a-z]*$}) # fix up segmentation for "www:123/" return false if has_port(text) component end
has_port(text)
click to toggle source
# File lib/url_grey.rb, line 251 def has_port(text) return false unless text.include?(":") match = text.match(%r{:(.*?)[\\/\?#]}) || text.match(%r{:(.*)$}) match[1].match(%r{^\d+$}) end
parse!()
click to toggle source
# File lib/url_grey.rb, line 144 def parse! parse_scheme! after_scheme = self.coerced.match(%r{:(.*)})[1] self.slashes, after_slashes = after_scheme.match(%r{^([\\\/]*)(.*)$})[1..2] # authority terminators: '/', '\', '?', '#' if (after_slashes.chars & ['/', '\\', '?', '#']).any? authority, full_path = after_slashes.match(%r{^(.*?)([\\\/?#].*)$})[1..2] else authority = after_slashes full_path = "" end if authority.include?("@") user_info, server_info = authority.match(%r{^(.*)@(.*)$})[1..2] else user_info = "" server_info = authority end # parse user_info if user_info.empty? self.username = "" self.password = "" else if user_info.include?(":") self.username, self.password = user_info.match(%r{^(.*?):(.*)$})[1..2] else self.username = user_info self.password = "" end end # parse server_info if !server_info.include?(":") self.host = server_info self.port = "" elsif server_info.include?("]") if server_info.reverse.index(":") < server_info.reverse.index("]") self.host, self.port = server_info.match(%r{^(.*):(.*)$})[1..2] else self.host = server_info self.port = "" end elsif server_info.chars.first == "[" self.host = server_info self.port = "" else self.host, self.port = server_info.match(%r{^(.*):(.*)$})[1..2] end # parse full_path if full_path.include?("#") before_ref, self.ref = full_path.match(%r{^(.*?)#(.*)$})[1..2] else before_ref = full_path self.ref = nil end if before_ref.include?("?") self.path, self.query = before_ref.match(%r{^(.*?)\?(.*)$})[1..2] else self.path = before_ref self.query = nil end end
parse_scheme!()
click to toggle source
# File lib/url_grey.rb, line 211 def parse_scheme! self.coerced = self.original if !find_scheme(self.original) && (self.original[0]!= ";") if find_scheme(self.original.sub(";", ":")) self.coerced = self.original.sub(";", ":") end end if !find_scheme(self.coerced) if self.coerced.match(%r{^ftp\.}i) self.coerced = "ftp://" + self.coerced else self.coerced = "http://" + self.coerced end end self.scheme = find_scheme(self.coerced) || "" end