class TheFox::Sengi::Uri
Constants
- URI_CLASSES
Public Class Methods
new(url)
click to toggle source
# File lib/sengi/uri.rb, line 12 def initialize(url) @uri = nil @hash = nil @is_blacklisted = nil @is_ignored = nil @is_ignored_reason = 'nil' @hash_id_key_name = nil @id = nil @key_name = nil @domain_nowww = nil @domain_nowww_hash = nil @domain_original_hash = nil @domain_hash_id_key_name = nil @domain_id = nil @domain_key_name = nil @request_id = nil @request_key_name = nil @response_id = nil @response_key_name = nil @response_size = 0 @response_content_type = '' begin @uri = URI(url) rescue Exception => e @uri = nil end validate if is_valid? append_slash host_downcase remove_fragment domain_setup @uri_class = @uri.class @hash = Digest::SHA256.hexdigest(to_s) @hash_id_key_name = "urls:id:#{@hash}" end end
Public Instance Methods
domain_hash_id_key_name()
click to toggle source
# File lib/sengi/uri.rb, line 122 def domain_hash_id_key_name @domain_hash_id_key_name end
domain_id()
click to toggle source
# File lib/sengi/uri.rb, line 131 def domain_id @domain_id end
domain_id=(domain_id)
click to toggle source
# File lib/sengi/uri.rb, line 126 def domain_id=(domain_id) @domain_id = domain_id @domain_key_name = "domains:#{@domain_id}" end
domain_key_name()
click to toggle source
# File lib/sengi/uri.rb, line 135 def domain_key_name @domain_key_name end
domain_nowww()
click to toggle source
# File lib/sengi/uri.rb, line 110 def domain_nowww @domain_nowww end
domain_nowww_hash()
click to toggle source
# File lib/sengi/uri.rb, line 114 def domain_nowww_hash @domain_nowww_hash end
domain_original_hash()
click to toggle source
# File lib/sengi/uri.rb, line 118 def domain_original_hash @domain_original_hash end
hash_id_key_name()
click to toggle source
def hash_id_key_name
=(hash_id_key_name
)
@hash_id_key_name = hash_id_key_name
end
# File lib/sengi/uri.rb, line 89 def hash_id_key_name @hash_id_key_name end
id()
click to toggle source
# File lib/sengi/uri.rb, line 98 def id @id end
id=(id)
click to toggle source
# File lib/sengi/uri.rb, line 93 def id=(id) @id = id @key_name = "urls:#{@id}" end
is_blacklisted()
click to toggle source
# File lib/sengi/uri.rb, line 65 def is_blacklisted @is_blacklisted end
is_blacklisted=(is_blacklisted)
click to toggle source
# File lib/sengi/uri.rb, line 61 def is_blacklisted=(is_blacklisted) @is_blacklisted = is_blacklisted end
is_ignored()
click to toggle source
# File lib/sengi/uri.rb, line 73 def is_ignored @is_ignored end
is_ignored=(is_ignored)
click to toggle source
# File lib/sengi/uri.rb, line 69 def is_ignored=(is_ignored) @is_ignored = is_ignored end
is_ignored_reason()
click to toggle source
# File lib/sengi/uri.rb, line 81 def is_ignored_reason @is_ignored_reason end
is_ignored_reason=(is_ignored_reason)
click to toggle source
# File lib/sengi/uri.rb, line 77 def is_ignored_reason=(is_ignored_reason) @is_ignored_reason = is_ignored_reason end
is_relative?(uri = nil)
click to toggle source
# File lib/sengi/uri.rb, line 233 def is_relative?(uri = nil) @uri_class == URI::Generic || (!uri.nil? && uri.ruri.host == @uri.host) end
is_valid?()
click to toggle source
# File lib/sengi/uri.rb, line 53 def is_valid? !@uri.nil? end
join(suburi)
click to toggle source
# File lib/sengi/uri.rb, line 229 def join(suburi) self.class.new(URI.join(@uri, suburi.ruri).to_s) end
key_name()
click to toggle source
request_id()
click to toggle source
# File lib/sengi/uri.rb, line 144 def request_id @request_id end
request_id=(request_id)
click to toggle source
# File lib/sengi/uri.rb, line 139 def request_id=(request_id) @request_id = request_id @request_key_name = "requests:#{@request_id}" end
request_key_name()
click to toggle source
# File lib/sengi/uri.rb, line 148 def request_key_name @request_key_name end
response_content_type()
click to toggle source
# File lib/sengi/uri.rb, line 177 def response_content_type @response_content_type end
response_content_type=(response_content_type)
click to toggle source
# File lib/sengi/uri.rb, line 173 def response_content_type=(response_content_type) @response_content_type = response_content_type.to_s end
response_id()
click to toggle source
# File lib/sengi/uri.rb, line 157 def response_id @response_id end
response_id=(response_id)
click to toggle source
# File lib/sengi/uri.rb, line 152 def response_id=(response_id) @response_id = response_id @response_key_name = "responses:#{@response_id}" end
response_key_name()
click to toggle source
# File lib/sengi/uri.rb, line 161 def response_key_name @response_key_name end
response_size()
click to toggle source
# File lib/sengi/uri.rb, line 169 def response_size @response_size end
response_size=(response_size)
click to toggle source
# File lib/sengi/uri.rb, line 165 def response_size=(response_size) @response_size = response_size.to_s end
ruri()
click to toggle source
# File lib/sengi/uri.rb, line 57 def ruri @uri end
to_hash()
click to toggle source
# File lib/sengi/uri.rb, line 185 def to_hash @hash end
to_http()
click to toggle source
# File lib/sengi/uri.rb, line 189 def to_http http_uri = @uri.clone http_uri.scheme = 'http' http_uri end
to_s()
click to toggle source
# File lib/sengi/uri.rb, line 181 def to_s "#{@uri}" end
weight(ref_uri = nil)
click to toggle source
# File lib/sengi/uri.rb, line 195 def weight(ref_uri = nil) is_subdomain = false if !@uri.host.nil? && !ref_uri.nil? && !ref_uri.ruri.host.nil? #puts "#{@uri.host}" #puts "#{ref_uri.ruri.host}" a_ss = @uri.host[ref_uri.ruri.host] #puts "a: '#{a_ss}'" if a_ss.nil? b_ss = ref_uri.ruri.host[@uri.host] #puts "b: '#{b_ss}'" if !b_ss.nil? is_subdomain = true end else is_subdomain = true end end if false elsif @uri_class == URI::Generic then return 100 elsif @uri_class == URI::HTTP if is_subdomain return 200 end return 250 elsif @uri_class == URI::HTTPS then return 290 end return 999 end
Private Instance Methods
append_slash()
click to toggle source
# File lib/sengi/uri.rb, line 255 def append_slash url = to_s #puts "url: '#{@url}'" #puts "request uri: '#{@uri.request_uri}'" #puts "class: '#{@uri.class}'" if @uri.class == URI::HTTP && @uri.request_uri == '/' && url[-1] != '/' @uri = URI("#{url}/") end end
domain_setup()
click to toggle source
# File lib/sengi/uri.rb, line 277 def domain_setup if !@uri.nil? && !@uri.host.nil? @domain_nowww = @uri.host.sub(/^www\./, '') @domain_nowww_hash = Digest::SHA256.hexdigest(@domain_nowww) @domain_original_hash = Digest::SHA256.hexdigest(@uri.host) @domain_hash_id_key_name = "domains:id:#{@domain_nowww_hash}" end end
host_downcase()
click to toggle source
# File lib/sengi/uri.rb, line 267 def host_downcase if @uri.class != URI::Generic @uri.host = @uri.host.downcase end end
remove_fragment()
click to toggle source
# File lib/sengi/uri.rb, line 273 def remove_fragment @uri.fragment = nil end
validate()
click to toggle source
# File lib/sengi/uri.rb, line 240 def validate if is_valid? s = to_s.downcase #puts "s '#{s[0..3]}'" if s[0..10] == 'javascript:' || s[0..3] == 'tel:' @uri = nil end end if is_valid? && !URI_CLASSES.include?(@uri.class) @uri = nil end end