class NetworkProfile::Extractor
Constants
- AUTO_LINK_RE
Logic from: github.com/tenderlove/rails_autolink/blob/master/lib/rails_autolink/helpers.rb
- BRACKETS
- HOST_PART
- TLD
- WORD_PATTERN
Public Class Methods
call(string)
click to toggle source
# File lib/network_profile/extractor.rb, line 12 def self.call(string) new(string).extracted_links! end
new(string)
click to toggle source
# File lib/network_profile/extractor.rb, line 16 def initialize(string) @string = string end
Public Instance Methods
extracted_links!()
click to toggle source
# File lib/network_profile/extractor.rb, line 20 def extracted_links! extracted = links.map do |l| NetworkProfile.parse(l) rescue StandardError => e p e nil end extracted.compact end
links()
click to toggle source
# File lib/network_profile/extractor.rb, line 30 def links return @links if @links @links ||= [] mapped_string.scan(AUTO_LINK_RE) { |_| scheme = Regexp.last_match(1) href = $& punctuation = [] while href.sub!(%r{[^#{WORD_PATTERN}/-=&]$}, '') punctuation.push($&) if opening = BRACKETS[punctuation.last] and href.scan(opening).size > href.scan(punctuation.last).size href << punctuation.pop break end end href = 'https://' + href unless scheme @links << href } @links.uniq end
mapped_string()
click to toggle source
# File lib/network_profile/extractor.rb, line 54 def mapped_string @string. gsub(%r{ (#{HOST_PART}\.#{TLD}/)}) { |_| host = Regexp.last_match['host'] "https://#{host}.#{Regexp.last_match['tld']}/" }. gsub(%r{ www *\. +#{HOST_PART} *\. *#{TLD}(?<path>[^<\u00A0"]+)}) { |_| path = Regexp.last_match['path'].remove(' ') "www.#{Regexp.last_match['host']}.#{Regexp.last_match['tld']}#{path}" } end