class WebStat::FetchAsWeb
Public Class Methods
new(url)
click to toggle source
initialize class @param [String] url
# File lib/web_stat/fetch/fetch_as_web.rb, line 7 def initialize(url) unless FetchAsWeb.url_valid?(url) raise WebStat::INVALID_URL, url end @url = original_url(url) if @url.match?(/\.pdf$/) title = nil body = nil URI.open(@url) do |io| reader = PDF::Reader.new(io) if reader.info.key?(:Title) title = reader.info[:Title] else title = File.basename(@url, ".pdf") end body = reader.pages.first.text end @html = <<-"EOS" <html> <head> <title>#{title}</title> </head> <body> #{body} </body> </html> EOS else @html = get_url(@url) end @nokogiri = ::Nokogiri::HTML(@html) end
url_valid?(url)
click to toggle source
Validation url
# File lib/web_stat/fetch/fetch_as_web.rb, line 41 def url_valid?(url) regexp = Regexp.new("^https?://([a-z0-9][a-z0-9\\\-\.]{0,61})\\\.([a-z]{2,})(.*)?$", Regexp::IGNORECASE) regexp.match?(url) end