module SiteinfoParser
Constants
- VERSION
Public Class Methods
check_format(url = nil)
click to toggle source
# File lib/siteinfo_parser.rb, line 40 def self.check_format(url = nil) @url = url return false if @url.nil? return @url =~ /^https?:\/\// ? true : false end
fetch(url = nil)
click to toggle source
# File lib/siteinfo_parser.rb, line 11 def self.fetch(url = nil) @url = url result = self.check_format(@url) return 'url is invalid' unless result html = open(@url, "r:binary", :allow_redirections => :safe).read doc = Nokogiri::HTML.parse(html.toutf8, nil, 'utf-8') domain = URI.split(@url) #["http", subdomain, "domain", nil, nil, "domain以下", nil, nil, nil] response = {} response['title'] = doc.title.strip response['og_image'] = doc.css('//meta[property="og:image"]/@content').first.to_s response['og_title'] = doc.css('//meta[property="og:title"]/@content').first.to_s response['og_url'] = doc.css('//meta[property="og:url"]/@content').first.to_s response['og_sitename'] = doc.css('//meta[property="og:site_name"]/@content').first.to_s response['twitter_card'] = doc.css('//meta[property="twitter:card"]/@content').first.to_s response['twitter_site'] = doc.css('//meta[property="twitter:site"]/@content').first.to_s response['twitter_domain'] = doc.css('//meta[property="twitter:domain"]/@content').first.to_s response['description'] = doc.css('//meta[name="description"]/@content').first.to_s.gsub(/(\r\n|\r|\n)/, "") response['description'] = doc.css('//meta[name="Description"]/@content').first.to_s.gsub(/(\r\n|\r|\n)/, "") if response['description'].empty? response['favicon'] = "https://www.google.com/s2/favicons?domain="+ domain[2] return response end