module SiteinfoParser

Constants

VERSION

Public Class Methods

check_format(url = nil) click to toggle source
# File lib/siteinfo_parser.rb, line 40
def self.check_format(url = nil)
  @url = url
  return false if @url.nil?
  return @url =~ /^https?:\/\// ? true : false
end
fetch(url = nil) click to toggle source
# File lib/siteinfo_parser.rb, line 11
def self.fetch(url = nil)

  @url = url
  result = self.check_format(@url)
  return 'url is invalid' unless result

  html = open(@url, "r:binary", :allow_redirections => :safe).read
  doc = Nokogiri::HTML.parse(html.toutf8, nil, 'utf-8')

  domain = URI.split(@url) #["http", subdomain, "domain", nil, nil, "domain以下", nil, nil, nil]

  response = {}
  response['title'] = doc.title.strip
  response['og_image'] = doc.css('//meta[property="og:image"]/@content').first.to_s
  response['og_title'] = doc.css('//meta[property="og:title"]/@content').first.to_s
  response['og_url'] = doc.css('//meta[property="og:url"]/@content').first.to_s
  response['og_sitename'] = doc.css('//meta[property="og:site_name"]/@content').first.to_s

  response['twitter_card'] = doc.css('//meta[property="twitter:card"]/@content').first.to_s
  response['twitter_site'] = doc.css('//meta[property="twitter:site"]/@content').first.to_s
  response['twitter_domain'] = doc.css('//meta[property="twitter:domain"]/@content').first.to_s

  response['description'] = doc.css('//meta[name="description"]/@content').first.to_s.gsub(/(\r\n|\r|\n)/, "")
  response['description'] = doc.css('//meta[name="Description"]/@content').first.to_s.gsub(/(\r\n|\r|\n)/, "") if response['description'].empty?
  response['favicon'] = "https://www.google.com/s2/favicons?domain="+ domain[2]

  return response
end