class Panchira::Resolver
Resolver
is a class that actually get attributes by fetching designated url. This class is the default resolver for pages.
To create your own resolver, first you make a class that extends Resolver
, and then register it by ::Panchira::Extensions::register()
. Then ::Panchira::fetch
will pick up your resolver when Resolver::applicable?()
is true.
Constants
- URL_REGEXP
URL pattern that a resolver tries to resolve. You must override this in subclasses to limit which urls to resolve.
Public Class Methods
applicable?(url)
click to toggle source
Tell whether the url is applicable for this resolver. ::Panchira::fetch
uses this method to choose a Resolver
for a URL.
# File lib/panchira/resolvers/resolver.rb, line 46 def applicable?(url) url =~ self::URL_REGEXP end
new(url)
click to toggle source
# File lib/panchira/resolvers/resolver.rb, line 14 def initialize(url) @url = url end
Public Instance Methods
fetch()
click to toggle source
This function is called right after this Resolver
instance is made. Fetch page from @url and return PanchiraResult
.
# File lib/panchira/resolvers/resolver.rb, line 20 def fetch result = PanchiraResult.new @page = fetch_page(@url) result.canonical_url = parse_canonical_url @page = fetch_page(result.canonical_url) if @url != result.canonical_url result.title = parse_title result.description = parse_description result.image = parse_image result.tags = parse_tags if respond_to?(:parse_authors, true) result.authors = parse_authors else result.author = parse_author end result.circle = parse_circle result.resolver = parse_resolver result end
Private Instance Methods
fetch_page(url)
click to toggle source
# File lib/panchira/resolvers/resolver.rb, line 53 def fetch_page(url) read_options = { 'User-Agent' => user_agent, 'Cookie' => cookie } raw_page = URI.parse(url).read(read_options) charset = raw_page.charset Nokogiri::HTML.parse(raw_page, url, charset) end
parse_canonical_url()
click to toggle source
# File lib/panchira/resolvers/resolver.rb, line 64 def parse_canonical_url history = [] # fetch page and refresh canonical_url until canonical_url converges. loop do url_in_res = @page.css('//link[rel="canonical"]/@href').to_s if url_in_res.empty? url_in_res = @page.css('//meta[property="og:url"]/@content').to_s end if url_in_res.empty? return history.last || @url end if history.include?(url_in_res) || history.length > 5 return url_in_res end history.push(url_in_res) @page = fetch_page(url_in_res) end end
parse_circle()
click to toggle source
# File lib/panchira/resolvers/resolver.rb, line 127 def parse_circle nil end
parse_description()
click to toggle source
# File lib/panchira/resolvers/resolver.rb, line 95 def parse_description if @page.css('//meta[property="og:description"]/@content').empty? @page.css('//meta[name$="description"]/@content').to_s else @page.css('//meta[property="og:description"]/@content').to_s end end
parse_image()
click to toggle source
# File lib/panchira/resolvers/resolver.rb, line 103 def parse_image image = PanchiraImage.new image.url = parse_image_url image.width, image.height = FastImage.size(image.url) image end
parse_image_url()
click to toggle source
# File lib/panchira/resolvers/resolver.rb, line 111 def parse_image_url @page.css('//meta[property="og:image"]/@content').first.to_s end
parse_resolver()
click to toggle source
# File lib/panchira/resolvers/resolver.rb, line 131 def parse_resolver self.class.to_s end
parse_title()
click to toggle source
# File lib/panchira/resolvers/resolver.rb, line 87 def parse_title if @page.css('//meta[property="og:title"]/@content').empty? @page.title.to_s else @page.css('//meta[property="og:title"]/@content').to_s end end
user_agent()
click to toggle source
# File lib/panchira/resolvers/resolver.rb, line 135 def user_agent "Mozilla/5.0 (compatible; PanchiraBot/#{VERSION}; +https://github.com/nuita/panchira)" end