class Panchira::Resolver

Resolver is a class that actually get attributes by fetching designated url. This class is the default resolver for pages.
To create your own resolver, first you make a class that extends Resolver, and then register it by ::Panchira::Extensions::register(). Then ::Panchira::fetch will pick up your resolver when Resolver::applicable?() is true.

Constants

URL_REGEXP

URL pattern that a resolver tries to resolve. You must override this in subclasses to limit which urls to resolve.

Public Class Methods

applicable?(url) click to toggle source

Tell whether the url is applicable for this resolver. ::Panchira::fetch uses this method to choose a Resolver for a URL.

# File lib/panchira/resolvers/resolver.rb, line 46
def applicable?(url)
  url =~ self::URL_REGEXP
end
new(url) click to toggle source
# File lib/panchira/resolvers/resolver.rb, line 14
def initialize(url)
  @url = url
end

Public Instance Methods

fetch() click to toggle source

This function is called right after this Resolver instance is made. Fetch page from @url and return PanchiraResult.

# File lib/panchira/resolvers/resolver.rb, line 20
def fetch
  result = PanchiraResult.new

  @page = fetch_page(@url)
  result.canonical_url = parse_canonical_url

  @page = fetch_page(result.canonical_url) if @url != result.canonical_url

  result.title = parse_title
  result.description = parse_description
  result.image = parse_image
  result.tags = parse_tags
  if respond_to?(:parse_authors, true)
    result.authors = parse_authors
  else
    result.author = parse_author
  end
  result.circle = parse_circle
  result.resolver = parse_resolver

  result
end

Private Instance Methods

fetch_page(url) click to toggle source
# File lib/panchira/resolvers/resolver.rb, line 53
def fetch_page(url)
  read_options = {
    'User-Agent' => user_agent,
    'Cookie' => cookie
  }

  raw_page = URI.parse(url).read(read_options)
  charset = raw_page.charset
  Nokogiri::HTML.parse(raw_page, url, charset)
end
parse_author() click to toggle source
# File lib/panchira/resolvers/resolver.rb, line 123
def parse_author
  @page.css('//meta[name="author"]/@content').first.to_s
end
parse_canonical_url() click to toggle source
# File lib/panchira/resolvers/resolver.rb, line 64
def parse_canonical_url
  history = []

  # fetch page and refresh canonical_url until canonical_url converges.
  loop do
    url_in_res = @page.css('//link[rel="canonical"]/@href').to_s
    if url_in_res.empty?
      url_in_res = @page.css('//meta[property="og:url"]/@content').to_s
    end

    if url_in_res.empty?
      return history.last || @url
    end

    if history.include?(url_in_res) || history.length > 5
      return url_in_res
    end

    history.push(url_in_res)
    @page = fetch_page(url_in_res)
  end
end
parse_circle() click to toggle source
# File lib/panchira/resolvers/resolver.rb, line 127
def parse_circle
  nil
end
parse_description() click to toggle source
# File lib/panchira/resolvers/resolver.rb, line 95
def parse_description
  if @page.css('//meta[property="og:description"]/@content').empty?
    @page.css('//meta[name$="description"]/@content').to_s
  else
    @page.css('//meta[property="og:description"]/@content').to_s
  end
end
parse_image() click to toggle source
# File lib/panchira/resolvers/resolver.rb, line 103
def parse_image
  image = PanchiraImage.new
  image.url = parse_image_url
  image.width, image.height = FastImage.size(image.url)

  image
end
parse_image_url() click to toggle source
# File lib/panchira/resolvers/resolver.rb, line 111
def parse_image_url
  @page.css('//meta[property="og:image"]/@content').first.to_s
end
parse_resolver() click to toggle source
# File lib/panchira/resolvers/resolver.rb, line 131
def parse_resolver
  self.class.to_s
end
parse_tags() click to toggle source
# File lib/panchira/resolvers/resolver.rb, line 115
def parse_tags
  []
end
parse_title() click to toggle source
# File lib/panchira/resolvers/resolver.rb, line 87
def parse_title
  if @page.css('//meta[property="og:title"]/@content').empty?
    @page.title.to_s
  else
    @page.css('//meta[property="og:title"]/@content').to_s
  end
end
user_agent() click to toggle source
# File lib/panchira/resolvers/resolver.rb, line 135
def user_agent
  "Mozilla/5.0 (compatible; PanchiraBot/#{VERSION}; +https://github.com/nuita/panchira)"
end