class Gembuild::GemScraper

This class is used to query for various information from rubygems.org.

@!attribute [r] agent

@return [Mechanize] the Mechanize agent

@!attribute [r] deps

@return [String] the rubygems URL for getting dependency information

@!attribute [r] gem

@return [String] the rubygems URL for the frontend

@!attribute [r] gemname

@return [String] the rubygem about which to query

@!attribute [r] url

@return [String] the rubygems URL to get version information

Attributes

agent[R]
deps[R]
gem[R]
gemname[R]
url[R]

Public Class Methods

new(gemname) click to toggle source

Creates a new GemScraper instance

@raise [Gembuild::UndefinedGemName] if the gemname is nil or empty

@example Create a new GemScraper object

Gembuild::GemScraper.new('mina')
# => #<Gembuild::GemScraper:0x00000002f8a500
#  @agent=
#   #<Mechanize
#    #<Mechanize::CookieJar:0x00000002f8a410
#     @store=
#      #<HTTP::CookieJar::HashStore:0x00000002f8a370
#       @gc_index=0,
#       @gc_threshold=150,
#       @jar={},
#       @logger=nil,
#       @mon_count=0,
#       @mon_mutex=#<Mutex:0x00000002f8a320>,
#       @mon_owner=nil>>
#    nil>,
#  @deps="https://rubygems.org/api/v1/dependencies?gems=mina",
#  @gem="https://rubygems.org/gems/mina",
#  @gemname="mina",
#  @url="https://rubygems.org/api/v1/versions/mina.json">

@param gemname [String] The gem about which to query. @return [Gembuild::GemScraper] a new GemScraper instance

# File lib/gembuild/gem_scraper.rb, line 65
def initialize(gemname)
  fail Gembuild::UndefinedGemNameError if gemname.nil? || gemname.empty?

  @gemname = gemname
  @agent = Mechanize.new

  @url = "https://rubygems.org/api/v1/versions/#{gemname}.json"
  @deps = "https://rubygems.org/api/v1/dependencies?gems=#{gemname}"
  @gem = "https://rubygems.org/gems/#{gemname}"
end

Public Instance Methods

format_description_from_response(response) click to toggle source

Gets a well-formed gem description from the parsed response.

@param response [Hash] The JSON parsed results from rubygems.org. @return [String] the gem description or summary ending in a full-stop

# File lib/gembuild/gem_scraper.rb, line 125
def format_description_from_response(response)
  description = response.fetch(:description)
  description = response.fetch(:summary) if description.empty?

  # Replace any newlines or tabs (which would mess up a PKGBUILD) with
  # spaces. Then, make sure there is no
  description = description.gsub(/[[:space:]]+/, ' ').strip

  # Ensure that the description ends in a full-stop.
  description += '.' unless description[-1, 1] == '.'

  description
end
get_checksum_from_response(response) click to toggle source

Gets the sha256 checksum returned from the rubygems.org API.

@param response [Hash] The JSON parsed results from rubygems.org. @return [String] the sha256 sum of the gem file

# File lib/gembuild/gem_scraper.rb, line 143
def get_checksum_from_response(response)
  response.fetch(:sha)
end
get_dependencies_for_version(version) click to toggle source

Get all other gem dependencies for the given version.

@param version [String|Gem::Version] The version for which to get the

dependencies.

@return [Array] list of other gems upon which the gem depends

# File lib/gembuild/gem_scraper.rb, line 160
def get_dependencies_for_version(version)
  version = Gem::Version.new(version) if version.is_a?(String)

  payload = Marshal.load(agent.get(deps).body)

  dependencies = payload.find do |v|
    Gem::Version.new(v[:number]) == version
  end

  dependencies[:dependencies].map(&:first)
end
get_licenses_from_response(response) click to toggle source

Get the array of licenses under which the gem is licensed.

@param response [Hash] The JSON parsed results from rubygems.org. @return [Array] the licenses for the gem

# File lib/gembuild/gem_scraper.rb, line 151
def get_licenses_from_response(response)
  response.fetch(:licenses)
end
get_version_from_response(response) click to toggle source

Gets the version number from the parsed response.

@param response [Hash] The JSON parsed results from rubygems.org. @return [Gem::Version] the current version of the gem

# File lib/gembuild/gem_scraper.rb, line 117
def get_version_from_response(response)
  Gem::Version.new(response.fetch(:number))
end
query_latest_version() click to toggle source

Query the rubygems version api for the latest version.

@raise [Gembuild::GemNotFoundError] if the page returns a 404 (not

found) error.

@example Query rubygems.org for version information

s = Gembuild::GemScraper.new('mina')
s.query_latest_version
# => {:authors=>"Rico Sta. Cruz, Michael Galero",
#  :built_at=>"2015-07-08T00:00:00.000Z",
#  :created_at=>"2015-07-08T13:13:33.292Z",
#  :description=>"Really fast deployer and server automation tool.",
#  :downloads_count=>18709,
#  :metadata=>{},
#  :number=>"0.3.7",
#  :summary=>"Really fast deployer and server automation tool.",
#  :platform=>"ruby",
#  :ruby_version=>">= 0",
#  :prerelease=>false,
#  :licenses=>[],
#  :requirements=>[],
#  :sha=>
#   "bd1fa2b56ed1aded882a12f6365a04496f5cf8a14c07f8c4f1f3cfc944ef34f6"
# }

@return [Hash] the information about the latest version of the gem

# File lib/gembuild/gem_scraper.rb, line 102
def query_latest_version
  response = JSON.parse(agent.get(url).body, symbolize_names: true)

  # Skip any release marked as a "prerelease"
  response.shift while response.first[:prerelease]

  response.first
rescue Mechanize::ResponseCodeError, Net::HTTPNotFound
  raise Gembuild::GemNotFoundError
end
scrape!() click to toggle source

Quick method to get all important information in a single hash for later processing.

@return [Hash] hash containing all the information available from the

rubygems.org APIs and website
# File lib/gembuild/gem_scraper.rb, line 191
def scrape!
  response = query_latest_version
  version = get_version_from_response(response)

  {
    version: version,
    description: format_description_from_response(response),
    checksum: get_checksum_from_response(response),
    license: get_licenses_from_response(response),
    dependencies: get_dependencies_for_version(version),
    homepage: scrape_frontend_for_homepage_url
  }
end
scrape_frontend_for_homepage_url() click to toggle source

Scrape the rubygems.org frontend for the gem’s homepage URL.

@return [String] the homepage URL of the gem

# File lib/gembuild/gem_scraper.rb, line 175
def scrape_frontend_for_homepage_url
  html = agent.get(gem).body
  links = Nokogiri::HTML(html).css('a')

  homepage_link = links.find do |a|
    a.text.strip == 'Homepage'
  end

  homepage_link[:href]
end