class Bibliothecary::Parsers::Pypi

Constants

INSTALL_REGEXP
MANIFEST_REGEXP
NoEggSpecified

While the thing in the repo that PyPI is using might be either in egg format or wheel format, PyPI uses “egg” in the fragment of the VCS URL to specify what package in the PyPI index the VCS URL should be treated as.

PEP_508_NAME_REGEXP

Adapted from peps.python.org/pep-0508/#names

PIP_COMPILE_REGEXP

TODO: can this be a more specific regexp so it doesn’t match something like “.yarn/cache/create-require-npm-1.0.0.zip”?

REQUIREMENTS_REGEXP
REQUIRE_REGEXP

Capture Group 1 is package. Optional Group 2 is [extras]. Capture Group 3 is Version

Public Class Methods

map_dependencies(packages, type) click to toggle source
# File lib/bibliothecary/parsers/pypi.rb, line 147
def self.map_dependencies(packages, type)
  return [] unless packages
  packages.map do |name, info|
    Dependency.new(
      name: name,
      requirement: map_requirements(info),
      type: type,
    )
  end
end
map_requirements(info) click to toggle source
# File lib/bibliothecary/parsers/pypi.rb, line 158
def self.map_requirements(info)
  if info.is_a?(Hash)
    if info["version"]
      info["version"]
    elsif info["git"]
      info["git"] + "#" + info["ref"]
    else
      "*"
    end
  else
    info
  end
end
mapping() click to toggle source
# File lib/bibliothecary/parsers/pypi.rb, line 21
def self.mapping
  {
    match_filenames("requirements-dev.txt", "requirements/dev.txt",
                    "requirements-docs.txt", "requirements/docs.txt",
                    "requirements-test.txt", "requirements/test.txt",
                    "requirements-tools.txt", "requirements/tools.txt") => {
      kind: "manifest",
      parser: :parse_requirements_txt,
    },
    lambda { |p| PIP_COMPILE_REGEXP.match(p) } => {
      content_matcher: :pip_compile?,
      kind: "lockfile",
      parser: :parse_requirements_txt,
    },
    lambda { |p| MANIFEST_REGEXP.match(p) } => {
      kind: "manifest",
      parser: :parse_requirements_txt,
      can_have_lockfile: false,
    },
    match_filename("requirements.frozen") => { # pattern exists to store frozen deps in requirements.frozen
      parser: :parse_requirements_txt,
      kind: "lockfile",
    },
    match_filename("pip-resolved-dependencies.txt") => { # Inferred from pip
      kind: "lockfile",
      parser: :parse_requirements_txt,
    },
    match_filename("pip-dependency-graph.json") => { # Exported from pipdeptree --json
      kind: "lockfile",
      parser: :parse_dependency_tree_json,
    },
    match_filename("setup.py") => {
      kind: "manifest",
      parser: :parse_setup_py,
      can_have_lockfile: false,
    },
    match_filename("Pipfile") => {
      kind: "manifest",
      parser: :parse_pipfile,
    },
    match_filename("Pipfile.lock") => {
      kind: "lockfile",
      parser: :parse_pipfile_lock,
    },
    match_filename("pyproject.toml") => {
      kind: "manifest",
      parser: :parse_pyproject,
    },
    match_filename("poetry.lock") => {
      kind: "lockfile",
      parser: :parse_poetry_lock,
    },
    # Pip dependencies can be embedded in conda environment files
    match_filename("environment.yml") => {
      parser: :parse_conda,
      kind: "manifest",
    },
    match_filename("environment.yaml") => {
      parser: :parse_conda,
      kind: "manifest",
    },
    match_filename("environment.yml.lock") => {
      parser: :parse_conda,
      kind: "lockfile",
    },
    match_filename("environment.yaml.lock") => {
      parser: :parse_conda,
      kind: "lockfile",
    },
  }
end
parse_conda(file_contents, options: {}) click to toggle source
# File lib/bibliothecary/parsers/pypi.rb, line 136
def self.parse_conda(file_contents, options: {}) # rubocop:disable Lint/UnusedMethodArgument
  contents = YAML.safe_load(file_contents)
  return [] unless contents

  dependencies = contents["dependencies"]
  pip = dependencies.find { |dep| dep.is_a?(Hash) && dep["pip"]}
  return [] unless pip

  Pypi.parse_requirements_txt(pip["pip"].join("\n"))
end
parse_dependency_tree_json(file_contents, options: {}) click to toggle source
# File lib/bibliothecary/parsers/pypi.rb, line 233
def self.parse_dependency_tree_json(file_contents, options: {})
  JSON.parse(file_contents)
    .map do |pkg|
      Dependency.new(
          name: pkg.dig("package", "package_name"),
          requirement: pkg.dig("package", "installed_version"),
          type: "runtime",
      )
    end
    .uniq
end
parse_pep_508_dep_spec(dep) click to toggle source

Simply parses out the name of a PEP 508 Dependency specification: peps.python.org/pep-0508/ Leaves the rest as-is with any leading semicolons or spaces stripped

# File lib/bibliothecary/parsers/pypi.rb, line 304
def self.parse_pep_508_dep_spec(dep)
  name, requirement = dep.split(PEP_508_NAME_REGEXP, 2).last(2).map(&:strip)
  requirement = requirement.sub(/^[\s;]*/, "")
  requirement = "*" if requirement == ""
  return name, requirement
end
parse_pipfile(file_contents, options: {}) click to toggle source
# File lib/bibliothecary/parsers/pypi.rb, line 97
def self.parse_pipfile(file_contents, options: {}) # rubocop:disable Lint/UnusedMethodArgument
  manifest = Tomlrb.parse(file_contents)
  map_dependencies(manifest["packages"], "runtime") + map_dependencies(manifest["dev-packages"], "develop")
end
parse_pipfile_lock(file_contents, options: {}) click to toggle source
# File lib/bibliothecary/parsers/pypi.rb, line 172
def self.parse_pipfile_lock(file_contents, options: {}) # rubocop:disable Lint/UnusedMethodArgument
  manifest = JSON.parse(file_contents)
  deps = []
  manifest.each do |group, dependencies|
    next if group == "_meta"
    group = "runtime" if group == "default"
    dependencies.each do |name, info|
      deps << Dependency.new(
        name: name,
        requirement: map_requirements(info),
        type: group,
      )
    end
  end
  deps
end
parse_poetry(file_contents, options: {}) click to toggle source

TODO: this was deprecated in 8.6.0. Remove this in any major version bump >= 9.*

# File lib/bibliothecary/parsers/pypi.rb, line 131
def self.parse_poetry(file_contents, options: {})
  puts "Warning: parse_poetry() is deprecated, use parse_pyproject() instead."
  parse_pyproject(file_contents, options)
end
parse_poetry_lock(file_contents, options: {}) click to toggle source
# File lib/bibliothecary/parsers/pypi.rb, line 189
def self.parse_poetry_lock(file_contents, options: {}) # rubocop:disable Lint/UnusedMethodArgument
  manifest = Tomlrb.parse(file_contents)
  deps = []
  manifest["package"].each do |package|
    # next if group == "_meta"
    group = case package["category"]
            when "dev"
              "develop"
            else
              "runtime"
            end

    deps << Dependency.new(
      name: package["name"],
      requirement: map_requirements(package),
      type: group,
    )
  end
  deps
end
parse_pyproject(file_contents, options: {}) click to toggle source
# File lib/bibliothecary/parsers/pypi.rb, line 102
def self.parse_pyproject(file_contents, options: {}) # rubocop:disable Lint/UnusedMethodArgument
  deps = []

  file_contents = Tomlrb.parse(file_contents)

  # Parse poetry [tool.poetry] deps
  poetry_manifest = file_contents.fetch("tool", {}).fetch("poetry", {})
  deps += map_dependencies(poetry_manifest["dependencies"], "runtime")
  # Poetry 1.0.0-1.2.0 way of defining dev deps
  deps += map_dependencies(poetry_manifest["dev-dependencies"], "develop")
  # Poetry's 1.2.0+ of defining dev deps
  poetry_manifest
    .fetch("group", {})
    .each_pair do |group_name, obj|
      group_name = "develop" if group_name == "dev"
      deps += map_dependencies(obj.fetch("dependencies", {}), group_name)
    end

  # Parse PEP621 [project] deps
  pep621_manifest = file_contents.fetch("project", {})
  pep621_deps = pep621_manifest.fetch("dependencies", []).map { |d| parse_pep_508_dep_spec(d) }
  deps += map_dependencies(pep621_deps, "runtime")

  # We're combining both poetry+PEP621 deps instead of making them mutually exclusive, until we
  # find a reason not to ingest them both.
  deps.uniq
end
parse_requirements_txt(file_contents, options: {}) click to toggle source

Parses a requirements.txt file, following the pip.pypa.io/en/stable/cli/pip_install/#requirement-specifiers and pip.pypa.io/en/stable/topics/vcs-support/#git. Invalid lines in requirements.txt are skipped.

# File lib/bibliothecary/parsers/pypi.rb, line 249
def self.parse_requirements_txt(file_contents, options: {}) # rubocop:disable Lint/UnusedMethodArgument
  deps = []
  type = case options[:filename]
         when /dev/ || /docs/ || /tools/
           "development"
         when /test/
           "test"
         else
           "runtime"
         end

  file_contents.split("\n").each do |line|
    if line["://"]
      begin
        result = parse_requirements_txt_url(line, type)
      rescue URI::Error, NoEggSpecified
        next
      end

      deps << result
    elsif (match = line.delete(" ").match(REQUIREMENTS_REGEXP))
      deps << Dependency.new(
        name: match[1],
        requirement: match[-1],
        type: type,
      )
    end
  end

  deps.uniq
end
parse_requirements_txt_url(url, type=nil) click to toggle source
# File lib/bibliothecary/parsers/pypi.rb, line 281
def self.parse_requirements_txt_url(url, type=nil)
  uri = URI.parse(url)
  raise NoEggSpecified, "No egg specified in #{url}" unless uri.fragment

  name = uri.fragment[/^egg=([^&]+)([&]|$)/, 1]
  raise NoEggSpecified, "No egg specified in #{url}" unless name

  requirement = uri.path[/@(.+)$/, 1]

  Dependency.new(name: name, requirement: requirement, type: type)
end
parse_setup_py(file_contents, options: {}) click to toggle source
# File lib/bibliothecary/parsers/pypi.rb, line 210
def self.parse_setup_py(file_contents, options: {}) # rubocop:disable Lint/UnusedMethodArgument
  match = file_contents.match(INSTALL_REGEXP)
  return [] unless match
  deps = []
  match[1].gsub(/',(\s)?'/, "\n").split("\n").each do |line|
    next if line.match(/^#/)
    match = line.match(REQUIRE_REGEXP)
    next unless match
    deps << Dependency.new(
      name: match[1],
      requirement: match[-1],
      type: "runtime",
    )
  end
  deps
end
pip_compile?(file_contents) click to toggle source
# File lib/bibliothecary/parsers/pypi.rb, line 293
def self.pip_compile?(file_contents)
  return file_contents.include?("This file is autogenerated by pip-compile")
rescue Exception # rubocop:disable Lint/RescueException
  # We rescue exception here since native libs can throw a non-StandardError
  # We don't want to throw errors during the matching phase, only during
  # parsing after we match.
  false
end