class Bibliothecary::Parsers::Pypi
Constants
- INSTALL_REGEXP
- MANIFEST_REGEXP
- NoEggSpecified
While the thing in the repo that PyPI is using might be either in egg format or wheel format, PyPI uses “egg” in the fragment of the VCS URL to specify what package in the PyPI index the VCS URL should be treated as.
- PEP_508_NAME_REGEXP
Adapted from peps.python.org/pep-0508/#names
- PIP_COMPILE_REGEXP
TODO: can this be a more specific regexp so it doesn’t match something like “.yarn/cache/create-require-npm-1.0.0.zip”?
- REQUIREMENTS_REGEXP
- REQUIRE_REGEXP
Capture Group 1 is package. Optional Group 2 is [extras]. Capture Group 3 is Version
Public Class Methods
# File lib/bibliothecary/parsers/pypi.rb, line 147 def self.map_dependencies(packages, type) return [] unless packages packages.map do |name, info| Dependency.new( name: name, requirement: map_requirements(info), type: type, ) end end
# File lib/bibliothecary/parsers/pypi.rb, line 158 def self.map_requirements(info) if info.is_a?(Hash) if info["version"] info["version"] elsif info["git"] info["git"] + "#" + info["ref"] else "*" end else info end end
# File lib/bibliothecary/parsers/pypi.rb, line 21 def self.mapping { match_filenames("requirements-dev.txt", "requirements/dev.txt", "requirements-docs.txt", "requirements/docs.txt", "requirements-test.txt", "requirements/test.txt", "requirements-tools.txt", "requirements/tools.txt") => { kind: "manifest", parser: :parse_requirements_txt, }, lambda { |p| PIP_COMPILE_REGEXP.match(p) } => { content_matcher: :pip_compile?, kind: "lockfile", parser: :parse_requirements_txt, }, lambda { |p| MANIFEST_REGEXP.match(p) } => { kind: "manifest", parser: :parse_requirements_txt, can_have_lockfile: false, }, match_filename("requirements.frozen") => { # pattern exists to store frozen deps in requirements.frozen parser: :parse_requirements_txt, kind: "lockfile", }, match_filename("pip-resolved-dependencies.txt") => { # Inferred from pip kind: "lockfile", parser: :parse_requirements_txt, }, match_filename("pip-dependency-graph.json") => { # Exported from pipdeptree --json kind: "lockfile", parser: :parse_dependency_tree_json, }, match_filename("setup.py") => { kind: "manifest", parser: :parse_setup_py, can_have_lockfile: false, }, match_filename("Pipfile") => { kind: "manifest", parser: :parse_pipfile, }, match_filename("Pipfile.lock") => { kind: "lockfile", parser: :parse_pipfile_lock, }, match_filename("pyproject.toml") => { kind: "manifest", parser: :parse_pyproject, }, match_filename("poetry.lock") => { kind: "lockfile", parser: :parse_poetry_lock, }, # Pip dependencies can be embedded in conda environment files match_filename("environment.yml") => { parser: :parse_conda, kind: "manifest", }, match_filename("environment.yaml") => { parser: :parse_conda, kind: "manifest", }, match_filename("environment.yml.lock") => { parser: :parse_conda, kind: "lockfile", }, match_filename("environment.yaml.lock") => { parser: :parse_conda, kind: "lockfile", }, } end
# File lib/bibliothecary/parsers/pypi.rb, line 136 def self.parse_conda(file_contents, options: {}) # rubocop:disable Lint/UnusedMethodArgument contents = YAML.safe_load(file_contents) return [] unless contents dependencies = contents["dependencies"] pip = dependencies.find { |dep| dep.is_a?(Hash) && dep["pip"]} return [] unless pip Pypi.parse_requirements_txt(pip["pip"].join("\n")) end
# File lib/bibliothecary/parsers/pypi.rb, line 233 def self.parse_dependency_tree_json(file_contents, options: {}) JSON.parse(file_contents) .map do |pkg| Dependency.new( name: pkg.dig("package", "package_name"), requirement: pkg.dig("package", "installed_version"), type: "runtime", ) end .uniq end
Simply parses out the name of a PEP 508 Dependency
specification: peps.python.org/pep-0508/ Leaves the rest as-is with any leading semicolons or spaces stripped
# File lib/bibliothecary/parsers/pypi.rb, line 304 def self.parse_pep_508_dep_spec(dep) name, requirement = dep.split(PEP_508_NAME_REGEXP, 2).last(2).map(&:strip) requirement = requirement.sub(/^[\s;]*/, "") requirement = "*" if requirement == "" return name, requirement end
# File lib/bibliothecary/parsers/pypi.rb, line 97 def self.parse_pipfile(file_contents, options: {}) # rubocop:disable Lint/UnusedMethodArgument manifest = Tomlrb.parse(file_contents) map_dependencies(manifest["packages"], "runtime") + map_dependencies(manifest["dev-packages"], "develop") end
# File lib/bibliothecary/parsers/pypi.rb, line 172 def self.parse_pipfile_lock(file_contents, options: {}) # rubocop:disable Lint/UnusedMethodArgument manifest = JSON.parse(file_contents) deps = [] manifest.each do |group, dependencies| next if group == "_meta" group = "runtime" if group == "default" dependencies.each do |name, info| deps << Dependency.new( name: name, requirement: map_requirements(info), type: group, ) end end deps end
TODO: this was deprecated in 8.6.0. Remove this in any major version bump >= 9.*
# File lib/bibliothecary/parsers/pypi.rb, line 131 def self.parse_poetry(file_contents, options: {}) puts "Warning: parse_poetry() is deprecated, use parse_pyproject() instead." parse_pyproject(file_contents, options) end
# File lib/bibliothecary/parsers/pypi.rb, line 189 def self.parse_poetry_lock(file_contents, options: {}) # rubocop:disable Lint/UnusedMethodArgument manifest = Tomlrb.parse(file_contents) deps = [] manifest["package"].each do |package| # next if group == "_meta" group = case package["category"] when "dev" "develop" else "runtime" end deps << Dependency.new( name: package["name"], requirement: map_requirements(package), type: group, ) end deps end
# File lib/bibliothecary/parsers/pypi.rb, line 102 def self.parse_pyproject(file_contents, options: {}) # rubocop:disable Lint/UnusedMethodArgument deps = [] file_contents = Tomlrb.parse(file_contents) # Parse poetry [tool.poetry] deps poetry_manifest = file_contents.fetch("tool", {}).fetch("poetry", {}) deps += map_dependencies(poetry_manifest["dependencies"], "runtime") # Poetry 1.0.0-1.2.0 way of defining dev deps deps += map_dependencies(poetry_manifest["dev-dependencies"], "develop") # Poetry's 1.2.0+ of defining dev deps poetry_manifest .fetch("group", {}) .each_pair do |group_name, obj| group_name = "develop" if group_name == "dev" deps += map_dependencies(obj.fetch("dependencies", {}), group_name) end # Parse PEP621 [project] deps pep621_manifest = file_contents.fetch("project", {}) pep621_deps = pep621_manifest.fetch("dependencies", []).map { |d| parse_pep_508_dep_spec(d) } deps += map_dependencies(pep621_deps, "runtime") # We're combining both poetry+PEP621 deps instead of making them mutually exclusive, until we # find a reason not to ingest them both. deps.uniq end
Parses a requirements.txt file, following the pip.pypa.io/en/stable/cli/pip_install/#requirement-specifiers and pip.pypa.io/en/stable/topics/vcs-support/#git. Invalid lines in requirements.txt are skipped.
# File lib/bibliothecary/parsers/pypi.rb, line 249 def self.parse_requirements_txt(file_contents, options: {}) # rubocop:disable Lint/UnusedMethodArgument deps = [] type = case options[:filename] when /dev/ || /docs/ || /tools/ "development" when /test/ "test" else "runtime" end file_contents.split("\n").each do |line| if line["://"] begin result = parse_requirements_txt_url(line, type) rescue URI::Error, NoEggSpecified next end deps << result elsif (match = line.delete(" ").match(REQUIREMENTS_REGEXP)) deps << Dependency.new( name: match[1], requirement: match[-1], type: type, ) end end deps.uniq end
# File lib/bibliothecary/parsers/pypi.rb, line 281 def self.parse_requirements_txt_url(url, type=nil) uri = URI.parse(url) raise NoEggSpecified, "No egg specified in #{url}" unless uri.fragment name = uri.fragment[/^egg=([^&]+)([&]|$)/, 1] raise NoEggSpecified, "No egg specified in #{url}" unless name requirement = uri.path[/@(.+)$/, 1] Dependency.new(name: name, requirement: requirement, type: type) end
# File lib/bibliothecary/parsers/pypi.rb, line 210 def self.parse_setup_py(file_contents, options: {}) # rubocop:disable Lint/UnusedMethodArgument match = file_contents.match(INSTALL_REGEXP) return [] unless match deps = [] match[1].gsub(/',(\s)?'/, "\n").split("\n").each do |line| next if line.match(/^#/) match = line.match(REQUIRE_REGEXP) next unless match deps << Dependency.new( name: match[1], requirement: match[-1], type: "runtime", ) end deps end
# File lib/bibliothecary/parsers/pypi.rb, line 293 def self.pip_compile?(file_contents) return file_contents.include?("This file is autogenerated by pip-compile") rescue Exception # rubocop:disable Lint/RescueException # We rescue exception here since native libs can throw a non-StandardError # We don't want to throw errors during the matching phase, only during # parsing after we match. false end