class Spandx::Rubygems::Index

Constants

COMMON_LICENSES

Public Class Methods

new() click to toggle source
# File lib/spandx/rubygems/index.rb, line 8
def initialize
  @dir = Spandx::Rubygems.root.join('index')
  @rubygems_file = DataFile.new(Spandx::Rubygems.root.join('rubygems.index'), default: {})
end

Public Instance Methods

each() { |key, value| ... } click to toggle source
# File lib/spandx/rubygems/index.rb, line 17
def each
  to_h.each { |key, value| yield key, value }
end
licenses_for(name:, version:) click to toggle source
# File lib/spandx/rubygems/index.rb, line 13
def licenses_for(name:, version:)
  to_h.fetch(index_key_for(name, version), [])
end
to_h() click to toggle source
# File lib/spandx/rubygems/index.rb, line 21
def to_h
  @rubygems_file.data
end
update!() click to toggle source
# File lib/spandx/rubygems/index.rb, line 25
def update!
  update_expanded_index!
  sort_index!
  build_optimized_index!
end

Private Instance Methods

build_optimized_index!() click to toggle source
# File lib/spandx/rubygems/index.rb, line 43
def build_optimized_index!
  files = index_data_files
  count = count_items_from(files)

  @rubygems_file.batch(size: count) do |io|
    files.each do |data_file_path|
      IO.foreach(data_file_path) do |line|
        json = JSON.parse(line)
        io.write(index_key_for(json['name'], json['version'])).write(json['licenses'])
      end
    end
  end
end
checkpoint!(tarfile) click to toggle source
# File lib/spandx/rubygems/index.rb, line 109
def checkpoint!(tarfile)
  IO.write('checkpoints', "#{tarfile}\n", mode: 'a')
end
checkpoints() click to toggle source
# File lib/spandx/rubygems/index.rb, line 100
def checkpoints
  @checkpoints ||=
    begin
      path = Spandx::Rubygems.root.join('checkpoints').to_s
      FileUtils.touch(path) unless File.exist?(path)
      IO.readlines(path).map(&:chomp)
    end
end
count_items_from(filenames) click to toggle source
# File lib/spandx/rubygems/index.rb, line 61
def count_items_from(filenames)
  filenames.map { |x| `wc -l #{x}`.split.first.to_i }.sum
end
data_dir_for(index_key) click to toggle source
# File lib/spandx/rubygems/index.rb, line 125
def data_dir_for(index_key)
  File.join(@dir, index_key[0...2])
end
data_file_for(key) click to toggle source
# File lib/spandx/rubygems/index.rb, line 129
def data_file_for(key)
  File.join(data_dir_for(key), 'data')
end
digest_for(components) click to toggle source
# File lib/spandx/rubygems/index.rb, line 113
def digest_for(components)
  Digest::SHA1.hexdigest(Array(components).join('/'))
end
extract_licenses_from(licenses) click to toggle source
# File lib/spandx/rubygems/index.rb, line 83
def extract_licenses_from(licenses)
  stripped = licenses.strip!

  return [] if stripped == '--- []'
  return [] if stripped == "--- \n..."

  found = COMMON_LICENSES.find do |x|
    stripped == "---\n- #{x}"
  end
  items = found ? [found] : YAML.safe_load(licenses)
  items.compact
end
index_data_files() click to toggle source
# File lib/spandx/rubygems/index.rb, line 33
def index_data_files
  Dir["#{@dir}/**/data"]
end
index_key_for(name, version) click to toggle source
# File lib/spandx/rubygems/index.rb, line 57
def index_key_for(name, version)
  "#{name}-#{version}"
end
indexed?(tarfile) click to toggle source
# File lib/spandx/rubygems/index.rb, line 96
def indexed?(tarfile)
  checkpoints.include?(tarfile.to_s)
end
map_from(row) click to toggle source
# File lib/spandx/rubygems/index.rb, line 79
def map_from(row)
  JSON.generate(name: row['name'], version: row['version'], licenses: extract_licenses_from(row['licenses']))
end
open_data(name, mode: 'a') { |file| ... } click to toggle source
# File lib/spandx/rubygems/index.rb, line 117
def open_data(name, mode: 'a')
  key = digest_for(name)
  FileUtils.mkdir_p(data_dir_for(key))
  File.open(data_file_for(key), mode) do |file|
    yield file
  end
end
sort_index!() click to toggle source
# File lib/spandx/rubygems/index.rb, line 37
def sort_index!
  index_data_files.each do |file|
    system("awk '!visited[$0]++' #{file} > #{file}1 && mv -f #{file}1 #{file}")
  end
end
update_expanded_index!() click to toggle source
# File lib/spandx/rubygems/index.rb, line 65
def update_expanded_index!
  Backups.new.each do |tarfile|
    next if indexed?(tarfile)

    tarfile.each do |row|
      licenses = extract_licenses_from(row['licenses'])
      next if licenses.empty?

      open_data(row['name']) { |io| io.puts(map_from(row)) }
    end
    checkpoint!(tarfile)
  end
end