module Elasticsearch::Git::Repository
Constants
- BLOBS_BATCH
- COMMMITS_BATCH
Public Instance Methods
as_indexed_json(options = {})
click to toggle source
Representation of repository as indexed json Attention: It can be very very very huge hash
# File lib/elasticsearch/git/repository.rb, line 271 def as_indexed_json(options = {}) data = {} data[:blobs] = index_blobs_array data[:commits] = index_commits_array data end
can_index_blob?(blob)
click to toggle source
Index text-like files which size less 1.mb
# File lib/elasticsearch/git/repository.rb, line 167 def can_index_blob?(blob) blob.text? && (blob.size && blob.size.to_i < 1048576) end
client_for_indexing()
click to toggle source
# File lib/elasticsearch/git/repository.rb, line 389 def client_for_indexing @client_for_indexing ||= Elasticsearch::Client.new retry_on_failure: 5 end
delete_blob(blob)
click to toggle source
# File lib/elasticsearch/git/repository.rb, line 122 def delete_blob(blob) return unless blob.text? { delete: { _index: "#{self.class.index_name}", _type: self.class.name.underscore, _id: "#{repository_id}_#{blob.path}", _parent: project_id } } end
index_blob(blob, target_sha)
click to toggle source
# File lib/elasticsearch/git/repository.rb, line 134 def index_blob(blob, target_sha) return unless can_index_blob?(blob) { index: { _index: "#{self.class.index_name}", _type: self.class.name.underscore, _id: "#{repository_id}_#{blob.path}", _parent: project_id, data: { blob: { type: "blob", oid: blob.id, rid: repository_id, content: blob.data, commit_sha: target_sha, path: blob.path, # We're duplicating file_name parameter here because # we need another analyzer for it. # Ideally this should be done with copy_to: 'blob.file_name' option # but it does not work in ES v2.3.*. We're doing it so to not make users # install newest versions # https://github.com/elastic/elasticsearch-mapper-attachments/issues/124 file_name: blob.path, language: blob.language ? blob.language.name : "Text" } } } } end
index_blobs(from_rev: nil, to_rev: repository_for_indexing.last_commit.oid) { |slice, length| ... }
click to toggle source
Indexing all text-like blobs in repository
All data stored in global index Repository
can be selected by 'rid' field If you want - this field can be used for store 'project' id
blob {
id - uniq id of blob from all repositories oid - blob id in repository content - blob content commit_sha - last actual commit sha
}
For search from blobs use type 'blob'
# File lib/elasticsearch/git/repository.rb, line 87 def index_blobs(from_rev: nil, to_rev: repository_for_indexing.last_commit.oid) from, to = parse_revs(from_rev, to_rev) diff = repository_for_indexing.diff(from, to) deltas = diff.deltas deltas.reverse.each_slice(BLOBS_BATCH) do |slice| bulk_operations = slice.map do |delta| if delta.status == :deleted next if delta.old_file[:mode].to_s(8) == "160000" b = LiteBlob.new(repository_for_indexing, delta.old_file) delete_blob(b) else next if delta.new_file[:mode].to_s(8) == "160000" b = LiteBlob.new(repository_for_indexing, delta.new_file) index_blob(b, to) end end perform_bulk bulk_operations yield slice, deltas.length if block_given? end ObjectSpace.garbage_collect end
index_blobs_array()
click to toggle source
Indexing blob from current index
# File lib/elasticsearch/git/repository.rb, line 279 def index_blobs_array result = [] target_sha = repository_for_indexing.head.target.oid if repository_for_indexing.bare? tree = repository_for_indexing.lookup(target_sha).tree result.push(recurse_blobs_index_hash(tree)) else repository_for_indexing.index.each do |blob| b = LiteBlob.new(repository_for_indexing, blob) result.push( { type: 'blob', id: "#{target_sha}_#{b.path}", rid: repository_id, oid: b.id, content: b.data, commit_sha: target_sha } ) if b.text? end end result end
index_commit(commit)
click to toggle source
# File lib/elasticsearch/git/repository.rb, line 216 def index_commit(commit) author = commit.author committer = commit.committer { index: { _index: "#{self.class.index_name}", _type: self.class.name.underscore, _id: "#{repository_id}_#{commit.oid}", _parent: project_id, data: { commit: { type: "commit", rid: repository_id, sha: commit.oid, author: { name: encode!(author[:name]), email: encode!(author[:email]), time: author[:time].strftime('%Y%m%dT%H%M%S%z'), }, committer: { name: encode!(committer[:name]), email: encode!(committer[:email]), time: committer[:time].strftime('%Y%m%dT%H%M%S%z'), }, message: encode!(commit.message) } } } } end
index_commits(from_rev: nil, to_rev: repository_for_indexing.last_commit.oid) { |batch, length| ... }
click to toggle source
Indexing all commits in repository
All data stored in global index Repository
can be filtered by 'rid' field If you want - this field can be used git store 'project' id
commit {
sha - commit sha author { name - commit author name email - commit author email time - commit time } commiter { name - committer name email - committer email time - commit time } message - commit message
}
For search from commits use type 'commit'
# File lib/elasticsearch/git/repository.rb, line 193 def index_commits(from_rev: nil, to_rev: repository_for_indexing.last_commit.oid) from, to = parse_revs(from_rev, to_rev) range = [from, to].compact.join('..') out, err, status = Open3.capture3("git log #{range} --format=\"%H\"", chdir: repository_for_indexing.path) if status.success? && err.blank? #TODO use rugged walker!!! commit_oids = out.split("\n") commit_oids.each_slice(COMMMITS_BATCH) do |batch| bulk_operations = batch.map do |commit| index_commit(repository_for_indexing.lookup(commit)) end perform_bulk bulk_operations yield batch, commit_oids.length if block_given? end ObjectSpace.garbage_collect end end
index_commits_array()
click to toggle source
Lookup all object ids for commit objects
# File lib/elasticsearch/git/repository.rb, line 332 def index_commits_array res = [] repository_for_indexing.each_id do |oid| obj = repository_for_indexing.lookup(oid) if obj.type == :commit res.push( { type: 'commit', sha: obj.oid, author: obj.author, committer: obj.committer, message: encode!(obj.message) } ) end end res end
index_new_branch?(from)
click to toggle source
# File lib/elasticsearch/git/repository.rb, line 265 def index_new_branch?(from) from == '0000000000000000000000000000000000000000' end
parse_revs(from_rev, to_rev)
click to toggle source
# File lib/elasticsearch/git/repository.rb, line 248 def parse_revs(from_rev, to_rev) from = if index_new_branch?(from_rev) if to_rev == repository_for_indexing.last_commit.oid nil else repository_for_indexing.merge_base( to_rev, repository_for_indexing.last_commit.oid ) end else from_rev end return from, to_rev end
path_to_repo()
click to toggle source
# File lib/elasticsearch/git/repository.rb, line 370 def path_to_repo if @path_to_repo.blank? raise NotImplementedError, 'Please, define "path_to_repo" method, or set "path_to_repo" via "repository_for_indexing" method' else @path_to_repo end end
perform_bulk(bulk_operations)
click to toggle source
# File lib/elasticsearch/git/repository.rb, line 114 def perform_bulk(bulk_operations) bulk_operations.compact! return false if bulk_operations.empty? client_for_indexing.bulk body: bulk_operations end
recurse_blobs_index_hash(tree, path = "")
click to toggle source
# File lib/elasticsearch/git/repository.rb, line 306 def recurse_blobs_index_hash(tree, path = "") result = [] tree.each_blob do |blob| blob[:path] = path + blob[:name] b = LiteBlob.new(repository_for_indexing, blob) result.push( { type: 'blob', id: "#{repository_for_indexing.head.target.oid}_#{path}#{blob[:name]}", rid: repository_id, oid: b.id, content: b.data, commit_sha: repository_for_indexing.head.target.oid } ) if b.text? end tree.each_tree do |nested_tree| result.push(recurse_blobs_index_hash(repository_for_indexing.lookup(nested_tree[:oid]), "#{nested_tree[:name]}/")) end result.flatten end
repository_for_indexing(repo_path = nil)
click to toggle source
# File lib/elasticsearch/git/repository.rb, line 379 def repository_for_indexing(repo_path = nil) return @rugged_repo_indexer if defined? @rugged_repo_indexer @path_to_repo ||= repo_path || path_to_repo set_repository_id @rugged_repo_indexer = Rugged::Repository.new(@path_to_repo) end
repository_id()
click to toggle source
For Overwrite
# File lib/elasticsearch/git/repository.rb, line 365 def repository_id @repository_id end
search(query, type: :all, page: 1, per: 20, options: {})
click to toggle source
# File lib/elasticsearch/git/repository.rb, line 353 def search(query, type: :all, page: 1, per: 20, options: {}) options[:repository_id] = repository_id if options[:repository_id].nil? self.class.search(query, type: type, page: page, per: per, options: options) end
set_repository_id(id = nil)
click to toggle source
Repository
id used for identity data from different repositories Update this value if needed
# File lib/elasticsearch/git/repository.rb, line 360 def set_repository_id(id = nil) @repository_id = id || path_to_repo end