require 'retries' $stdout.sync = true

def log(logger,message,log_type=:info)

case log_type
  when :error
    logger.error(message)
  else
    logger.info(message)
end
puts message
$stdout.flush

end

desc 'Index a specific list of druids from a pre-assembly log YAML file, a remediate log file, or a simple CSV. Specify target to index into and log file to index from.' #Run me: rake log_indexer RAILS_ENV=production target=revs_prod log_file=/tmp/mailander_1.yaml log_type=preassembly #Run me: rake log_indexer RAILS_ENV=production target=revs_prod log_file=/tmp/mailander_1_remediation.yaml log_type=remediate #Run me: rake log_indexer RAILS_ENV=production target=revs_prod log_file=/tmp/mailander_1.csv log_type=csv # csv must contain a heading called “druid” with the druid to index

# Examples: task :log_indexer => :environment do |t, args|

target = ENV['target'] # must pass in the target so specify solr core to index into
log_file_path = ENV['log_file'] # must specify pre-assembly log file to index from
log_type = ENV['log_type'] || 'preassembly' # log type (either preassembly, csv, or remediate), defaults to preassembly

raise 'You must specify a target and log file.' if target.blank? || log_file_path.blank?
raise 'Log type must be preassembly, remediate or csv.' unless ['preassembly','remediate','csv'].include? log_type
raise 'Log file not found.' unless File.readable? log_file_path

target_config = Settings.SOLR_TARGETS[target]

raise 'Target not found.' if target_config.nil?

if log_type.blank? || log_type == 'preassembly'
  log_completed=:pre_assem_finished
elsif log_type == 'remediate'
  log_completed=:remediate_completed
end

output_log_file_name="#{Rails.root}/log/#{File.basename(log_file_path,File.extname(log_file_path))}_indexer_#{Time.now.strftime('%Y%m%d-%H%M%S')}.log"
my_logger=Logger.new(output_log_file_name) # set up a new log file

start_time=Time.now

errors=0
indexed=0

if ['preassembly','remediate'].include? log_type
  YAML.load_stream(IO.read(log_file_path)).map { |obj| obj[:pid] if obj[log_completed] == true}
else
  csv = CSV.parse(IO.read(log_file_path), :headers => true)
  druids=csv.map { |row| row.to_hash.with_indifferent_access['druid'] }.delete_if {|druid| druid.nil?}
end

solr_server=target_config['url']

log my_logger,"** Indexing #{druids.size} druids from #{log_file_path} into solr server #{solr_server} (target=#{target}).  Log file is of type #{log_type}."
log my_logger,"Indexing started at #{start_time}"

indexer = BaseIndexer.indexer_class.constantize.new

counter=0

druids.each do |druid|

  druid.gsub!('druid:','')
  counter+=1

  begin
    with_retries(:max_tries => 5, :base_sleep_seconds => 3, :max_sleep_seconds => 60) do
      indexer.index(druid,{target=>true})
      log my_logger,"#{counter} of #{druids.size}: #{druid}"
      indexed += 1
    end
  rescue  => e
    log my_logger,"ERROR: Failed to index #{druid}: #{e.message}",:error
    errors += 1
  end

end

log my_logger,"Objects indexed: #{indexed} out of #{druids.size}"
log(my_logger,"ERRORS Encountered, #{errors} objects not indexed") if errors > 0
log my_logger,"Completed at #{Time.now}, total time was #{'%.2f' % ((Time.now - start_time)/60.0)} minutes"
puts "Logged output at #{output_log_file_name}"

end

desc “Delete a single druid. It will be deleted from all targets!” #Run me: rake delete RAILS_ENV=production druid=oo000oo0001 # Examples: task :delete => :environment do |t, args|

druid = ENV['druid']

raise 'You must specify a druid.' if druid.blank?

print "Are you sure you wish to delete this druid from all targets? (y/n) "
STDOUT.flush
answer=STDIN.gets.chomp

raise 'STOP!' unless (answer && ['y','yes'].include?(answer.downcase))

puts "** Delete #{druid} druid from all targets."

indexer = BaseIndexer.indexer_class.constantize.new
indexer.delete druid.gsub('druid:','')

end

desc 'Index a single druid. Specify target to index into and druid to index.' #Run me: rake index RAILS_ENV=production target=revs_prod druid=oo000oo0001 # Examples: task :index => :environment do |t, args|

target = ENV['target'] # must pass in the target so specify solr core to index into
druid = ENV['druid']

raise 'You must specify a target and druid.' if target.blank? || druid.blank?

target_config = Settings.SOLR_TARGETS[target]

raise 'Target not found.' if target_config.nil?

solr_server=target_config['url']

puts "** Indexing #{druid} druid into solr server #{solr_server} (target=#{target})."

indexer = BaseIndexer.indexer_class.constantize.new
indexer.index(druid.gsub('druid:',''),{target=>true})

end

desc 'Index an entire collection, including the collection itself and all of its members. Specify target to index into and collection druid to index.' #Run me: rake collection_indexer RAILS_ENV=production target=revs_prod collection_druid=oo000oo0001 # Examples: task :collection_indexer => :environment do |t, args|

require 'harvestdor/indexer'
target = ENV['target'] # must pass in the target so specify solr core to index into
collection_druid = ENV['collection_druid']

raise 'You must specify a target and collection druid.' if target.blank? || collection_druid.blank?

target_config = Settings.SOLR_TARGETS[target]

raise 'Target not found.' if target_config.nil?

solr_server=target_config['url']

output_log_file_name="#{Rails.root}/log/collection_#{collection_druid}_indexer_#{Time.now.strftime('%Y%m%d-%H%M%S')}.log"
my_logger=Logger.new(output_log_file_name) # set up a new log file

log my_logger,"** Indexing collection #{collection_druid} druid and all of its members into solr server #{solr_server} (target=#{target})."

start_time=Time.now
log my_logger,"Indexing started at #{start_time}"

indexer = BaseIndexer.indexer_class.constantize.new

fetcher = Harvestdor::Indexer::PurlFetcher.new(url: Rails.application.config.fetcher_url)

collection_druid=collection_druid.gsub('druid:','')

indexer.index(collection_druid,{target=>true})
log my_logger,"Indexed collection: #{collection_druid}"

druids = fetcher.druids_from_collection(collection_druid)

log my_logger,"** Found #{druids.size} members of the collection"

counter=0
indexed=0
errors=0

druids.each do |druid|

  druid=druid.gsub('druid:','')
  counter+=1

  begin
    with_retries(:max_tries => 5, :base_sleep_seconds => 3, :max_sleep_seconds => 60) do
      indexer.index(druid,{target=>true})
      log my_logger,"#{counter} of #{druids.size}: #{druid}"
      indexed += 1
    end
  rescue  => e
    log my_logger,"ERROR: Failed to index #{druid}: #{e.message}",:error
    errors += 1
  end

end

log my_logger,"Objects indexed: #{indexed} out of #{druids.size} + 1 collection druid"
log(my_logger,"ERRORS Encountered, #{errors} objects not indexed") if errors > 0
log my_logger,"Completed at #{Time.now}, total time was #{'%.2f' % ((Time.now - start_time)/60.0)} minutes"
puts "Logged output at #{output_log_file_name}"

end

desc 'ReIndex just the druids that errored out from a previous batch index run. Specify target to index into and batch errored log file to index from.' #Run me: rake reindexer RAILS_ENV=production target=revs_prod file=./log/index.log # Examples: task :reindexer => :environment do |t, args|

target = ENV['target'] # must pass in the target so specify solr core to index into
file_path = ENV['file'] # must specify previous indexing log file to index from

raise 'You must specify a target and file.' if target.blank? || file_path.blank?
raise 'File not found.' unless File.readable? file_path

target_config = Settings.SOLR_TARGETS[target]

raise 'Target not found.' if target_config.nil?

start_time=Time.now

errors=0
indexed=0

solr_server=target_config['url']

output_log_file_name="#{Rails.root}/log/#{File.basename(file_path,File.extname(file_path))}_reindex_#{Time.now.strftime('%Y%m%d-%H%M%S')}.log"
my_logger=Logger.new(output_log_file_name) # set up a new log file

log my_logger,"** Indexing errored out druids from #{file_path} into solr server #{solr_server} (target=#{target})."
log my_logger,"Indexing started at #{start_time}"

indexer = BaseIndexer.indexer_class.constantize.new

counter=0

IO.readlines(file_path).each do |line|

  downcased_line=line.downcase

  if downcased_line.include? 'error'
    druid=downcased_line.scan(/[a-z][a-z][0-9][0-9][0-9][a-z][a-z][0-9][0-9][0-9][0-9]/).first

    unless druid.blank?
      begin
        counter+=1
        with_retries(:max_tries => 5, :base_sleep_seconds => 3, :max_sleep_seconds => 60) do
          indexer.index(druid,{target=>true})
          log my_logger,"#{counter}: #{druid}"
          indexed += 1
        end
      rescue  => e
        log my_logger,"ERROR: Failed to index #{druid}: #{e.message}",:error
        errors += 1
      end
    end

  end

end

log my_logger,"Objects indexed: #{indexed}"
log(my_logger,"ERRORS Encountered, #{errors} objects not indexed") if errors > 0
log my_logger,"Completed at #{Time.now}, total time was #{'%.2f' % ((Time.now - start_time)/60.0)} minutes"
puts "Logged output at #{output_log_file_name}"

end

desc 'Delete the druids specified in the supplied text file (one druid per line, header not necessary). Be careful! It will delete from all targets.' #Run me: rake delete_druids RAILS_ENV=production file=druid_list.txt # Examples: task :delete_druids => :environment do |t, args|

file_path = ENV['file'] # must specify previous indexing log file to index from

raise 'You must specify a druid file.' if file_path.blank?
raise 'File not found.' unless File.readable? file_path

print "Are you sure you wish to delete all of the druids from all targets specified in #{file_path}? (y/n) "
STDOUT.flush
answer=STDIN.gets.chomp

raise 'STOP!' unless (answer && ['y','yes'].include?(answer.downcase))

output_log_file_name="#{Rails.root}/log/#{File.basename(file_path,File.extname(file_path))}_delete_#{Time.now.strftime('%Y%m%d-%H%M%S')}.log"
my_logger=Logger.new(output_log_file_name) # set up a new log file

start_time=Time.now

errors=0
indexed=0

log my_logger,"** Deleting druids from #{file_path} in all targets."
log my_logger,"Deleting started at #{start_time}"

indexer = BaseIndexer.indexer_class.constantize.new

counter=0

IO.readlines(file_path).each do |line|

   downcased_line=line.downcase
   druid=downcased_line.scan(/[a-z][a-z][0-9][0-9][0-9][a-z][a-z][0-9][0-9][0-9][0-9]/).first

   unless druid.blank?
     counter+=1

      begin
        with_retries(:max_tries => 5, :base_sleep_seconds => 3, :max_sleep_seconds => 60) do
          indexer.delete druid
          log my_logger,"#{counter}: #{druid}"
          indexed += 1
        end
      rescue  => e
        log my_logger,"ERROR: Failed to delete #{druid}: #{e.message}",:error
        errors += 1
      end
   end
end

log my_logger,"Objects deleted: #{indexed}"
log(my_logger,"ERRORS Encountered, #{errors} objects not deleted",:error) if errors > 0
log my_logger,"Completed at #{Time.now}, total time was #{'%.2f' % ((Time.now - start_time)/60.0)} minutes"

end