require 'cdmdexer'

namespace :cdmdexer do

desc 'Ingest a Collection Syncronously'
task :collection_sync do
  # config = etl.config
  # raise etl.config.keys.inspect
  CDMDEXER::ETLWorker.new.perform(
    'solr_config' => {:url=>"http://solr:8983/solr/mdl-1"},
    'oai_endpoint' => 'http://cdm16022.contentdm.oclc.org/oai/oai.php',
    'cdm_endpoint' => 'https://server16022.contentdm.oclc.org/dmwebservices/index.php',
    'set_spec' => 'mpls',
    'batch_size' => 10,
    'max_compounds' => 10
  )
end

desc 'Launch a background job to index metadata from CONTENTdm to Solr.'
task :batch, [
  :solr_url,
  :oai_endpoint,
  :cdm_endpoint,
  :set_spec,
  :batch_size,
  :max_compounds
] do |t, args|
  CDMDEXER::ETLWorker.perform_async(
    solr_config: { url: args.fetch(:solr_url) },
    oai_endpoint: args.fetch(:oai_endpoint),
    cdm_endpoint: args.fetch(:cdm_endpoint),
    set_spec: args[:set_spec] != '""' ? args[:set_spec] : nil,
    batch_size: args.fetch(:batch_size, 10),
    max_compounds: args.fetch(:max_compounds, 10)
  )
end

desc 'Launch an indexing worker for each collection with an optional regex
      pattern to match setSpec. Patterns can be inclusive or exclusive.'
task :by_collections, [
  :solr_url,
  :oai_endpoint,
  :cdm_endpoint,
  :set_spec_pattern,
  :inclusive,
  :batch_size
] do |t, args|
  oai_endpoint = args.fetch(:oai_endpoint)
  # Optional args
  pattern = args.fetch(:set_spec_pattern, false)
  inclusive = args.fetch(:inclusive, 'true') == 'true'
  # Define your own callback if you want to use other set related fields
  # Use the RegexFilterCallback as an example of how to build your own filter
  set_specs =
    if pattern
      filter = CDMDEXER::RegexFilterCallback.new(field: 'setName',
                                              pattern: Regexp.new(pattern),
                                              inclusive: inclusive)
      CDMDEXER::FilteredSetSpecs.new(oai_base_url: oai_endpoint,
                                  callback: filter).set_specs
    else
      CDMDEXER::FilteredSetSpecs.new(oai_base_url: oai_endpoint).set_specs
    end

  puts "Indexing Sets: '#{set_specs.join(', ')}'"

  etl_config = {
    solr_config: { url: args.fetch(:solr_url) },
    oai_endpoint: args.fetch(:oai_endpoint),
    cdm_endpoint: args.fetch(:cdm_endpoint),
    batch_size: args.fetch(:batch_size, 5),
    max_compounds: args.fetch(:max_compounds, 10)
  }

  CDMDEXER::ETLBySetSpecs.new(set_specs: set_specs, etl_config: etl_config).run!
end

desc 'Launch a background job to index a single record.'
task :record, [
  :collection,
  :id,
  :solr_url,
  :cdm_endpoint,
  :oai_endpoint
] do |t, args|
  CDMDEXER::TransformWorker.perform_async(
    [[args.fetch(:collection), args.fetch(:id)]],
    { url: args.fetch(:solr_url) },
    args.fetch(:cdm_endpoint),
    args.fetch(:oai_endpoint)
  )
end

end