class DbSubsetter::Exporter

Manages exporting a subset of data

Constants

INSERT_BATCH_SIZE

this is the batch size we insert into sqlite, which seems to be a reasonable balance of speed and memory usage

SELECT_BATCH_SIZE

Attributes

database[R]
filter[RW]
max_filtered_rows[W]
output[R]
scramblers[R]
verbose[RW]
verbose?[RW]

Public Class Methods

new() click to toggle source
# File lib/db_subsetter/exporter.rb, line 46
def initialize
  @scramblers = []
  @page_counts = {}
  @database = Database.new(self)
  @filter = Filter.new
  @verbose = true
  $stdout.sync
end

Public Instance Methods

add_scrambler(scrambler) click to toggle source
# File lib/db_subsetter/exporter.rb, line 34
def add_scrambler(scrambler)
  @scramblers << scrambler
end
export(filename) click to toggle source
# File lib/db_subsetter/exporter.rb, line 16
def export(filename)
  unless @database.exportable?
    if verbose?
      STDERR.puts "\nExportability issues:\n"
      @database.exportability_issues.each do |table, issues|
        STDERR.puts table
        issues.each { |issue| STDERR.puts "\t#{issue}" }
      end
    end
    raise ArgumentError, 'Database is not exportable as filtered!'
  end

  puts "Exporting data...\n\n" if @verbose
  @output = SQLite3::Database.new(filename)
  @output.execute 'CREATE TABLE tables (name TEXT, records_exported INTEGER, columns TEXT)'
  @database.exported_tables.each(&:export)
end
ignore_tables(ignored) click to toggle source
# File lib/db_subsetter/exporter.rb, line 38
def ignore_tables(ignored)
  limit_tables('ignore!', ignored)
end
max_filtered_rows() click to toggle source
# File lib/db_subsetter/exporter.rb, line 55
def max_filtered_rows
  @max_filtered_rows || 2000
end
sanitize_row(table_name, row) click to toggle source

FIXME: look at this API, passing a table name back seems wrong

# File lib/db_subsetter/exporter.rb, line 60
def sanitize_row(table_name, row)
  row = TypeHelper.cleanup_types(row)
  scramble_row(table_name, row)
end
subset_full_tables(full_tables) click to toggle source
# File lib/db_subsetter/exporter.rb, line 42
def subset_full_tables(full_tables)
  limit_tables('subset_in_full!', full_tables)
end

Private Instance Methods

limit_tables(operation, apply_to) click to toggle source
# File lib/db_subsetter/exporter.rb, line 74
def limit_tables(operation, apply_to)
  if apply_to.is_a?(Array)
    apply_to.each do |t|
      @database.find_table(t).send(operation)
    end
  elsif apply_to.is_a?(Symbol) || apply_to.is_a?(String)
    @database.find_table(apply_to).send(operation)
  elsif apply_to.is_a?(Regexp)
    @database.tables.each do |table|
      table.send(operation) if table.name =~ apply_to
    end
  else
    raise ArgumentError, "Don't know how to #{operation} a #{apply_to.class}"
  end
end
scramble_row(table_name, row) click to toggle source
# File lib/db_subsetter/exporter.rb, line 67
def scramble_row(table_name, row)
  scramblers.each do |scrambler|
    row = scrambler.scramble(table_name, row)
  end
  row
end