class Embulk::Guess::QueryString

$ embulk guess -g “query_string” partial-config.yml

Public Instance Methods

guess_lines(config, sample_lines) click to toggle source
# File lib/embulk/guess/query_string.rb, line 10
def guess_lines(config, sample_lines)
  return {} unless config.fetch("parser", {}).fetch("type", "query_string") == "query_string"

  parser_config = config.param("parser", :hash)
  options = {
    strip_quote: parser_config.param("strip_quote", :bool, default: true),
    strip_whitespace: parser_config.param("strip_whitespace", :bool, default: true),
    capture: parser_config.param("capture", :string, default: nil)
  }
  records = sample_lines.map do |line|
    Parser::QueryString.parse(line, options) || {}
  end

  column_names = records.map(&:keys).flatten.uniq.sort
  samples = records.map do |record|
    column_names.map {|name| record[name]}
  end

  columns = Guess::SchemaGuess.from_array_records(column_names, samples)
  columns = columns.map do |c|
    column = {name: c.name, type: c.type}
    column[:format] = c.format if c.format
    column
  end

  guessed = {
    type: "query_string",
    columns: columns
  }

  return {"parser" => guessed}
end