class Embulk::Parser::QueryString
Public Class Methods
parse(line, options = {})
click to toggle source
# File lib/embulk/parser/query_string.rb, line 67 def self.parse(line, options = {}) if options[:capture] line = line.match(options[:capture]).to_a[1] || "" # TODO: detect incorrect regexp given end return if line == "" line.strip! if options[:strip_whitespace] if options[:strip_quote] line = line[/\A(?:["'])?(.*?)(?:["'])?\z/, 1] end begin uri = Addressable::URI.parse("?#{line}") if valid_query_string?(uri.query) uri.query_values(Hash) else nil end rescue ArgumentError Embulk.logger.warn "Failed parse: #{line}" nil end end
transaction(config) { |task, columns| ... }
click to toggle source
# File lib/embulk/parser/query_string.rb, line 9 def self.transaction(config, &control) decoder_task = config.load_config(Java::LineDecoder::DecoderTask) task = { "decoder" => DataSource.from_java(decoder_task.dump), "strip_quote" => config.param("strip_quote", :bool, default: true), "strip_whitespace" => config.param("strip_whitespace", :bool, default: true), "capture" => config.param("capture", :string, default: nil), } columns = [] schema = config.param("columns", :array, default: []) schema.each do |column| name = column["name"] type = column["type"].to_sym columns << Column.new(nil, name, type) end yield(task, columns) end
valid_query_string?(qs)
click to toggle source
# File lib/embulk/parser/query_string.rb, line 53 def self.valid_query_string?(qs) if qs.match(/[\s]/) Embulk.logger.warn "'#{qs}' contains unescaped space" return false end if qs.match(/[^\x20-\x7e]/) Embulk.logger.warn "'#{qs}' contains non-ascii character (maybe unescaped)" return false end true end
Public Instance Methods
init()
click to toggle source
# File lib/embulk/parser/query_string.rb, line 31 def init @options = { strip_quote: task["strip_quote"], strip_whitespace: task["strip_whitespace"], capture: task["capture"], } @decoder = task.param("decoder", :hash).load_task(Java::LineDecoder::DecoderTask) end
run(file_input)
click to toggle source
# File lib/embulk/parser/query_string.rb, line 41 def run(file_input) decoder = Java::LineDecoder.new(file_input.to_java, @decoder) while decoder.nextFile while line = decoder.poll process_line(line) end end page_builder.finish end
Private Instance Methods
process_line(line)
click to toggle source
# File lib/embulk/parser/query_string.rb, line 95 def process_line(line) record = self.class.parse(line, @options) return unless record # NOTE: this conversion is needless afrer Embulk 0.6.13 values = schema.map do |column| name = column.name value = record[name] next nil if value.nil? || value.empty? begin case column.type when :long value.strip.empty? ? nil : Integer(value) when :timestamp value.strip.empty? ? nil : Time.parse(value) when :boolean truthy_value?(value) else value.to_s end rescue => e raise ConfigError.new("Cast failed '#{value}' as '#{column.type}' (key is '#{column.name}')") end end page_builder.add(values) end
truthy_value?(str)
click to toggle source
# File lib/embulk/parser/query_string.rb, line 126 def truthy_value?(str) # Same as Embulk csv parser # https://github.com/embulk/embulk/blob/v0.8.9/embulk-standards/src/main/java/org/embulk/standards/CsvParserPlugin.java#L35-L41 %w( true True TRUE yes Yes YES t T y Y on On ON 1 ).include?(str) end