class FluentPluginS3Arrow::Schemas::AWSGlue
Public Class Methods
new(table_name, **options)
click to toggle source
# File lib/fluent-plugin-s3-arrow/schemas/aws_glue.rb, line 10 def initialize(table_name, **options) @table_name = table_name @database_name = options.delete(:database_name) || "default" @catalog_id = options.delete(:catalog_id) @client = Aws::Glue::Client.new(options) end
Public Instance Methods
to_arrow()
click to toggle source
# File lib/fluent-plugin-s3-arrow/schemas/aws_glue.rb, line 17 def to_arrow glue_schema = fetch_glue_schema convert_to_arrow_schema(glue_schema) end
Private Instance Methods
convert_to_arrow_field_description(glue_field)
click to toggle source
# File lib/fluent-plugin-s3-arrow/schemas/aws_glue.rb, line 39 def convert_to_arrow_field_description(glue_field) arrow_field = {name: glue_field.name} case glue_field.type when "boolean", "float", "double" arrow_field[:type] = glue_field.type when "tinyint" arrow_field[:type] = "int8" when "smallint" arrow_field[:type] = "int16" when "int" arrow_field[:type] = "int32" when "bigint" arrow_field[:type] = "int64" when /\Achar/,/\Avarchar/,"string" arrow_field[:type] = "string" when "binary" arrow_field[:type] = "binary" when "date" arrow_field[:type] = "date32" when /\Aarray/ arrow_field[:type] = "list" arrow_field[:field] = parse_array(glue_field.type) when /\Astruct/ arrow_field[:type] = "struct" arrow_field[:fields] = parse_struct(glue_field.type) else # TODO: Need support for MAP, DECIMAL, TIMESTAMP type. raise ConvertError, "Input type is not supported: #{glue_field.type}" end arrow_field end
convert_to_arrow_schema(glue_schema)
click to toggle source
# File lib/fluent-plugin-s3-arrow/schemas/aws_glue.rb, line 32 def convert_to_arrow_schema(glue_schema) arrow_schema_description = glue_schema.map do |glue_field| convert_to_arrow_field_description(glue_field) end Arrow::Schema.new(arrow_schema_description) end
each_struct_fields(str) { |name, type| ... }
click to toggle source
# File lib/fluent-plugin-s3-arrow/schemas/aws_glue.rb, line 87 def each_struct_fields(str) start, nest = 0, 0 name = "" str.each_char.with_index do |c, i| case c when ':' if nest == 0 name = str[start...i] start = i + 1 end when '<' nest += 1 when '>' nest -= 1 when ',' if nest == 0 type = str[start...i] yield(name, type) start = i + 1 end end end type = str[start..] yield(name, type) end
fetch_glue_schema()
click to toggle source
# File lib/fluent-plugin-s3-arrow/schemas/aws_glue.rb, line 23 def fetch_glue_schema glue_table = @client.get_table({ catalog_id: @catalog_id, database_name: @database_name, name: @table_name }) glue_table.table.storage_descriptor.columns end
parse_array(str)
click to toggle source
# File lib/fluent-plugin-s3-arrow/schemas/aws_glue.rb, line 71 def parse_array(str) matched = str.match(/\Aarray<(.*)>\z/) raise ConvertError, "Parse error on array type: #{str}" if matched.nil? convert_to_arrow_field_description(Field.new("", matched[1])) end
parse_struct(str)
click to toggle source
# File lib/fluent-plugin-s3-arrow/schemas/aws_glue.rb, line 77 def parse_struct(str) fields = [] matched = str.match(/\Astruct<(.*)>\z/) raise ConvertError, "Parse error on struct type: #{str}" if matched.nil? each_struct_fields(matched[1]) do |name, type| fields << convert_to_arrow_field_description(Field.new(name, type)) end fields end