module DataMetaByteSer

Serialization artifacts generation such as Hadoop Writables etc.

TODO this isn't a bad way, but beter use templating next time such as ERB.

For command line details either check the new method's source or the README.rdoc file, the usage section.

Constants

BITSET_RW_METHODS

HDFS Reader and Writer the BitSet.

BOOL_RW_METHODS

HDFS Reader and Writer for boolean Java type.

DTTM_RW_METHODS

HDFS Reader and Writer for the temporal type, the DateTime

ENUM_RW_METHODS

HDFS Reader and Writer the Java Enums.

FLOAT_RW_METHODS

HDFS Reader and Writer for floating point Java types such as Float or Double.

INTEGRAL_RW_METHODS

HDFS Reader and Writer for integral Java types such as Integer or Long.

MAP_RW_METHODS

Read/write methods for the DataMeta DOM Maps, accidentally all the same as for the standard data types.

NUMERIC_RW_METHODS

HDFS Reader and Writer the variable size Decimal data type.

RAW_RW_METHODS

HDFS Reader and Writer the raw data type, the byte array.

RECORD_RW_METHODS

DataMeta DOM object renderer

STD_RW_METHODS

Read/write methods for the standard data types.

TEXT_RW_METHODS

HDFS Reader and Writer for textual Java types such as String.

URL_RW_METHODS

HDFS Reader and Writer the URL.

VERSION

Current version

Public Class Methods

aggrBaseName(aggr) click to toggle source

Transforms the given full Java name for the aggregate class into base name to interpolate into methods

# File lib/dataMetaByteSer.rb, line 209
def aggrBaseName(aggr)
    /^(\w+\.)+(\w+)$/.match(aggr)[2]
end
aggrJavaFull(aggr) click to toggle source

Transforms the given DataMeta DOM aggregate type to full pathed Java class name

# File lib/dataMetaByteSer.rb, line 204
def aggrJavaFull(aggr)
    PojoLexer::AGGR_CLASSES[aggr] || (raise ArgumentError, "No Aggregate classes for type #{aggr}" )
end
aggrNotSupported(fld, forWhat) click to toggle source
# File lib/dataMetaByteSer/util.rb, line 133
def aggrNotSupported(fld, forWhat)
    raise ArgumentError, "Field #{fld.name}: aggregate types are not supported for #{forWhat} on Hadoop layer"
end
genWritable(model, wriOut, ioOut, record, javaPackage, baseName) click to toggle source

generates writable via delegation

# File lib/dataMetaByteSer.rb, line 244
  def genWritable(model, wriOut, ioOut, record, javaPackage, baseName)
    ctx = RendCtx.new.init(model, record, javaPackage, baseName)
    fields = record.fields
    wriName = writableClassName(baseName)
    ioName = inOutableClassName(baseName)
    # scan for imports needed
    hasOptional = fields.values.map{|f|
#      !model.records[f.dataType.type] &&
              !f.isRequired
    }.reduce(:|) # true if there is at least one optional field which isn't a record
    #fields.values.each { |f|
    #      ctx << DataMetaDom::PojoLexer::JAVA_IMPORTS[f.dataType.type]
    #}

    # field keys (names) in the order of reading/writing to the in/out record
    keysInOrder = fields.each_key.map{|k| k.to_s}.sort.map{|k| k.to_sym}
    reads = ''
    writes = ''
    writeNullMaskHead = hasOptional ? 'final BitSet nullFlags = new BitSet(); int fldIndex = -1;' : ''
    readNullMaskHead = hasOptional ? 'final BitSet nullFlags = new BitSet(readLongArray(in), false); int fldIndex = -1;' : ''
    indent = "\n#{' ' * 8}"
    # sorting provides predictable read/write order
    keysInOrder.each { |k|
      f = fields[k]
      ctx.fld = f
      rwRenderer = getRwRenderer(ctx)
#      unless ctx.refType.kind_of?(DataMetaDom::Record)
        reads <<  (
            indent + (f.isRequired ? '' : 'fldIndex++;') + "val.#{DataMetaDom.setterName(ctx.fld)}(" +
                (f.isRequired ? '' : 'nullFlags.get(fldIndex) ? null : ')+
            "#{rwRenderer.r.call(ctx)});"
        )
# rendering of noReqFld - using the Veryfiable interface instead
#=begin
        writes << (indent + (f.isRequired ?
                (PRIMITIVABLE_TYPES.member?(f.dataType.type) ? '' : ''):
#%Q<if(val.#{DataMetaDom::PojoLexer::getterName(ctx.fld)}() == null) throw noReqFld("#{f.name}"); >) :
                "if(val.#{DataMetaDom.getterName(ctx.fld)}() != null) ") + "#{rwRenderer.w.call(ctx)};")
        unless f.isRequired
          writeNullMaskHead << (indent + "fldIndex++; if(val.#{DataMetaDom.getterName(ctx.fld)}() == null) nullFlags.set(fldIndex);")
        end
#=end
#      end
    }
    writeNullMaskHead << ( indent + 'writeBitSet(out, nullFlags);') if hasOptional
    ioOut.puts <<IN_OUTABLE_CLASS
package #{javaPackage};
import org.ebay.datameta.dom.*;
import java.io.*;
import static org.ebay.datameta.ser.bytes.DataMetaHadoopUtil.*;
import static org.apache.hadoop.io.WritableUtils.*;
import org.ebay.datameta.ser.bytes.InOutable;
#{ctx.importsText}
#{DataMetaDom::PojoLexer.classJavaDoc({})}public class #{ioName} extends InOutable<#{baseName}> {

    private static final #{ioName} INSTANCE = new #{ioName}();
    public static #{ioName} getInstance() { return INSTANCE; }
    private #{ioName}() {}

    @Override public void write(final DataOutput out, final #{baseName} val) throws IOException {
        val.verify();
        #{writeNullMaskHead}
#{writes}
    }

    @Override public #{baseName} read(final DataInput in, final #{baseName} val) throws IOException {
        #{readNullMaskHead}
#{reads}
        return val;
    }
    @Override public #{baseName} read(final DataInput in) throws IOException {
        return read(in, new #{baseName}());
    }
}
IN_OUTABLE_CLASS
      wriOut.puts <<WRITABLE_CLASS
package #{javaPackage};
import org.apache.hadoop.io.Writable;
import org.ebay.datameta.dom.*;
import java.io.*;
import static org.ebay.datameta.ser.bytes.DataMetaHadoopUtil.*;
import static org.apache.hadoop.io.WritableUtils.*;
import org.ebay.datameta.ser.bytes.HdfsReadWrite;
#{ctx.importsText}
#{DataMetaDom::PojoLexer.classJavaDoc({})}public class #{wriName} extends HdfsReadWrite<#{baseName}> {

    public #{wriName}(final #{baseName} value) {
        super(value);
    }

    public #{wriName}() {
        super(new #{baseName}()); // the value must be on the instance at all times,
// for example, when used with hadoop fs -text, this class will be used with default constructor
    }

    @Override public void write(final DataOutput out) throws IOException {
        #{ioName}.getInstance().write(out, getVal());
    }

    @Override public void readFields(final DataInput in) throws IOException {
        #{ioName}.getInstance().read(in, getVal());
    }
}
WRITABLE_CLASS

      ########assertValue();
  end
genWritables(model, outRoot) click to toggle source

Generates all the writables for the given model. Parameters:

  • model - the model to generate Writables from.

  • outRoot - destination directory name.

# File lib/dataMetaByteSer.rb, line 358
def genWritables(model, outRoot)
  model.records.values.each { |e|
    javaPackage, base, packagePath = DataMetaDom::PojoLexer::assertNamespace(e.name)
    destDir = File.join(outRoot, packagePath)
    FileUtils.mkdir_p destDir
    wriOut = File.open(File.join(destDir, "#{writableClassName(base)}.java"), 'wb')
    ioOut = File.open(File.join(destDir, "#{inOutableClassName(base)}.java"), 'wb')
    begin
      case
        when e.kind_of?(DataMetaDom::Record)
          genWritable model, wriOut, ioOut, e, javaPackage, base
        else
          raise "Unsupported Entity: #{e.inspect}"
      end
    ensure
        begin
           ioOut.close
        ensure
           wriOut.close
        end
    end
  }
end
getRwRenderer(ctx) click to toggle source

Build the Read/Write operation renderer for the given context:

# File lib/dataMetaByteSer.rb, line 218
def getRwRenderer(ctx)
    dt = ctx.fld.dataType
    ctx.refType = nil # reset to avoid misrendering primitives
    rwRenderer = STD_RW_METHODS[dt.type]
    return rwRenderer if rwRenderer
    refKey = dt.type
    ctx.refType = ctx.model.enums[refKey] || ctx.model.records[refKey]
    case
        when ctx.refType.kind_of?(DataMetaDom::Record)
            RECORD_RW_METHODS
        when ctx.refType.kind_of?(DataMetaDom::Enum)
            ENUM_RW_METHODS
        when ctx.refType.kind_of?(DataMetaDom::BitSet)
            BITSET_RW_METHODS
        when ctx.refType.kind_of?(DataMetaDom::Mapping)
            MAP_RW_METHODS[ctx.fType.type] || (raise ArgumentError, "No renderer found for the key type #{
            ctx.fType.type}, record #{ctx.rec}, field #{ctx.fld}")
        else
            raise "No renderer defined for field #{ctx.fld}"
    end
end
helpDataMetaBytesSerGen(file, errorText=nil) click to toggle source

Shortcut to help for the Hadoop Writables generator.

# File lib/dataMetaByteSer.rb, line 383
def helpDataMetaBytesSerGen(file, errorText=nil)
    DataMetaDom::help(file, 'DataMeta Serialization to/from Bytes', '<DataMeta DOM source> <Target Directory>', errorText)
end
inOutableClassName(baseName) click to toggle source

Builds a class name for a InOutable.

# File lib/dataMetaByteSer/util.rb, line 127
def inOutableClassName(baseName); "#{baseName}_InOutable" end
mapsNotSupported(fld) click to toggle source
# File lib/dataMetaByteSer/util.rb, line 129
def mapsNotSupported(fld)
    raise ArgumentError, "Field #{fld.name}: maps are not currently supported on Hadoop layer"
end
writableClassName(baseName) click to toggle source

Builds a class name for a Writable.

# File lib/dataMetaByteSer/util.rb, line 122
def writableClassName(baseName); "#{baseName}_Writable" end

Public Instance Methods

tmpVar(name) click to toggle source

Temporary/scratch var – avoiding collisions at all costs

# File lib/dataMetaByteSer.rb, line 241
def tmpVar(name); "#{'_'*3}#{name}#{'_'*3}" end

Private Instance Methods

aggrBaseName(aggr) click to toggle source

Transforms the given full Java name for the aggregate class into base name to interpolate into methods

# File lib/dataMetaByteSer.rb, line 209
def aggrBaseName(aggr)
    /^(\w+\.)+(\w+)$/.match(aggr)[2]
end
aggrJavaFull(aggr) click to toggle source

Transforms the given DataMeta DOM aggregate type to full pathed Java class name

# File lib/dataMetaByteSer.rb, line 204
def aggrJavaFull(aggr)
    PojoLexer::AGGR_CLASSES[aggr] || (raise ArgumentError, "No Aggregate classes for type #{aggr}" )
end
aggrNotSupported(fld, forWhat) click to toggle source
# File lib/dataMetaByteSer/util.rb, line 133
def aggrNotSupported(fld, forWhat)
    raise ArgumentError, "Field #{fld.name}: aggregate types are not supported for #{forWhat} on Hadoop layer"
end
genWritable(model, wriOut, ioOut, record, javaPackage, baseName) click to toggle source

generates writable via delegation

# File lib/dataMetaByteSer.rb, line 244
  def genWritable(model, wriOut, ioOut, record, javaPackage, baseName)
    ctx = RendCtx.new.init(model, record, javaPackage, baseName)
    fields = record.fields
    wriName = writableClassName(baseName)
    ioName = inOutableClassName(baseName)
    # scan for imports needed
    hasOptional = fields.values.map{|f|
#      !model.records[f.dataType.type] &&
              !f.isRequired
    }.reduce(:|) # true if there is at least one optional field which isn't a record
    #fields.values.each { |f|
    #      ctx << DataMetaDom::PojoLexer::JAVA_IMPORTS[f.dataType.type]
    #}

    # field keys (names) in the order of reading/writing to the in/out record
    keysInOrder = fields.each_key.map{|k| k.to_s}.sort.map{|k| k.to_sym}
    reads = ''
    writes = ''
    writeNullMaskHead = hasOptional ? 'final BitSet nullFlags = new BitSet(); int fldIndex = -1;' : ''
    readNullMaskHead = hasOptional ? 'final BitSet nullFlags = new BitSet(readLongArray(in), false); int fldIndex = -1;' : ''
    indent = "\n#{' ' * 8}"
    # sorting provides predictable read/write order
    keysInOrder.each { |k|
      f = fields[k]
      ctx.fld = f
      rwRenderer = getRwRenderer(ctx)
#      unless ctx.refType.kind_of?(DataMetaDom::Record)
        reads <<  (
            indent + (f.isRequired ? '' : 'fldIndex++;') + "val.#{DataMetaDom.setterName(ctx.fld)}(" +
                (f.isRequired ? '' : 'nullFlags.get(fldIndex) ? null : ')+
            "#{rwRenderer.r.call(ctx)});"
        )
# rendering of noReqFld - using the Veryfiable interface instead
#=begin
        writes << (indent + (f.isRequired ?
                (PRIMITIVABLE_TYPES.member?(f.dataType.type) ? '' : ''):
#%Q<if(val.#{DataMetaDom::PojoLexer::getterName(ctx.fld)}() == null) throw noReqFld("#{f.name}"); >) :
                "if(val.#{DataMetaDom.getterName(ctx.fld)}() != null) ") + "#{rwRenderer.w.call(ctx)};")
        unless f.isRequired
          writeNullMaskHead << (indent + "fldIndex++; if(val.#{DataMetaDom.getterName(ctx.fld)}() == null) nullFlags.set(fldIndex);")
        end
#=end
#      end
    }
    writeNullMaskHead << ( indent + 'writeBitSet(out, nullFlags);') if hasOptional
    ioOut.puts <<IN_OUTABLE_CLASS
package #{javaPackage};
import org.ebay.datameta.dom.*;
import java.io.*;
import static org.ebay.datameta.ser.bytes.DataMetaHadoopUtil.*;
import static org.apache.hadoop.io.WritableUtils.*;
import org.ebay.datameta.ser.bytes.InOutable;
#{ctx.importsText}
#{DataMetaDom::PojoLexer.classJavaDoc({})}public class #{ioName} extends InOutable<#{baseName}> {

    private static final #{ioName} INSTANCE = new #{ioName}();
    public static #{ioName} getInstance() { return INSTANCE; }
    private #{ioName}() {}

    @Override public void write(final DataOutput out, final #{baseName} val) throws IOException {
        val.verify();
        #{writeNullMaskHead}
#{writes}
    }

    @Override public #{baseName} read(final DataInput in, final #{baseName} val) throws IOException {
        #{readNullMaskHead}
#{reads}
        return val;
    }
    @Override public #{baseName} read(final DataInput in) throws IOException {
        return read(in, new #{baseName}());
    }
}
IN_OUTABLE_CLASS
      wriOut.puts <<WRITABLE_CLASS
package #{javaPackage};
import org.apache.hadoop.io.Writable;
import org.ebay.datameta.dom.*;
import java.io.*;
import static org.ebay.datameta.ser.bytes.DataMetaHadoopUtil.*;
import static org.apache.hadoop.io.WritableUtils.*;
import org.ebay.datameta.ser.bytes.HdfsReadWrite;
#{ctx.importsText}
#{DataMetaDom::PojoLexer.classJavaDoc({})}public class #{wriName} extends HdfsReadWrite<#{baseName}> {

    public #{wriName}(final #{baseName} value) {
        super(value);
    }

    public #{wriName}() {
        super(new #{baseName}()); // the value must be on the instance at all times,
// for example, when used with hadoop fs -text, this class will be used with default constructor
    }

    @Override public void write(final DataOutput out) throws IOException {
        #{ioName}.getInstance().write(out, getVal());
    }

    @Override public void readFields(final DataInput in) throws IOException {
        #{ioName}.getInstance().read(in, getVal());
    }
}
WRITABLE_CLASS

      ########assertValue();
  end
genWritables(model, outRoot) click to toggle source

Generates all the writables for the given model. Parameters:

  • model - the model to generate Writables from.

  • outRoot - destination directory name.

# File lib/dataMetaByteSer.rb, line 358
def genWritables(model, outRoot)
  model.records.values.each { |e|
    javaPackage, base, packagePath = DataMetaDom::PojoLexer::assertNamespace(e.name)
    destDir = File.join(outRoot, packagePath)
    FileUtils.mkdir_p destDir
    wriOut = File.open(File.join(destDir, "#{writableClassName(base)}.java"), 'wb')
    ioOut = File.open(File.join(destDir, "#{inOutableClassName(base)}.java"), 'wb')
    begin
      case
        when e.kind_of?(DataMetaDom::Record)
          genWritable model, wriOut, ioOut, e, javaPackage, base
        else
          raise "Unsupported Entity: #{e.inspect}"
      end
    ensure
        begin
           ioOut.close
        ensure
           wriOut.close
        end
    end
  }
end
getRwRenderer(ctx) click to toggle source

Build the Read/Write operation renderer for the given context:

# File lib/dataMetaByteSer.rb, line 218
def getRwRenderer(ctx)
    dt = ctx.fld.dataType
    ctx.refType = nil # reset to avoid misrendering primitives
    rwRenderer = STD_RW_METHODS[dt.type]
    return rwRenderer if rwRenderer
    refKey = dt.type
    ctx.refType = ctx.model.enums[refKey] || ctx.model.records[refKey]
    case
        when ctx.refType.kind_of?(DataMetaDom::Record)
            RECORD_RW_METHODS
        when ctx.refType.kind_of?(DataMetaDom::Enum)
            ENUM_RW_METHODS
        when ctx.refType.kind_of?(DataMetaDom::BitSet)
            BITSET_RW_METHODS
        when ctx.refType.kind_of?(DataMetaDom::Mapping)
            MAP_RW_METHODS[ctx.fType.type] || (raise ArgumentError, "No renderer found for the key type #{
            ctx.fType.type}, record #{ctx.rec}, field #{ctx.fld}")
        else
            raise "No renderer defined for field #{ctx.fld}"
    end
end
helpDataMetaBytesSerGen(file, errorText=nil) click to toggle source

Shortcut to help for the Hadoop Writables generator.

# File lib/dataMetaByteSer.rb, line 383
def helpDataMetaBytesSerGen(file, errorText=nil)
    DataMetaDom::help(file, 'DataMeta Serialization to/from Bytes', '<DataMeta DOM source> <Target Directory>', errorText)
end
inOutableClassName(baseName) click to toggle source

Builds a class name for a InOutable.

# File lib/dataMetaByteSer/util.rb, line 127
def inOutableClassName(baseName); "#{baseName}_InOutable" end
mapsNotSupported(fld) click to toggle source
# File lib/dataMetaByteSer/util.rb, line 129
def mapsNotSupported(fld)
    raise ArgumentError, "Field #{fld.name}: maps are not currently supported on Hadoop layer"
end
writableClassName(baseName) click to toggle source

Builds a class name for a Writable.

# File lib/dataMetaByteSer/util.rb, line 122
def writableClassName(baseName); "#{baseName}_Writable" end