module DataMetaByteSer::Py

(De)Serialization for Python

Constants

BOOLEAN_RW_METHODS

HDFS Reader and Writer for Booleans.

DTTM_RW_METHODS

HDFS Reader and Writer for the temporal type, the DateTime

ENUM_RW_METHODS

HDFS Reader and Writer the Java Enums.

FLOAT_RW_METHODS

HDFS Reader and Writer for floating point types.

INTEGRAL_RW_METHODS

HDFS Reader and Writer for integral Python type.

MAP_RW_METHODS

Read/write methods for the DataMeta DOM Maps, accidentally all the same as for the standard data types.

NOT_IMPLEMENTED_METHODS

Pseudo-implementers that just raise an error

NUMERIC_RW_METHODS

HDFS Reader and Writer the variable size Decimal data type.

PRIMITIVABLE_TYPES

Python has no primitivable types

RECORD_RW_METHODS

DataMeta DOM object renderer

STD_RW_METHODS

Read/write methods for the standard data types.

TEXT_RW_METHODS

HDFS Reader and Writer for textual Python types such as str.

URL_RW_METHODS

HDFS Reader and Writer the URL.

Public Class Methods

aggrNotSupported(fld, forWhat) click to toggle source
# File lib/dataMetaByteSer/python.rb, line 35
def aggrNotSupported(fld, forWhat)
    raise ArgumentError, "Field #{fld.name}: aggregate types are not supported for #{forWhat} for Byte Array format"
end
aggrPyFull(aggr) click to toggle source

Full name of a Py aggregate for the given DataMeta DOM aggregate

# File lib/dataMetaByteSer/python.rb, line 132
def aggrPyFull(aggr)
    case aggr
        when DataMetaDom::Field::LIST
            'List'
        when DataMetaDom::Field::SET
            'Set'
        when DataMetaDom::Field::DEQUE
            'Deque' # note this is different from Java
        else
            raise ArgumentError, "Aggregate type #{aggr} not supported for Python serialization"
    end
end
genWritable(model, wriOut, ioOut, record, pyPackage, baseName) click to toggle source

Generates one InOutable, Writables here currently are not generated

# File lib/dataMetaByteSer/python.rb, line 252
        def genWritable(model, wriOut, ioOut, record, pyPackage, baseName)
            enumCount = model.enums.values.select{|e| e.kind_of?(DataMetaDom::Enum)}.size
            recImports = model.records.values.map{|r| # import all records
                p, b, pp = DataMetaDom::PojoLexer::assertNamespace(r.name)
                %|from #{DataMetaXtra::Str.downCaseFirst(b)} import #{b}|
            }.join("\n")
#             ioImports = model.records.values.reject{|r| r.name == record.name}.map{|r| # import all InOutables except of this one
#                 p, b, pp = DataMetaDom::PojoLexer::assertNamespace(r.name)
#                 # since one InOutable may import another which may import another, and Python can't handle this,
#                 # catch the error. It's harmless because if it really failed to import, we'll know
#                 %|
# try:
#     from #{inOutablePy(DataMetaXtra::Str.downCaseFirst(b))} import #{inOutablePy(b)}
# except ImportError:
#     pass|
#             }.join("\n")
            ctx = RendCtx.new.init(model, record, pyPackage, baseName)
            fields = record.fields
            wriName = nil # writableClassName(baseName)
            ioName = inOutablePy(baseName)
            hasOptional = fields.values.map{|f|
#      !model.records[f.dataType.type] &&
                !f.isRequired
            }.reduce(:|) # true if there is at least one optional field which isn't a record
            keysInOrder = fields.each_key.map{|k| k.to_s}.sort.map{|k| k.to_sym}
            reads = ''
            writes = ''
            writeNullMaskHead = hasOptional ? "nullFlags = bitarray(#{fields.keys.size}); nullFlags.setall(False); fldIndex = -1" : ''
            readNullMaskHead = hasOptional ? 'nullFlags = DataMetaHadoopUtil.readBitArray(di); fldIndex = -1' : ''
            indent = "\n#{' ' * 8}"
            # sorting provides predictable read/write order
            keysInOrder.each { |k|
                f = fields[k]
                ctx.fld = f
                rwRenderer = getRwRenderer(ctx)
                reads <<  ( indent + (f.isRequired ? '' : "fldIndex += 1#{indent}") + "val.#{DataMetaDom.setterName(ctx.fld)}(" +
                        (f.isRequired ? '' : ' None if nullFlags[fldIndex] else ')+ "#{rwRenderer.r.call(ctx)})"
                )
                # noinspection RubyNestedTernaryOperatorsInspection
                writes << (indent + (f.isRequired ?
                        (PRIMITIVABLE_TYPES.member?(f.dataType.type) ? '' : ''):
#%Q<if(val.#{DataMetaDom::PojoLexer::getterName(ctx.fld)}() == null) throw noReqFld("#{f.name}"); >) :
                        "if(val.#{DataMetaDom.getterName(ctx.fld)}() is not None): ") + "#{rwRenderer.w.call(ctx)}")
                unless f.isRequired
                    writeNullMaskHead << (indent + "fldIndex += 1#{indent}if(val.#{DataMetaDom.getterName(ctx.fld)}() is None): nullFlags[fldIndex] = True")
                end
            }
            writeNullMaskHead << ( indent + 'DataMetaHadoopUtil.writeBitArray(do, nullFlags)') if hasOptional

            ioOut.puts <<IN_OUTABLE_CLASS

class #{ioName}(InOutable):

    def write(self, do, val):
        val.verify()
        #{writeNullMaskHead}
        #{writes}

    def readVal(self, di, val):
        #{readNullMaskHead}
        #{reads}
        return val

    def read(self, di):
        return self.readVal(di, #{baseName}())

IN_OUTABLE_CLASS
        end
genWritables(model, outRoot) click to toggle source

Generates all the writables for the given model. Parameters:

  • model - the model to generate Writables from.

  • outRoot - destination directory name.

# File lib/dataMetaByteSer/python.rb, line 327
        def genWritables(model, outRoot)
            firstRecord = model.records.values.first
            pyPackage, base, packagePath = DataMetaDom::PojoLexer::assertNamespace(firstRecord.name)
            # Next: replace dots with underscores.The path also adjusted accordingly.
            #
            # Rationale for this, quoting PEP 8:
            #
            #    Package and Module Names
            #
            #    Modules should have short, all-lowercase names. Underscores can be used in the module name if it improves
            #    readability. Python packages should also have short, all-lowercase names, although the use of underscores
            #    is discouraged.
            #
            # Short and all-lowercase names, and improving readability if you have complex system and need long package names,
            # is "discouraged". Can't do this here, our system is more complicated for strictly religous, "pythonic" Python.
            # A tool must be enabling, and in this case, this irrational ruling gets in the way.
            # And dots are a no-no, Python can't find packages with complicated package structures and imports.
            #
            # Hence, we opt for long package names with underscores for distinctiveness and readability:
            pyPackage = pyPackage.gsub('.', '_')
            packagePath = packagePath.gsub('/', '_')
            destDir = File.join(outRoot, packagePath)
            FileUtils.mkdir_p destDir
            wriOut = nil # File.open(File.join(destDir, "#{writableClassName(base)}.py"), 'wb')
            serFile = File.join(destDir, 'serial.py')
            FileUtils.rm serFile if File.file?(serFile)
            ioOut = File.open(serFile, 'wb') # one huge serialization file
            ioOut.puts %|# This file is generated by DataMeta DOM. Do not edit manually!
#package #{pyPackage}

from hadoop.io import WritableUtils, InputStream, OutputStream, Text
from ebay_datameta_core.base import DateTime
from decimal import *
from collections import *
from bitarray import bitarray
from ebay_datameta_hadoop.base import *
from model import *

|
            begin
                model.records.values.each { |e|
                        _, base, _ = DataMetaDom::PojoLexer::assertNamespace(e.name)
                        case
                            when e.kind_of?(DataMetaDom::Record)
                                genWritable model, wriOut, ioOut, e, pyPackage, base
                            else
                                raise "Unsupported Entity: #{e.inspect}"
                        end
                }
            ensure
                begin
                    ioOut.close
                ensure
                    #wriOut.close
                end
            end
        end
getRwRenderer(ctx) click to toggle source

Build the Read/Write operation renderer for the given context:

# File lib/dataMetaByteSer/python.rb, line 229
def getRwRenderer(ctx)
    dt = ctx.fld.dataType
    ctx.refType = nil # reset to avoid misrendering primitives
    rwRenderer = STD_RW_METHODS[dt.type]
    return rwRenderer if rwRenderer
    refKey = dt.type
    ctx.refType = ctx.model.enums[refKey] || ctx.model.records[refKey]
    case
        when ctx.refType.kind_of?(DataMetaDom::Record)
            RECORD_RW_METHODS
        when ctx.refType.kind_of?(DataMetaDom::Enum)
            ENUM_RW_METHODS
        when ctx.refType.kind_of?(DataMetaDom::BitSet)
            NOT_IMPLEMENTED_METHODS
        when ctx.refType.kind_of?(DataMetaDom::Mapping)
            MAP_RW_METHODS[ctx.fType.type] || (raise ArgumentError, "No renderer found for the key type #{
            ctx.fType.type}, record #{ctx.rec}, field #{ctx.fld}")
        else
            raise "No renderer defined for field #{ctx.fld}"
    end
end
inOutablePy(arg) click to toggle source

Builds a class name for a InOutable.

# File lib/dataMetaByteSer/python.rb, line 20
def inOutablePy(arg)
    klassName = case
                    when arg.kind_of?(String)
                        arg
                    else
                        _, s = DataMetaDom.splitNameSpace(arg.fType.type)
                        s
                end
    "#{klassName}_InOutable"
end
mapsNotSupported(fld) click to toggle source
# File lib/dataMetaByteSer/python.rb, line 31
def mapsNotSupported(fld)
    raise ArgumentError, "Field #{fld.name}: maps are not currently supported for Byte Array format"
end
writableClassName(baseName) click to toggle source

Builds a class name for a Writable.

# File lib/dataMetaByteSer/python.rb, line 16
def writableClassName(baseName); "#{baseName}_Writable" end

Private Instance Methods

aggrNotSupported(fld, forWhat) click to toggle source
# File lib/dataMetaByteSer/python.rb, line 35
def aggrNotSupported(fld, forWhat)
    raise ArgumentError, "Field #{fld.name}: aggregate types are not supported for #{forWhat} for Byte Array format"
end
aggrPyFull(aggr) click to toggle source

Full name of a Py aggregate for the given DataMeta DOM aggregate

# File lib/dataMetaByteSer/python.rb, line 132
def aggrPyFull(aggr)
    case aggr
        when DataMetaDom::Field::LIST
            'List'
        when DataMetaDom::Field::SET
            'Set'
        when DataMetaDom::Field::DEQUE
            'Deque' # note this is different from Java
        else
            raise ArgumentError, "Aggregate type #{aggr} not supported for Python serialization"
    end
end
genWritable(model, wriOut, ioOut, record, pyPackage, baseName) click to toggle source

Generates one InOutable, Writables here currently are not generated

# File lib/dataMetaByteSer/python.rb, line 252
        def genWritable(model, wriOut, ioOut, record, pyPackage, baseName)
            enumCount = model.enums.values.select{|e| e.kind_of?(DataMetaDom::Enum)}.size
            recImports = model.records.values.map{|r| # import all records
                p, b, pp = DataMetaDom::PojoLexer::assertNamespace(r.name)
                %|from #{DataMetaXtra::Str.downCaseFirst(b)} import #{b}|
            }.join("\n")
#             ioImports = model.records.values.reject{|r| r.name == record.name}.map{|r| # import all InOutables except of this one
#                 p, b, pp = DataMetaDom::PojoLexer::assertNamespace(r.name)
#                 # since one InOutable may import another which may import another, and Python can't handle this,
#                 # catch the error. It's harmless because if it really failed to import, we'll know
#                 %|
# try:
#     from #{inOutablePy(DataMetaXtra::Str.downCaseFirst(b))} import #{inOutablePy(b)}
# except ImportError:
#     pass|
#             }.join("\n")
            ctx = RendCtx.new.init(model, record, pyPackage, baseName)
            fields = record.fields
            wriName = nil # writableClassName(baseName)
            ioName = inOutablePy(baseName)
            hasOptional = fields.values.map{|f|
#      !model.records[f.dataType.type] &&
                !f.isRequired
            }.reduce(:|) # true if there is at least one optional field which isn't a record
            keysInOrder = fields.each_key.map{|k| k.to_s}.sort.map{|k| k.to_sym}
            reads = ''
            writes = ''
            writeNullMaskHead = hasOptional ? "nullFlags = bitarray(#{fields.keys.size}); nullFlags.setall(False); fldIndex = -1" : ''
            readNullMaskHead = hasOptional ? 'nullFlags = DataMetaHadoopUtil.readBitArray(di); fldIndex = -1' : ''
            indent = "\n#{' ' * 8}"
            # sorting provides predictable read/write order
            keysInOrder.each { |k|
                f = fields[k]
                ctx.fld = f
                rwRenderer = getRwRenderer(ctx)
                reads <<  ( indent + (f.isRequired ? '' : "fldIndex += 1#{indent}") + "val.#{DataMetaDom.setterName(ctx.fld)}(" +
                        (f.isRequired ? '' : ' None if nullFlags[fldIndex] else ')+ "#{rwRenderer.r.call(ctx)})"
                )
                # noinspection RubyNestedTernaryOperatorsInspection
                writes << (indent + (f.isRequired ?
                        (PRIMITIVABLE_TYPES.member?(f.dataType.type) ? '' : ''):
#%Q<if(val.#{DataMetaDom::PojoLexer::getterName(ctx.fld)}() == null) throw noReqFld("#{f.name}"); >) :
                        "if(val.#{DataMetaDom.getterName(ctx.fld)}() is not None): ") + "#{rwRenderer.w.call(ctx)}")
                unless f.isRequired
                    writeNullMaskHead << (indent + "fldIndex += 1#{indent}if(val.#{DataMetaDom.getterName(ctx.fld)}() is None): nullFlags[fldIndex] = True")
                end
            }
            writeNullMaskHead << ( indent + 'DataMetaHadoopUtil.writeBitArray(do, nullFlags)') if hasOptional

            ioOut.puts <<IN_OUTABLE_CLASS

class #{ioName}(InOutable):

    def write(self, do, val):
        val.verify()
        #{writeNullMaskHead}
        #{writes}

    def readVal(self, di, val):
        #{readNullMaskHead}
        #{reads}
        return val

    def read(self, di):
        return self.readVal(di, #{baseName}())

IN_OUTABLE_CLASS
        end
genWritables(model, outRoot) click to toggle source

Generates all the writables for the given model. Parameters:

  • model - the model to generate Writables from.

  • outRoot - destination directory name.

# File lib/dataMetaByteSer/python.rb, line 327
        def genWritables(model, outRoot)
            firstRecord = model.records.values.first
            pyPackage, base, packagePath = DataMetaDom::PojoLexer::assertNamespace(firstRecord.name)
            # Next: replace dots with underscores.The path also adjusted accordingly.
            #
            # Rationale for this, quoting PEP 8:
            #
            #    Package and Module Names
            #
            #    Modules should have short, all-lowercase names. Underscores can be used in the module name if it improves
            #    readability. Python packages should also have short, all-lowercase names, although the use of underscores
            #    is discouraged.
            #
            # Short and all-lowercase names, and improving readability if you have complex system and need long package names,
            # is "discouraged". Can't do this here, our system is more complicated for strictly religous, "pythonic" Python.
            # A tool must be enabling, and in this case, this irrational ruling gets in the way.
            # And dots are a no-no, Python can't find packages with complicated package structures and imports.
            #
            # Hence, we opt for long package names with underscores for distinctiveness and readability:
            pyPackage = pyPackage.gsub('.', '_')
            packagePath = packagePath.gsub('/', '_')
            destDir = File.join(outRoot, packagePath)
            FileUtils.mkdir_p destDir
            wriOut = nil # File.open(File.join(destDir, "#{writableClassName(base)}.py"), 'wb')
            serFile = File.join(destDir, 'serial.py')
            FileUtils.rm serFile if File.file?(serFile)
            ioOut = File.open(serFile, 'wb') # one huge serialization file
            ioOut.puts %|# This file is generated by DataMeta DOM. Do not edit manually!
#package #{pyPackage}

from hadoop.io import WritableUtils, InputStream, OutputStream, Text
from ebay_datameta_core.base import DateTime
from decimal import *
from collections import *
from bitarray import bitarray
from ebay_datameta_hadoop.base import *
from model import *

|
            begin
                model.records.values.each { |e|
                        _, base, _ = DataMetaDom::PojoLexer::assertNamespace(e.name)
                        case
                            when e.kind_of?(DataMetaDom::Record)
                                genWritable model, wriOut, ioOut, e, pyPackage, base
                            else
                                raise "Unsupported Entity: #{e.inspect}"
                        end
                }
            ensure
                begin
                    ioOut.close
                ensure
                    #wriOut.close
                end
            end
        end
getRwRenderer(ctx) click to toggle source

Build the Read/Write operation renderer for the given context:

# File lib/dataMetaByteSer/python.rb, line 229
def getRwRenderer(ctx)
    dt = ctx.fld.dataType
    ctx.refType = nil # reset to avoid misrendering primitives
    rwRenderer = STD_RW_METHODS[dt.type]
    return rwRenderer if rwRenderer
    refKey = dt.type
    ctx.refType = ctx.model.enums[refKey] || ctx.model.records[refKey]
    case
        when ctx.refType.kind_of?(DataMetaDom::Record)
            RECORD_RW_METHODS
        when ctx.refType.kind_of?(DataMetaDom::Enum)
            ENUM_RW_METHODS
        when ctx.refType.kind_of?(DataMetaDom::BitSet)
            NOT_IMPLEMENTED_METHODS
        when ctx.refType.kind_of?(DataMetaDom::Mapping)
            MAP_RW_METHODS[ctx.fType.type] || (raise ArgumentError, "No renderer found for the key type #{
            ctx.fType.type}, record #{ctx.rec}, field #{ctx.fld}")
        else
            raise "No renderer defined for field #{ctx.fld}"
    end
end
inOutablePy(arg) click to toggle source

Builds a class name for a InOutable.

# File lib/dataMetaByteSer/python.rb, line 20
def inOutablePy(arg)
    klassName = case
                    when arg.kind_of?(String)
                        arg
                    else
                        _, s = DataMetaDom.splitNameSpace(arg.fType.type)
                        s
                end
    "#{klassName}_InOutable"
end
mapsNotSupported(fld) click to toggle source
# File lib/dataMetaByteSer/python.rb, line 31
def mapsNotSupported(fld)
    raise ArgumentError, "Field #{fld.name}: maps are not currently supported for Byte Array format"
end
writableClassName(baseName) click to toggle source

Builds a class name for a Writable.

# File lib/dataMetaByteSer/python.rb, line 16
def writableClassName(baseName); "#{baseName}_Writable" end