import org.jruby.Ruby; import org.jruby.RubyClass; import org.jruby.RubyObject; import org.jruby.RubyString; import org.jruby.RubyFixnum; import org.jruby.anno.JRubyClass; import org.jruby.anno.JRubyMethod; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject;
/** Murmur hash 2.0.
* * The murmur hash is a relative fast hash function from * http://murmurhash.googlepages.com/ for platforms with efficient * multiplication. * * http://d3s.mff.cuni.cz/~holub/sw/javamurmurhash/ * */
@JRubyClass(name=“Spark::Digest::Murmur2”) public class Murmur2 extends RubyObject {
public Murmur2(final Ruby ruby, RubyClass rubyClass) { super(ruby, rubyClass); } @JRubyMethod(required=1, optional=1, module=true) public static IRubyObject digest(ThreadContext context, IRubyObject self, IRubyObject[] args) { Ruby ruby = context.getRuntime(); RubyString keyString = (RubyString)args[0]; long seed; if(args.length > 1){ RubyFixnum rb_seed = (RubyFixnum)args[1]; seed = rb_seed.getLongValue(); } else{ seed = 0; } long hash = hash64(keyString.getBytes(), (int)keyString.length().getLongValue(), seed); RubyFixnum result = new RubyFixnum(ruby, hash); return result; } /** Generates 64 bit hash from byte array of the given length and seed. * * @param data byte array to hash * @param length length of the array to hash * @param seed initial seed value * @return 64 bit hash of the given array */ public static long hash64(final byte[] data, int length, long seed) { final long m = 0xc6a4a7935bd1e995L; final int r = 47; long h = (seed&0xffffffffl)^(length*m); int length8 = length/8; for (int i=0; i<length8; i++) { final int i8 = i*8; long k = ((long)data[i8+0]&0xff) +(((long)data[i8+1]&0xff)<<8) +(((long)data[i8+2]&0xff)<<16) +(((long)data[i8+3]&0xff)<<24) +(((long)data[i8+4]&0xff)<<32) +(((long)data[i8+5]&0xff)<<40) +(((long)data[i8+6]&0xff)<<48) +(((long)data[i8+7]&0xff)<<56); k *= m; k ^= k >>> r; k *= m; h ^= k; h *= m; } switch (length%8) { case 7: h ^= (long)(data[(length&~7)+6]&0xff) << 48; case 6: h ^= (long)(data[(length&~7)+5]&0xff) << 40; case 5: h ^= (long)(data[(length&~7)+4]&0xff) << 32; case 4: h ^= (long)(data[(length&~7)+3]&0xff) << 24; case 3: h ^= (long)(data[(length&~7)+2]&0xff) << 16; case 2: h ^= (long)(data[(length&~7)+1]&0xff) << 8; case 1: h ^= (long)(data[length&~7]&0xff); h *= m; }; h ^= h >>> r; h *= m; h ^= h >>> r; return h; }
}