From d5d325ececba1d87fcdaa99ea64dda6fe86e1bfb Mon Sep 17 00:00:00 2001 From: Jasper Potts <1466205+jasperpotts@users.noreply.github.com> Date: Tue, 12 Aug 2025 19:21:33 -0700 Subject: [PATCH 1/7] Added xxh3 64bit hash and switched Bytes over to using it. Signed-off-by: Jasper Potts <1466205+jasperpotts@users.noreply.github.com> --- .../hedera/pbj/runtime/hashing/XXH3_64.java | 443 ++++++++++++++++++ .../hedera/pbj/runtime/io/buffer/Bytes.java | 10 +- .../src/main/java/module-info.java | 1 + .../hedera/pbj/runtime/hashing/XXH3Test.java | 140 ++++++ 4 files changed, 588 insertions(+), 6 deletions(-) create mode 100644 pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/hashing/XXH3_64.java create mode 100644 pbj-core/pbj-runtime/src/test/java/com/hedera/pbj/runtime/hashing/XXH3Test.java diff --git a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/hashing/XXH3_64.java b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/hashing/XXH3_64.java new file mode 100644 index 000000000..5fbca6f8c --- /dev/null +++ b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/hashing/XXH3_64.java @@ -0,0 +1,443 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.pbj.runtime.hashing; + +import java.lang.invoke.MethodHandles; +import java.lang.invoke.VarHandle; +import java.nio.ByteOrder; + +/** + * XXH3_64 is a 64-bit variant of the XXH3 hash function. + * It is designed to be fast and efficient, providing a good balance between speed and + * collision resistance. + * + *
It is recommended to use {@link #DEFAULT_INSTANCE} for most use cases. + * @see xxhash + */ +@SuppressWarnings({"DuplicatedCode", "NumericOverflow"}) +public class XXH3_64 { + /** Default instance of the XXH3_64 hasher with a seed of 0. */ + public static final XXH3_64 DEFAULT_INSTANCE = new XXH3_64(0); + + private static final VarHandle LONG_HANDLE = + MethodHandles.byteArrayViewVarHandle(long[].class, ByteOrder.LITTLE_ENDIAN); + private static final VarHandle INT_HANDLE = + MethodHandles.byteArrayViewVarHandle(int[].class, ByteOrder.LITTLE_ENDIAN); + private static final int BLOCK_LEN_EXP = 10; + private static final long SECRET_00 = 0xbe4ba423396cfeb8L; + private static final long SECRET_01 = 0x1cad21f72c81017cL; + private static final long SECRET_02 = 0xdb979083e96dd4deL; + private static final long SECRET_03 = 0x1f67b3b7a4a44072L; + private static final long SECRET_04 = 0x78e5c0cc4ee679cbL; + private static final long SECRET_05 = 0x2172ffcc7dd05a82L; + private static final long SECRET_06 = 0x8e2443f7744608b8L; + private static final long SECRET_07 = 0x4c263a81e69035e0L; + private static final long SECRET_08 = 0xcb00c391bb52283cL; + private static final long SECRET_09 = 0xa32e531b8b65d088L; + private static final long SECRET_10 = 0x4ef90da297486471L; + private static final long SECRET_11 = 0xd8acdea946ef1938L; + private static final long SECRET_12 = 0x3f349ce33f76faa8L; + private static final long SECRET_13 = 0x1d4f0bc7c7bbdcf9L; + private static final long SECRET_14 = 0x3159b4cd4be0518aL; + private static final long SECRET_15 = 0x647378d9c97e9fc8L; + private static final long SECRET_16 = 0xc3ebd33483acc5eaL; + private static final long SECRET_17 = 0xeb6313faffa081c5L; + private static final long SECRET_18 = 0x49daf0b751dd0d17L; + private static final long SECRET_19 = 0x9e68d429265516d3L; + private static final long SECRET_20 = 0xfca1477d58be162bL; + private static final long SECRET_21 = 0xce31d07ad1b8f88fL; + private static final long SECRET_22 = 0x280416958f3acb45L; + private static final long SECRET_23 = 0x7e404bbbcafbd7afL; + private static final long INIT_ACC_0 = 0x00000000C2B2AE3DL; + private static final long INIT_ACC_1 = 0x9E3779B185EBCA87L; + private static final long INIT_ACC_2 = 0xC2B2AE3D27D4EB4FL; + private static final long INIT_ACC_3 = 0x165667B19E3779F9L; + private static final long INIT_ACC_4 = 0x85EBCA77C2B2AE63L; + private static final long INIT_ACC_5 = 0x0000000085EBCA77L; + private static final long INIT_ACC_6 = 0x27D4EB2F165667C5L; + private static final long INIT_ACC_7 = 0x000000009E3779B1L; + + private final long secret00; + private final long secret01; + private final long secret02; + private final long secret03; + private final long secret04; + private final long secret05; + private final long secret06; + private final long secret07; + private final long secret08; + private final long secret09; + private final long secret10; + private final long secret11; + private final long secret12; + private final long secret13; + private final long secret14; + private final long secret15; + private final long secret16; + private final long secret17; + private final long secret18; + private final long secret19; + private final long secret20; + private final long secret21; + private final long secret22; + private final long secret23; + + private final long[] secret; + + private final long secShift00; + private final long secShift01; + private final long secShift02; + private final long secShift03; + private final long secShift04; + private final long secShift05; + private final long secShift06; + private final long secShift07; + private final long secShift08; + private final long secShift09; + private final long secShift10; + private final long secShift11; + + private final long secShift16; + private final long secShift17; + private final long secShift18; + private final long secShift19; + private final long secShift20; + private final long secShift21; + private final long secShift22; + private final long secShift23; + + private final long secShiftFinal0; + private final long secShiftFinal1; + private final long secShiftFinal2; + private final long secShiftFinal3; + private final long secShiftFinal4; + private final long secShiftFinal5; + private final long secShiftFinal6; + private final long secShiftFinal7; + private final long secShift12; + private final long secShift13; + private final long secShift14; + private final long secShift15; + private final long bitflip00; + private final long bitflip12; + private final long bitflip34; + private final long bitflip56; + private final long hash0; + + @SuppressWarnings("NumericOverflow") + private XXH3_64(long seed) { + this.secret00 = SECRET_00 + seed; + this.secret01 = SECRET_01 - seed; + this.secret02 = SECRET_02 + seed; + this.secret03 = SECRET_03 - seed; + this.secret04 = SECRET_04 + seed; + this.secret05 = SECRET_05 - seed; + this.secret06 = SECRET_06 + seed; + this.secret07 = SECRET_07 - seed; + this.secret08 = SECRET_08 + seed; + this.secret09 = SECRET_09 - seed; + this.secret10 = SECRET_10 + seed; + this.secret11 = SECRET_11 - seed; + this.secret12 = SECRET_12 + seed; + this.secret13 = SECRET_13 - seed; + this.secret14 = SECRET_14 + seed; + this.secret15 = SECRET_15 - seed; + this.secret16 = SECRET_16 + seed; + this.secret17 = SECRET_17 - seed; + this.secret18 = SECRET_18 + seed; + this.secret19 = SECRET_19 - seed; + this.secret20 = SECRET_20 + seed; + this.secret21 = SECRET_21 - seed; + this.secret22 = SECRET_22 + seed; + this.secret23 = SECRET_23 - seed; + + this.secShift00 = (SECRET_00 >>> 24) + (SECRET_01 << 40) + seed; + this.secShift01 = (SECRET_01 >>> 24) + (SECRET_02 << 40) - seed; + this.secShift02 = (SECRET_02 >>> 24) + (SECRET_03 << 40) + seed; + this.secShift03 = (SECRET_03 >>> 24) + (SECRET_04 << 40) - seed; + this.secShift04 = (SECRET_04 >>> 24) + (SECRET_05 << 40) + seed; + this.secShift05 = (SECRET_05 >>> 24) + (SECRET_06 << 40) - seed; + this.secShift06 = (SECRET_06 >>> 24) + (SECRET_07 << 40) + seed; + this.secShift07 = (SECRET_07 >>> 24) + (SECRET_08 << 40) - seed; + this.secShift08 = (SECRET_08 >>> 24) + (SECRET_09 << 40) + seed; + this.secShift09 = (SECRET_09 >>> 24) + (SECRET_10 << 40) - seed; + this.secShift10 = (SECRET_10 >>> 24) + (SECRET_11 << 40) + seed; + this.secShift11 = (SECRET_11 >>> 24) + (SECRET_12 << 40) - seed; + + this.secShift16 = secret15 >>> 8 | secret16 << 56; + this.secShift17 = secret16 >>> 8 | secret17 << 56; + this.secShift18 = secret17 >>> 8 | secret18 << 56; + this.secShift19 = secret18 >>> 8 | secret19 << 56; + this.secShift20 = secret19 >>> 8 | secret20 << 56; + this.secShift21 = secret20 >>> 8 | secret21 << 56; + this.secShift22 = secret21 >>> 8 | secret22 << 56; + this.secShift23 = secret22 >>> 8 | secret23 << 56; + + this.secShiftFinal0 = secret01 >>> 24 | secret02 << 40; + this.secShiftFinal1 = secret02 >>> 24 | secret03 << 40; + this.secShiftFinal2 = secret03 >>> 24 | secret04 << 40; + this.secShiftFinal3 = secret04 >>> 24 | secret05 << 40; + this.secShiftFinal4 = secret05 >>> 24 | secret06 << 40; + this.secShiftFinal5 = secret06 >>> 24 | secret07 << 40; + this.secShiftFinal6 = secret07 >>> 24 | secret08 << 40; + this.secShiftFinal7 = secret08 >>> 24 | secret09 << 40; + + this.secret = new long[] { + secret00, secret01, secret02, secret03, secret04, secret05, secret06, secret07, + secret08, secret09, secret10, secret11, secret12, secret13, secret14, secret15, + secret16, secret17, secret18, secret19, secret20, secret21, secret22, secret23 + }; + + this.secShift12 = (SECRET_12 >>> 24) + (SECRET_13 << 40) + seed; + this.secShift13 = (SECRET_13 >>> 24) + (SECRET_14 << 40) - seed; + this.secShift14 = (SECRET_14 >>> 56) + (SECRET_15 << 8) + seed; + this.secShift15 = (SECRET_15 >>> 56) + (SECRET_16 << 8) - seed; + + this.bitflip00 = ((SECRET_00 >>> 32) ^ (SECRET_00 & 0xFFFFFFFFL)) + seed; + this.bitflip12 = (SECRET_01 ^ SECRET_02) - (seed ^ Long.reverseBytes(seed & 0xFFFFFFFFL)); + this.bitflip34 = (SECRET_03 ^ SECRET_04) + seed; + this.bitflip56 = (SECRET_05 ^ SECRET_06) - seed; + + this.hash0 = avalanche64(seed ^ (SECRET_07 ^ SECRET_08)); + } + + private static long rrmxmx(long h64, final long length) { + h64 ^= Long.rotateLeft(h64, 49) ^ Long.rotateLeft(h64, 24); + h64 *= 0x9FB21C651E98DF25L; + h64 ^= (h64 >>> 35) + length; + h64 *= 0x9FB21C651E98DF25L; + return h64 ^ (h64 >>> 28); + } + + private static long mix16B(final byte[] input, final int offIn, final long sec0, final long sec1) { + long lo = (long) LONG_HANDLE.get(input, offIn); + long hi = (long) LONG_HANDLE.get(input, offIn + 8); + return mix2Accs(lo, hi, sec0, sec1); + } + + private static long avalanche64(long h64) { + h64 ^= h64 >>> 33; + h64 *= INIT_ACC_2; + h64 ^= h64 >>> 29; + h64 *= INIT_ACC_3; + return h64 ^ (h64 >>> 32); + } + + private static long avalanche3(long h64) { + h64 ^= h64 >>> 37; + h64 *= 0x165667919E3779F9L; + return h64 ^ (h64 >>> 32); + } + + private static long mix2Accs(final long lh, final long rh, long sec0, long sec8) { + return mix(lh ^ sec0, rh ^ sec8); + } + + private static long contrib(long a, long b) { + long k = a ^ b; + return (0xFFFFFFFFL & k) * (k >>> 32); + } + + private static long mixAcc(long acc, long sec) { + return (acc ^ (acc >>> 47) ^ sec) * INIT_ACC_7; + } + + private static long mix(long a, long b) { + long x = a * b; + long y = Math.unsignedMultiplyHigh(a, b); + return x ^ y; + } + + /** + * Hashes a byte array to a 64-bit {@code long} value. + * + *
Equivalent to {@code hashToLong(input, (b, f) -> f.putBytes(b, off, len))}. + * + * @param input the byte array + * @param off the offset + * @param length the length + * @return the hash value + */ + public long hashBytesToLong(final byte[] input, final int off, final int length) { + if (length <= 16) { + if (length > 8) { + long lo = (long) LONG_HANDLE.get(input, off) ^ bitflip34; + long hi = (long) LONG_HANDLE.get(input, off + length - 8) ^ bitflip56; + long acc = length + Long.reverseBytes(lo) + hi + mix(lo, hi); + return avalanche3(acc); + } + if (length >= 4) { + long input1 = (int) INT_HANDLE.get(input, off); + long input2 = (int) INT_HANDLE.get(input, off + length - 4); + long keyed = (input2 & 0xFFFFFFFFL) ^ (input1 << 32) ^ bitflip12; + return XXH3_64.rrmxmx(keyed, length); + } + if (length != 0) { + int c1 = input[off] & 0xFF; + int c2 = input[off + (length >> 1)]; + int c3 = input[off + length - 1] & 0xFF; + long combined = ((c1 << 16) | (c2 << 24) | c3 | ((long) length << 8)) & 0xFFFFFFFFL; + return avalanche64(combined ^ bitflip00); + } + return hash0; + } + if (length <= 128) { + long acc = length * INIT_ACC_1; + + if (length > 32) { + if (length > 64) { + if (length > 96) { + acc += XXH3_64.mix16B(input, off + 48, secret12, secret13); + acc += XXH3_64.mix16B(input, off + length - 64, secret14, secret15); + } + acc += XXH3_64.mix16B(input, off + 32, secret08, secret09); + acc += XXH3_64.mix16B(input, off + length - 48, secret10, secret11); + } + acc += XXH3_64.mix16B(input, off + 16, secret04, secret05); + acc += XXH3_64.mix16B(input, off + length - 32, secret06, secret07); + } + acc += XXH3_64.mix16B(input, off, secret00, secret01); + acc += XXH3_64.mix16B(input, off + length - 16, secret02, secret03); + + return avalanche3(acc); + } + if (length <= 240) { + long acc = length * INIT_ACC_1; + acc += XXH3_64.mix16B(input, off, secret00, secret01); + acc += XXH3_64.mix16B(input, off + 16, secret02, secret03); + acc += XXH3_64.mix16B(input, off + 16 * 2, secret04, secret05); + acc += XXH3_64.mix16B(input, off + 16 * 3, secret06, secret07); + acc += XXH3_64.mix16B(input, off + 16 * 4, secret08, secret09); + acc += XXH3_64.mix16B(input, off + 16 * 5, secret10, secret11); + acc += XXH3_64.mix16B(input, off + 16 * 6, secret12, secret13); + acc += XXH3_64.mix16B(input, off + 16 * 7, secret14, secret15); + + acc = avalanche3(acc); + + if (length >= 144) { + acc += XXH3_64.mix16B(input, off + 128, secShift00, secShift01); + if (length >= 160) { + acc += XXH3_64.mix16B(input, off + 144, secShift02, secShift03); + if (length >= 176) { + acc += XXH3_64.mix16B(input, off + 160, secShift04, secShift05); + if (length >= 192) { + acc += XXH3_64.mix16B(input, off + 176, secShift06, secShift07); + if (length >= 208) { + acc += XXH3_64.mix16B(input, off + 192, secShift08, secShift09); + if (length >= 224) { + acc += XXH3_64.mix16B(input, off + 208, secShift10, secShift11); + if (length >= 240) acc += XXH3_64.mix16B(input, off + 224, secShift12, secShift13); + } + } + } + } + } + } + acc += XXH3_64.mix16B(input, off + length - 16, secShift14, secShift15); + return avalanche3(acc); + } + + long acc0 = INIT_ACC_0; + long acc1 = INIT_ACC_1; + long acc2 = INIT_ACC_2; + long acc3 = INIT_ACC_3; + long acc4 = INIT_ACC_4; + long acc5 = INIT_ACC_5; + long acc6 = INIT_ACC_6; + long acc7 = INIT_ACC_7; + + final int nbBlocks = (length - 1) >>> BLOCK_LEN_EXP; + for (int n = 0; n < nbBlocks; n++) { + final int offBlock = off + (n << BLOCK_LEN_EXP); + for (int s = 0; s < 16; s += 1) { + int offStripe = offBlock + (s << 6); + + long b0 = (long) LONG_HANDLE.get(input, offStripe); + long b1 = (long) LONG_HANDLE.get(input, offStripe + 8); + long b2 = (long) LONG_HANDLE.get(input, offStripe + 8 * 2); + long b3 = (long) LONG_HANDLE.get(input, offStripe + 8 * 3); + long b4 = (long) LONG_HANDLE.get(input, offStripe + 8 * 4); + long b5 = (long) LONG_HANDLE.get(input, offStripe + 8 * 5); + long b6 = (long) LONG_HANDLE.get(input, offStripe + 8 * 6); + long b7 = (long) LONG_HANDLE.get(input, offStripe + 8 * 7); + + acc0 += b1 + contrib(b0, secret[s]); + acc1 += b0 + contrib(b1, secret[s + 1]); + acc2 += b3 + contrib(b2, secret[s + 2]); + acc3 += b2 + contrib(b3, secret[s + 3]); + acc4 += b5 + contrib(b4, secret[s + 4]); + acc5 += b4 + contrib(b5, secret[s + 5]); + acc6 += b7 + contrib(b6, secret[s + 6]); + acc7 += b6 + contrib(b7, secret[s + 7]); + } + + acc0 = mixAcc(acc0, secret16); + acc1 = mixAcc(acc1, secret17); + acc2 = mixAcc(acc2, secret18); + acc3 = mixAcc(acc3, secret19); + acc4 = mixAcc(acc4, secret20); + acc5 = mixAcc(acc5, secret21); + acc6 = mixAcc(acc6, secret22); + acc7 = mixAcc(acc7, secret23); + } + + final int nbStripes = ((length - 1) - (nbBlocks << BLOCK_LEN_EXP)) >>> 6; + final int offBlock = off + (nbBlocks << BLOCK_LEN_EXP); + for (int s = 0; s < nbStripes; s++) { + int offStripe = offBlock + (s << 6); + + long b0 = (long) LONG_HANDLE.get(input, offStripe); + long b1 = (long) LONG_HANDLE.get(input, offStripe + 8); + long b2 = (long) LONG_HANDLE.get(input, offStripe + 8 * 2); + long b3 = (long) LONG_HANDLE.get(input, offStripe + 8 * 3); + long b4 = (long) LONG_HANDLE.get(input, offStripe + 8 * 4); + long b5 = (long) LONG_HANDLE.get(input, offStripe + 8 * 5); + long b6 = (long) LONG_HANDLE.get(input, offStripe + 8 * 6); + long b7 = (long) LONG_HANDLE.get(input, offStripe + 8 * 7); + + acc0 += b1 + contrib(b0, secret[s]); + acc1 += b0 + contrib(b1, secret[s + 1]); + acc2 += b3 + contrib(b2, secret[s + 2]); + acc3 += b2 + contrib(b3, secret[s + 3]); + acc4 += b5 + contrib(b4, secret[s + 4]); + acc5 += b4 + contrib(b5, secret[s + 5]); + acc6 += b7 + contrib(b6, secret[s + 6]); + acc7 += b6 + contrib(b7, secret[s + 7]); + } + + { + int offStripe = off + length - 64; + + long b0 = (long) LONG_HANDLE.get(input, offStripe); + long b1 = (long) LONG_HANDLE.get(input, offStripe + 8); + long b2 = (long) LONG_HANDLE.get(input, offStripe + 8 * 2); + long b3 = (long) LONG_HANDLE.get(input, offStripe + 8 * 3); + long b4 = (long) LONG_HANDLE.get(input, offStripe + 8 * 4); + long b5 = (long) LONG_HANDLE.get(input, offStripe + 8 * 5); + long b6 = (long) LONG_HANDLE.get(input, offStripe + 8 * 6); + long b7 = (long) LONG_HANDLE.get(input, offStripe + 8 * 7); + + acc0 += b1 + contrib(b0, secShift16); + acc1 += b0 + contrib(b1, secShift17); + acc2 += b3 + contrib(b2, secShift18); + acc3 += b2 + contrib(b3, secShift19); + acc4 += b5 + contrib(b4, secShift20); + acc5 += b4 + contrib(b5, secShift21); + acc6 += b7 + contrib(b6, secShift22); + acc7 += b6 + contrib(b7, secShift23); + } + + return finalizeHash(length, acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7); + } + + private long finalizeHash( + long length, long acc0, long acc1, long acc2, long acc3, long acc4, long acc5, long acc6, long acc7) { + + long result64 = length * INIT_ACC_1 + + mix2Accs(acc0, acc1, secShiftFinal0, secShiftFinal1) + + mix2Accs(acc2, acc3, secShiftFinal2, secShiftFinal3) + + mix2Accs(acc4, acc5, secShiftFinal4, secShiftFinal5) + + mix2Accs(acc6, acc7, secShiftFinal6, secShiftFinal7); + + return avalanche3(result64); + } +} diff --git a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/Bytes.java b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/Bytes.java index 59aee2c6c..ec2b2538d 100644 --- a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/Bytes.java +++ b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/Bytes.java @@ -3,6 +3,7 @@ import static java.util.Objects.requireNonNull; +import com.hedera.pbj.runtime.hashing.XXH3_64; import com.hedera.pbj.runtime.io.DataEncodingException; import com.hedera.pbj.runtime.io.ReadableSequentialData; import com.hedera.pbj.runtime.io.UnsafeUtils; @@ -192,9 +193,10 @@ public static Bytes merge(@NonNull final Bytes bytes1, @NonNull final Bytes byte /** * Returns the first byte offset of {@code needle} inside {@code haystack}, * or –1 if it is not present. - * + *
* Offsets are *relative to the start of the Bytes slice*, so 0 means * “starts exactly at haystack.start”. + *
*/ public static int indexOf(@NonNull final Bytes haystack, @NonNull final Bytes needle) { requireNonNull(haystack); @@ -537,11 +539,7 @@ public boolean equals(@Nullable final Object o) { @Override public int hashCode() { if (hashCode == 0) { - int h = 1; - for (int i = start + length - 1; i >= start; i--) { - h = 31 * h + UnsafeUtils.getArrayByteNoChecks(buffer, i); - } - hashCode = h; + hashCode = (int) XXH3_64.DEFAULT_INSTANCE.hashBytesToLong(buffer, start, length); } return hashCode; } diff --git a/pbj-core/pbj-runtime/src/main/java/module-info.java b/pbj-core/pbj-runtime/src/main/java/module-info.java index 63d5f648f..c0d1907eb 100644 --- a/pbj-core/pbj-runtime/src/main/java/module-info.java +++ b/pbj-core/pbj-runtime/src/main/java/module-info.java @@ -12,4 +12,5 @@ exports com.hedera.pbj.runtime.io.buffer; exports com.hedera.pbj.runtime.jsonparser; exports com.hedera.pbj.runtime.grpc; + exports com.hedera.pbj.runtime.hashing; } diff --git a/pbj-core/pbj-runtime/src/test/java/com/hedera/pbj/runtime/hashing/XXH3Test.java b/pbj-core/pbj-runtime/src/test/java/com/hedera/pbj/runtime/hashing/XXH3Test.java new file mode 100644 index 000000000..f489b9687 --- /dev/null +++ b/pbj-core/pbj-runtime/src/test/java/com/hedera/pbj/runtime/hashing/XXH3Test.java @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.pbj.runtime.hashing; + +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertTrue; + +import java.io.IOException; +import java.util.HexFormat; +import java.util.Random; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.stream.IntStream; +import org.junit.jupiter.api.Assumptions; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +public class XXH3Test { + + /** + * This test checks the hash of the string "hello world". The expected hash is computed using the command line + * tool `xxhsum` with the `-H3` option. + */ + @Test + @DisplayName("Test for the string 'hello world'") + public void helloWorldTest() { + byte[] inputBytes = "hello world".getBytes(); + // Assuming XXH3.hash() is a method that computes the hash + long hash = XXH3_64.DEFAULT_INSTANCE.hashBytesToLong(inputBytes, 0, inputBytes.length); + // hello world expected hash in hex produced with command line -> echo -n "hello world" | xxhsum -H3 + String expectedHash = "d447b1ea40e6988b"; + assertEquals(expectedHash, Long.toHexString(hash)); + } + + /** + * This test checks the hash of the byte sequence CAFEBABE, which is often used as a magic number in Java class + * files. + */ + @Test + @DisplayName("Test for the CAFEBABE byte sequence") + public void cafeBabyTest() { + byte[] inputBytes = HexFormat.of().parseHex("CAFEBABE"); + // Assuming XXH3.hash() is a method that computes the hash + long hash = XXH3_64.DEFAULT_INSTANCE.hashBytesToLong(inputBytes, 0, inputBytes.length); + // hello world expected hash in hex produced with command line -> echo CAFEBABE | xxd -r -p | xxhsum -H3 + String expectedHash = "36afb8d0770d97ea"; + assertEquals(expectedHash, Long.toHexString(hash)); + } + + /** + * This test checks the hash of a large random data set against the `xxhsum` command line tool if it is available. + * It uses a large number of random byte arrays to ensure that the hash function behaves correctly across a wide + * range of inputs. + */ + @Test + @DisplayName("Test random data against xxhsum if available") + void testRandomDataAgainstXxhsumIfAvailable() { + Assumptions.assumeTrue(isXXHSumAvailable(), "xxhsum not available, skipping test"); + // test with a large random data set + Random random = new Random(18971947891479L); + final AtomicBoolean allMatch = new AtomicBoolean(true); + IntStream.range(0, 1_000).parallel().forEach(i -> { + byte[] randomData = new byte[1 + random.nextInt(128)]; + random.nextBytes(randomData); + long testCodeHashResult = XXH3_64.DEFAULT_INSTANCE.hashBytesToLong(randomData, 0, randomData.length); + long referenceExpectedHash = xxh364HashWithCommandLine(randomData, 0, randomData.length); + assertEquals( + referenceExpectedHash, + testCodeHashResult, + "Mismatch for random data " + i + ": Input: " + + HexFormat.of().formatHex(randomData) + + ", Expected xxhsum: " + Long.toHexString(referenceExpectedHash) + + ", XXH3_64: " + Long.toHexString(testCodeHashResult)); + if (testCodeHashResult != referenceExpectedHash) { + allMatch.set(false); + } + }); + assertTrue(allMatch.get()); + } + + /** + * This class checks if the `xxhsum` command line tool is available on the system. + * It does this by trying to execute `xxhsum --version` and checking the exit code. + */ + public static boolean isXXHSumAvailable() { + try { + Process process = new ProcessBuilder("xxhsum", "--version") + .redirectErrorStream(true) + .start(); + int exitCode = process.waitFor(); + return exitCode == 0; + } catch (IOException | InterruptedException e) { + return false; + } + } + + /** + * This method computes the XXH3-64 hash of the given byte array using the `xxhsum` command line tool. + * It writes the bytes to the standard input of `xxhsum` and reads the output. + * + * @param bytes The byte array to hash. + * @param start The starting index in the byte array. + * @param length The number of bytes to hash. + * @return The computed hash as a long value. + */ + public static long xxh364HashWithCommandLine(final byte[] bytes, int start, int length) { + String result; + ProcessBuilder pb = new ProcessBuilder("xxhsum", "-H" + 3, "-"); + Process process; + try { + process = pb.start(); + // Write input and close output to signal EOF to xxhsum + try (var out = process.getOutputStream()) { + out.write(bytes, start, length); + out.flush(); + } + // Read result from input stream + String resultString1; + try (var in = process.getInputStream()) { + var resultBytes = in.readAllBytes(); + resultString1 = new String(resultBytes).trim(); + } + // Drain error stream to avoid blocking + try (var err = process.getErrorStream()) { + var errorBytes = err.readAllBytes(); + if (errorBytes.length > 0) { + String errorString = new String(errorBytes).trim(); + if (!errorString.isEmpty()) { + throw new RuntimeException("Error from xxhsum: " + errorString); + } + } + } + process.waitFor(); + result = resultString1; + } catch (IOException | InterruptedException e) { + throw new RuntimeException(e); + } + final String resultString = result; + final String resultHexString = resultString.substring(resultString.indexOf('_') + 1, resultString.indexOf(' ')); + return Long.parseUnsignedLong(resultHexString, 16); + } +} From b9410d6a4e8baf6371947596f275db0503d5bae5 Mon Sep 17 00:00:00 2001 From: Jasper Potts <1466205+jasperpotts@users.noreply.github.com> Date: Wed, 13 Aug 2025 18:32:17 -0700 Subject: [PATCH 2/7] Added xxh3 64bit hash streaming implementation of WritableSequentialData. Added JMH benchmark for model object hashCode(). Rewritten code generation to generate hashCode64() methods and hashCode() using xxhash. Signed-off-by: Jasper Potts <1466205+jasperpotts@users.noreply.github.com> --- .../com/hedera/pbj/compiler/impl/Common.java | 363 +++++++---- .../hedera/pbj/compiler/impl/MapField.java | 6 +- .../hedera/pbj/compiler/impl/SingleField.java | 11 + .../impl/generators/ModelGenerator.java | 44 +- .../protobuf/CodecWriteMethodGenerator.java | 2 +- .../hedera/pbj/runtime/hashing/XXH3_64.java | 583 ++++++++++++++++-- .../runtime/hashing/XXH3StreamingTest.java | 187 ++++++ .../jmh/ModelObjHashCodeBench.java | 76 +++ .../src/main/proto/hasheval.proto | 21 + 9 files changed, 1084 insertions(+), 209 deletions(-) create mode 100644 pbj-core/pbj-runtime/src/test/java/com/hedera/pbj/runtime/hashing/XXH3StreamingTest.java create mode 100644 pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/ModelObjHashCodeBench.java diff --git a/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/Common.java b/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/Common.java index f77f48d1c..5f651b41a 100644 --- a/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/Common.java +++ b/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/Common.java @@ -1,13 +1,13 @@ // SPDX-License-Identifier: Apache-2.0 package com.hedera.pbj.compiler.impl; -import com.hedera.pbj.compiler.impl.grammar.Protobuf3Parser; +import com.hedera.pbj.compiler.impl.Field.FieldType; +import com.hedera.pbj.compiler.impl.grammar.Protobuf3Parser.DocCommentContext; import edu.umd.cs.findbugs.annotations.NonNull; import java.io.File; import java.util.Arrays; import java.util.List; import java.util.Objects; -import java.util.regex.Pattern; import java.util.stream.Collectors; /** @@ -30,8 +30,6 @@ public final class Common { /** Number of bits used to represent the tag type */ static final int TAG_TYPE_BITS = 3; - private static final Pattern COMPARABLE_PATTERN = Pattern.compile("implements Comparable<\\w+>\\s*\\{"); - /** * Makes a tag value given a field number and wire type. * @@ -120,7 +118,7 @@ public static String camelToUpperSnake(String name) { * * @return clean comment */ - public static String buildCleanFieldJavaDoc(int fieldNumber, Protobuf3Parser.DocCommentContext docContext) { + public static String buildCleanFieldJavaDoc(int fieldNumber, DocCommentContext docContext) { final String cleanedComment = docContext == null ? "" : cleanJavaDocComment(docContext.getText()); final String fieldNumComment = "(" + fieldNumber + ") "; return fieldNumComment + cleanedComment; @@ -134,8 +132,7 @@ public static String buildCleanFieldJavaDoc(int fieldNumber, Protobuf3Parser.Doc * * @return clean comment */ - public static String buildCleanFieldJavaDoc( - ListIt is recommended to use {@link #DEFAULT_INSTANCE} for most use cases.
+ * + * @param seed the seed to use for hashing + */ @SuppressWarnings("NumericOverflow") - private XXH3_64(long seed) { + public XXH3_64(long seed) { this.secret00 = SECRET_00 + seed; this.secret01 = SECRET_01 - seed; this.secret02 = SECRET_02 + seed; @@ -200,51 +214,14 @@ private XXH3_64(long seed) { this.hash0 = avalanche64(seed ^ (SECRET_07 ^ SECRET_08)); } - private static long rrmxmx(long h64, final long length) { - h64 ^= Long.rotateLeft(h64, 49) ^ Long.rotateLeft(h64, 24); - h64 *= 0x9FB21C651E98DF25L; - h64 ^= (h64 >>> 35) + length; - h64 *= 0x9FB21C651E98DF25L; - return h64 ^ (h64 >>> 28); - } - - private static long mix16B(final byte[] input, final int offIn, final long sec0, final long sec1) { - long lo = (long) LONG_HANDLE.get(input, offIn); - long hi = (long) LONG_HANDLE.get(input, offIn + 8); - return mix2Accs(lo, hi, sec0, sec1); - } - - private static long avalanche64(long h64) { - h64 ^= h64 >>> 33; - h64 *= INIT_ACC_2; - h64 ^= h64 >>> 29; - h64 *= INIT_ACC_3; - return h64 ^ (h64 >>> 32); - } - - private static long avalanche3(long h64) { - h64 ^= h64 >>> 37; - h64 *= 0x165667919E3779F9L; - return h64 ^ (h64 >>> 32); - } - - private static long mix2Accs(final long lh, final long rh, long sec0, long sec8) { - return mix(lh ^ sec0, rh ^ sec8); - } - - private static long contrib(long a, long b) { - long k = a ^ b; - return (0xFFFFFFFFL & k) * (k >>> 32); - } - - private static long mixAcc(long acc, long sec) { - return (acc ^ (acc >>> 47) ^ sec) * INIT_ACC_7; - } - - private static long mix(long a, long b) { - long x = a * b; - long y = Math.unsignedMultiplyHigh(a, b); - return x ^ y; + /** + * Creates a new instance of {@link HashingWritableSequentialData} with the seed derived from the instance of + * {@link XXH3_64}. + * + * @return a new instance of {@link XXH3_64} + */ + public HashingWritableSequentialData hashingWritableSequentialData() { + return new HashingWritableSequentialData(); } /** @@ -429,6 +406,57 @@ public long hashBytesToLong(final byte[] input, final int off, final int length) return finalizeHash(length, acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7); } + // ============================================================================================================= + // Private methods + // ============================================================================================================= + + private static long rrmxmx(long h64, final long length) { + h64 ^= Long.rotateLeft(h64, 49) ^ Long.rotateLeft(h64, 24); + h64 *= 0x9FB21C651E98DF25L; + h64 ^= (h64 >>> 35) + length; + h64 *= 0x9FB21C651E98DF25L; + return h64 ^ (h64 >>> 28); + } + + private static long mix16B(final byte[] input, final int offIn, final long sec0, final long sec1) { + long lo = (long) LONG_HANDLE.get(input, offIn); + long hi = (long) LONG_HANDLE.get(input, offIn + 8); + return mix2Accs(lo, hi, sec0, sec1); + } + + private static long avalanche64(long h64) { + h64 ^= h64 >>> 33; + h64 *= INIT_ACC_2; + h64 ^= h64 >>> 29; + h64 *= INIT_ACC_3; + return h64 ^ (h64 >>> 32); + } + + private static long avalanche3(long h64) { + h64 ^= h64 >>> 37; + h64 *= 0x165667919E3779F9L; + return h64 ^ (h64 >>> 32); + } + + private static long mix2Accs(final long lh, final long rh, long sec0, long sec8) { + return mix(lh ^ sec0, rh ^ sec8); + } + + private static long contrib(long a, long b) { + long k = a ^ b; + return (0xFFFFFFFFL & k) * (k >>> 32); + } + + private static long mixAcc(long acc, long sec) { + return (acc ^ (acc >>> 47) ^ sec) * INIT_ACC_7; + } + + private static long mix(long a, long b) { + long x = a * b; + long y = Math.unsignedMultiplyHigh(a, b); + return x ^ y; + } + private long finalizeHash( long length, long acc0, long acc1, long acc2, long acc3, long acc4, long acc5, long acc6, long acc7) { @@ -440,4 +468,455 @@ private long finalizeHash( return avalanche3(result64); } + + public class HashingWritableSequentialData implements WritableSequentialData { + /** The size of the buffer used for writing data in bulk. */ + private static final int BULK_SIZE = 256; + /** The mask for the bulk size, used for wrapping around the buffer. */ + private static final int BULK_SIZE_MASK = BULK_SIZE - 1; + // Initial accumulator values for the hashing process. + private long acc0 = INIT_ACC_0; + private long acc1 = INIT_ACC_1; + private long acc2 = INIT_ACC_2; + private long acc3 = INIT_ACC_3; + private long acc4 = INIT_ACC_4; + private long acc5 = INIT_ACC_5; + private long acc6 = INIT_ACC_6; + private long acc7 = INIT_ACC_7; + /** The buffer used for writing data in bulk. */ + private final byte[] buffer = new byte[BULK_SIZE + 8]; + /** The offset in the buffer where the next write will occur. */ + private int offset = 0; + /** The total number of bytes written so far. */ + private long byteCount = 0; + + /** + * Constructs a new instance of the XXH3_64 hashing writable sequential data. + */ + private HashingWritableSequentialData() {} + + // ============================================================================================================= + // WritableSequentialData implementation + // ============================================================================================================= + + @Override + public void skip(long count) throws UncheckedIOException { + // Skip is not supported in this implementation. + throw new UnsupportedOperationException("Skip operation is not supported in this implementation."); + } + + @Override + public void limit(long limit) { + // Skip is not supported in this implementation. + throw new UnsupportedOperationException("Skip operation is not supported in this implementation."); + } + + @Override + public long limit() { + return Long.MAX_VALUE; + } + + @Override + public long position() { + return byteCount; + } + + @Override + public long capacity() { + return Long.MAX_VALUE; + } + + /** {@inheritDoc} */ + @Override + public void writeVarLong(long value, boolean zigZag) throws BufferOverflowException, UncheckedIOException { + // We do not need to protobuf encode for hashing, so we just write the long directly. + writeLong(value); + } + + /** {@inheritDoc} */ + @Override + public void writeVarInt(int value, boolean zigZag) throws BufferOverflowException, UncheckedIOException { + // We do not need to protobuf encode for hashing, so we just write the int directly. + writeInt(value); + } + + /** {@inheritDoc} */ + @Override + public void writeDouble(double value, @NonNull ByteOrder byteOrder) + throws BufferOverflowException, UncheckedIOException { + // we are ignoring the byte order here as we always write in little-endian in hashes + writeLong(Double.doubleToRawLongBits(value)); + } + + /** {@inheritDoc} */ + @Override + public void writeDouble(double value) throws BufferOverflowException, UncheckedIOException { + writeLong(Double.doubleToRawLongBits(value)); + } + + /** {@inheritDoc} */ + @Override + public void writeFloat(float value, @NonNull ByteOrder byteOrder) + throws BufferOverflowException, UncheckedIOException { + // we are ignoring the byte order here as we always write in little-endian in hashes + writeInt(Float.floatToRawIntBits(value)); + } + + /** {@inheritDoc} */ + @Override + public void writeFloat(float value) throws BufferOverflowException, UncheckedIOException { + writeInt(Float.floatToRawIntBits(value)); + } + + /** {@inheritDoc} */ + @Override + public void writeLong(long value, @NonNull ByteOrder byteOrder) + throws BufferOverflowException, UncheckedIOException { + writeLong(value); + } + + /** {@inheritDoc} */ + @Override + public void writeLong(long value) throws BufferOverflowException, UncheckedIOException { + LONG_HANDLE.set(buffer, offset, value); + if (offset >= BULK_SIZE - 7) { + processBuffer(); + offset -= BULK_SIZE; + LONG_HANDLE.set(buffer, 0, value >>> (-offset << 3)); + } + offset += 8; + byteCount += 8; + } + + /** {@inheritDoc} */ + @Override + public void writeUnsignedInt(long value, @NonNull ByteOrder byteOrder) + throws BufferOverflowException, UncheckedIOException { + writeInt((int) value); + } + + /** {@inheritDoc} */ + @Override + public void writeUnsignedInt(long value) throws BufferOverflowException, UncheckedIOException { + writeInt((int) value); + } + + /** {@inheritDoc} */ + @Override + public void writeInt(int value, @NonNull ByteOrder byteOrder) + throws BufferOverflowException, UncheckedIOException { + writeInt(value); + } + + /** {@inheritDoc} */ + @Override + public void writeInt(int value) throws BufferOverflowException, UncheckedIOException { + INT_HANDLE.set(buffer, offset, value); + if (offset >= BULK_SIZE - 3) { + processBuffer(); + offset -= BULK_SIZE; + INT_HANDLE.set(buffer, 0, value >>> (-offset << 3)); + } + offset += 4; + byteCount += 4; + } + + /** {@inheritDoc} */ + @Override + public void writeBytes(@NonNull RandomAccessData src) throws BufferOverflowException, UncheckedIOException { + long offset = 0; + int length = Math.toIntExact(src.length()); + int remaining = length; + final int x = BULK_SIZE - this.offset; + byte[] temp = new byte[BULK_SIZE]; + if (length > x) { + int s = (int) ((byteCount - 1) >>> 6) & 12; + if (this.offset > 0) { + src.getBytes(offset, buffer, this.offset, x); + processBuffer(0, buffer, s); + this.offset = 0; + offset += x; + remaining -= x; + } + if (remaining > BULK_SIZE) { + do { + s += 4; + s &= 12; + src.getBytes(offset, temp, 0, BULK_SIZE); + processBuffer(0, temp, s); + offset += BULK_SIZE; + remaining -= BULK_SIZE; + } while (remaining > BULK_SIZE); + if (remaining < 64) { + int l = 64 - remaining; + src.getBytes(offset - l, buffer, BULK_SIZE - l, l); + } + } + } + src.getBytes(offset, buffer, this.offset, remaining); + this.offset += remaining; + byteCount += length; + } + + /** {@inheritDoc} */ + @Override + public void writeBytes(@NonNull BufferedData src) throws BufferOverflowException, UncheckedIOException { + long offset = src.position(); + int length = Math.toIntExact(src.remaining()); + int remaining = length; + final int x = BULK_SIZE - this.offset; + byte[] temp = new byte[BULK_SIZE]; + if (length > x) { + int s = (int) ((byteCount - 1) >>> 6) & 12; + if (this.offset > 0) { + src.getBytes(offset, buffer, this.offset, x); + processBuffer(0, buffer, s); + this.offset = 0; + offset += x; + remaining -= x; + } + if (remaining > BULK_SIZE) { + do { + s += 4; + s &= 12; + src.getBytes(offset, temp, 0, BULK_SIZE); + processBuffer(0, temp, s); + offset += BULK_SIZE; + remaining -= BULK_SIZE; + } while (remaining > BULK_SIZE); + if (remaining < 64) { + int l = 64 - remaining; + src.getBytes(offset - l, buffer, BULK_SIZE - l, l); + } + } + } + src.getBytes(offset, buffer, this.offset, remaining); + this.offset += remaining; + byteCount += length; + } + + /** {@inheritDoc} */ + @Override + public void writeBytes(@NonNull ByteBuffer srcBuffer) throws BufferOverflowException, UncheckedIOException { + int offset = srcBuffer.position(); + int length = srcBuffer.remaining(); + int remaining = length; + final int x = BULK_SIZE - this.offset; + if (length > x) { + int s = (int) ((byteCount - 1) >>> 6) & 12; + if (this.offset > 0) { + // Copy x bytes from srcBuffer to buffer at this.offset + srcBuffer.get(buffer, this.offset, x); + processBuffer(0, buffer, s); + this.offset = 0; + offset += x; + remaining -= x; + } + if (remaining > BULK_SIZE) { + do { + s += 4; + s &= 12; + // Copy BULK_SIZE bytes from srcBuffer to buffer + srcBuffer.get(buffer, 0, BULK_SIZE); + processBuffer(0, buffer, s); + offset += BULK_SIZE; + remaining -= BULK_SIZE; + } while (remaining > BULK_SIZE); + if (remaining < 64) { + int l = 64 - remaining; + // Copy l bytes from srcBuffer's previous position to buffer at BULK_SIZE - l + int prevPos = srcBuffer.position() - l; + int oldLimit = srcBuffer.limit(); + srcBuffer.limit(srcBuffer.position()); + srcBuffer.position(prevPos); + srcBuffer.get(buffer, BULK_SIZE - l, l); + srcBuffer.limit(oldLimit); + srcBuffer.position(offset + remaining); + } + } + } + // Copy remaining bytes from srcBuffer to buffer at this.offset + srcBuffer.get(buffer, this.offset, remaining); + this.offset += remaining; + byteCount += length; + } + + /** {@inheritDoc} */ + @Override + public void writeBytes(@NonNull byte[] src, int offset, int length) + throws BufferOverflowException, UncheckedIOException { + int remaining = length; + final int x = BULK_SIZE - this.offset; + if (length > x) { + int s = (int) ((byteCount - 1) >>> 6) & 12; + if (this.offset > 0) { + System.arraycopy(src, offset, buffer, this.offset, x); + processBuffer(0, buffer, s); + this.offset = 0; + offset += x; + remaining -= x; + } + if (remaining > BULK_SIZE) { + do { + s += 4; + s &= 12; + processBuffer(offset, src, s); + offset += BULK_SIZE; + remaining -= BULK_SIZE; + } while (remaining > BULK_SIZE); + if (remaining < 64) { + int l = 64 - remaining; + System.arraycopy(src, offset - l, buffer, BULK_SIZE - l, l); + } + } + } + System.arraycopy(src, offset, buffer, this.offset, remaining); + this.offset += remaining; + byteCount += length; + } + + /** {@inheritDoc} */ + @Override + public void writeByte(byte b) throws BufferOverflowException, UncheckedIOException { + if (offset >= BULK_SIZE) { + processBuffer(); + offset -= BULK_SIZE; + } + buffer[offset] = b; + offset += 1; + byteCount += 1; + } + + /** + * Resets the internal state of the hashing writable sequential data. + * This method clears the accumulated values and resets the buffer. + */ + public void reset() { + acc0 = INIT_ACC_0; + acc1 = INIT_ACC_1; + acc2 = INIT_ACC_2; + acc3 = INIT_ACC_3; + acc4 = INIT_ACC_4; + acc5 = INIT_ACC_5; + acc6 = INIT_ACC_6; + acc7 = INIT_ACC_7; + offset = 0; + byteCount = 0; + } + + /** + * Computes the hash of the data written so far. + * + * @return the computed hash as a 64-bit long value + */ + public long computeHash() { + if (byteCount >= 0 && byteCount <= BULK_SIZE) { + return hashBytesToLong(buffer, 0, (int) byteCount); + } + LONG_HANDLE.set(buffer, BULK_SIZE, (long) LONG_HANDLE.get(buffer, 0)); + + long acc0Loc = acc0; + long acc1Loc = acc1; + long acc2Loc = acc2; + long acc3Loc = acc3; + long acc4Loc = acc4; + long acc5Loc = acc5; + long acc6Loc = acc6; + long acc7Loc = acc7; + + for (int off = 0, s = (((int) byteCount - 1) >>> 6) & 12; + off + 64 <= (((int) byteCount - 1) & BULK_SIZE_MASK); + off += 64, s += 1) { + + long b0 = (long) LONG_HANDLE.get(buffer, off); + long b1 = (long) LONG_HANDLE.get(buffer, off + 8); + long b2 = (long) LONG_HANDLE.get(buffer, off + 8 * 2); + long b3 = (long) LONG_HANDLE.get(buffer, off + 8 * 3); + long b4 = (long) LONG_HANDLE.get(buffer, off + 8 * 4); + long b5 = (long) LONG_HANDLE.get(buffer, off + 8 * 5); + long b6 = (long) LONG_HANDLE.get(buffer, off + 8 * 6); + long b7 = (long) LONG_HANDLE.get(buffer, off + 8 * 7); + + acc0Loc += b1 + contrib(b0, secret[s]); + acc1Loc += b0 + contrib(b1, secret[s + 1]); + acc2Loc += b3 + contrib(b2, secret[s + 2]); + acc3Loc += b2 + contrib(b3, secret[s + 3]); + acc4Loc += b5 + contrib(b4, secret[s + 4]); + acc5Loc += b4 + contrib(b5, secret[s + 5]); + acc6Loc += b7 + contrib(b6, secret[s + 6]); + acc7Loc += b6 + contrib(b7, secret[s + 7]); + } + + { + long b0 = (long) LONG_HANDLE.get(buffer, (offset - (64)) & BULK_SIZE_MASK); + long b1 = (long) LONG_HANDLE.get(buffer, (offset - (64 - 8)) & BULK_SIZE_MASK); + long b2 = (long) LONG_HANDLE.get(buffer, (offset - (64 - 8 * 2)) & BULK_SIZE_MASK); + long b3 = (long) LONG_HANDLE.get(buffer, (offset - (64 - 8 * 3)) & BULK_SIZE_MASK); + long b4 = (long) LONG_HANDLE.get(buffer, (offset - (64 - 8 * 4)) & BULK_SIZE_MASK); + long b5 = (long) LONG_HANDLE.get(buffer, (offset - (64 - 8 * 5)) & BULK_SIZE_MASK); + long b6 = (long) LONG_HANDLE.get(buffer, (offset - (64 - 8 * 6)) & BULK_SIZE_MASK); + long b7 = (long) LONG_HANDLE.get(buffer, (offset - (64 - 8 * 7)) & BULK_SIZE_MASK); + + acc0Loc += b1 + contrib(b0, secShift16); + acc1Loc += b0 + contrib(b1, secShift17); + acc2Loc += b3 + contrib(b2, secShift18); + acc3Loc += b2 + contrib(b3, secShift19); + acc4Loc += b5 + contrib(b4, secShift20); + acc5Loc += b4 + contrib(b5, secShift21); + acc6Loc += b7 + contrib(b6, secShift22); + acc7Loc += b6 + contrib(b7, secShift23); + } + + return finalizeHash(byteCount, acc0Loc, acc1Loc, acc2Loc, acc3Loc, acc4Loc, acc5Loc, acc6Loc, acc7Loc); + } + + // ============================================================================================================= + // Internal methods for processing the buffer and computing the hash + // ============================================================================================================= + + private void processBuffer() { + int s = (int) ((byteCount - 1) >>> 6) & 12; + processBuffer(0, buffer, s); + } + + private void mixAcc() { + acc0 = XXH3_64.mixAcc(acc0, secret16); + acc1 = XXH3_64.mixAcc(acc1, secret17); + acc2 = XXH3_64.mixAcc(acc2, secret18); + acc3 = XXH3_64.mixAcc(acc3, secret19); + acc4 = XXH3_64.mixAcc(acc4, secret20); + acc5 = XXH3_64.mixAcc(acc5, secret21); + acc6 = XXH3_64.mixAcc(acc6, secret22); + acc7 = XXH3_64.mixAcc(acc7, secret23); + } + + private void processBuffer(int off, byte[] buffer, int s) { + for (int i = 0; i < 4; ++i) { + int o = off + (i << 6); + long b0 = (long) LONG_HANDLE.get(buffer, o); + long b1 = (long) LONG_HANDLE.get(buffer, o + 8); + long b2 = (long) LONG_HANDLE.get(buffer, o + 8 * 2); + long b3 = (long) LONG_HANDLE.get(buffer, o + 8 * 3); + long b4 = (long) LONG_HANDLE.get(buffer, o + 8 * 4); + long b5 = (long) LONG_HANDLE.get(buffer, o + 8 * 5); + long b6 = (long) LONG_HANDLE.get(buffer, o + 8 * 6); + long b7 = (long) LONG_HANDLE.get(buffer, o + 8 * 7); + processBuffer(b0, b1, b2, b3, b4, b5, b6, b7, s + i); + } + if (s == 12) { + mixAcc(); + } + } + + private void processBuffer(long b0, long b1, long b2, long b3, long b4, long b5, long b6, long b7, int s) { + acc0 += b1 + contrib(b0, secret[s]); + acc1 += b0 + contrib(b1, secret[s + 1]); + acc2 += b3 + contrib(b2, secret[s + 2]); + acc3 += b2 + contrib(b3, secret[s + 3]); + acc4 += b5 + contrib(b4, secret[s + 4]); + acc5 += b4 + contrib(b5, secret[s + 5]); + acc6 += b7 + contrib(b6, secret[s + 6]); + acc7 += b6 + contrib(b7, secret[s + 7]); + } + } } diff --git a/pbj-core/pbj-runtime/src/test/java/com/hedera/pbj/runtime/hashing/XXH3StreamingTest.java b/pbj-core/pbj-runtime/src/test/java/com/hedera/pbj/runtime/hashing/XXH3StreamingTest.java new file mode 100644 index 000000000..ce2093007 --- /dev/null +++ b/pbj-core/pbj-runtime/src/test/java/com/hedera/pbj/runtime/hashing/XXH3StreamingTest.java @@ -0,0 +1,187 @@ +package com.hedera.pbj.runtime.hashing; + +import static org.junit.jupiter.api.Assertions.assertEquals; + +import com.hedera.pbj.runtime.hashing.XXH3_64.HashingWritableSequentialData; +import com.hedera.pbj.runtime.io.buffer.BufferedData; +import com.hedera.pbj.runtime.io.buffer.Bytes; +import com.hedera.pbj.runtime.io.buffer.RandomAccessData; +import java.lang.invoke.MethodHandles; +import java.lang.invoke.VarHandle; +import java.nio.ByteOrder; +import java.util.Random; +import org.junit.jupiter.api.DisplayName; +import org.junit.jupiter.api.Test; + +public class XXH3StreamingTest { + /** VarHandle for reading and writing longs in little-endian byte order. */ + private static final VarHandle LONG_HANDLE = + MethodHandles.byteArrayViewVarHandle(long[].class, ByteOrder.LITTLE_ENDIAN); + /** VarHandle for reading and writing integers in little-endian byte order. */ + private static final VarHandle INT_HANDLE = + MethodHandles.byteArrayViewVarHandle(int[].class, ByteOrder.LITTLE_ENDIAN); + + @Test + @DisplayName("Test for 32-bit integer an float handling in XXH3 streaming") + public void testLimitAndCapacity() { + final var hashingStream = XXH3_64.DEFAULT_INSTANCE.hashingWritableSequentialData(); + assertEquals(Long.MAX_VALUE, hashingStream.limit()); + assertEquals(Long.MAX_VALUE, hashingStream.capacity()); + hashingStream.writeInt(123456789); + assertEquals(Integer.BYTES, hashingStream.position()); + assertEquals(Long.MAX_VALUE, hashingStream.limit()); + assertEquals(Long.MAX_VALUE, hashingStream.capacity()); + hashingStream.reset(); + assertEquals(0, hashingStream.position()); + } + + /** + * Test for 32-bit integer and float handling in XXH3 streaming. + */ + @Test + @DisplayName("Test for 32-bit integer an float handling in XXH3 streaming") + public void test32Bit() { + final int value = 123456789; + final byte[] bytes = new byte[4]; + INT_HANDLE.set(bytes, 0, value); + final long simpleHash = XXH3_64.DEFAULT_INSTANCE.hashBytesToLong(bytes, 0, 4); + final var hashingStream = XXH3_64.DEFAULT_INSTANCE.hashingWritableSequentialData(); + hashingStream.writeInt(value); + assertEquals(simpleHash, hashingStream.computeHash()); + assertEquals(Integer.BYTES, hashingStream.position()); + hashingStream.reset(); + hashingStream.writeInt(value, ByteOrder.LITTLE_ENDIAN); + assertEquals(simpleHash, hashingStream.computeHash()); + hashingStream.reset(); + hashingStream.writeInt(value, ByteOrder.BIG_ENDIAN); + assertEquals(simpleHash, hashingStream.computeHash()); + hashingStream.reset(); + hashingStream.writeUnsignedInt(value); + assertEquals(simpleHash, hashingStream.computeHash()); + hashingStream.reset(); + hashingStream.writeUnsignedInt(value, ByteOrder.LITTLE_ENDIAN); + assertEquals(simpleHash, hashingStream.computeHash()); + hashingStream.reset(); + hashingStream.writeUnsignedInt(value, ByteOrder.BIG_ENDIAN); + assertEquals(simpleHash, hashingStream.computeHash()); + hashingStream.reset(); + hashingStream.writeVarInt(value, true); + assertEquals(simpleHash, hashingStream.computeHash()); + hashingStream.reset(); + hashingStream.writeVarInt(value, false); + assertEquals(simpleHash, hashingStream.computeHash()); + hashingStream.reset(); + // Convert int to float and write it + final float floatValue = Float.intBitsToFloat(value); + hashingStream.writeFloat(floatValue); + assertEquals(simpleHash, hashingStream.computeHash()); + hashingStream.reset(); + hashingStream.writeFloat(floatValue, ByteOrder.LITTLE_ENDIAN); + assertEquals(simpleHash, hashingStream.computeHash()); + hashingStream.reset(); + hashingStream.writeFloat(floatValue, ByteOrder.BIG_ENDIAN); + assertEquals(simpleHash, hashingStream.computeHash()); + hashingStream.reset(); + } + + @Test + @DisplayName("Test for 64-bit long handling in XXH3 streaming") + public void test64Bit() { + final long value = 1234567890123456789L; + final byte[] bytes = new byte[8]; + LONG_HANDLE.set(bytes, 0, value); + final long simpleHash = XXH3_64.DEFAULT_INSTANCE.hashBytesToLong(bytes, 0, 8); + final var hashingStream = XXH3_64.DEFAULT_INSTANCE.hashingWritableSequentialData(); + hashingStream.writeLong(value); + assertEquals(simpleHash, hashingStream.computeHash()); + assertEquals(Long.BYTES, hashingStream.position()); + hashingStream.reset(); + hashingStream.writeLong(value, ByteOrder.LITTLE_ENDIAN); + assertEquals(simpleHash, hashingStream.computeHash()); + hashingStream.reset(); + hashingStream.writeLong(value, ByteOrder.BIG_ENDIAN); + assertEquals(simpleHash, hashingStream.computeHash()); + hashingStream.reset(); + hashingStream.writeVarLong(value, true); + assertEquals(simpleHash, hashingStream.computeHash()); + hashingStream.reset(); + hashingStream.writeVarLong(value, false); + assertEquals(simpleHash, hashingStream.computeHash()); + hashingStream.reset(); + // Convert long to double and write it + final double doubleValue = Double.longBitsToDouble(value); + hashingStream.writeDouble(doubleValue); + assertEquals(simpleHash, hashingStream.computeHash()); + hashingStream.reset(); + hashingStream.writeDouble(doubleValue, ByteOrder.LITTLE_ENDIAN); + assertEquals(simpleHash, hashingStream.computeHash()); + hashingStream.reset(); + hashingStream.writeDouble(doubleValue, ByteOrder.BIG_ENDIAN); + assertEquals(simpleHash, hashingStream.computeHash()); + hashingStream.reset(); + } + + @Test + @DisplayName("Test for byte methods in XXH3 streaming") + public void testByteMethods() { + final byte[] bytes = new byte[128]; + new Random(91824819480L).nextBytes(bytes); + final long simpleHash = XXH3_64.DEFAULT_INSTANCE.hashBytesToLong(bytes, 0, bytes.length); + final HashingWritableSequentialData hashingStream = XXH3_64.DEFAULT_INSTANCE.hashingWritableSequentialData(); + // byte arrays + hashingStream.writeBytes(bytes, 0, bytes.length); + assertEquals(simpleHash, hashingStream.computeHash()); + hashingStream.reset(); + hashingStream.writeBytes(bytes); + assertEquals(simpleHash, hashingStream.computeHash()); + hashingStream.reset(); + for (byte aByte : bytes) { + hashingStream.writeByte(aByte); + } + assertEquals(simpleHash, hashingStream.computeHash()); + hashingStream.reset(); + // BufferedData + BufferedData bufferedData = BufferedData.wrap(bytes); + hashingStream.writeBytes(bufferedData); + assertEquals(simpleHash, hashingStream.computeHash()); + hashingStream.reset(); + // ByteBuffer + java.nio.ByteBuffer byteBuffer = java.nio.ByteBuffer.wrap(bytes); + hashingStream.writeBytes(byteBuffer); + assertEquals(simpleHash, hashingStream.computeHash()); + hashingStream.reset(); + // RandomAccessData + RandomAccessData randomAccessData = Bytes.wrap(bytes); + hashingStream.writeBytes(randomAccessData); + assertEquals(simpleHash, hashingStream.computeHash()); + hashingStream.reset(); + // === subsets === + int offset = 10; + int length = 50; + final long simpleSubsetHash = XXH3_64.DEFAULT_INSTANCE.hashBytesToLong(bytes, offset, length); + // byte arrays + hashingStream.writeBytes(bytes, offset, length); + assertEquals(simpleSubsetHash, hashingStream.computeHash()); + hashingStream.reset(); + for (int i = offset; i < offset + length; i++) { + hashingStream.writeByte(bytes[i]); + } + assertEquals(simpleSubsetHash, hashingStream.computeHash()); + hashingStream.reset(); + // BufferedData + BufferedData bufferedSubsetData = BufferedData.wrap(bytes, offset, length); + hashingStream.writeBytes(bufferedSubsetData); + assertEquals(simpleSubsetHash, hashingStream.computeHash()); + hashingStream.reset(); + // ByteBuffer + java.nio.ByteBuffer byteSubsetBuffer = java.nio.ByteBuffer.wrap(bytes, offset, length); + hashingStream.writeBytes(byteSubsetBuffer); + assertEquals(simpleSubsetHash, hashingStream.computeHash()); + hashingStream.reset(); + // RandomAccessData + RandomAccessData randomSubsetAccessData = Bytes.wrap(bytes, offset, length); + hashingStream.writeBytes(randomSubsetAccessData); + assertEquals(simpleSubsetHash, hashingStream.computeHash()); + hashingStream.reset(); + } +} diff --git a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/ModelObjHashCodeBench.java b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/ModelObjHashCodeBench.java new file mode 100644 index 000000000..45893044c --- /dev/null +++ b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/ModelObjHashCodeBench.java @@ -0,0 +1,76 @@ +// SPDX-License-Identifier: Apache-2.0 +package com.hedera.pbj.integration.jmh; + +import com.hedera.pbj.runtime.io.buffer.Bytes; +import com.hedera.pbj.test.proto.pbj.Hasheval; +import com.hedera.pbj.test.proto.pbj.Hasheval2; +import com.hedera.pbj.test.proto.pbj.Suit; +import com.hedera.pbj.test.proto.pbj.TimestampTest; +import java.util.concurrent.TimeUnit; +import org.openjdk.jmh.annotations.Benchmark; +import org.openjdk.jmh.annotations.BenchmarkMode; +import org.openjdk.jmh.annotations.Fork; +import org.openjdk.jmh.annotations.Measurement; +import org.openjdk.jmh.annotations.Mode; +import org.openjdk.jmh.annotations.OperationsPerInvocation; +import org.openjdk.jmh.annotations.OutputTimeUnit; +import org.openjdk.jmh.annotations.Scope; +import org.openjdk.jmh.annotations.State; +import org.openjdk.jmh.annotations.Warmup; +import org.openjdk.jmh.infra.Blackhole; + +@SuppressWarnings("unused") +@State(Scope.Benchmark) +@Fork(1) +@Warmup(iterations = 4, time = 2) +@Measurement(iterations = 5, time = 2) +@OutputTimeUnit(TimeUnit.NANOSECONDS) +@BenchmarkMode(Mode.AverageTime) +public class ModelObjHashCodeBench { + + @Benchmark + public void simpleObject(Blackhole blackhole) { + TimestampTest tst = new TimestampTest(987L, 123); + blackhole.consume(tst.hashCode()); + } + + @Benchmark + public void bigObject(Blackhole blackhole) { + Hasheval2 complexObj = new Hasheval2( + 13, + -1262, + 2236, + 326, + -27, + 123f, + 7L, + -7L, + 123L, + 234L, + -345L, + 456.789D, + true, + Suit.ACES, + new Hasheval( + 1109840, + -1414, + 25151, + 31515, + -236, + 123f, + 7347L, + -7L, + 1233474347347L, + 234L, + -345L, + 456.789D, + true, + Suit.ACES, + new TimestampTest(987L, 123), + "FooBarKKKKHHHHOIOIOI", + Bytes.wrap(new byte[] {127, 2, 3, 123, 48, 6, 7, (byte) 255})), + "FooBarKKKKHHHHOIOIOI", + Bytes.wrap(new byte[] {81, 52, 13, 94, 85, 66, 7, (byte) 255})); + blackhole.consume(complexObj.hashCode()); + } +} diff --git a/pbj-integration-tests/src/main/proto/hasheval.proto b/pbj-integration-tests/src/main/proto/hasheval.proto index b819a1428..008c08b5f 100644 --- a/pbj-integration-tests/src/main/proto/hasheval.proto +++ b/pbj-integration-tests/src/main/proto/hasheval.proto @@ -33,3 +33,24 @@ message Hasheval { string text = 16; bytes bytesField = 17; } + + +message Hasheval2 { + int32 int32Number = 1; + sint32 sint32Number = 2; + uint32 uint32Number = 3; + fixed32 fixed32Number = 4; + sfixed32 sfixed32Number = 5; + float floatNumber = 6; + int64 int64Number = 7; + sint64 sint64Number = 8; + uint64 uint64Number = 9; + fixed64 fixed64Number = 10; + sfixed64 sfixed64Number = 11; + double doubleNumber = 12; + bool booleanField = 13; + Suit enumSuit = 14; + Hasheval subObject = 15; + string text = 16; + bytes bytesField = 17; +} \ No newline at end of file From c23d57d9b9e8042d1559995d348757fcf01964fb Mon Sep 17 00:00:00 2001 From: Jasper Potts <1466205+jasperpotts@users.noreply.github.com> Date: Thu, 14 Aug 2025 08:14:54 -0700 Subject: [PATCH 3/7] Refactored and moved hashing generation to separate class. Signed-off-by: Jasper Potts <1466205+jasperpotts@users.noreply.github.com> --- .../com/hedera/pbj/compiler/impl/Common.java | 338 -------------- .../impl/generators/ModelGenerator.java | 62 +-- .../generators/ModelHashCodeGenerator.java | 415 ++++++++++++++++++ 3 files changed, 416 insertions(+), 399 deletions(-) create mode 100644 pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/generators/ModelHashCodeGenerator.java diff --git a/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/Common.java b/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/Common.java index 5f651b41a..4f6ac436a 100644 --- a/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/Common.java +++ b/pbj-core/pbj-compiler/src/main/java/com/hedera/pbj/compiler/impl/Common.java @@ -205,344 +205,6 @@ public static String javaPrimitiveToObjectType(String primitiveFieldType) { }; } - /** - * Recursively calculates the hashcode for a message fields. - * - * @param fields The fields of this object. - * @param generatedCodeSoFar The accumulated hash code so far. - * @return The generated code for getting the hashCode value. - */ - public static String getFieldsHashCode(final Listxxhsum command line tool.
+ */
+ public static final long DEFAULT_SEED = 0;
/** Default instance of the XXH3_64 hasher with a seed of 0. */
- public static final XXH3_64 DEFAULT_INSTANCE = new XXH3_64(0);
+ public static final XXH3_64 DEFAULT_INSTANCE = new XXH3_64(DEFAULT_SEED);
/** VarHandle for reading and writing longs in little-endian byte order. */
private static final VarHandle LONG_HANDLE =
MethodHandles.byteArrayViewVarHandle(long[].class, ByteOrder.LITTLE_ENDIAN);
@@ -66,10 +74,10 @@ public class XXH3_64 {
private static final long INIT_ACC_6 = 0x27D4EB2F165667C5L;
private static final long INIT_ACC_7 = 0x000000009E3779B1L;
// Private constants for the mix and avalanche functions, derived from seed and secret values.
- private final long secret00;
- private final long secret01;
- private final long secret02;
- private final long secret03;
+ public final long secret00;
+ public final long secret01;
+ public final long secret02;
+ public final long secret03;
private final long secret04;
private final long secret05;
private final long secret06;
@@ -123,12 +131,12 @@ public class XXH3_64 {
private final long secShift13;
private final long secShift14;
private final long secShift15;
- private final long bitflip00;
- private final long bitflip12;
- private final long bitflip34;
- private final long bitflip56;
+ public final long bitflip00;
+ public final long bitflip12;
+ public final long bitflip34;
+ public final long bitflip56;
/** Precomputed hash value used when the input length is 0. */
- private final long hash0;
+ public final long hash0;
/**
* Creates a new instance of {@link XXH3_64} with the specified seed.
@@ -224,6 +232,82 @@ public HashingWritableSequentialData hashingWritableSequentialData() {
return new HashingWritableSequentialData();
}
+ /**
+ * Hashes a pair of longs to a 64-bit {@code long} value.
+ *
+ * + * Equivalent to {@code hashBytesToLong(input1 || input2)}, where {@code ||} denotes concatenation + * and the longs are interpreted as little-endian byte sequences. + *
+ * + * @param input1 the first long data to hash + * @param input2 the second long data to hash + * @return the hash value + */ + public long hash(long input1, long input2) { + long lo = input1 ^ bitflip34; + long hi = input2 ^ bitflip56; + long acc = 16 + Long.reverseBytes(lo) + hi + mix(lo, hi); + return avalanche3(acc); + } + + /** + * Hashes a pair of long and double to a 64-bit {@code long} value. + * + * @param input1 the long data to hash + * @param input2 the double data to hash + * @return the hash value + */ + public long hash(long input1, double input2) { + long lo = input1 ^ bitflip34; + long hi = Double.doubleToRawLongBits(input2) ^ bitflip56; + long acc = 16 + Long.reverseBytes(lo) + hi + mix(lo, hi); + return avalanche3(acc); + } + + /** + * Hashes a pair of long and float to a 64-bit {@code long} value. + * + * @param input1 the long data to hash + * @param input2 the float data to hash + * @return the hash value + */ + public long hash(long input1, float input2) { + long lo = input1 ^ bitflip34; + long hi = Float.floatToRawIntBits(input2) ^ bitflip56; + long acc = 16 + Long.reverseBytes(lo) + hi + mix(lo, hi); + return avalanche3(acc); + } + + /** + * Hashes a pair of long and String to a 64-bit {@code long} value. + * + * @param input1 the long data to hash + * @param input2 the String data to hash + * @return the hash value + */ + public long hash(long input1, String input2) { + long lo = input1 ^ bitflip34; + byte[] stringBytes = input2.getBytes(StandardCharsets.UTF_8); + long hi = hashBytesToLong(stringBytes,0,stringBytes.length) ^ bitflip56; + long acc = 16 + Long.reverseBytes(lo) + hi + mix(lo, hi); + return avalanche3(acc); + } + + /** + * Hashes a pair of long and Bytes to a 64-bit {@code long} value. + * + * @param input1 the long data to hash + * @param input2 the Bytes data to hash + * @return the hash value + */ + public long hash(long input1, Bytes input2) { + long lo = input1 ^ bitflip34; + long hi = input2.hashCode64() ^ bitflip56; + long acc = 16 + Long.reverseBytes(lo) + hi + mix(lo, hi); + return avalanche3(acc); + } + /** * Hashes a byte array to a 64-bit {@code long} value. * @@ -410,7 +494,7 @@ public long hashBytesToLong(final byte[] input, final int off, final int length) // Private methods // ============================================================================================================= - private static long rrmxmx(long h64, final long length) { + public static long rrmxmx(long h64, final long length) { h64 ^= Long.rotateLeft(h64, 49) ^ Long.rotateLeft(h64, 24); h64 *= 0x9FB21C651E98DF25L; h64 ^= (h64 >>> 35) + length; @@ -424,7 +508,7 @@ private static long mix16B(final byte[] input, final int offIn, final long sec0, return mix2Accs(lo, hi, sec0, sec1); } - private static long avalanche64(long h64) { + public static long avalanche64(long h64) { h64 ^= h64 >>> 33; h64 *= INIT_ACC_2; h64 ^= h64 >>> 29; @@ -432,7 +516,7 @@ private static long avalanche64(long h64) { return h64 ^ (h64 >>> 32); } - private static long avalanche3(long h64) { + public static long avalanche3(long h64) { h64 ^= h64 >>> 37; h64 *= 0x165667919E3779F9L; return h64 ^ (h64 >>> 32); @@ -451,7 +535,7 @@ private static long mixAcc(long acc, long sec) { return (acc ^ (acc >>> 47) ^ sec) * INIT_ACC_7; } - private static long mix(long a, long b) { + public static long mix(long a, long b) { long x = a * b; long y = Math.unsignedMultiplyHigh(a, b); return x ^ y; diff --git a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/Bytes.java b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/Bytes.java index ec2b2538d..0dbafcd57 100644 --- a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/Bytes.java +++ b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/Bytes.java @@ -67,7 +67,7 @@ public final class Bytes implements RandomAccessData, Comparable+ * This hash code has to match how the field would be hashed if it was a normal field in the schema + *
*/ @Override public int hashCode() { - int hashCode = 1; - - hashCode = 31 * hashCode + Integer.hashCode(field); - hashCode = 31 * hashCode + Integer.hashCode(wireType.ordinal()); - hashCode = 31 * hashCode + bytes.hashCode(); - - // Shifts: 30, 27, 16, 20, 5, 18, 10, 24, 30 - hashCode += hashCode << 30; - hashCode ^= hashCode >>> 27; - hashCode += hashCode << 16; - hashCode ^= hashCode >>> 20; - hashCode += hashCode << 5; - hashCode ^= hashCode >>> 18; - hashCode += hashCode << 10; - hashCode ^= hashCode >>> 24; - hashCode += hashCode << 30; + return (int)hashCode64(); + } - return hashCode; + /** + * A `SixtyFourBitHashable.hashCode64()` implementation that computes a 64-bit hash code using all the members of + * the UnknownField record: the `field`, the `wireType`, and the `bytes`. + * The implementation should remain stable over time because this is a public API. + *+ * This hash code has to match how the field would be hashed if it was a normal field in the schema + *
+ * + * @return a 64-bit hash code for this UnknownField object + */ + @Override + public long hashCode64() { + return bytes.hashCode64(); } /** diff --git a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/hashing/SixtyFourBitHashable.java b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/hashing/SixtyFourBitHashable.java new file mode 100644 index 000000000..8f9ef54a4 --- /dev/null +++ b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/hashing/SixtyFourBitHashable.java @@ -0,0 +1,13 @@ +package com.hedera.pbj.runtime.hashing; + +/** + * Interface for objects that can be hashed to a 64-bit long value. + */ +public interface SixtyFourBitHashable { + /** + * Hash this object to a 64-bit long value. + * + * @return the 64-bit hash value + */ + long hashCode64(); +} diff --git a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/hashing/XXH3_64.java b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/hashing/XXH3_64.java index 1cd81d433..cb96e9f39 100644 --- a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/hashing/XXH3_64.java +++ b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/hashing/XXH3_64.java @@ -1,7 +1,6 @@ // SPDX-License-Identifier: Apache-2.0 package com.hedera.pbj.runtime.hashing; -import com.hedera.pbj.runtime.Utf8Tools; import com.hedera.pbj.runtime.io.WritableSequentialData; import com.hedera.pbj.runtime.io.buffer.BufferedData; import com.hedera.pbj.runtime.io.buffer.Bytes; @@ -24,7 +23,7 @@ * @see xxhash */ @SuppressWarnings({"DuplicatedCode", "NumericOverflow"}) -public class XXH3_64 { +public final class XXH3_64 { /** * The default seed value used for hashing. ZERO is chosen as this is the default for xxhash and used in tools lile *xxhsum command line tool.
@@ -490,11 +489,216 @@ public long hashBytesToLong(final byte[] input, final int off, final int length)
return finalizeHash(length, acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7);
}
+ /**
+ * Hashes a CharSequence as raw UTF16 to a 64-bit {@code long} value.
+ *
+ * Equivalent to {@code hashToLong(input, (b, f) -> f.putBytes(b, off, len))}.
+ *
+ * @param charSequence the character sequence to hash
+ * @return the hash value
+ */
+ public long hashCharsToLong(final CharSequence charSequence) {
+ int len = charSequence.length();
+ if (len <= 8) {
+ if (len > 4) {
+ long lo = getLong(charSequence, 0) ^ bitflip34;
+ long hi = getLong(charSequence, len - 4) ^ bitflip56;
+ long acc = (len << 1) + Long.reverseBytes(lo) + hi + mix(lo, hi);
+ return avalanche3(acc);
+ }
+ if (len >= 2) {
+ long input1 = getInt(charSequence, 0);
+ long input2 = getInt(charSequence, len - 2);
+ long keyed = (input2 & 0xFFFFFFFFL) ^ (input1 << 32) ^ bitflip12;
+ return rrmxmx(keyed, len << 1);
+ }
+ if (len != 0) {
+ long c = charSequence.charAt(0);
+ long combined = (c << 16) | (c >>> 8) | 512L;
+ return avalanche64(combined ^ bitflip00);
+ }
+ return hash0;
+ }
+ if (len <= 64) {
+ long acc = len * (INIT_ACC_1 << 1);
+
+ if (len > 16) {
+ if (len > 32) {
+ if (len > 48) {
+ acc += mix16B(charSequence, 24, secret12, secret13);
+ acc += mix16B(charSequence, len - 32, secret14, secret15);
+ }
+ acc += mix16B(charSequence, 16, secret08, secret09);
+ acc += mix16B(charSequence, len - 24, secret10, secret11);
+ }
+ acc += mix16B(charSequence, 8, secret04, secret05);
+ acc += mix16B(charSequence, len - 16, secret06, secret07);
+ }
+ acc += mix16B(charSequence, 0, secret00, secret01);
+ acc += mix16B(charSequence, len - 8, secret02, secret03);
+
+ return avalanche3(acc);
+ }
+ if (len <= 120) {
+ long acc = len * (INIT_ACC_1 << 1);
+ acc += mix16B(charSequence, 0, secret00, secret01);
+ acc += mix16B(charSequence, 8, secret02, secret03);
+ acc += mix16B(charSequence, 16, secret04, secret05);
+ acc += mix16B(charSequence, 24, secret06, secret07);
+ acc += mix16B(charSequence, 32, secret08, secret09);
+ acc += mix16B(charSequence, 40, secret10, secret11);
+ acc += mix16B(charSequence, 48, secret12, secret13);
+ acc += mix16B(charSequence, 56, secret14, secret15);
+
+ acc = avalanche3(acc);
+
+ if (len >= 72) {
+ acc += mix16B(charSequence, 64, secShift00, secShift01);
+ if (len >= 80) {
+ acc += mix16B(charSequence, 72, secShift02, secShift03);
+ if (len >= 88) {
+ acc += mix16B(charSequence, 80, secShift04, secShift05);
+ if (len >= 96) {
+ acc += mix16B(charSequence, 88, secShift06, secShift07);
+ if (len >= 104) {
+ acc += mix16B(charSequence, 96, secShift08, secShift09);
+ if (len >= 112) {
+ acc += mix16B(charSequence, 104, secShift10, secShift11);
+ if (len >= 120) acc += mix16B(charSequence, 112, secShift12, secShift13);
+ }
+ }
+ }
+ }
+ }
+ }
+ acc += mix16B(charSequence, len - 8, secShift14, secShift15);
+ return avalanche3(acc);
+ }
+
+ long acc0 = INIT_ACC_0;
+ long acc1 = INIT_ACC_1;
+ long acc2 = INIT_ACC_2;
+ long acc3 = INIT_ACC_3;
+ long acc4 = INIT_ACC_4;
+ long acc5 = INIT_ACC_5;
+ long acc6 = INIT_ACC_6;
+ long acc7 = INIT_ACC_7;
+
+ final int nbBlocks = (len - 1) >>> (BLOCK_LEN_EXP - 1);
+ for (int n = 0; n < nbBlocks; n++) {
+ final int offBlock = n << (BLOCK_LEN_EXP - 1);
+ for (int s = 0; s < 16; s += 1) {
+ int offStripe = offBlock + (s << 5);
+
+ long b0 = getLong(charSequence, offStripe);
+ long b1 = getLong(charSequence, offStripe + 4);
+ long b2 = getLong(charSequence, offStripe + 4 * 2);
+ long b3 = getLong(charSequence, offStripe + 4 * 3);
+ long b4 = getLong(charSequence, offStripe + 4 * 4);
+ long b5 = getLong(charSequence, offStripe + 4 * 5);
+ long b6 = getLong(charSequence, offStripe + 4 * 6);
+ long b7 = getLong(charSequence, offStripe + 4 * 7);
+
+ acc0 += b1 + contrib(b0, secret[s]);
+ acc1 += b0 + contrib(b1, secret[s + 1]);
+ acc2 += b3 + contrib(b2, secret[s + 2]);
+ acc3 += b2 + contrib(b3, secret[s + 3]);
+ acc4 += b5 + contrib(b4, secret[s + 4]);
+ acc5 += b4 + contrib(b5, secret[s + 5]);
+ acc6 += b7 + contrib(b6, secret[s + 6]);
+ acc7 += b6 + contrib(b7, secret[s + 7]);
+ }
+
+ acc0 = mixAcc(acc0, secret16);
+ acc1 = mixAcc(acc1, secret17);
+ acc2 = mixAcc(acc2, secret18);
+ acc3 = mixAcc(acc3, secret19);
+ acc4 = mixAcc(acc4, secret20);
+ acc5 = mixAcc(acc5, secret21);
+ acc6 = mixAcc(acc6, secret22);
+ acc7 = mixAcc(acc7, secret23);
+ }
+
+ final int nbStripes = ((len - 1) - (nbBlocks << (BLOCK_LEN_EXP - 1))) >>> 5;
+ final int offBlock = nbBlocks << (BLOCK_LEN_EXP - 1);
+ for (int s = 0; s < nbStripes; s++) {
+ int offStripe = offBlock + (s << 5);
+
+ long b0 = getLong(charSequence, offStripe);
+ long b1 = getLong(charSequence, offStripe + 4);
+ long b2 = getLong(charSequence, offStripe + 4 * 2);
+ long b3 = getLong(charSequence, offStripe + 4 * 3);
+ long b4 = getLong(charSequence, offStripe + 4 * 4);
+ long b5 = getLong(charSequence, offStripe + 4 * 5);
+ long b6 = getLong(charSequence, offStripe + 4 * 6);
+ long b7 = getLong(charSequence, offStripe + 4 * 7);
+
+ acc0 += b1 + contrib(b0, secret[s]);
+ acc1 += b0 + contrib(b1, secret[s + 1]);
+ acc2 += b3 + contrib(b2, secret[s + 2]);
+ acc3 += b2 + contrib(b3, secret[s + 3]);
+ acc4 += b5 + contrib(b4, secret[s + 4]);
+ acc5 += b4 + contrib(b5, secret[s + 5]);
+ acc6 += b7 + contrib(b6, secret[s + 6]);
+ acc7 += b6 + contrib(b7, secret[s + 7]);
+ }
+
+ {
+ int offStripe = len - 32;
+
+ long b0 = getLong(charSequence, offStripe);
+ long b1 = getLong(charSequence, offStripe + 4);
+ long b2 = getLong(charSequence, offStripe + 4 * 2);
+ long b3 = getLong(charSequence, offStripe + 4 * 3);
+ long b4 = getLong(charSequence, offStripe + 4 * 4);
+ long b5 = getLong(charSequence, offStripe + 4 * 5);
+ long b6 = getLong(charSequence, offStripe + 4 * 6);
+ long b7 = getLong(charSequence, offStripe + 4 * 7);
+
+ acc0 += b1 + contrib(b0, secShift16);
+ acc1 += b0 + contrib(b1, secShift17);
+ acc2 += b3 + contrib(b2, secShift18);
+ acc3 += b2 + contrib(b3, secShift19);
+ acc4 += b5 + contrib(b4, secShift20);
+ acc5 += b4 + contrib(b5, secShift21);
+ acc6 += b7 + contrib(b6, secShift22);
+ acc7 += b6 + contrib(b7, secShift23);
+ }
+
+ return finalizeHash((long) len << 1, acc0, acc1, acc2, acc3, acc4, acc5, acc6, acc7);
+ }
+
// =============================================================================================================
// Private methods
// =============================================================================================================
- public static long rrmxmx(long h64, final long length) {
+
+ /**
+ * Reads a {@code long} value from four UTF16 characters from a {@link CharSequence} with given offset.
+ *
+ * @param charSequence a char sequence
+ * @param off an offset
+ * @return the value
+ */
+ private static long getLong(final CharSequence charSequence, int off) {
+ return (long) charSequence.charAt(off)
+ | ((long) charSequence.charAt(off + 1) << 16)
+ | ((long) charSequence.charAt(off + 2) << 32)
+ | ((long) charSequence.charAt(off + 3) << 48);
+ }
+
+ /**
+ * Reads an {@code int} value from two UTF16 characters from a {@link CharSequence} with given offset.
+ *
+ * @param charSequence a char sequence
+ * @param off an offset
+ * @return the value
+ */
+ private static int getInt(CharSequence charSequence, int off) {
+ return (int) charSequence.charAt(off) | ((int) charSequence.charAt(off + 1) << 16);
+ }
+
+ static long rrmxmx(long h64, final long length) {
h64 ^= Long.rotateLeft(h64, 49) ^ Long.rotateLeft(h64, 24);
h64 *= 0x9FB21C651E98DF25L;
h64 ^= (h64 >>> 35) + length;
@@ -508,7 +712,14 @@ private static long mix16B(final byte[] input, final int offIn, final long sec0,
return mix2Accs(lo, hi, sec0, sec1);
}
- public static long avalanche64(long h64) {
+
+ private static long mix16B( final CharSequence input, final int offIn, final long sec0, final long sec1) {
+ long lo = getLong(input, offIn);
+ long hi = getLong(input, offIn + 4);
+ return mix2Accs(lo, hi, sec0, sec1);
+ }
+
+ static long avalanche64(long h64) {
h64 ^= h64 >>> 33;
h64 *= INIT_ACC_2;
h64 ^= h64 >>> 29;
@@ -516,7 +727,7 @@ public static long avalanche64(long h64) {
return h64 ^ (h64 >>> 32);
}
- public static long avalanche3(long h64) {
+ static long avalanche3(long h64) {
h64 ^= h64 >>> 37;
h64 *= 0x165667919E3779F9L;
return h64 ^ (h64 >>> 32);
@@ -535,7 +746,7 @@ private static long mixAcc(long acc, long sec) {
return (acc ^ (acc >>> 47) ^ sec) * INIT_ACC_7;
}
- public static long mix(long a, long b) {
+ static long mix(long a, long b) {
long x = a * b;
long y = Math.unsignedMultiplyHigh(a, b);
return x ^ y;
@@ -553,6 +764,10 @@ private long finalizeHash(
return avalanche3(result64);
}
+ /**
+ * A writable sequential data implementation that hashes data using the XXH3_64 algorithm.
+ * It buffers writes in bulk and processes them to compute the hash incrementally.
+ */
public class HashingWritableSequentialData implements WritableSequentialData {
/** The size of the buffer used for writing data in bulk. */
private static final int BULK_SIZE = 256;
diff --git a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/Bytes.java b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/Bytes.java
index 0dbafcd57..2a70d4c91 100644
--- a/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/Bytes.java
+++ b/pbj-core/pbj-runtime/src/main/java/com/hedera/pbj/runtime/io/buffer/Bytes.java
@@ -3,6 +3,7 @@
import static java.util.Objects.requireNonNull;
+import com.hedera.pbj.runtime.hashing.SixtyFourBitHashable;
import com.hedera.pbj.runtime.hashing.XXH3_64;
import com.hedera.pbj.runtime.io.DataEncodingException;
import com.hedera.pbj.runtime.io.ReadableSequentialData;
@@ -30,7 +31,7 @@
* An immutable representation of a byte array. This class is designed to be efficient and usable across threads.
*/
@SuppressWarnings("unused")
-public final class Bytes implements RandomAccessData, Comparable Code direct from HalfDiskHashMap, only change is NUM_BUCKETS Code direct from HalfDiskHashMap, only change is NUM_BUCKETS
+ * Goal:
+ *
+ * Pattern:
+ *
+ * If the sequence is empty (no non-defaults), finish() returns the canonical empty-hash for XXH3 with seed=0.
+ *
+ * Notes:
+ *
+ *
+ *
+ *
+ *
+ *
+ *
+ * {@code
+ * // Pseudo-example showing how your generator should produce code for a model:
+ * final class ExampleModel {
+ * long a; // default 0
+ * long b; // default 0
+ * int c; // default 0
+ * double d; // default 0.0
+ * // ...
+ *
+ * // Convert each field to a 64-bit value deterministically (little-endian representation in spirit).
+ * // Only include the field if it is non-default for that field.
+ * public long hashCode64() {
+ * long $xx_acc = 0L;
+ * long $xx_total = 0L;
+ * long $xx_carry = 0L;
+ * boolean $xx_haveCarry = false;
+ * int $xx_pairIndex = 0;
+ *
+ * // Field a (long)
+ * if (a != 0L) {
+ * final long v = a; // already a 64-bit value
+ * if (!$xx_haveCarry) { $xx_carry = v; $xx_haveCarry = true; $xx_total += 8; }
+ * else { $xx_acc += XXH3FieldHash.mixPair($xx_carry, v, $xx_pairIndex++); $xx_haveCarry = false; $xx_total += 8; }
+ * }
+ *
+ * // Field b (long)
+ * if (b != 0L) {
+ * final long v = b;
+ * if (!$xx_haveCarry) { $xx_carry = v; $xx_haveCarry = true; $xx_total += 8; }
+ * else { $xx_acc += XXH3FieldHash.mixPair($xx_carry, v, $xx_pairIndex++); $xx_haveCarry = false; $xx_total += 8; }
+ * }
+ *
+ * // Field c (int) -> widen to 64-bits in a stable way
+ * if (c != 0) {
+ * final long v = ((long) c) & 0xFFFFFFFFL; // LE semantics if desired
+ * if (!$xx_haveCarry) { $xx_carry = v; $xx_haveCarry = true; $xx_total += 8; }
+ * else { $xx_acc += XXH3FieldHash.mixPair($xx_carry, v, $xx_pairIndex++); $xx_haveCarry = false; $xx_total += 8; }
+ * }
+ *
+ * // Field d (double)
+ * if (Double.doubleToRawLongBits(d) != 0L) {
+ * final long v = Double.doubleToRawLongBits(d);
+ * if (!$xx_haveCarry) { $xx_carry = v; $xx_haveCarry = true; $xx_total += 8; }
+ * else { $xx_acc += XXH3FieldHash.mixPair($xx_carry, v, $xx_pairIndex++); $xx_haveCarry = false; $xx_total += 8; }
+ * }
+ *
+ * // ... repeat for all fields ...
+ *
+ * if ($xx_haveCarry) {
+ * $xx_acc += XXH3FieldHash.mixTail8($xx_carry, $xx_pairIndex);
+ * }
+ * return XXH3FieldHash.finish($xx_acc, $xx_total);
+ * }
+ * }
+ * }
+ *
+ */
+public final class XXH3FieldHash {
+
+ private XXH3FieldHash() {}
+
+ // XXH3 constants (seed=0), inlined to avoid touching an instance.
+ // We use only the first 16 for the rotating pair schedule.
+ private static final long SECRET_00 = 0xbe4ba423396cfeb8L;
+ private static final long SECRET_01 = 0x1cad21f72c81017cL;
+ private static final long SECRET_02 = 0xdb979083e96dd4deL;
+ private static final long SECRET_03 = 0x1f67b3b7a4a44072L;
+ private static final long SECRET_04 = 0x78e5c0cc4ee679cbL;
+ private static final long SECRET_05 = 0x2172ffcc7dd05a82L;
+ private static final long SECRET_06 = 0x8e2443f7744608b8L;
+ private static final long SECRET_07 = 0x4c263a81e69035e0L;
+ private static final long SECRET_08 = 0xcb00c391bb52283cL;
+ private static final long SECRET_09 = 0xa32e531b8b65d088L;
+ private static final long SECRET_10 = 0x4ef90da297486471L;
+ private static final long SECRET_11 = 0xd8acdea946ef1938L;
+ private static final long SECRET_12 = 0x3f349ce33f76faa8L;
+ private static final long SECRET_13 = 0x1d4f0bc7c7bbdcf9L;
+ private static final long SECRET_14 = 0x3159b4cd4be0518aL;
+ private static final long SECRET_15 = 0x647378d9c97e9fc8L;
+
+ // INIT_ACC_1 from xxh3 (publicly visible value in the provided code)
+ private static final long INIT_ACC_1 = 0x9E3779B185EBCA87L;
+
+ // Canonical hash for empty input with seed=0 (matches XXH3_64.DEFAULT_INSTANCE.hash0)
+ // Computed as avalanche64(0 ^ (SECRET_07 ^ SECRET_08)).
+ private static final long EMPTY_HASH =
+ XXH3_64.avalanche64(SECRET_07 ^ SECRET_08);
+
+ // Small fixed table for rotating secret pairs. index = (pairIndex & 7) << 1
+ private static final long[] SECRET_PAIRS = {
+ SECRET_00, SECRET_01,
+ SECRET_02, SECRET_03,
+ SECRET_04, SECRET_05,
+ SECRET_06, SECRET_07,
+ SECRET_08, SECRET_09,
+ SECRET_10, SECRET_11,
+ SECRET_12, SECRET_13,
+ SECRET_14, SECRET_15
+ };
+
+ private static long mix2(long lo, long hi, long s0, long s1) {
+ // Equivalent to the XXH3 "mix2Accs" core: mix(lo ^ s0, hi ^ s1)
+ return XXH3_64.mix(lo ^ s0, hi ^ s1);
+ }
+
+ /**
+ * Mix a pair of 64-bit values into the accumulator with a rotating secret schedule.
+ *
+ * @param lo first value in the pair
+ * @param hi second value in the pair
+ * @param pairIndex 0-based index of the pair (increment by 1 each time you call mixPair)
+ * @return updated accumulator
+ */
+ public static long mixPair(long lo, long hi, int pairIndex) {
+ final int base = (pairIndex & 7) << 1; // 0..14
+ final long s0 = SECRET_PAIRS[base];
+ final long s1 = SECRET_PAIRS[base + 1];
+ return XXH3_64.mix(lo ^ s0, hi ^ s1);
+ }
+
+ /**
+ * Mix a single 64-bit leftover ("tail") value into the accumulator.
+ * Uses rrmxmx with a rotating secret; very fast and high quality.
+ *
+ * @param v tail value (one leftover long)
+ * @param pairIndex the next pair index (i.e., the count of full pairs already processed)
+ * @return updated accumulator
+ */
+ public static long mixTail8(long v, int pairIndex) {
+ final int base = (pairIndex & 7) << 1;
+ final long s0 = SECRET_PAIRS[base];
+ // rrmxmx gives excellent avalanching for a single 64-bit word with a "length" tweak of 8
+ return XXH3_64.rrmxmx(v ^ s0, 8);
+ }
+
+ /**
+ * Finish the hash. If totalBytes==0 returns the canonical XXH3 empty hash (seed=0).
+ * Otherwise applies a simple length bias like XXH3 and finishes with avalanche3.
+ *
+ * @param acc running accumulator from mixPair/mixTail8
+ * @param totalBytes total output bytes you've conceptually written (8 per non-default field)
+ * @return final 64-bit hash
+ */
+ public static long finish(long acc, long totalBytes) {
+ if (totalBytes == 0) {
+ return EMPTY_HASH;
+ }
+ // XXH3-like finish: add a length bias and avalanche3
+ return XXH3_64.avalanche3(acc + totalBytes * INIT_ACC_1);
+ }
+
+ /**
+ * Convert int primitive to a 64-bit value in a stable way using little-endian semantics,
+ * suitable for combining with mixPair or mixTail8.
+ *
+ * @param value the value to convert
+ * @return a 64-bit representation of the value
+ */
+ public static long toLong(final int value) {
+ // Convert an int to a long in a stable way (little-endian semantics)
+ return ((long) value) & 0xFFFFFFFFL; // zero-extend to 64 bits
+ }
+
+ /**
+ * Convert double primitive to a 64-bit value in a stable way using little-endian semantics,
+ * suitable for combining with mixPair or mixTail8.
+ *
+ * @param value the value to convert
+ * @return a 64-bit representation of the value
+ */
+ public static long toLong(final double value) {
+ // Convert a double to a long in a stable way (little-endian semantics)
+ return Double.doubleToRawLongBits(value);
+ }
+
+ /**
+ * Convert float primitive to a 64-bit value in a stable way using little-endian semantics,
+ * suitable for combining with mixPair or mixTail8.
+ *
+ * @param value the value to convert
+ * @return a 64-bit representation of the value
+ */
+ public static long toLong(final float value) {
+ // Convert a float to a long in a stable way (little-endian semantics)
+ return Float.floatToRawIntBits(value) & 0xFFFFFFFFL; // zero-extend to 64 bits
+ }
+
+ /**
+ * Convert boolean primitive to a 64-bit value in a stable way, suitable for combining with mixPair or mixTail8.
+ *
+ * @param value the value to convert
+ * @return a 64-bit representation of the value
+ */
+ public static long toLong(final boolean value) {
+ // Convert a boolean to a long in a stable way (little-endian semantics)
+ return value ? 1L : 0L; // 1 for true, 0 for false
+ }
+
+ /**
+ * Convert a byte array to a 64-bit value in a stable way using XXH3 64 hashing,
+ * suitable for combining with mixPair or mixTail8.
+ *
+ * @param value the byte array to convert
+ * @return a 64-bit representation of the byte array
+ */
+ public static long toLong(final byte[] value) {
+ // Convert a byte to a long in a stable way (little-endian semantics)
+ return XXH3_64.DEFAULT_INSTANCE.hashBytesToLong(value,0,value.length);
+ }
+
+ /**
+ * Convert a String to a 64-bit value in a stable way using XXH3 64 hashing of UTF16 bytes,
+ * suitable for combining with mixPair or mixTail8.
+ *
+ * @param value the String to convert
+ * @return a 64-bit representation of the String
+ */
+ public static long toLong(final String value) {
+ return XXH3_64.DEFAULT_INSTANCE.hashCharsToLong(value);
+ }
+}
\ No newline at end of file
diff --git a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/ModelObjHashCodeBench.java b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/ModelObjHashCodeBench.java
index 45893044c..e9485c531 100644
--- a/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/ModelObjHashCodeBench.java
+++ b/pbj-integration-tests/src/jmh/java/com/hedera/pbj/integration/jmh/ModelObjHashCodeBench.java
@@ -1,7 +1,9 @@
// SPDX-License-Identifier: Apache-2.0
package com.hedera.pbj.integration.jmh;
+import com.hedera.pbj.integration.EverythingTestData;
import com.hedera.pbj.runtime.io.buffer.Bytes;
+import com.hedera.pbj.test.proto.pbj.Everything;
import com.hedera.pbj.test.proto.pbj.Hasheval;
import com.hedera.pbj.test.proto.pbj.Hasheval2;
import com.hedera.pbj.test.proto.pbj.Suit;
@@ -34,6 +36,12 @@ public void simpleObject(Blackhole blackhole) {
blackhole.consume(tst.hashCode());
}
+ @Benchmark
+ public void everythingObject(Blackhole blackhole) {
+ Everything e = EverythingTestData.EVERYTHING.copyBuilder().build();
+ blackhole.consume(e.hashCode());
+ }
+
@Benchmark
public void bigObject(Blackhole blackhole) {
Hasheval2 complexObj = new Hasheval2(
From a5d1b7be77b499ffa54481b05de73b9baa62e9ff Mon Sep 17 00:00:00 2001
From: Jasper Potts <1466205+jasperpotts@users.noreply.github.com>
Date: Mon, 18 Aug 2025 16:04:41 -0700
Subject: [PATCH 7/7] Added hashing benchmark
Signed-off-by: Jasper Potts <1466205+jasperpotts@users.noreply.github.com>
---
.../pbj/runtime/hashing/XxhBenchmark.java | 74 +++++++++++++++++++
1 file changed, 74 insertions(+)
create mode 100644 pbj-core/pbj-runtime/src/jmh/java/com/hedera/pbj/runtime/hashing/XxhBenchmark.java
diff --git a/pbj-core/pbj-runtime/src/jmh/java/com/hedera/pbj/runtime/hashing/XxhBenchmark.java b/pbj-core/pbj-runtime/src/jmh/java/com/hedera/pbj/runtime/hashing/XxhBenchmark.java
new file mode 100644
index 000000000..f1d4a1232
--- /dev/null
+++ b/pbj-core/pbj-runtime/src/jmh/java/com/hedera/pbj/runtime/hashing/XxhBenchmark.java
@@ -0,0 +1,74 @@
+// SPDX-License-Identifier: Apache-2.0
+package com.hedera.pbj.runtime.hashing;
+
+import java.util.Random;
+import java.util.concurrent.TimeUnit;
+import org.openjdk.jmh.annotations.Benchmark;
+import org.openjdk.jmh.annotations.BenchmarkMode;
+import org.openjdk.jmh.annotations.Fork;
+import org.openjdk.jmh.annotations.Level;
+import org.openjdk.jmh.annotations.Measurement;
+import org.openjdk.jmh.annotations.Mode;
+import org.openjdk.jmh.annotations.OperationsPerInvocation;
+import org.openjdk.jmh.annotations.OutputTimeUnit;
+import org.openjdk.jmh.annotations.Param;
+import org.openjdk.jmh.annotations.Scope;
+import org.openjdk.jmh.annotations.Setup;
+import org.openjdk.jmh.annotations.State;
+import org.openjdk.jmh.annotations.Warmup;
+import org.openjdk.jmh.infra.Blackhole;
+
+/**
+ * Benchmark for XXH3_64 hashing functions.
+ * This benchmark tests the performance of hashing byte arrays and strings using the XXH3_64
+ * hashing algorithm.
+ */
+@State(Scope.Benchmark)
+@Fork(1)
+@Warmup(iterations = 4, time = 2)
+@Measurement(iterations = 5, time = 2)
+@OutputTimeUnit(TimeUnit.NANOSECONDS)
+@BenchmarkMode(Mode.AverageTime)
+public class XxhBenchmark {
+ private static final String CHAR_POOL = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
+ public static final int SAMPLES = 10_000;
+
+
+ @Param({"4","8","16","32","48","64","120","1024"})
+ public int length = 10000;
+
+ private final byte[][] byteInputData = new byte[SAMPLES][];
+ private final String[] stringInputData = new String[SAMPLES];
+
+ @Setup(Level.Trial)
+ public void init() {
+ final Random random = new Random(45155315113511L);
+ for (int i = 0; i < SAMPLES; i++) {
+ // byte[]
+ byteInputData[i] = new byte[length];
+ random.nextBytes(byteInputData[i]);
+ // string
+ StringBuilder builder = new StringBuilder(length);
+ for (int j = 0; j < length; j++) {
+ builder.append(CHAR_POOL.charAt(random.nextInt(CHAR_POOL.length())));
+ }
+ stringInputData[i] = builder.toString();
+ }
+ }
+
+ @Benchmark
+ @OperationsPerInvocation(SAMPLES)
+ public void testBytesHashing(final Blackhole blackhole) {
+ for (int i = 0; i < SAMPLES; i++) {
+ blackhole.consume(XXH3_64.DEFAULT_INSTANCE.hashBytesToLong(byteInputData[i], 0, byteInputData[i].length));
+ }
+ }
+
+ @Benchmark
+ @OperationsPerInvocation(SAMPLES)
+ public void testStringHashing(final Blackhole blackhole) {
+ for (int i = 0; i < SAMPLES; i++) {
+ blackhole.consume(XXH3_64.DEFAULT_INSTANCE.hashCharsToLong(stringInputData[i]));
+ }
+ }
+}