From 814380ffd14927dc9b13b3d277fde3061301ee5b Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Wed, 28 May 2025 17:22:06 +0200 Subject: [PATCH 01/44] init gearhash package --- packages/gearhash/README.md | 3 ++ packages/gearhash/asconfig.json | 22 ++++++++++++++ packages/gearhash/assembly/index.ts | 5 ++++ packages/gearhash/assembly/tsconfig.json | 6 ++++ packages/gearhash/build/.gitignore | 2 ++ packages/gearhash/index.html | 10 +++++++ packages/gearhash/package.json | 25 ++++++++++++++++ packages/gearhash/pnpm-lock.yaml | 38 ++++++++++++++++++++++++ packages/gearhash/tests/index.js | 4 +++ pnpm-workspace.yaml | 1 + 10 files changed, 116 insertions(+) create mode 100644 packages/gearhash/README.md create mode 100644 packages/gearhash/asconfig.json create mode 100644 packages/gearhash/assembly/index.ts create mode 100644 packages/gearhash/assembly/tsconfig.json create mode 100644 packages/gearhash/build/.gitignore create mode 100644 packages/gearhash/index.html create mode 100644 packages/gearhash/package.json create mode 100644 packages/gearhash/pnpm-lock.yaml create mode 100644 packages/gearhash/tests/index.js diff --git a/packages/gearhash/README.md b/packages/gearhash/README.md new file mode 100644 index 0000000000..e6f17fa01f --- /dev/null +++ b/packages/gearhash/README.md @@ -0,0 +1,3 @@ +JS and WASM implementations of https://github.com/srijs/rust-gearhash + +Using [AssemblyScript](https://www.assemblyscript.org/) to generate a lean WASM. \ No newline at end of file diff --git a/packages/gearhash/asconfig.json b/packages/gearhash/asconfig.json new file mode 100644 index 0000000000..8776597856 --- /dev/null +++ b/packages/gearhash/asconfig.json @@ -0,0 +1,22 @@ +{ + "targets": { + "debug": { + "outFile": "build/debug.wasm", + "textFile": "build/debug.wat", + "sourceMap": true, + "debug": true + }, + "release": { + "outFile": "build/release.wasm", + "textFile": "build/release.wat", + "sourceMap": true, + "optimizeLevel": 3, + "shrinkLevel": 0, + "converge": false, + "noAssert": false + } + }, + "options": { + "bindings": "esm" + } +} \ No newline at end of file diff --git a/packages/gearhash/assembly/index.ts b/packages/gearhash/assembly/index.ts new file mode 100644 index 0000000000..66a9aafd88 --- /dev/null +++ b/packages/gearhash/assembly/index.ts @@ -0,0 +1,5 @@ +// The entry file of your WebAssembly module. + +export function add(a: i32, b: i32): i32 { + return a + b; +} diff --git a/packages/gearhash/assembly/tsconfig.json b/packages/gearhash/assembly/tsconfig.json new file mode 100644 index 0000000000..f81c3d55e6 --- /dev/null +++ b/packages/gearhash/assembly/tsconfig.json @@ -0,0 +1,6 @@ +{ + "extends": "../node_modules/.pnpm/assemblyscript@0.27.36/node_modules/assemblyscript/std/assembly.json", + "include": [ + "./**/*.ts" + ] +} \ No newline at end of file diff --git a/packages/gearhash/build/.gitignore b/packages/gearhash/build/.gitignore new file mode 100644 index 0000000000..d6b7ef32c8 --- /dev/null +++ b/packages/gearhash/build/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/packages/gearhash/index.html b/packages/gearhash/index.html new file mode 100644 index 0000000000..c170ddeb9a --- /dev/null +++ b/packages/gearhash/index.html @@ -0,0 +1,10 @@ + + + + + + + diff --git a/packages/gearhash/package.json b/packages/gearhash/package.json new file mode 100644 index 0000000000..c11bf7aeca --- /dev/null +++ b/packages/gearhash/package.json @@ -0,0 +1,25 @@ +{ + "name": "@huggingface/gearhash", + "version": "0.0.1", + "scripts": { + "build": "tsc", + "asbuild:debug": "asc assembly/index.ts --target debug", + "asbuild:release": "asc assembly/index.ts --target release", + "asbuild": "npm run asbuild:debug && npm run asbuild:release", + "test": "node tests", + "start": "npx serve ." + }, + "dependencies": { + "assemblyscript": "^0.27.36" + }, + "type": "module", + "exports": { + ".": { + "import": "./build/release.js", + "types": "./build/release.d.ts" + } + }, + "devDependencies": { + "assemblyscript": "^0.27.36" + } +} \ No newline at end of file diff --git a/packages/gearhash/pnpm-lock.yaml b/packages/gearhash/pnpm-lock.yaml new file mode 100644 index 0000000000..f0edbf2226 --- /dev/null +++ b/packages/gearhash/pnpm-lock.yaml @@ -0,0 +1,38 @@ +lockfileVersion: '9.0' + +settings: + autoInstallPeers: true + excludeLinksFromLockfile: false + +importers: + + .: + dependencies: + assemblyscript: + specifier: ^0.27.36 + version: 0.27.36 + +packages: + + assemblyscript@0.27.36: + resolution: {integrity: sha512-1qX2zf6p7l/mNYv8r21jC/Yft7kX7XKR3xUHw41zvV4xad5lyC8w7jZiwZBGoy64VKZLc+bTDJDWi8Kb70YrHA==} + engines: {node: '>=18', npm: '>=10'} + hasBin: true + + binaryen@116.0.0-nightly.20240114: + resolution: {integrity: sha512-0GZrojJnuhoe+hiwji7QFaL3tBlJoA+KFUN7ouYSDGZLSo9CKM8swQX8n/UcbR0d1VuZKU+nhogNzv423JEu5A==} + hasBin: true + + long@5.3.2: + resolution: {integrity: sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA==} + +snapshots: + + assemblyscript@0.27.36: + dependencies: + binaryen: 116.0.0-nightly.20240114 + long: 5.3.2 + + binaryen@116.0.0-nightly.20240114: {} + + long@5.3.2: {} diff --git a/packages/gearhash/tests/index.js b/packages/gearhash/tests/index.js new file mode 100644 index 0000000000..769a0b0bff --- /dev/null +++ b/packages/gearhash/tests/index.js @@ -0,0 +1,4 @@ +import assert from "assert"; +import { add } from "../build/debug.js"; +assert.strictEqual(add(1, 2), 3); +console.log("ok"); diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index 08e651bb73..5d89bd2418 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -14,3 +14,4 @@ packages: - "packages/ollama-utils" - "packages/mcp-client" - "packages/tiny-agents" + - "packages/gearhash" From 68b32be01eba3b8162a5897cdd0a0a2bc66f0f10 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Wed, 28 May 2025 17:35:56 +0200 Subject: [PATCH 02/44] fix build command --- packages/gearhash/package.json | 47 +++++++++++++++++----------------- 1 file changed, 23 insertions(+), 24 deletions(-) diff --git a/packages/gearhash/package.json b/packages/gearhash/package.json index c11bf7aeca..ad2419b2e8 100644 --- a/packages/gearhash/package.json +++ b/packages/gearhash/package.json @@ -1,25 +1,24 @@ { - "name": "@huggingface/gearhash", - "version": "0.0.1", - "scripts": { - "build": "tsc", - "asbuild:debug": "asc assembly/index.ts --target debug", - "asbuild:release": "asc assembly/index.ts --target release", - "asbuild": "npm run asbuild:debug && npm run asbuild:release", - "test": "node tests", - "start": "npx serve ." - }, - "dependencies": { - "assemblyscript": "^0.27.36" - }, - "type": "module", - "exports": { - ".": { - "import": "./build/release.js", - "types": "./build/release.d.ts" - } - }, - "devDependencies": { - "assemblyscript": "^0.27.36" - } -} \ No newline at end of file + "name": "@huggingface/gearhash", + "version": "0.0.1", + "scripts": { + "build:debug": "asc assembly/index.ts --target debug", + "build:release": "asc assembly/index.ts --target release", + "build": "npm run build:debug && npm run build:release", + "test": "node tests", + "start": "npx serve ." + }, + "dependencies": { + "assemblyscript": "^0.27.36" + }, + "type": "module", + "exports": { + ".": { + "import": "./build/release.js", + "types": "./build/release.d.ts" + } + }, + "devDependencies": { + "assemblyscript": "^0.27.36" + } +} From 14a9ef41f0cd21b39aebde716bcea2c3f62fbe82 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Fri, 30 May 2025 15:52:35 +0200 Subject: [PATCH 03/44] Create gearhash function from rust source --- packages/gearhash/assembly/index.ts | 18 ++++++++- packages/gearhash/assembly/table.ts | 57 +++++++++++++++++++++++++++++ 2 files changed, 73 insertions(+), 2 deletions(-) create mode 100644 packages/gearhash/assembly/table.ts diff --git a/packages/gearhash/assembly/index.ts b/packages/gearhash/assembly/index.ts index 66a9aafd88..659e958263 100644 --- a/packages/gearhash/assembly/index.ts +++ b/packages/gearhash/assembly/index.ts @@ -1,5 +1,19 @@ // The entry file of your WebAssembly module. -export function add(a: i32, b: i32): i32 { - return a + b; +import { DEFAULT_TABLE } from "./table"; + +export { DEFAULT_TABLE }; + +// Function to find the next match in the buffer +export function nextMatch(hash: u64, buf: Uint8Array, mask: u64, table: StaticArray = DEFAULT_TABLE): i32 { + for (let i = 0; i < buf.length; i++) { + const b = buf[i]; + hash = (hash << 1) + table[b]; + + if ((hash & mask) == 0) { + return i + 1; + } + } + + return -1; // Return -1 to indicate no match found (equivalent to None in Rust) } diff --git a/packages/gearhash/assembly/table.ts b/packages/gearhash/assembly/table.ts new file mode 100644 index 0000000000..22a9e52df9 --- /dev/null +++ b/packages/gearhash/assembly/table.ts @@ -0,0 +1,57 @@ +/* eslint-disable @typescript-eslint/no-loss-of-precision */ + +// Define the Table type as a static array of u64 values +export const DEFAULT_TABLE: StaticArray = [ + 0xb088d3a9e840f559, 0x5652c7f739ed20d6, 0x45b28969898972ab, 0x6b0a89d5b68ec777, 0x368f573e8b7a31b7, + 0x1dc636dce936d94b, 0x207a4c4e5554d5b6, 0xa474b34628239acb, 0x3b06a83e1ca3b912, 0x90e78d6c2f02baf7, + 0xe1c92df7150d9a8a, 0x8e95053a1086d3ad, 0x5a2ef4f1b83a0722, 0xa50fac949f807fae, 0x0e7303eb80d8d681, + 0x99b07edc1570ad0f, 0x689d2fb555fd3076, 0x00005082119ea468, 0xc4b08306a88fcc28, 0x3eb0678af6374afd, + 0xf19f87ab86ad7436, 0xf2129fbfbe6bc736, 0x481149575c98a4ed, 0x0000010695477bc5, 0x1fba37801a9ceacc, + 0x3bf06fd663a49b6d, 0x99687e9782e3874b, 0x79a10673aa50d8e3, 0xe4accf9e6211f420, 0x2520e71f87579071, + 0x2bd5d3fd781a8a9b, 0x00de4dcddd11c873, 0xeaa9311c5a87392f, 0xdb748eb617bc40ff, 0xaf579a8df620bf6f, + 0x86a6e5da1b09c2b1, 0xcc2fc30ac322a12e, 0x355e2afec1f74267, 0x2d99c8f4c021a47b, 0xbade4b4a9404cfc3, + 0xf7b518721d707d69, 0x3286b6587bf32c20, 0x0000b68886af270c, 0xa115d6e4db8a9079, 0x484f7e9c97b2e199, + 0xccca7bb75713e301, 0xbf2584a62bb0f160, 0xade7e813625dbcc8, 0x000070940d87955a, 0x8ae69108139e626f, + 0xbd776ad72fde38a2, 0xfb6b001fc2fcc0cf, 0xc7a474b8e67bc427, 0xbaf6f11610eb5d58, 0x09cb1f5b6de770d1, + 0xb0b219e6977d4c47, 0x00ccbc386ea7ad4a, 0xcc849d0adf973f01, 0x73a3ef7d016af770, 0xc807d2d386bdbdfe, + 0x7f2ac9966c791730, 0xd037a86bc6c504da, 0xf3f17c661eaa609d, 0xaca626b04daae687, 0x755a99374f4a5b07, + 0x90837ee65b2caede, 0x6ee8ad93fd560785, 0x0000d9e11053edd8, 0x9e063bb2d21cdbd7, 0x07ab77f12a01d2b2, + 0xec550255e6641b44, 0x78fb94a8449c14c6, 0xc7510e1bc6c0f5f5, 0x0000320b36e4cae3, 0x827c33262c8b1a2d, + 0x14675f0b48ea4144, 0x267bd3a6498deceb, 0xf1916ff982f5035e, 0x86221b7ff434fb88, 0x9dbecee7386f49d8, + 0xea58f8cac80f8f4a, 0x008d198692fc64d8, 0x6d38704fbabf9a36, 0xe032cb07d1e7be4c, 0x228d21f6ad450890, + 0x635cb1bfc02589a5, 0x4620a1739ca2ce71, 0xa7e7dfe3aae5fb58, 0x0c10ca932b3c0deb, 0x2727fee884afed7b, + 0xa2df1c6df9e2ab1f, 0x4dcdd1ac0774f523, 0x000070ffad33e24e, 0xa2ace87bc5977816, 0x9892275ab4286049, + 0xc2861181ddf18959, 0xbb9972a042483e19, 0xef70cd3766513078, 0x00000513abfc9864, 0xc058b61858c94083, + 0x09e850859725e0de, 0x9197fb3bf83e7d94, 0x7e1e626d12b64bce, 0x520c54507f7b57d1, 0xbee1797174e22416, + 0x6fd9ac3222e95587, 0x0023957c9adfbf3e, 0xa01c7d7e234bbe15, 0xaba2c758b8a38cbb, 0x0d1fa0ceec3e2b30, + 0x0bb6a58b7e60b991, 0x4333dd5b9fa26635, 0xc2fd3b7d4001c1a3, 0xfb41802454731127, 0x65a56185a50d18cb, + 0xf67a02bd8784b54f, 0x696f11dd67e65063, 0x00002022fca814ab, 0x8cd6be912db9d852, 0x695189b6e9ae8a57, + 0xee9453b50ada0c28, 0xd8fc5ea91a78845e, 0xab86bf191a4aa767, 0x0000c6b5c86415e5, 0x267310178e08a22e, + 0xed2d101b078bca25, 0x3b41ed84b226a8fb, 0x13e622120f28dc06, 0xa315f5ebfb706d26, 0x8816c34e3301bace, + 0xe9395b9cbb71fdae, 0x002ce9202e721648, 0x4283db1d2bb3c91c, 0xd77d461ad2b1a6a5, 0xe2ec17e46eeb866b, + 0xb8e0be4039fbc47c, 0xdea160c4d5299d04, 0x7eec86c8d28c3634, 0x2119ad129f98a399, 0xa6ccf46b61a283ef, + 0x2c52cedef658c617, 0x2db4871169acdd83, 0x0000f0d6f39ecbe9, 0x3dd5d8c98d2f9489, 0x8a1872a22b01f584, + 0xf282a4c40e7b3cf2, 0x8020ec2ccb1ba196, 0x6693b6e09e59e313, 0x0000ce19cc7c83eb, 0x20cb5735f6479c3b, + 0x762ebf3759d75a5b, 0x207bfe823d693975, 0xd77dc112339cd9d5, 0x9ba7834284627d03, 0x217dc513e95f51e9, + 0xb27b1a29fc5e7816, 0x00d5cd9831bb662d, 0x71e39b806d75734c, 0x7e572af006fb1a23, 0xa2734f2f6ae91f85, + 0xbf82c6b5022cddf2, 0x5c3beac60761a0de, 0xcdc893bb47416998, 0x6d1085615c187e01, 0x77f8ae30ac277c5d, + 0x917c6b81122a2c91, 0x5b75b699add16967, 0x0000cf6ae79a069b, 0xf3c40afa60de1104, 0x2063127aa59167c3, + 0x621de62269d1894d, 0xd188ac1de62b4726, 0x107036e2154b673c, 0x0000b85f28553a1d, 0xf2ef4e4c18236f3d, + 0xd9d6de6611b9f602, 0xa1fc7955fb47911c, 0xeb85fd032f298dbd, 0xbe27502fb3befae1, 0xe3034251c4cd661e, + 0x441364d354071836, 0x0082b36c75f2983e, 0xb145910316fa66f0, 0x021c069c9847caf7, 0x2910dfc75a4b5221, + 0x735b353e1c57a8b5, 0xce44312ce98ed96c, 0xbc942e4506bdfa65, 0xf05086a71257941b, 0xfec3b215d351cead, + 0x00ae1055e0144202, 0xf54b40846f42e454, 0x00007fd9c8bcbcc8, 0xbfbd9ef317de9bfe, 0xa804302ff2854e12, + 0x39ce4957a5e5d8d4, 0xffb9e2a45637ba84, 0x55b9ad1d9ea0818b, 0x00008acbf319178a, 0x48e2bfc8d0fbfb38, + 0x8be39841e848b5e8, 0x0e2712160696a08b, 0xd51096e84b44242a, 0x1101ba176792e13a, 0xc22e770f4531689d, + 0x1689eff272bbc56c, 0x00a92a197f5650ec, 0xbc765990bda1784e, 0xc61441e392fcb8ae, 0x07e13a2ced31e4a0, + 0x92cbe984234e9d4d, 0x8f4ff572bb7d8ac5, 0x0b9670c00b963bd0, 0x62955a581a03eb01, 0x645f83e5ea000254, + 0x41fce516cd88f299, 0xbbda9748da7a98cf, 0x0000aab2fe4845fa, 0x19761b069bf56555, 0x8b8f5e8343b6ad56, + 0x3e5d1cfd144821d9, 0xec5c1e2ca2b0cd8f, 0xfaf7e0fea7fbb57f, 0x000000d3ba12961b, 0xda3f90178401b18e, + 0x70ff906de33a5feb, 0x0527d5a7c06970e7, 0x22d8e773607c13e9, 0xc9ab70df643c3bac, 0xeda4c6dc8abe12e3, + 0xecef1f410033e78a, 0x0024c2b274ac72cb, 0x06740d954fa900b4, 0x1d7a299b323d6304, 0xb3c37cb298cbead5, + 0xc986e3c76178739b, 0x9fabea364b46f58a, 0x6da214c5af85cc56, 0x17a43ed8b7a38f84, 0x6eccec511d9adbeb, + 0xf9cab30913335afb, 0x4a5e60c5f415eed2, 0x00006967503672b4, 0x9da51d121454bb87, 0x84321e13b9bbc816, + 0xfb3d6fb6ab2fdd8d, 0x60305eed8e160a8d, 0xcbbf4b14e9946ce8, 0x00004f63381b10c3, 0x07d5b7816fcc4e10, + 0xe5a536726a6a8155, 0x57afb23447a07fdd, 0x18f346f7abc9d394, 0x636dc655d61ad33d, 0xcc8bab4939f7f3f6, + 0x63c7a906c1dd187b, +]; From a2cb917cc88c13d403707f79a47695ae4c2ac24c Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Fri, 30 May 2025 16:37:30 +0200 Subject: [PATCH 04/44] switch to gearhash-wasm package --- .../{gearhash => gearhash-wasm}/README.md | 0 .../{gearhash => gearhash-wasm}/asconfig.json | 0 packages/gearhash-wasm/assembly/index.ts | 2 + packages/gearhash-wasm/assembly/next-match.ts | 17 ++++++ .../assembly/table.ts | 0 .../assembly/tsconfig.json | 0 .../build/.gitignore | 0 .../{gearhash => gearhash-wasm}/index.html | 0 .../{gearhash => gearhash-wasm}/package.json | 2 +- .../pnpm-lock.yaml | 0 packages/gearhash-wasm/tests/index.js | 4 ++ packages/gearhash/assembly/index.ts | 55 ++++++++++++++++++- packages/gearhash/tests/index.js | 4 -- pnpm-workspace.yaml | 2 +- 14 files changed, 78 insertions(+), 8 deletions(-) rename packages/{gearhash => gearhash-wasm}/README.md (100%) rename packages/{gearhash => gearhash-wasm}/asconfig.json (100%) create mode 100644 packages/gearhash-wasm/assembly/index.ts create mode 100644 packages/gearhash-wasm/assembly/next-match.ts rename packages/{gearhash => gearhash-wasm}/assembly/table.ts (100%) rename packages/{gearhash => gearhash-wasm}/assembly/tsconfig.json (100%) rename packages/{gearhash => gearhash-wasm}/build/.gitignore (100%) rename packages/{gearhash => gearhash-wasm}/index.html (100%) rename packages/{gearhash => gearhash-wasm}/package.json (92%) rename packages/{gearhash => gearhash-wasm}/pnpm-lock.yaml (100%) create mode 100644 packages/gearhash-wasm/tests/index.js delete mode 100644 packages/gearhash/tests/index.js diff --git a/packages/gearhash/README.md b/packages/gearhash-wasm/README.md similarity index 100% rename from packages/gearhash/README.md rename to packages/gearhash-wasm/README.md diff --git a/packages/gearhash/asconfig.json b/packages/gearhash-wasm/asconfig.json similarity index 100% rename from packages/gearhash/asconfig.json rename to packages/gearhash-wasm/asconfig.json diff --git a/packages/gearhash-wasm/assembly/index.ts b/packages/gearhash-wasm/assembly/index.ts new file mode 100644 index 0000000000..3b224ae616 --- /dev/null +++ b/packages/gearhash-wasm/assembly/index.ts @@ -0,0 +1,2 @@ +export { DEFAULT_TABLE } from "./table"; +export { nextMatch } from "./next-match"; diff --git a/packages/gearhash-wasm/assembly/next-match.ts b/packages/gearhash-wasm/assembly/next-match.ts new file mode 100644 index 0000000000..17c65b887c --- /dev/null +++ b/packages/gearhash-wasm/assembly/next-match.ts @@ -0,0 +1,17 @@ +// The entry file of your WebAssembly module. + +import { DEFAULT_TABLE } from "./table"; + +// Function to find the next match in the buffer +export function nextMatch(buf: Uint8Array, mask: u64, hash: u64 = 0, table: StaticArray = DEFAULT_TABLE): i32 { + for (let i = 0; i < buf.length; i++) { + const b = buf[i]; + hash = (hash << 1) + table[b]; + + if ((hash & mask) == 0) { + return i + 1; + } + } + + return -1; // Return -1 to indicate no match found (equivalent to None in Rust) +} diff --git a/packages/gearhash/assembly/table.ts b/packages/gearhash-wasm/assembly/table.ts similarity index 100% rename from packages/gearhash/assembly/table.ts rename to packages/gearhash-wasm/assembly/table.ts diff --git a/packages/gearhash/assembly/tsconfig.json b/packages/gearhash-wasm/assembly/tsconfig.json similarity index 100% rename from packages/gearhash/assembly/tsconfig.json rename to packages/gearhash-wasm/assembly/tsconfig.json diff --git a/packages/gearhash/build/.gitignore b/packages/gearhash-wasm/build/.gitignore similarity index 100% rename from packages/gearhash/build/.gitignore rename to packages/gearhash-wasm/build/.gitignore diff --git a/packages/gearhash/index.html b/packages/gearhash-wasm/index.html similarity index 100% rename from packages/gearhash/index.html rename to packages/gearhash-wasm/index.html diff --git a/packages/gearhash/package.json b/packages/gearhash-wasm/package.json similarity index 92% rename from packages/gearhash/package.json rename to packages/gearhash-wasm/package.json index ad2419b2e8..3417394eca 100644 --- a/packages/gearhash/package.json +++ b/packages/gearhash-wasm/package.json @@ -1,5 +1,5 @@ { - "name": "@huggingface/gearhash", + "name": "@huggingface/gearhash-wasms", "version": "0.0.1", "scripts": { "build:debug": "asc assembly/index.ts --target debug", diff --git a/packages/gearhash/pnpm-lock.yaml b/packages/gearhash-wasm/pnpm-lock.yaml similarity index 100% rename from packages/gearhash/pnpm-lock.yaml rename to packages/gearhash-wasm/pnpm-lock.yaml diff --git a/packages/gearhash-wasm/tests/index.js b/packages/gearhash-wasm/tests/index.js new file mode 100644 index 0000000000..c7f06f4c03 --- /dev/null +++ b/packages/gearhash-wasm/tests/index.js @@ -0,0 +1,4 @@ +import assert from "assert"; +import { nextMatch } from "../build/debug.js"; +assert.strictEqual(nextMatch(new Uint8Array([1, 2, 3]), 0xaf2900n), 3); +console.log("ok"); diff --git a/packages/gearhash/assembly/index.ts b/packages/gearhash/assembly/index.ts index 659e958263..1635f22c39 100644 --- a/packages/gearhash/assembly/index.ts +++ b/packages/gearhash/assembly/index.ts @@ -1,8 +1,12 @@ // The entry file of your WebAssembly module. -import { DEFAULT_TABLE } from "./table"; +import type { StaticArray } from "@assemblyscript/runtime"; -export { DEFAULT_TABLE }; +// Define the Table type as a static array of u64 values +export const DEFAULT_TABLE: StaticArray = [ + 0xb088d3a9e840f559, 0x5652c7f739ed20d6, 0x45b28969898972ab, 0x6b0a89d5b68ec777, 0x368f573e8b7a31b7, + // ... existing code ... +]; // Function to find the next match in the buffer export function nextMatch(hash: u64, buf: Uint8Array, mask: u64, table: StaticArray = DEFAULT_TABLE): i32 { @@ -17,3 +21,50 @@ export function nextMatch(hash: u64, buf: Uint8Array, mask: u64, table: StaticAr return -1; // Return -1 to indicate no match found (equivalent to None in Rust) } + +// Hasher class that maintains hash state +export class Hasher { + private hash: u64; + private table: StaticArray; + + constructor(table: StaticArray = DEFAULT_TABLE) { + this.table = table; + this.hash = 0; + } + + // Update the hash state by processing all the bytes in the given slice + update(buf: Uint8Array): void { + for (let i = 0; i < buf.length; i++) { + const b = buf[i]; + this.hash = (this.hash << 1) + this.table[b]; + } + } + + // Match the current hash state against the given mask + isMatch(mask: u64): boolean { + return (this.hash & mask) == 0; + } + + // Process the given byte slice until a match is found for the given mask + nextMatch(buf: Uint8Array, mask: u64): i32 { + for (let i = 0; i < buf.length; i++) { + const b = buf[i]; + this.hash = (this.hash << 1) + this.table[b]; + + if ((this.hash & mask) == 0) { + return i + 1; + } + } + return -1; + } + + // Get the current hash value + getHash(): u64 { + return this.hash; + } + + // Set the hash value to the given integer + setHash(hash: u64): void { + this.hash = hash; + } +} diff --git a/packages/gearhash/tests/index.js b/packages/gearhash/tests/index.js deleted file mode 100644 index 769a0b0bff..0000000000 --- a/packages/gearhash/tests/index.js +++ /dev/null @@ -1,4 +0,0 @@ -import assert from "assert"; -import { add } from "../build/debug.js"; -assert.strictEqual(add(1, 2), 3); -console.log("ok"); diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index 5d89bd2418..e6016bff8c 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -14,4 +14,4 @@ packages: - "packages/ollama-utils" - "packages/mcp-client" - "packages/tiny-agents" - - "packages/gearhash" + - "packages/gearhash-wasm" From 97f3e0bf047d364380551cc353e87a4ddfd9d17f Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Fri, 30 May 2025 17:17:40 +0200 Subject: [PATCH 05/44] v 1.0 --- packages/gearhash-wasm/README.md | 56 ++++++++++++++++++- packages/gearhash-wasm/assembly/next-match.ts | 14 +++-- packages/gearhash-wasm/tests/index.js | 18 +++++- 3 files changed, 82 insertions(+), 6 deletions(-) diff --git a/packages/gearhash-wasm/README.md b/packages/gearhash-wasm/README.md index e6f17fa01f..14942afc2e 100644 --- a/packages/gearhash-wasm/README.md +++ b/packages/gearhash-wasm/README.md @@ -1,3 +1,57 @@ JS and WASM implementations of https://github.com/srijs/rust-gearhash -Using [AssemblyScript](https://www.assemblyscript.org/) to generate a lean WASM. \ No newline at end of file +Using [AssemblyScript](https://www.assemblyscript.org/) to generate a lean WASM. + +## Usage + +```javascript +import { nextMatch } from '@huggingface/gearhash-wasm'; + +// Create a Uint8Array of data to search through +const data = new Uint8Array(1000000); // Example: 1MB of data +// ... fill data with your content ... + +// Search for a pattern with a specific mask +const mask = 0x0000d90003530000n; // Example mask as a BigInt +const matchResult = nextMatch(data, mask); + +// matchIndex will be the position where the pattern was found +// or -1 if no match was found +``` + +The `nextMatch` function takes two parameters: +- `data`: A Uint8Array containing the data to search through +- `mask`: A BigInt representing the pattern mask to search for + +The function returns an object with the `position` (i32) and `hash` (u64) properties + +You can continuously feed data like this: + +```javascript +let hash = 0n; +const mask = 0x0000d90003530000n; + +let position = 0; +for await (const chunk of dataSource) { + let index = 0; + while (1) { + let match = nextMatch(chunk.subArray(index), mask, hash); + + if (match.position !== -1) { + console.log({ + position: match.position + position, + hash: match.hash + }) + + index += match.position; + position = 0; + hash = 0n; + } else { + position += chunk.length - index; + break; + } + } +} + +console.log(position, "bytes without a match, ending hash: ", hash); +``` \ No newline at end of file diff --git a/packages/gearhash-wasm/assembly/next-match.ts b/packages/gearhash-wasm/assembly/next-match.ts index 17c65b887c..180d11a6a4 100644 --- a/packages/gearhash-wasm/assembly/next-match.ts +++ b/packages/gearhash-wasm/assembly/next-match.ts @@ -2,16 +2,22 @@ import { DEFAULT_TABLE } from "./table"; +// Interface for the match result +export class MatchResult { + position: i32 = -1; + hash: u64 = 0; +} + // Function to find the next match in the buffer -export function nextMatch(buf: Uint8Array, mask: u64, hash: u64 = 0, table: StaticArray = DEFAULT_TABLE): i32 { +export function nextMatch(buf: Uint8Array, mask: u64, hash: u64 = 0): MatchResult { for (let i = 0; i < buf.length; i++) { const b = buf[i]; - hash = (hash << 1) + table[b]; + hash = (hash << 1) + DEFAULT_TABLE[b]; if ((hash & mask) == 0) { - return i + 1; + return { position: i + 1, hash }; } } - return -1; // Return -1 to indicate no match found (equivalent to None in Rust) + return { position: -1, hash }; // Return -1 position to indicate no match found, along with the final hash } diff --git a/packages/gearhash-wasm/tests/index.js b/packages/gearhash-wasm/tests/index.js index c7f06f4c03..1b962c543c 100644 --- a/packages/gearhash-wasm/tests/index.js +++ b/packages/gearhash-wasm/tests/index.js @@ -1,4 +1,20 @@ import assert from "assert"; import { nextMatch } from "../build/debug.js"; -assert.strictEqual(nextMatch(new Uint8Array([1, 2, 3]), 0xaf2900n), 3); + +// Simple seeded random number generator +function seededRandom(seed) { + return function () { + seed = (seed * 16807) % 2147483647; + return (seed - 1) / 2147483646; + }; +} + +// Create seeded random data +const seed = 12345; // Fixed seed for deterministic results +const random = seededRandom(seed); +const randomData = new Uint8Array(1000000).map(() => Math.floor(random() * 256)); + +// Test with a known mask +assert.deepStrictEqual(nextMatch(randomData, 0xaf2900n), { position: 128, hash: 11757411513747408525n }); +assert.deepStrictEqual(nextMatch(randomData.subarray(128), 0xaf2900n), { position: 184, hash: 7438883163016807155n }); console.log("ok"); From 512801f4f33fe95e13d87b67170651a07849a564 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Fri, 30 May 2025 17:20:40 +0200 Subject: [PATCH 06/44] remove extra file --- packages/gearhash/assembly/index.ts | 70 ----------------------------- 1 file changed, 70 deletions(-) delete mode 100644 packages/gearhash/assembly/index.ts diff --git a/packages/gearhash/assembly/index.ts b/packages/gearhash/assembly/index.ts deleted file mode 100644 index 1635f22c39..0000000000 --- a/packages/gearhash/assembly/index.ts +++ /dev/null @@ -1,70 +0,0 @@ -// The entry file of your WebAssembly module. - -import type { StaticArray } from "@assemblyscript/runtime"; - -// Define the Table type as a static array of u64 values -export const DEFAULT_TABLE: StaticArray = [ - 0xb088d3a9e840f559, 0x5652c7f739ed20d6, 0x45b28969898972ab, 0x6b0a89d5b68ec777, 0x368f573e8b7a31b7, - // ... existing code ... -]; - -// Function to find the next match in the buffer -export function nextMatch(hash: u64, buf: Uint8Array, mask: u64, table: StaticArray = DEFAULT_TABLE): i32 { - for (let i = 0; i < buf.length; i++) { - const b = buf[i]; - hash = (hash << 1) + table[b]; - - if ((hash & mask) == 0) { - return i + 1; - } - } - - return -1; // Return -1 to indicate no match found (equivalent to None in Rust) -} - -// Hasher class that maintains hash state -export class Hasher { - private hash: u64; - private table: StaticArray; - - constructor(table: StaticArray = DEFAULT_TABLE) { - this.table = table; - this.hash = 0; - } - - // Update the hash state by processing all the bytes in the given slice - update(buf: Uint8Array): void { - for (let i = 0; i < buf.length; i++) { - const b = buf[i]; - this.hash = (this.hash << 1) + this.table[b]; - } - } - - // Match the current hash state against the given mask - isMatch(mask: u64): boolean { - return (this.hash & mask) == 0; - } - - // Process the given byte slice until a match is found for the given mask - nextMatch(buf: Uint8Array, mask: u64): i32 { - for (let i = 0; i < buf.length; i++) { - const b = buf[i]; - this.hash = (this.hash << 1) + this.table[b]; - - if ((this.hash & mask) == 0) { - return i + 1; - } - } - return -1; - } - - // Get the current hash value - getHash(): u64 { - return this.hash; - } - - // Set the hash value to the given integer - setHash(hash: u64): void { - this.hash = hash; - } -} From 07a384d278075b5807abea05893ee3ba09533668 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Fri, 30 May 2025 17:23:01 +0200 Subject: [PATCH 07/44] bigger mask --- packages/gearhash-wasm/tests/index.js | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/packages/gearhash-wasm/tests/index.js b/packages/gearhash-wasm/tests/index.js index 1b962c543c..5f70c7ca7c 100644 --- a/packages/gearhash-wasm/tests/index.js +++ b/packages/gearhash-wasm/tests/index.js @@ -15,6 +15,9 @@ const random = seededRandom(seed); const randomData = new Uint8Array(1000000).map(() => Math.floor(random() * 256)); // Test with a known mask -assert.deepStrictEqual(nextMatch(randomData, 0xaf2900n), { position: 128, hash: 11757411513747408525n }); -assert.deepStrictEqual(nextMatch(randomData.subarray(128), 0xaf2900n), { position: 184, hash: 7438883163016807155n }); +assert.deepStrictEqual(nextMatch(randomData, 0x0000d90003530000n), { position: 459, hash: 9546224108073667431n }); +assert.deepStrictEqual(nextMatch(randomData.subarray(128), 0x0000d90003530000n), { + position: 331, + hash: 9546224108073667431n, +}); console.log("ok"); From 3306f2c22a542313b83acbfb6689e1f1a57e2614 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Fri, 30 May 2025 17:49:12 +0200 Subject: [PATCH 08/44] add nextMatches function --- packages/gearhash-wasm/README.md | 47 ++++++-- packages/gearhash-wasm/assembly/index.ts | 2 +- packages/gearhash-wasm/assembly/next-match.ts | 23 ++++ packages/gearhash-wasm/tests/index.js | 104 +++++++++++++++++- 4 files changed, 160 insertions(+), 16 deletions(-) diff --git a/packages/gearhash-wasm/README.md b/packages/gearhash-wasm/README.md index 14942afc2e..cf72dafbd3 100644 --- a/packages/gearhash-wasm/README.md +++ b/packages/gearhash-wasm/README.md @@ -13,10 +13,8 @@ const data = new Uint8Array(1000000); // Example: 1MB of data // Search for a pattern with a specific mask const mask = 0x0000d90003530000n; // Example mask as a BigInt -const matchResult = nextMatch(data, mask); - -// matchIndex will be the position where the pattern was found -// or -1 if no match was found +const match = nextMatch(data, mask); +const allMatches = nextMatches(data, mask).matches; ``` The `nextMatch` function takes two parameters: @@ -31,7 +29,7 @@ You can continuously feed data like this: let hash = 0n; const mask = 0x0000d90003530000n; -let position = 0; +let length = 0; // extra length not processed for await (const chunk of dataSource) { let index = 0; while (1) { @@ -39,19 +37,48 @@ for await (const chunk of dataSource) { if (match.position !== -1) { console.log({ - position: match.position + position, + length: match.position + length, hash: match.hash }) index += match.position; - position = 0; + length = 0; hash = 0n; } else { - position += chunk.length - index; + length += chunk.length - index; break; } } } -console.log(position, "bytes without a match, ending hash: ", hash); -``` \ No newline at end of file +console.log(length, "bytes without a match, ending hash: ", hash); +``` + +or, more performant with `nextMatches`: + +```javascript +let hash = 0n; +const mask = 0x0000d90003530000n; + +let length = 0; +for await (const chunk of dataSource) { + const result = nextMatches(chunk, mask, hash); + let lastPosition = 0; + for (const match of result.matches) { + console.log({ + length: match.position - lastPosition + length, + hash: match.hash + }); + + length = 0; + lastPosition = match.position; + } + length = result.remaining; + hash = result.hash; +} + +console.log(length, "bytes without a match, ending hash: ", hash); +``` + +## Possible improvements + diff --git a/packages/gearhash-wasm/assembly/index.ts b/packages/gearhash-wasm/assembly/index.ts index 3b224ae616..447e7776f7 100644 --- a/packages/gearhash-wasm/assembly/index.ts +++ b/packages/gearhash-wasm/assembly/index.ts @@ -1,2 +1,2 @@ export { DEFAULT_TABLE } from "./table"; -export { nextMatch } from "./next-match"; +export { nextMatch, nextMatches } from "./next-match"; diff --git a/packages/gearhash-wasm/assembly/next-match.ts b/packages/gearhash-wasm/assembly/next-match.ts index 180d11a6a4..1093f77a80 100644 --- a/packages/gearhash-wasm/assembly/next-match.ts +++ b/packages/gearhash-wasm/assembly/next-match.ts @@ -21,3 +21,26 @@ export function nextMatch(buf: Uint8Array, mask: u64, hash: u64 = 0): MatchResul return { position: -1, hash }; // Return -1 position to indicate no match found, along with the final hash } + +export class NextMatchesResult { + matches: MatchResult[] = []; + hash: u64 = 0; + remaining: i32 = 0; +} + +export function nextMatches(buf: Uint8Array, mask: u64, hash: u64 = 0): NextMatchesResult { + const result = new NextMatchesResult(); + + let match = nextMatch(buf, mask, hash); + let position = 0; + while (match.position !== -1) { + result.matches.push(match); + position += match.position; + match = nextMatch(buf.subarray(position), mask, 0); + } + + result.remaining = buf.length - position; + result.hash = match.hash; + + return result; +} diff --git a/packages/gearhash-wasm/tests/index.js b/packages/gearhash-wasm/tests/index.js index 5f70c7ca7c..d3d220da00 100644 --- a/packages/gearhash-wasm/tests/index.js +++ b/packages/gearhash-wasm/tests/index.js @@ -1,5 +1,5 @@ import assert from "assert"; -import { nextMatch } from "../build/debug.js"; +import { nextMatch, nextMatches } from "../build/debug.js"; // Simple seeded random number generator function seededRandom(seed) { @@ -12,12 +12,106 @@ function seededRandom(seed) { // Create seeded random data const seed = 12345; // Fixed seed for deterministic results const random = seededRandom(seed); -const randomData = new Uint8Array(1000000).map(() => Math.floor(random() * 256)); +const randomData = new Uint8Array(150_000).map(() => Math.floor(random() * 256)); // Test with a known mask assert.deepStrictEqual(nextMatch(randomData, 0x0000d90003530000n), { position: 459, hash: 9546224108073667431n }); -assert.deepStrictEqual(nextMatch(randomData.subarray(128), 0x0000d90003530000n), { - position: 331, - hash: 9546224108073667431n, +assert.deepStrictEqual(nextMatch(randomData.subarray(459), 0x0000d90003530000n), { + position: 3658, + hash: 4043712133052525799n, }); + +assert.deepStrictEqual(nextMatches(randomData, 0x0000d90003530000n), { + remaining: 1206, + hash: 18262966296195680063n, + matches: [ + { position: 459, hash: 9546224108073667431n }, + { position: 3658, hash: 4043712133052525799n }, + { position: 2013, hash: 6111702085179831561n }, + { position: 1593, hash: 12901166541873917249n }, + { position: 1566, hash: 7692186462913612151n }, + { position: 211, hash: 16543980755458487441n }, + { position: 1778, hash: 15644384556715661587n }, + { position: 566, hash: 9793366463237592247n }, + { position: 2079, hash: 11221321116171663064n }, + { position: 2940, hash: 1564726223525919786n }, + { position: 809, hash: 15395839328876515337n }, + { position: 946, hash: 10585747199093122759n }, + { position: 854, hash: 4479393852251501569n }, + { position: 436, hash: 15702966577303948694n }, + { position: 2165, hash: 17148900940125069205n }, + { position: 273, hash: 11505890591385615424n }, + { position: 1459, hash: 10774060112464860369n }, + { position: 158, hash: 2233823235057951370n }, + { position: 7, hash: 1983310208686139647n }, + { position: 1926, hash: 4499661659570185271n }, + { position: 1529, hash: 16090517590946392505n }, + { position: 1751, hash: 12536054222087023458n }, + { position: 1222, hash: 334146166487300408n }, + { position: 2230, hash: 6981431015531396608n }, + { position: 826, hash: 11877997991061156988n }, + { position: 33, hash: 8454422284689001989n }, + { position: 1731, hash: 15095819886766624527n }, + { position: 8842, hash: 6362744947164356842n }, + { position: 928, hash: 3627691864743766239n }, + { position: 684, hash: 1137480049753900759n }, + { position: 5301, hash: 10541554813326859395n }, + { position: 2546, hash: 14704288147532701373n }, + { position: 11856, hash: 9653226176528805511n }, + { position: 650, hash: 12714262162290274678n }, + { position: 1346, hash: 2525679969999819421n }, + { position: 353, hash: 2532749299807420736n }, + { position: 1091, hash: 693561665209300041n }, + { position: 729, hash: 11014435606385442344n }, + { position: 1204, hash: 10083883741570968570n }, + { position: 1671, hash: 12308901096302322810n }, + { position: 1362, hash: 13399339535394154305n }, + { position: 1858, hash: 792389713896955383n }, + { position: 2248, hash: 15568664728418446816n }, + { position: 1790, hash: 4328805983976714464n }, + { position: 634, hash: 722305044694988273n }, + { position: 741, hash: 17978970776495983968n }, + { position: 901, hash: 5911861036065769110n }, + { position: 302, hash: 1334790489764850513n }, + { position: 1435, hash: 16174119877357924758n }, + { position: 61, hash: 12103430617785210167n }, + { position: 1, hash: 35334639850667n }, + { position: 2074, hash: 7449519750512442798n }, + { position: 2061, hash: 1805950971475184864n }, + { position: 1612, hash: 5837797879339327135n }, + { position: 3281, hash: 6649572008787195357n }, + { position: 39, hash: 16137242368496690753n }, + { position: 263, hash: 8133543763164586431n }, + { position: 2333, hash: 17019949823094703325n }, + { position: 1160, hash: 8949503946391874147n }, + { position: 641, hash: 18344573417262448121n }, + { position: 2588, hash: 13345294745157777411n }, + { position: 3116, hash: 7832639641689314418n }, + { position: 4671, hash: 13762161036402935807n }, + { position: 276, hash: 10924644382434953404n }, + { position: 4430, hash: 9045519457622973922n }, + { position: 32, hash: 4188636638659752674n }, + { position: 2470, hash: 1184167847892138852n }, + { position: 694, hash: 11699508361075635892n }, + { position: 1703, hash: 9012268790677532920n }, + { position: 47, hash: 6528251874505412319n }, + { position: 2672, hash: 8484789019946020371n }, + { position: 202, hash: 1365160724288031760n }, + { position: 467, hash: 10426152000837661087n }, + { position: 496, hash: 3605417399306471847n }, + { position: 3777, hash: 8410473338876477323n }, + { position: 80, hash: 3693273711429567121n }, + { position: 813, hash: 9224216742837123228n }, + { position: 3115, hash: 5150752707627454542n }, + { position: 806, hash: 8797260981186887018n }, + { position: 4915, hash: 1483374079741560715n }, + { position: 2118, hash: 1742900153494554703n }, + { position: 1515, hash: 4635371751468227093n }, + { position: 2393, hash: 15282968615371427111n }, + { position: 4331, hash: 4659818917792066036n }, + { position: 1188, hash: 3862441883651577693n }, + { position: 2663, hash: 8524789558855117254n }, + ], +}); + console.log("ok"); From 12f9e97c89ad3bdf261b5c030ceb8ce3a8647254 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Wed, 4 Jun 2025 12:31:25 +0200 Subject: [PATCH 09/44] (wip) xet chunk code generated by cursor to fix --- .../gearhash-wasm/assembly/xet-chunker.ts | 172 ++++++++++++++++++ 1 file changed, 172 insertions(+) create mode 100644 packages/gearhash-wasm/assembly/xet-chunker.ts diff --git a/packages/gearhash-wasm/assembly/xet-chunker.ts b/packages/gearhash-wasm/assembly/xet-chunker.ts new file mode 100644 index 0000000000..2e501cc815 --- /dev/null +++ b/packages/gearhash-wasm/assembly/xet-chunker.ts @@ -0,0 +1,172 @@ +import { nextMatch } from "./next-match"; + +// Constants +const TARGET_CHUNK_SIZE: usize = 64 * 1024; // 64KB +const MINIMUM_CHUNK_DIVISOR: usize = 8; +const MAXIMUM_CHUNK_MULTIPLIER: usize = 2; +const HASH_WINDOW_SIZE: usize = 64; + +export class Chunk { + hash: Uint8Array; + data: Uint8Array; + + constructor(hash: Uint8Array, data: Uint8Array) { + this.hash = hash; + this.data = data; + } +} + +// Type for the next() method return value +export class NextResult { + chunk: Chunk | null; + bytesConsumed: usize; + + constructor(chunk: Chunk | null, bytesConsumed: usize) { + this.chunk = chunk; + this.bytesConsumed = bytesConsumed; + } +} + +export class XetChunker { + private minimumChunk: usize; + private maximumChunk: usize; + private mask: u64; + private chunkBuf: Uint8Array; + private curChunkLen: usize; + private hash: u64; + + constructor(targetChunkSize: usize = TARGET_CHUNK_SIZE) { + // Validate target chunk size is a power of 2 + assert((targetChunkSize & (targetChunkSize - 1)) == 0, "Target chunk size must be a power of 2"); + assert(targetChunkSize > HASH_WINDOW_SIZE, "Target chunk size must be greater than hash window size"); + assert(targetChunkSize < u32.MAX_VALUE, "Target chunk size must be less than u32.MAX_VALUE"); + + let mask = (targetChunkSize - 1) as u64; + // Shift mask left by leading zeros count + mask = mask << (64 - clz(mask)); + + this.minimumChunk = targetChunkSize / MINIMUM_CHUNK_DIVISOR; + this.maximumChunk = targetChunkSize * MAXIMUM_CHUNK_MULTIPLIER; + this.mask = mask; + this.chunkBuf = new Uint8Array(this.maximumChunk); + this.curChunkLen = 0; + this.hash = 0; + } + + next(data: Uint8Array, isFinal: boolean): NextResult { + const nBytes = data.length; + let createChunk = false; + let consumeLen: usize = 0; + + if (nBytes != 0) { + // Skip minimum chunk size + if (this.curChunkLen + HASH_WINDOW_SIZE < this.minimumChunk) { + const maxAdvance = min(this.minimumChunk - this.curChunkLen - HASH_WINDOW_SIZE - 1, nBytes - consumeLen); + consumeLen += maxAdvance; + this.curChunkLen += maxAdvance; + } + + // Calculate read end + const readEnd = min(nBytes, consumeLen + this.maximumChunk - this.curChunkLen); + + let bytesToNextBoundary: usize; + const matchResult = nextMatch(data.subarray(consumeLen, readEnd), this.mask, this.hash); + + if (matchResult.position != -1) { + bytesToNextBoundary = matchResult.position; + createChunk = true; + this.hash = matchResult.hash; + } else { + bytesToNextBoundary = readEnd - consumeLen; + this.hash = matchResult.hash; + } + + // Check if we hit maximum chunk + if (bytesToNextBoundary + this.curChunkLen >= this.maximumChunk) { + bytesToNextBoundary = this.maximumChunk - this.curChunkLen; + createChunk = true; + } + + this.curChunkLen += bytesToNextBoundary; + consumeLen += bytesToNextBoundary; + + // Copy data to chunk buffer + this.chunkBuf.set(data.subarray(0, consumeLen), this.curChunkLen - consumeLen); + } + + if (createChunk || (isFinal && this.curChunkLen > 0)) { + const chunkData = this.chunkBuf.subarray(0, this.curChunkLen); + const chunk = new Chunk(computeDataHash(chunkData), chunkData); + this.curChunkLen = 0; + this.hash = 0; + return new NextResult(chunk, consumeLen); + } + + return new NextResult(null, consumeLen); + } + + nextBlock(data: Uint8Array, isFinal: boolean): Chunk[] { + const chunks: Chunk[] = []; + let pos: usize = 0; + + while (pos < data.length) { + const result = this.next(data.subarray(pos), isFinal); + if (result.chunk) { + chunks.push(result.chunk); + } + pos += result.bytesConsumed; + } + + return chunks; + } + + finish(): Chunk | null { + return this.next(new Uint8Array(0), true).chunk; + } +} + +// Simple SHA-256 implementation for data hashing +function computeDataHash(data: Uint8Array): Uint8Array { + // TODO: Replace with actual SHA-256 implementation + // For now, using a simple hash function for demonstration + const hash = new Uint8Array(32); + for (let i = 0; i < data.length; i++) { + hash[i % 32] ^= data[i]; + } + return hash; +} + +// Helper function to find minimum of two numbers +function min(a: usize, b: usize): usize { + return a < b ? a : b; +} + +// Helper function to count leading zeros +function clz(x: u64): u32 { + let n: u32 = 0; + if (x == 0) return 64; + if ((x & 0xffffffff00000000) == 0) { + n += 32; + x <<= 32; + } + if ((x & 0xffff000000000000) == 0) { + n += 16; + x <<= 16; + } + if ((x & 0xff00000000000000) == 0) { + n += 8; + x <<= 8; + } + if ((x & 0xf000000000000000) == 0) { + n += 4; + x <<= 4; + } + if ((x & 0xc000000000000000) == 0) { + n += 2; + x <<= 2; + } + if ((x & 0x8000000000000000) == 0) { + n += 1; + } + return n; +} From 36348feb5c5d9bdc9974bf6da3bd5ad6bec28919 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Wed, 4 Jun 2025 15:11:45 +0200 Subject: [PATCH 10/44] add blake3-wasm and xetchunk-wasm --- packages/blake3-wasm/assembly/blake3.ts | 361 ++++++++++++++++++ packages/blake3-wasm/assembly/index.ts | 2 + packages/blake3-wasm/assembly/tsconfig.json | 4 + packages/blake3-wasm/package.json | 23 ++ packages/blake3-wasm/pnpm-lock.yaml | 38 ++ packages/gearhash-wasm/assembly/blake3.ts | 357 +++++++++++++++++ packages/gearhash-wasm/package.json | 11 +- packages/gearhash-wasm/pnpm-lock.yaml | 6 + packages/xetchunk-wasm/assembly/index.ts | 2 + packages/xetchunk-wasm/assembly/next-match.ts | 28 ++ packages/xetchunk-wasm/assembly/tsconfig.json | 4 + .../assembly/xet-chunker.ts | 10 +- packages/xetchunk-wasm/package.json | 23 ++ packages/xetchunk-wasm/pnpm-lock.yaml | 42 ++ pnpm-workspace.yaml | 2 + tsconfig.json | 13 + 16 files changed, 918 insertions(+), 8 deletions(-) create mode 100644 packages/blake3-wasm/assembly/blake3.ts create mode 100644 packages/blake3-wasm/assembly/index.ts create mode 100644 packages/blake3-wasm/assembly/tsconfig.json create mode 100644 packages/blake3-wasm/package.json create mode 100644 packages/blake3-wasm/pnpm-lock.yaml create mode 100644 packages/gearhash-wasm/assembly/blake3.ts create mode 100644 packages/xetchunk-wasm/assembly/index.ts create mode 100644 packages/xetchunk-wasm/assembly/next-match.ts create mode 100644 packages/xetchunk-wasm/assembly/tsconfig.json rename packages/{gearhash-wasm => xetchunk-wasm}/assembly/xet-chunker.ts (94%) create mode 100644 packages/xetchunk-wasm/package.json create mode 100644 packages/xetchunk-wasm/pnpm-lock.yaml create mode 100644 tsconfig.json diff --git a/packages/blake3-wasm/assembly/blake3.ts b/packages/blake3-wasm/assembly/blake3.ts new file mode 100644 index 0000000000..46c19b4db3 --- /dev/null +++ b/packages/blake3-wasm/assembly/blake3.ts @@ -0,0 +1,361 @@ +// Import AssemblyScript types +import type { usize, u32, u8, u64 } from "assemblyscript"; +import { StaticArray } from "assemblyscript"; + +// Constants from the reference implementation +const OUT_LEN: usize = 32; +// const KEY_LEN: usize = 32; +const BLOCK_LEN: usize = 64; +const CHUNK_LEN: usize = 1024; + +const CHUNK_START: u32 = 1 << 0; +const CHUNK_END: u32 = 1 << 1; +const PARENT: u32 = 1 << 2; +const ROOT: u32 = 1 << 3; +//const KEYED_HASH: u32 = 1 << 4; +//const DERIVE_KEY_CONTEXT: u32 = 1 << 5; +// const DERIVE_KEY_MATERIAL: u32 = 1 << 6; + +const IV: StaticArray = [ + 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19, +]; + +const MSG_PERMUTATION: StaticArray = [2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8]; + +// The mixing function, G, which mixes either a column or a diagonal. +function g(state: StaticArray, a: usize, b: usize, c: usize, d: usize, mx: u32, my: u32): void { + state[a] = state[a] + state[b] + mx; + state[d] = rotl32(state[d] ^ state[a], 16); + state[c] = state[c] + state[d]; + state[b] = rotl32(state[b] ^ state[c], 12); + state[a] = state[a] + state[b] + my; + state[d] = rotl32(state[d] ^ state[a], 8); + state[c] = state[c] + state[d]; + state[b] = rotl32(state[b] ^ state[c], 7); +} + +// Rotate left by n bits +function rotl32(x: u32, n: u32): u32 { + return (x << n) | (x >>> (32 - n)); +} + +function round(state: StaticArray, m: StaticArray): void { + // Mix the columns. + g(state, 0, 4, 8, 12, m[0], m[1]); + g(state, 1, 5, 9, 13, m[2], m[3]); + g(state, 2, 6, 10, 14, m[4], m[5]); + g(state, 3, 7, 11, 15, m[6], m[7]); + // Mix the diagonals. + g(state, 0, 5, 10, 15, m[8], m[9]); + g(state, 1, 6, 11, 12, m[10], m[11]); + g(state, 2, 7, 8, 13, m[12], m[13]); + g(state, 3, 4, 9, 14, m[14], m[15]); +} + +function permute(m: StaticArray): void { + const permuted = new StaticArray(16); + for (let i = 0; i < 16; i++) { + permuted[i] = m[MSG_PERMUTATION[i]]; + } + for (let i = 0; i < 16; i++) { + m[i] = permuted[i]; + } +} + +function compress( + chaining_value: StaticArray, + block_words: StaticArray, + counter: u64, + block_len: u32, + flags: u32 +): StaticArray { + const counter_low = counter as u32; + const counter_high = (counter >> 32) as u32; + const state = new StaticArray(16); + + // Initialize state + for (let i = 0; i < 8; i++) { + state[i] = chaining_value[i]; + state[i + 8] = IV[i]; + } + state[12] = counter_low; + state[13] = counter_high; + state[14] = block_len; + state[15] = flags; + + const block = new StaticArray(16); + for (let i = 0; i < 16; i++) { + block[i] = block_words[i]; + } + + // Apply rounds + round(state, block); + permute(block); + round(state, block); + permute(block); + round(state, block); + permute(block); + round(state, block); + permute(block); + round(state, block); + permute(block); + round(state, block); + permute(block); + round(state, block); + + // Final mixing + for (let i = 0; i < 8; i++) { + state[i] ^= state[i + 8]; + state[i + 8] ^= chaining_value[i]; + } + + return state; +} + +function words_from_little_endian_bytes(bytes: Uint8Array, words: StaticArray): void { + for (let i = 0; i < words.length; i++) { + const offset = i * 4; + words[i] = bytes[offset] | (bytes[offset + 1] << 8) | (bytes[offset + 2] << 16) | (bytes[offset + 3] << 24); + } +} + +export class Blake3Hasher { + private chunk_state: ChunkState; + private key_words: StaticArray; + private cv_stack: StaticArray>; + private cv_stack_len: u8; + private flags: u32; + + constructor() { + this.key_words = new StaticArray(8); + for (let i = 0; i < 8; i++) { + this.key_words[i] = IV[i]; + } + this.chunk_state = new ChunkState(this.key_words, 0, 0); + this.cv_stack = new StaticArray>(54); + for (let i = 0; i < 54; i++) { + this.cv_stack[i] = new StaticArray(8); + } + this.cv_stack_len = 0; + this.flags = 0; + } + + update(input: Uint8Array): void { + let inputPos = 0; + while (inputPos < input.length) { + if (this.chunk_state.len() == CHUNK_LEN) { + const chunk_cv = this.chunk_state.output().chaining_value(); + const total_chunks = this.chunk_state.chunk_counter + 1; + this.add_chunk_chaining_value(chunk_cv, total_chunks); + this.chunk_state = new ChunkState(this.key_words, total_chunks, this.flags); + } + + const want = CHUNK_LEN - this.chunk_state.len(); + const take = min(want, input.length - inputPos); + this.chunk_state.update(input.subarray(inputPos, inputPos + take)); + inputPos += take; + } + } + + finalize(out: Uint8Array): void { + let output = this.chunk_state.output(); + let parent_nodes_remaining = this.cv_stack_len; + + while (parent_nodes_remaining > 0) { + parent_nodes_remaining--; + output = parent_output( + this.cv_stack[parent_nodes_remaining], + output.chaining_value(), + this.key_words, + this.flags + ); + } + + output.root_output_bytes(out); + } + + private add_chunk_chaining_value(new_cv: StaticArray, total_chunks: u64): void { + let mut_new_cv = new_cv; + let mut_total_chunks = total_chunks; + + while ((mut_total_chunks & 1) == 0) { + mut_new_cv = parent_cv(this.pop_stack(), mut_new_cv, this.key_words, this.flags); + mut_total_chunks >>= 1; + } + + this.push_stack(mut_new_cv); + } + + private push_stack(cv: StaticArray): void { + for (let i = 0; i < 8; i++) { + this.cv_stack[this.cv_stack_len][i] = cv[i]; + } + this.cv_stack_len++; + } + + private pop_stack(): StaticArray { + this.cv_stack_len--; + return this.cv_stack[this.cv_stack_len]; + } +} + +class ChunkState { + chaining_value: StaticArray; + chunk_counter: u64; + block: Uint8Array; + block_len: u8; + blocks_compressed: u8; + flags: u32; + + constructor(key_words: StaticArray, chunk_counter: u64, flags: u32) { + this.chaining_value = new StaticArray(8); + for (let i = 0; i < 8; i++) { + this.chaining_value[i] = key_words[i]; + } + this.chunk_counter = chunk_counter; + this.block = new Uint8Array(BLOCK_LEN); + this.block_len = 0; + this.blocks_compressed = 0; + this.flags = flags; + } + + len(): usize { + return BLOCK_LEN * this.blocks_compressed + this.block_len; + } + + start_flag(): u32 { + return this.blocks_compressed == 0 ? CHUNK_START : 0; + } + + update(input: Uint8Array): void { + let inputPos = 0; + while (inputPos < input.length) { + if (this.block_len == BLOCK_LEN) { + const block_words = new StaticArray(16); + words_from_little_endian_bytes(this.block, block_words); + const compressed = compress( + this.chaining_value, + block_words, + this.chunk_counter, + BLOCK_LEN, + this.flags | this.start_flag() + ); + for (let i = 0; i < 8; i++) { + this.chaining_value[i] = compressed[i]; + } + this.blocks_compressed++; + this.block = new Uint8Array(BLOCK_LEN); + this.block_len = 0; + } + + const want = BLOCK_LEN - this.block_len; + const take = min(want, input.length - inputPos); + for (let i = 0; i < take; i++) { + this.block[this.block_len + i] = input[inputPos + i]; + } + this.block_len += take; + inputPos += take; + } + } + + output(): Output { + const block_words = new StaticArray(16); + words_from_little_endian_bytes(this.block, block_words); + return new Output( + this.chaining_value, + block_words, + this.chunk_counter, + this.block_len, + this.flags | this.start_flag() | CHUNK_END + ); + } +} + +class Output { + input_chaining_value: StaticArray; + block_words: StaticArray; + counter: u64; + block_len: u32; + flags: u32; + + constructor( + input_chaining_value: StaticArray, + block_words: StaticArray, + counter: u64, + block_len: u32, + flags: u32 + ) { + this.input_chaining_value = input_chaining_value; + this.block_words = block_words; + this.counter = counter; + this.block_len = block_len; + this.flags = flags; + } + + chaining_value(): StaticArray { + const compressed = compress(this.input_chaining_value, this.block_words, this.counter, this.block_len, this.flags); + const result = new StaticArray(8); + for (let i = 0; i < 8; i++) { + result[i] = compressed[i]; + } + return result; + } + + root_output_bytes(out: Uint8Array): void { + let output_block_counter: u64 = 0; + for (let i = 0; i < out.length; i += 2 * OUT_LEN) { + const words = compress( + this.input_chaining_value, + this.block_words, + output_block_counter, + this.block_len, + this.flags | ROOT + ); + const out_block = out.subarray(i, i + 2 * OUT_LEN); + for (let j = 0; j < words.length; j++) { + const word = words[j]; + const offset = j * 4; + if (offset < out_block.length) { + out_block[offset] = word & 0xff; + if (offset + 1 < out_block.length) { + out_block[offset + 1] = (word >> 8) & 0xff; + if (offset + 2 < out_block.length) { + out_block[offset + 2] = (word >> 16) & 0xff; + if (offset + 3 < out_block.length) { + out_block[offset + 3] = (word >> 24) & 0xff; + } + } + } + } + } + output_block_counter++; + } + } +} + +function parent_output( + left_child_cv: StaticArray, + right_child_cv: StaticArray, + key_words: StaticArray, + flags: u32 +): Output { + const block_words = new StaticArray(16); + for (let i = 0; i < 8; i++) { + block_words[i] = left_child_cv[i]; + block_words[i + 8] = right_child_cv[i]; + } + return new Output(key_words, block_words, 0, BLOCK_LEN, PARENT | flags); +} + +function parent_cv( + left_child_cv: StaticArray, + right_child_cv: StaticArray, + key_words: StaticArray, + flags: u32 +): StaticArray { + return parent_output(left_child_cv, right_child_cv, key_words, flags).chaining_value(); +} + +function min(a: usize, b: usize): usize { + return a < b ? a : b; +} diff --git a/packages/blake3-wasm/assembly/index.ts b/packages/blake3-wasm/assembly/index.ts new file mode 100644 index 0000000000..8183303929 --- /dev/null +++ b/packages/blake3-wasm/assembly/index.ts @@ -0,0 +1,2 @@ +// Re-export everything from blake3.ts +export * from "./blake3"; diff --git a/packages/blake3-wasm/assembly/tsconfig.json b/packages/blake3-wasm/assembly/tsconfig.json new file mode 100644 index 0000000000..8131d68a0a --- /dev/null +++ b/packages/blake3-wasm/assembly/tsconfig.json @@ -0,0 +1,4 @@ +{ + "extends": "../node_modules/.pnpm/assemblyscript@0.27.36/node_modules/assemblyscript/std/assembly.json", + "include": ["./**/*.ts"] +} diff --git a/packages/blake3-wasm/package.json b/packages/blake3-wasm/package.json new file mode 100644 index 0000000000..7d258a5372 --- /dev/null +++ b/packages/blake3-wasm/package.json @@ -0,0 +1,23 @@ +{ + "name": "@huggingface/blake3-wasm", + "version": "0.0.1", + "scripts": { + "build:debug": "asc assembly/index.ts --target debug", + "build:release": "asc assembly/index.ts --target release", + "build": "npm run build:debug && npm run build:release", + "test": "node tests" + }, + "dependencies": { + "assemblyscript": "^0.27.36" + }, + "type": "module", + "exports": { + ".": { + "import": "./build/release.js", + "types": "./build/release.d.ts" + } + }, + "devDependencies": { + "assemblyscript": "^0.27.36" + } +} diff --git a/packages/blake3-wasm/pnpm-lock.yaml b/packages/blake3-wasm/pnpm-lock.yaml new file mode 100644 index 0000000000..d18c304a0e --- /dev/null +++ b/packages/blake3-wasm/pnpm-lock.yaml @@ -0,0 +1,38 @@ +lockfileVersion: '9.0' + +settings: + autoInstallPeers: true + excludeLinksFromLockfile: false + +importers: + + .: + dependencies: + assemblyscript: + specifier: ^0.27.36 + version: 0.27.37 + +packages: + + assemblyscript@0.27.37: + resolution: {integrity: sha512-YtY5k3PiV3SyUQ6gRlR2OCn8dcVRwkpiG/k2T5buoL2ymH/Z/YbaYWbk/f9mO2HTgEtGWjPiAQrIuvA7G/63Gg==} + engines: {node: '>=18', npm: '>=10'} + hasBin: true + + binaryen@116.0.0-nightly.20240114: + resolution: {integrity: sha512-0GZrojJnuhoe+hiwji7QFaL3tBlJoA+KFUN7ouYSDGZLSo9CKM8swQX8n/UcbR0d1VuZKU+nhogNzv423JEu5A==} + hasBin: true + + long@5.3.2: + resolution: {integrity: sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA==} + +snapshots: + + assemblyscript@0.27.37: + dependencies: + binaryen: 116.0.0-nightly.20240114 + long: 5.3.2 + + binaryen@116.0.0-nightly.20240114: {} + + long@5.3.2: {} diff --git a/packages/gearhash-wasm/assembly/blake3.ts b/packages/gearhash-wasm/assembly/blake3.ts new file mode 100644 index 0000000000..44239e5390 --- /dev/null +++ b/packages/gearhash-wasm/assembly/blake3.ts @@ -0,0 +1,357 @@ +// Constants from the reference implementation +const OUT_LEN: usize = 32; +const KEY_LEN: usize = 32; +const BLOCK_LEN: usize = 64; +const CHUNK_LEN: usize = 1024; + +const CHUNK_START: u32 = 1 << 0; +const CHUNK_END: u32 = 1 << 1; +const PARENT: u32 = 1 << 2; +const ROOT: u32 = 1 << 3; +const KEYED_HASH: u32 = 1 << 4; +const DERIVE_KEY_CONTEXT: u32 = 1 << 5; +const DERIVE_KEY_MATERIAL: u32 = 1 << 6; + +const IV: StaticArray = [ + 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19, +]; + +const MSG_PERMUTATION: StaticArray = [2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8]; + +// The mixing function, G, which mixes either a column or a diagonal. +function g(state: StaticArray, a: usize, b: usize, c: usize, d: usize, mx: u32, my: u32): void { + state[a] = state[a] + state[b] + mx; + state[d] = rotl32(state[d] ^ state[a], 16); + state[c] = state[c] + state[d]; + state[b] = rotl32(state[b] ^ state[c], 12); + state[a] = state[a] + state[b] + my; + state[d] = rotl32(state[d] ^ state[a], 8); + state[c] = state[c] + state[d]; + state[b] = rotl32(state[b] ^ state[c], 7); +} + +// Rotate left by n bits +function rotl32(x: u32, n: u32): u32 { + return (x << n) | (x >>> (32 - n)); +} + +function round(state: StaticArray, m: StaticArray): void { + // Mix the columns. + g(state, 0, 4, 8, 12, m[0], m[1]); + g(state, 1, 5, 9, 13, m[2], m[3]); + g(state, 2, 6, 10, 14, m[4], m[5]); + g(state, 3, 7, 11, 15, m[6], m[7]); + // Mix the diagonals. + g(state, 0, 5, 10, 15, m[8], m[9]); + g(state, 1, 6, 11, 12, m[10], m[11]); + g(state, 2, 7, 8, 13, m[12], m[13]); + g(state, 3, 4, 9, 14, m[14], m[15]); +} + +function permute(m: StaticArray): void { + const permuted = new StaticArray(16); + for (let i = 0; i < 16; i++) { + permuted[i] = m[MSG_PERMUTATION[i]]; + } + for (let i = 0; i < 16; i++) { + m[i] = permuted[i]; + } +} + +function compress( + chaining_value: StaticArray, + block_words: StaticArray, + counter: u64, + block_len: u32, + flags: u32 +): StaticArray { + const counter_low = counter as u32; + const counter_high = (counter >> 32) as u32; + const state = new StaticArray(16); + + // Initialize state + for (let i = 0; i < 8; i++) { + state[i] = chaining_value[i]; + state[i + 8] = IV[i]; + } + state[12] = counter_low; + state[13] = counter_high; + state[14] = block_len; + state[15] = flags; + + const block = new StaticArray(16); + for (let i = 0; i < 16; i++) { + block[i] = block_words[i]; + } + + // Apply rounds + round(state, block); + permute(block); + round(state, block); + permute(block); + round(state, block); + permute(block); + round(state, block); + permute(block); + round(state, block); + permute(block); + round(state, block); + permute(block); + round(state, block); + + // Final mixing + for (let i = 0; i < 8; i++) { + state[i] ^= state[i + 8]; + state[i + 8] ^= chaining_value[i]; + } + + return state; +} + +function words_from_little_endian_bytes(bytes: Uint8Array, words: StaticArray): void { + for (let i = 0; i < words.length; i++) { + const offset = i * 4; + words[i] = bytes[offset] | (bytes[offset + 1] << 8) | (bytes[offset + 2] << 16) | (bytes[offset + 3] << 24); + } +} + +export class Blake3Hasher { + private chunk_state: ChunkState; + private key_words: StaticArray; + private cv_stack: StaticArray>; + private cv_stack_len: u8; + private flags: u32; + + constructor() { + this.key_words = new StaticArray(8); + for (let i = 0; i < 8; i++) { + this.key_words[i] = IV[i]; + } + this.chunk_state = new ChunkState(this.key_words, 0, 0); + this.cv_stack = new StaticArray>(54); + for (let i = 0; i < 54; i++) { + this.cv_stack[i] = new StaticArray(8); + } + this.cv_stack_len = 0; + this.flags = 0; + } + + update(input: Uint8Array): void { + let inputPos = 0; + while (inputPos < input.length) { + if (this.chunk_state.len() == CHUNK_LEN) { + const chunk_cv = this.chunk_state.output().chaining_value(); + const total_chunks = this.chunk_state.chunk_counter + 1; + this.add_chunk_chaining_value(chunk_cv, total_chunks); + this.chunk_state = new ChunkState(this.key_words, total_chunks, this.flags); + } + + const want = CHUNK_LEN - this.chunk_state.len(); + const take = min(want, input.length - inputPos); + this.chunk_state.update(input.subarray(inputPos, inputPos + take)); + inputPos += take; + } + } + + finalize(out: Uint8Array): void { + let output = this.chunk_state.output(); + let parent_nodes_remaining = this.cv_stack_len; + + while (parent_nodes_remaining > 0) { + parent_nodes_remaining--; + output = parent_output( + this.cv_stack[parent_nodes_remaining], + output.chaining_value(), + this.key_words, + this.flags + ); + } + + output.root_output_bytes(out); + } + + private add_chunk_chaining_value(new_cv: StaticArray, total_chunks: u64): void { + let mut_new_cv = new_cv; + let mut_total_chunks = total_chunks; + + while ((mut_total_chunks & 1) == 0) { + mut_new_cv = parent_cv(this.pop_stack(), mut_new_cv, this.key_words, this.flags); + mut_total_chunks >>= 1; + } + + this.push_stack(mut_new_cv); + } + + private push_stack(cv: StaticArray): void { + for (let i = 0; i < 8; i++) { + this.cv_stack[this.cv_stack_len][i] = cv[i]; + } + this.cv_stack_len++; + } + + private pop_stack(): StaticArray { + this.cv_stack_len--; + return this.cv_stack[this.cv_stack_len]; + } +} + +class ChunkState { + chaining_value: StaticArray; + chunk_counter: u64; + block: Uint8Array; + block_len: u8; + blocks_compressed: u8; + flags: u32; + + constructor(key_words: StaticArray, chunk_counter: u64, flags: u32) { + this.chaining_value = new StaticArray(8); + for (let i = 0; i < 8; i++) { + this.chaining_value[i] = key_words[i]; + } + this.chunk_counter = chunk_counter; + this.block = new Uint8Array(BLOCK_LEN); + this.block_len = 0; + this.blocks_compressed = 0; + this.flags = flags; + } + + len(): usize { + return BLOCK_LEN * this.blocks_compressed + this.block_len; + } + + start_flag(): u32 { + return this.blocks_compressed == 0 ? CHUNK_START : 0; + } + + update(input: Uint8Array): void { + let inputPos = 0; + while (inputPos < input.length) { + if (this.block_len == BLOCK_LEN) { + const block_words = new StaticArray(16); + words_from_little_endian_bytes(this.block, block_words); + const compressed = compress( + this.chaining_value, + block_words, + this.chunk_counter, + BLOCK_LEN, + this.flags | this.start_flag() + ); + for (let i = 0; i < 8; i++) { + this.chaining_value[i] = compressed[i]; + } + this.blocks_compressed++; + this.block = new Uint8Array(BLOCK_LEN); + this.block_len = 0; + } + + const want = BLOCK_LEN - this.block_len; + const take = min(want, input.length - inputPos); + for (let i = 0; i < take; i++) { + this.block[this.block_len + i] = input[inputPos + i]; + } + this.block_len += take; + inputPos += take; + } + } + + output(): Output { + const block_words = new StaticArray(16); + words_from_little_endian_bytes(this.block, block_words); + return new Output( + this.chaining_value, + block_words, + this.chunk_counter, + this.block_len, + this.flags | this.start_flag() | CHUNK_END + ); + } +} + +class Output { + input_chaining_value: StaticArray; + block_words: StaticArray; + counter: u64; + block_len: u32; + flags: u32; + + constructor( + input_chaining_value: StaticArray, + block_words: StaticArray, + counter: u64, + block_len: u32, + flags: u32 + ) { + this.input_chaining_value = input_chaining_value; + this.block_words = block_words; + this.counter = counter; + this.block_len = block_len; + this.flags = flags; + } + + chaining_value(): StaticArray { + const compressed = compress(this.input_chaining_value, this.block_words, this.counter, this.block_len, this.flags); + const result = new StaticArray(8); + for (let i = 0; i < 8; i++) { + result[i] = compressed[i]; + } + return result; + } + + root_output_bytes(out: Uint8Array): void { + let output_block_counter: u64 = 0; + for (let i = 0; i < out.length; i += 2 * OUT_LEN) { + const words = compress( + this.input_chaining_value, + this.block_words, + output_block_counter, + this.block_len, + this.flags | ROOT + ); + const out_block = out.subarray(i, i + 2 * OUT_LEN); + for (let j = 0; j < words.length; j++) { + const word = words[j]; + const offset = j * 4; + if (offset < out_block.length) { + out_block[offset] = word & 0xff; + if (offset + 1 < out_block.length) { + out_block[offset + 1] = (word >> 8) & 0xff; + if (offset + 2 < out_block.length) { + out_block[offset + 2] = (word >> 16) & 0xff; + if (offset + 3 < out_block.length) { + out_block[offset + 3] = (word >> 24) & 0xff; + } + } + } + } + } + output_block_counter++; + } + } +} + +function parent_output( + left_child_cv: StaticArray, + right_child_cv: StaticArray, + key_words: StaticArray, + flags: u32 +): Output { + const block_words = new StaticArray(16); + for (let i = 0; i < 8; i++) { + block_words[i] = left_child_cv[i]; + block_words[i + 8] = right_child_cv[i]; + } + return new Output(key_words, block_words, 0, BLOCK_LEN, PARENT | flags); +} + +function parent_cv( + left_child_cv: StaticArray, + right_child_cv: StaticArray, + key_words: StaticArray, + flags: u32 +): StaticArray { + return parent_output(left_child_cv, right_child_cv, key_words, flags).chaining_value(); +} + +function min(a: usize, b: usize): usize { + return a < b ? a : b; +} diff --git a/packages/gearhash-wasm/package.json b/packages/gearhash-wasm/package.json index 3417394eca..766b1f7b8c 100644 --- a/packages/gearhash-wasm/package.json +++ b/packages/gearhash-wasm/package.json @@ -1,5 +1,5 @@ { - "name": "@huggingface/gearhash-wasms", + "name": "@huggingface/gearhash-wasm", "version": "0.0.1", "scripts": { "build:debug": "asc assembly/index.ts --target debug", @@ -9,8 +9,15 @@ "start": "npx serve ." }, "dependencies": { - "assemblyscript": "^0.27.36" + "assemblyscript": "^0.27.36", + "@huggingface/blake3-wasm": "workspace:*", + "@huggingface/xetchunk-wasm": "workspace:*" }, + "keywords": [ + "gearhash", + "assemblyscript", + "wasm" + ], "type": "module", "exports": { ".": { diff --git a/packages/gearhash-wasm/pnpm-lock.yaml b/packages/gearhash-wasm/pnpm-lock.yaml index f0edbf2226..c86f2350d3 100644 --- a/packages/gearhash-wasm/pnpm-lock.yaml +++ b/packages/gearhash-wasm/pnpm-lock.yaml @@ -8,6 +8,12 @@ importers: .: dependencies: + '@huggingface/blake3-wasm': + specifier: workspace:* + version: link:../blake3-wasm + '@huggingface/xetchunk-wasm': + specifier: workspace:* + version: link:../xetchunk-wasm assemblyscript: specifier: ^0.27.36 version: 0.27.36 diff --git a/packages/xetchunk-wasm/assembly/index.ts b/packages/xetchunk-wasm/assembly/index.ts new file mode 100644 index 0000000000..e4bd372b33 --- /dev/null +++ b/packages/xetchunk-wasm/assembly/index.ts @@ -0,0 +1,2 @@ +export * from "./xet-chunker"; +export * from "./next-match"; diff --git a/packages/xetchunk-wasm/assembly/next-match.ts b/packages/xetchunk-wasm/assembly/next-match.ts new file mode 100644 index 0000000000..5cf17752c1 --- /dev/null +++ b/packages/xetchunk-wasm/assembly/next-match.ts @@ -0,0 +1,28 @@ +export class MatchResult { + position: i32; + hash: u64; + + constructor(position: i32, hash: u64) { + this.position = position; + this.hash = hash; + } +} + +export function nextMatch(data: Uint8Array, mask: u64, hash: u64): MatchResult { + const nBytes = data.length; + let pos: usize = 0; + + while (pos < nBytes) { + // Update hash with next byte + hash = ((hash << 1) | data[pos]) & mask; + + // Check if we found a match + if (hash == 0) { + return new MatchResult(pos, hash); + } + + pos++; + } + + return new MatchResult(-1, hash); +} diff --git a/packages/xetchunk-wasm/assembly/tsconfig.json b/packages/xetchunk-wasm/assembly/tsconfig.json new file mode 100644 index 0000000000..8131d68a0a --- /dev/null +++ b/packages/xetchunk-wasm/assembly/tsconfig.json @@ -0,0 +1,4 @@ +{ + "extends": "../node_modules/.pnpm/assemblyscript@0.27.36/node_modules/assemblyscript/std/assembly.json", + "include": ["./**/*.ts"] +} diff --git a/packages/gearhash-wasm/assembly/xet-chunker.ts b/packages/xetchunk-wasm/assembly/xet-chunker.ts similarity index 94% rename from packages/gearhash-wasm/assembly/xet-chunker.ts rename to packages/xetchunk-wasm/assembly/xet-chunker.ts index 2e501cc815..2054a20694 100644 --- a/packages/gearhash-wasm/assembly/xet-chunker.ts +++ b/packages/xetchunk-wasm/assembly/xet-chunker.ts @@ -1,4 +1,5 @@ import { nextMatch } from "./next-match"; +import { Blake3Hasher } from "@huggingface/blake3-wasm"; // Constants const TARGET_CHUNK_SIZE: usize = 64 * 1024; // 64KB @@ -125,14 +126,11 @@ export class XetChunker { } } -// Simple SHA-256 implementation for data hashing function computeDataHash(data: Uint8Array): Uint8Array { - // TODO: Replace with actual SHA-256 implementation - // For now, using a simple hash function for demonstration + const hasher = new Blake3Hasher(); + hasher.update(data); const hash = new Uint8Array(32); - for (let i = 0; i < data.length; i++) { - hash[i % 32] ^= data[i]; - } + hasher.finalize(hash); return hash; } diff --git a/packages/xetchunk-wasm/package.json b/packages/xetchunk-wasm/package.json new file mode 100644 index 0000000000..ee20e8a094 --- /dev/null +++ b/packages/xetchunk-wasm/package.json @@ -0,0 +1,23 @@ +{ + "name": "@huggingface/xetchunk-wasm", + "version": "0.0.1", + "scripts": { + "build:debug": "asc assembly/index.ts --target debug", + "build:release": "asc assembly/index.ts --target release", + "build": "npm run build:debug && npm run build:release", + "test": "node tests" + }, + "dependencies": { + "@huggingface/blake3-wasm": "workspace:*" + }, + "type": "module", + "exports": { + ".": { + "import": "./build/release.js", + "types": "./build/release.d.ts" + } + }, + "devDependencies": { + "assemblyscript": "^0.27.36" + } +} diff --git a/packages/xetchunk-wasm/pnpm-lock.yaml b/packages/xetchunk-wasm/pnpm-lock.yaml new file mode 100644 index 0000000000..23a82a8008 --- /dev/null +++ b/packages/xetchunk-wasm/pnpm-lock.yaml @@ -0,0 +1,42 @@ +lockfileVersion: '9.0' + +settings: + autoInstallPeers: true + excludeLinksFromLockfile: false + +importers: + + .: + dependencies: + '@huggingface/blake3-wasm': + specifier: workspace:* + version: link:../blake3-wasm + devDependencies: + assemblyscript: + specifier: ^0.27.36 + version: 0.27.37 + +packages: + + assemblyscript@0.27.37: + resolution: {integrity: sha512-YtY5k3PiV3SyUQ6gRlR2OCn8dcVRwkpiG/k2T5buoL2ymH/Z/YbaYWbk/f9mO2HTgEtGWjPiAQrIuvA7G/63Gg==} + engines: {node: '>=18', npm: '>=10'} + hasBin: true + + binaryen@116.0.0-nightly.20240114: + resolution: {integrity: sha512-0GZrojJnuhoe+hiwji7QFaL3tBlJoA+KFUN7ouYSDGZLSo9CKM8swQX8n/UcbR0d1VuZKU+nhogNzv423JEu5A==} + hasBin: true + + long@5.3.2: + resolution: {integrity: sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA==} + +snapshots: + + assemblyscript@0.27.37: + dependencies: + binaryen: 116.0.0-nightly.20240114 + long: 5.3.2 + + binaryen@116.0.0-nightly.20240114: {} + + long@5.3.2: {} diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index e6016bff8c..bc118ffdff 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -15,3 +15,5 @@ packages: - "packages/mcp-client" - "packages/tiny-agents" - "packages/gearhash-wasm" + - "packages/blake3-wasm" + - "packages/xetchunk-wasm" diff --git a/tsconfig.json b/tsconfig.json new file mode 100644 index 0000000000..fbe8ff6fda --- /dev/null +++ b/tsconfig.json @@ -0,0 +1,13 @@ +{ + "compilerOptions": { + "target": "ESNext", + "module": "ESNext", + "moduleResolution": "node", + "strict": true, + "esModuleInterop": true, + "skipLibCheck": true, + "forceConsistentCasingInFileNames": true, + "lib": ["ESNext"], + "types": ["assemblyscript"] + } +} From 07b115dc5a0427f2d9a32fdbc2c334af986ff44c Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Wed, 4 Jun 2025 16:22:41 +0200 Subject: [PATCH 11/44] fix TS refs --- packages/README.md | 3 + packages/blake3-wasm/assembly/blake3.ts | 4 - packages/blake3-wasm/assembly/tsconfig.json | 2 +- packages/blake3-wasm/package.json | 5 +- packages/gearhash-wasm/assembly/blake3.ts | 357 ------------------ packages/gearhash-wasm/package.json | 4 +- packages/xetchunk-wasm/assembly/tsconfig.json | 2 +- packages/xetchunk-wasm/package.json | 5 +- 8 files changed, 13 insertions(+), 369 deletions(-) create mode 100644 packages/README.md delete mode 100644 packages/gearhash-wasm/assembly/blake3.ts diff --git a/packages/README.md b/packages/README.md new file mode 100644 index 0000000000..020157a990 --- /dev/null +++ b/packages/README.md @@ -0,0 +1,3 @@ +A WASM implementation of blake3 using assemblyscript. + +Implementation based on https://github.com/BLAKE3-team/BLAKE3/blob/master/reference_impl/reference_impl.rs \ No newline at end of file diff --git a/packages/blake3-wasm/assembly/blake3.ts b/packages/blake3-wasm/assembly/blake3.ts index 46c19b4db3..8f1ddb537f 100644 --- a/packages/blake3-wasm/assembly/blake3.ts +++ b/packages/blake3-wasm/assembly/blake3.ts @@ -1,7 +1,3 @@ -// Import AssemblyScript types -import type { usize, u32, u8, u64 } from "assemblyscript"; -import { StaticArray } from "assemblyscript"; - // Constants from the reference implementation const OUT_LEN: usize = 32; // const KEY_LEN: usize = 32; diff --git a/packages/blake3-wasm/assembly/tsconfig.json b/packages/blake3-wasm/assembly/tsconfig.json index 8131d68a0a..33daff5dac 100644 --- a/packages/blake3-wasm/assembly/tsconfig.json +++ b/packages/blake3-wasm/assembly/tsconfig.json @@ -1,4 +1,4 @@ { - "extends": "../node_modules/.pnpm/assemblyscript@0.27.36/node_modules/assemblyscript/std/assembly.json", + "extends": "../node_modules/.pnpm/assemblyscript@0.27.37/node_modules/assemblyscript/std/assembly.json", "include": ["./**/*.ts"] } diff --git a/packages/blake3-wasm/package.json b/packages/blake3-wasm/package.json index 7d258a5372..cdb861f379 100644 --- a/packages/blake3-wasm/package.json +++ b/packages/blake3-wasm/package.json @@ -4,8 +4,9 @@ "scripts": { "build:debug": "asc assembly/index.ts --target debug", "build:release": "asc assembly/index.ts --target release", - "build": "npm run build:debug && npm run build:release", - "test": "node tests" + "build": "pnpm run build:debug && npm run build:release", + "test": "node tests", + "prepare": "pnpm run build" }, "dependencies": { "assemblyscript": "^0.27.36" diff --git a/packages/gearhash-wasm/assembly/blake3.ts b/packages/gearhash-wasm/assembly/blake3.ts deleted file mode 100644 index 44239e5390..0000000000 --- a/packages/gearhash-wasm/assembly/blake3.ts +++ /dev/null @@ -1,357 +0,0 @@ -// Constants from the reference implementation -const OUT_LEN: usize = 32; -const KEY_LEN: usize = 32; -const BLOCK_LEN: usize = 64; -const CHUNK_LEN: usize = 1024; - -const CHUNK_START: u32 = 1 << 0; -const CHUNK_END: u32 = 1 << 1; -const PARENT: u32 = 1 << 2; -const ROOT: u32 = 1 << 3; -const KEYED_HASH: u32 = 1 << 4; -const DERIVE_KEY_CONTEXT: u32 = 1 << 5; -const DERIVE_KEY_MATERIAL: u32 = 1 << 6; - -const IV: StaticArray = [ - 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19, -]; - -const MSG_PERMUTATION: StaticArray = [2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8]; - -// The mixing function, G, which mixes either a column or a diagonal. -function g(state: StaticArray, a: usize, b: usize, c: usize, d: usize, mx: u32, my: u32): void { - state[a] = state[a] + state[b] + mx; - state[d] = rotl32(state[d] ^ state[a], 16); - state[c] = state[c] + state[d]; - state[b] = rotl32(state[b] ^ state[c], 12); - state[a] = state[a] + state[b] + my; - state[d] = rotl32(state[d] ^ state[a], 8); - state[c] = state[c] + state[d]; - state[b] = rotl32(state[b] ^ state[c], 7); -} - -// Rotate left by n bits -function rotl32(x: u32, n: u32): u32 { - return (x << n) | (x >>> (32 - n)); -} - -function round(state: StaticArray, m: StaticArray): void { - // Mix the columns. - g(state, 0, 4, 8, 12, m[0], m[1]); - g(state, 1, 5, 9, 13, m[2], m[3]); - g(state, 2, 6, 10, 14, m[4], m[5]); - g(state, 3, 7, 11, 15, m[6], m[7]); - // Mix the diagonals. - g(state, 0, 5, 10, 15, m[8], m[9]); - g(state, 1, 6, 11, 12, m[10], m[11]); - g(state, 2, 7, 8, 13, m[12], m[13]); - g(state, 3, 4, 9, 14, m[14], m[15]); -} - -function permute(m: StaticArray): void { - const permuted = new StaticArray(16); - for (let i = 0; i < 16; i++) { - permuted[i] = m[MSG_PERMUTATION[i]]; - } - for (let i = 0; i < 16; i++) { - m[i] = permuted[i]; - } -} - -function compress( - chaining_value: StaticArray, - block_words: StaticArray, - counter: u64, - block_len: u32, - flags: u32 -): StaticArray { - const counter_low = counter as u32; - const counter_high = (counter >> 32) as u32; - const state = new StaticArray(16); - - // Initialize state - for (let i = 0; i < 8; i++) { - state[i] = chaining_value[i]; - state[i + 8] = IV[i]; - } - state[12] = counter_low; - state[13] = counter_high; - state[14] = block_len; - state[15] = flags; - - const block = new StaticArray(16); - for (let i = 0; i < 16; i++) { - block[i] = block_words[i]; - } - - // Apply rounds - round(state, block); - permute(block); - round(state, block); - permute(block); - round(state, block); - permute(block); - round(state, block); - permute(block); - round(state, block); - permute(block); - round(state, block); - permute(block); - round(state, block); - - // Final mixing - for (let i = 0; i < 8; i++) { - state[i] ^= state[i + 8]; - state[i + 8] ^= chaining_value[i]; - } - - return state; -} - -function words_from_little_endian_bytes(bytes: Uint8Array, words: StaticArray): void { - for (let i = 0; i < words.length; i++) { - const offset = i * 4; - words[i] = bytes[offset] | (bytes[offset + 1] << 8) | (bytes[offset + 2] << 16) | (bytes[offset + 3] << 24); - } -} - -export class Blake3Hasher { - private chunk_state: ChunkState; - private key_words: StaticArray; - private cv_stack: StaticArray>; - private cv_stack_len: u8; - private flags: u32; - - constructor() { - this.key_words = new StaticArray(8); - for (let i = 0; i < 8; i++) { - this.key_words[i] = IV[i]; - } - this.chunk_state = new ChunkState(this.key_words, 0, 0); - this.cv_stack = new StaticArray>(54); - for (let i = 0; i < 54; i++) { - this.cv_stack[i] = new StaticArray(8); - } - this.cv_stack_len = 0; - this.flags = 0; - } - - update(input: Uint8Array): void { - let inputPos = 0; - while (inputPos < input.length) { - if (this.chunk_state.len() == CHUNK_LEN) { - const chunk_cv = this.chunk_state.output().chaining_value(); - const total_chunks = this.chunk_state.chunk_counter + 1; - this.add_chunk_chaining_value(chunk_cv, total_chunks); - this.chunk_state = new ChunkState(this.key_words, total_chunks, this.flags); - } - - const want = CHUNK_LEN - this.chunk_state.len(); - const take = min(want, input.length - inputPos); - this.chunk_state.update(input.subarray(inputPos, inputPos + take)); - inputPos += take; - } - } - - finalize(out: Uint8Array): void { - let output = this.chunk_state.output(); - let parent_nodes_remaining = this.cv_stack_len; - - while (parent_nodes_remaining > 0) { - parent_nodes_remaining--; - output = parent_output( - this.cv_stack[parent_nodes_remaining], - output.chaining_value(), - this.key_words, - this.flags - ); - } - - output.root_output_bytes(out); - } - - private add_chunk_chaining_value(new_cv: StaticArray, total_chunks: u64): void { - let mut_new_cv = new_cv; - let mut_total_chunks = total_chunks; - - while ((mut_total_chunks & 1) == 0) { - mut_new_cv = parent_cv(this.pop_stack(), mut_new_cv, this.key_words, this.flags); - mut_total_chunks >>= 1; - } - - this.push_stack(mut_new_cv); - } - - private push_stack(cv: StaticArray): void { - for (let i = 0; i < 8; i++) { - this.cv_stack[this.cv_stack_len][i] = cv[i]; - } - this.cv_stack_len++; - } - - private pop_stack(): StaticArray { - this.cv_stack_len--; - return this.cv_stack[this.cv_stack_len]; - } -} - -class ChunkState { - chaining_value: StaticArray; - chunk_counter: u64; - block: Uint8Array; - block_len: u8; - blocks_compressed: u8; - flags: u32; - - constructor(key_words: StaticArray, chunk_counter: u64, flags: u32) { - this.chaining_value = new StaticArray(8); - for (let i = 0; i < 8; i++) { - this.chaining_value[i] = key_words[i]; - } - this.chunk_counter = chunk_counter; - this.block = new Uint8Array(BLOCK_LEN); - this.block_len = 0; - this.blocks_compressed = 0; - this.flags = flags; - } - - len(): usize { - return BLOCK_LEN * this.blocks_compressed + this.block_len; - } - - start_flag(): u32 { - return this.blocks_compressed == 0 ? CHUNK_START : 0; - } - - update(input: Uint8Array): void { - let inputPos = 0; - while (inputPos < input.length) { - if (this.block_len == BLOCK_LEN) { - const block_words = new StaticArray(16); - words_from_little_endian_bytes(this.block, block_words); - const compressed = compress( - this.chaining_value, - block_words, - this.chunk_counter, - BLOCK_LEN, - this.flags | this.start_flag() - ); - for (let i = 0; i < 8; i++) { - this.chaining_value[i] = compressed[i]; - } - this.blocks_compressed++; - this.block = new Uint8Array(BLOCK_LEN); - this.block_len = 0; - } - - const want = BLOCK_LEN - this.block_len; - const take = min(want, input.length - inputPos); - for (let i = 0; i < take; i++) { - this.block[this.block_len + i] = input[inputPos + i]; - } - this.block_len += take; - inputPos += take; - } - } - - output(): Output { - const block_words = new StaticArray(16); - words_from_little_endian_bytes(this.block, block_words); - return new Output( - this.chaining_value, - block_words, - this.chunk_counter, - this.block_len, - this.flags | this.start_flag() | CHUNK_END - ); - } -} - -class Output { - input_chaining_value: StaticArray; - block_words: StaticArray; - counter: u64; - block_len: u32; - flags: u32; - - constructor( - input_chaining_value: StaticArray, - block_words: StaticArray, - counter: u64, - block_len: u32, - flags: u32 - ) { - this.input_chaining_value = input_chaining_value; - this.block_words = block_words; - this.counter = counter; - this.block_len = block_len; - this.flags = flags; - } - - chaining_value(): StaticArray { - const compressed = compress(this.input_chaining_value, this.block_words, this.counter, this.block_len, this.flags); - const result = new StaticArray(8); - for (let i = 0; i < 8; i++) { - result[i] = compressed[i]; - } - return result; - } - - root_output_bytes(out: Uint8Array): void { - let output_block_counter: u64 = 0; - for (let i = 0; i < out.length; i += 2 * OUT_LEN) { - const words = compress( - this.input_chaining_value, - this.block_words, - output_block_counter, - this.block_len, - this.flags | ROOT - ); - const out_block = out.subarray(i, i + 2 * OUT_LEN); - for (let j = 0; j < words.length; j++) { - const word = words[j]; - const offset = j * 4; - if (offset < out_block.length) { - out_block[offset] = word & 0xff; - if (offset + 1 < out_block.length) { - out_block[offset + 1] = (word >> 8) & 0xff; - if (offset + 2 < out_block.length) { - out_block[offset + 2] = (word >> 16) & 0xff; - if (offset + 3 < out_block.length) { - out_block[offset + 3] = (word >> 24) & 0xff; - } - } - } - } - } - output_block_counter++; - } - } -} - -function parent_output( - left_child_cv: StaticArray, - right_child_cv: StaticArray, - key_words: StaticArray, - flags: u32 -): Output { - const block_words = new StaticArray(16); - for (let i = 0; i < 8; i++) { - block_words[i] = left_child_cv[i]; - block_words[i + 8] = right_child_cv[i]; - } - return new Output(key_words, block_words, 0, BLOCK_LEN, PARENT | flags); -} - -function parent_cv( - left_child_cv: StaticArray, - right_child_cv: StaticArray, - key_words: StaticArray, - flags: u32 -): StaticArray { - return parent_output(left_child_cv, right_child_cv, key_words, flags).chaining_value(); -} - -function min(a: usize, b: usize): usize { - return a < b ? a : b; -} diff --git a/packages/gearhash-wasm/package.json b/packages/gearhash-wasm/package.json index 766b1f7b8c..be57a01a40 100644 --- a/packages/gearhash-wasm/package.json +++ b/packages/gearhash-wasm/package.json @@ -4,9 +4,9 @@ "scripts": { "build:debug": "asc assembly/index.ts --target debug", "build:release": "asc assembly/index.ts --target release", - "build": "npm run build:debug && npm run build:release", + "build": "pnpm run build:debug && npm run build:release", "test": "node tests", - "start": "npx serve ." + "prepare": "pnpm run build" }, "dependencies": { "assemblyscript": "^0.27.36", diff --git a/packages/xetchunk-wasm/assembly/tsconfig.json b/packages/xetchunk-wasm/assembly/tsconfig.json index 8131d68a0a..33daff5dac 100644 --- a/packages/xetchunk-wasm/assembly/tsconfig.json +++ b/packages/xetchunk-wasm/assembly/tsconfig.json @@ -1,4 +1,4 @@ { - "extends": "../node_modules/.pnpm/assemblyscript@0.27.36/node_modules/assemblyscript/std/assembly.json", + "extends": "../node_modules/.pnpm/assemblyscript@0.27.37/node_modules/assemblyscript/std/assembly.json", "include": ["./**/*.ts"] } diff --git a/packages/xetchunk-wasm/package.json b/packages/xetchunk-wasm/package.json index ee20e8a094..3929f2ef8f 100644 --- a/packages/xetchunk-wasm/package.json +++ b/packages/xetchunk-wasm/package.json @@ -4,8 +4,9 @@ "scripts": { "build:debug": "asc assembly/index.ts --target debug", "build:release": "asc assembly/index.ts --target release", - "build": "npm run build:debug && npm run build:release", - "test": "node tests" + "build": "pnpm run build:debug && npm run build:release", + "test": "node tests", + "prepare": "pnpm run build" }, "dependencies": { "@huggingface/blake3-wasm": "workspace:*" From 2664cba2be3ed620884dd57a5b00e8d2820f0569 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Wed, 4 Jun 2025 16:24:39 +0200 Subject: [PATCH 12/44] use builtins --- packages/blake3-wasm/assembly/blake3.ts | 4 --- .../xetchunk-wasm/assembly/xet-chunker.ts | 35 ------------------- 2 files changed, 39 deletions(-) diff --git a/packages/blake3-wasm/assembly/blake3.ts b/packages/blake3-wasm/assembly/blake3.ts index 8f1ddb537f..eadddaa2de 100644 --- a/packages/blake3-wasm/assembly/blake3.ts +++ b/packages/blake3-wasm/assembly/blake3.ts @@ -351,7 +351,3 @@ function parent_cv( ): StaticArray { return parent_output(left_child_cv, right_child_cv, key_words, flags).chaining_value(); } - -function min(a: usize, b: usize): usize { - return a < b ? a : b; -} diff --git a/packages/xetchunk-wasm/assembly/xet-chunker.ts b/packages/xetchunk-wasm/assembly/xet-chunker.ts index 2054a20694..6816064efd 100644 --- a/packages/xetchunk-wasm/assembly/xet-chunker.ts +++ b/packages/xetchunk-wasm/assembly/xet-chunker.ts @@ -133,38 +133,3 @@ function computeDataHash(data: Uint8Array): Uint8Array { hasher.finalize(hash); return hash; } - -// Helper function to find minimum of two numbers -function min(a: usize, b: usize): usize { - return a < b ? a : b; -} - -// Helper function to count leading zeros -function clz(x: u64): u32 { - let n: u32 = 0; - if (x == 0) return 64; - if ((x & 0xffffffff00000000) == 0) { - n += 32; - x <<= 32; - } - if ((x & 0xffff000000000000) == 0) { - n += 16; - x <<= 16; - } - if ((x & 0xff00000000000000) == 0) { - n += 8; - x <<= 8; - } - if ((x & 0xf000000000000000) == 0) { - n += 4; - x <<= 4; - } - if ((x & 0xc000000000000000) == 0) { - n += 2; - x <<= 2; - } - if ((x & 0x8000000000000000) == 0) { - n += 1; - } - return n; -} From 031dc0d6ee7c4f4046af76d1779f4b7963e9e2cf Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Wed, 4 Jun 2025 16:33:40 +0200 Subject: [PATCH 13/44] package org --- packages/blake3-wasm/package.json | 12 ++++++-- packages/blake3-wasm/pnpm-lock.yaml | 2 +- packages/gearhash-wasm/package.json | 9 +++--- packages/gearhash-wasm/pnpm-lock.yaml | 8 +----- packages/xetchunk-wasm/assembly/index.ts | 1 - packages/xetchunk-wasm/assembly/next-match.ts | 28 ------------------- .../xetchunk-wasm/assembly/xet-chunker.ts | 4 +-- packages/xetchunk-wasm/package.json | 14 +++++++++- packages/xetchunk-wasm/pnpm-lock.yaml | 3 ++ 9 files changed, 33 insertions(+), 48 deletions(-) delete mode 100644 packages/xetchunk-wasm/assembly/next-match.ts diff --git a/packages/blake3-wasm/package.json b/packages/blake3-wasm/package.json index cdb861f379..32b92cc077 100644 --- a/packages/blake3-wasm/package.json +++ b/packages/blake3-wasm/package.json @@ -8,14 +8,20 @@ "test": "node tests", "prepare": "pnpm run build" }, - "dependencies": { - "assemblyscript": "^0.27.36" - }, + "keywords": [ + "blake3", + "assemblyscript", + "assembly", + "wasm" + ], "type": "module", "exports": { ".": { "import": "./build/release.js", "types": "./build/release.d.ts" + }, + "./assembly": { + "import": "./assembly/index.ts" } }, "devDependencies": { diff --git a/packages/blake3-wasm/pnpm-lock.yaml b/packages/blake3-wasm/pnpm-lock.yaml index d18c304a0e..f96d25e4f8 100644 --- a/packages/blake3-wasm/pnpm-lock.yaml +++ b/packages/blake3-wasm/pnpm-lock.yaml @@ -7,7 +7,7 @@ settings: importers: .: - dependencies: + devDependencies: assemblyscript: specifier: ^0.27.36 version: 0.27.37 diff --git a/packages/gearhash-wasm/package.json b/packages/gearhash-wasm/package.json index be57a01a40..54b4f3b690 100644 --- a/packages/gearhash-wasm/package.json +++ b/packages/gearhash-wasm/package.json @@ -8,14 +8,10 @@ "test": "node tests", "prepare": "pnpm run build" }, - "dependencies": { - "assemblyscript": "^0.27.36", - "@huggingface/blake3-wasm": "workspace:*", - "@huggingface/xetchunk-wasm": "workspace:*" - }, "keywords": [ "gearhash", "assemblyscript", + "assembly", "wasm" ], "type": "module", @@ -23,6 +19,9 @@ ".": { "import": "./build/release.js", "types": "./build/release.d.ts" + }, + "./assembly": { + "import": "./assembly/index.ts" } }, "devDependencies": { diff --git a/packages/gearhash-wasm/pnpm-lock.yaml b/packages/gearhash-wasm/pnpm-lock.yaml index c86f2350d3..cf8533a3b9 100644 --- a/packages/gearhash-wasm/pnpm-lock.yaml +++ b/packages/gearhash-wasm/pnpm-lock.yaml @@ -7,13 +7,7 @@ settings: importers: .: - dependencies: - '@huggingface/blake3-wasm': - specifier: workspace:* - version: link:../blake3-wasm - '@huggingface/xetchunk-wasm': - specifier: workspace:* - version: link:../xetchunk-wasm + devDependencies: assemblyscript: specifier: ^0.27.36 version: 0.27.36 diff --git a/packages/xetchunk-wasm/assembly/index.ts b/packages/xetchunk-wasm/assembly/index.ts index e4bd372b33..d889a1ba0a 100644 --- a/packages/xetchunk-wasm/assembly/index.ts +++ b/packages/xetchunk-wasm/assembly/index.ts @@ -1,2 +1 @@ export * from "./xet-chunker"; -export * from "./next-match"; diff --git a/packages/xetchunk-wasm/assembly/next-match.ts b/packages/xetchunk-wasm/assembly/next-match.ts deleted file mode 100644 index 5cf17752c1..0000000000 --- a/packages/xetchunk-wasm/assembly/next-match.ts +++ /dev/null @@ -1,28 +0,0 @@ -export class MatchResult { - position: i32; - hash: u64; - - constructor(position: i32, hash: u64) { - this.position = position; - this.hash = hash; - } -} - -export function nextMatch(data: Uint8Array, mask: u64, hash: u64): MatchResult { - const nBytes = data.length; - let pos: usize = 0; - - while (pos < nBytes) { - // Update hash with next byte - hash = ((hash << 1) | data[pos]) & mask; - - // Check if we found a match - if (hash == 0) { - return new MatchResult(pos, hash); - } - - pos++; - } - - return new MatchResult(-1, hash); -} diff --git a/packages/xetchunk-wasm/assembly/xet-chunker.ts b/packages/xetchunk-wasm/assembly/xet-chunker.ts index 6816064efd..fac83a1f8e 100644 --- a/packages/xetchunk-wasm/assembly/xet-chunker.ts +++ b/packages/xetchunk-wasm/assembly/xet-chunker.ts @@ -1,5 +1,5 @@ -import { nextMatch } from "./next-match"; -import { Blake3Hasher } from "@huggingface/blake3-wasm"; +import { nextMatch } from "@huggingface/gearhash-wasm/assembly"; +import { Blake3Hasher } from "@huggingface/blake3-wasm/assembly"; // Constants const TARGET_CHUNK_SIZE: usize = 64 * 1024; // 64KB diff --git a/packages/xetchunk-wasm/package.json b/packages/xetchunk-wasm/package.json index 3929f2ef8f..668e3da2c7 100644 --- a/packages/xetchunk-wasm/package.json +++ b/packages/xetchunk-wasm/package.json @@ -8,14 +8,26 @@ "test": "node tests", "prepare": "pnpm run build" }, + "keywords": [ + "xet", + "chunk", + "chunking", + "assemblyscript", + "assembly", + "wasm" + ], "dependencies": { - "@huggingface/blake3-wasm": "workspace:*" + "@huggingface/blake3-wasm": "workspace:*", + "@huggingface/gearhash-wasm": "workspace:*" }, "type": "module", "exports": { ".": { "import": "./build/release.js", "types": "./build/release.d.ts" + }, + "./assembly": { + "import": "./assembly/index.ts" } }, "devDependencies": { diff --git a/packages/xetchunk-wasm/pnpm-lock.yaml b/packages/xetchunk-wasm/pnpm-lock.yaml index 23a82a8008..32bdab0b41 100644 --- a/packages/xetchunk-wasm/pnpm-lock.yaml +++ b/packages/xetchunk-wasm/pnpm-lock.yaml @@ -11,6 +11,9 @@ importers: '@huggingface/blake3-wasm': specifier: workspace:* version: link:../blake3-wasm + '@huggingface/gearhash-wasm': + specifier: workspace:* + version: link:../gearhash-wasm devDependencies: assemblyscript: specifier: ^0.27.36 From 01d7472d0157dd766461424298460844c6e05536 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Wed, 4 Jun 2025 16:46:34 +0200 Subject: [PATCH 14/44] also provide direct wasm exports --- packages/blake3-wasm/package.json | 3 +++ packages/gearhash-wasm/package.json | 3 +++ packages/xetchunk-wasm/package.json | 3 +++ 3 files changed, 9 insertions(+) diff --git a/packages/blake3-wasm/package.json b/packages/blake3-wasm/package.json index 32b92cc077..4085980507 100644 --- a/packages/blake3-wasm/package.json +++ b/packages/blake3-wasm/package.json @@ -22,6 +22,9 @@ }, "./assembly": { "import": "./assembly/index.ts" + }, + "./wasm": { + "import": "./build/release.wasm" } }, "devDependencies": { diff --git a/packages/gearhash-wasm/package.json b/packages/gearhash-wasm/package.json index 54b4f3b690..ecc3ef79ff 100644 --- a/packages/gearhash-wasm/package.json +++ b/packages/gearhash-wasm/package.json @@ -22,6 +22,9 @@ }, "./assembly": { "import": "./assembly/index.ts" + }, + "./wasm": { + "import": "./build/release.wasm" } }, "devDependencies": { diff --git a/packages/xetchunk-wasm/package.json b/packages/xetchunk-wasm/package.json index 668e3da2c7..1842aa1c47 100644 --- a/packages/xetchunk-wasm/package.json +++ b/packages/xetchunk-wasm/package.json @@ -28,6 +28,9 @@ }, "./assembly": { "import": "./assembly/index.ts" + }, + "./wasm": { + "import": "./build/release.wasm" } }, "devDependencies": { From 2b9b57ae8498f92795079cac7d9eb52d706c53f8 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Wed, 4 Jun 2025 16:53:10 +0200 Subject: [PATCH 15/44] proper build outputs --- packages/blake3-wasm/asconfig.json | 22 ++++++++++++++++++++++ packages/blake3-wasm/build/.gitignore | 2 ++ packages/gearhash-wasm/index.html | 10 ---------- packages/xetchunk-wasm/asconfig.json | 22 ++++++++++++++++++++++ packages/xetchunk-wasm/build/.gitignore | 2 ++ 5 files changed, 48 insertions(+), 10 deletions(-) create mode 100644 packages/blake3-wasm/asconfig.json create mode 100644 packages/blake3-wasm/build/.gitignore delete mode 100644 packages/gearhash-wasm/index.html create mode 100644 packages/xetchunk-wasm/asconfig.json create mode 100644 packages/xetchunk-wasm/build/.gitignore diff --git a/packages/blake3-wasm/asconfig.json b/packages/blake3-wasm/asconfig.json new file mode 100644 index 0000000000..8776597856 --- /dev/null +++ b/packages/blake3-wasm/asconfig.json @@ -0,0 +1,22 @@ +{ + "targets": { + "debug": { + "outFile": "build/debug.wasm", + "textFile": "build/debug.wat", + "sourceMap": true, + "debug": true + }, + "release": { + "outFile": "build/release.wasm", + "textFile": "build/release.wat", + "sourceMap": true, + "optimizeLevel": 3, + "shrinkLevel": 0, + "converge": false, + "noAssert": false + } + }, + "options": { + "bindings": "esm" + } +} \ No newline at end of file diff --git a/packages/blake3-wasm/build/.gitignore b/packages/blake3-wasm/build/.gitignore new file mode 100644 index 0000000000..d6b7ef32c8 --- /dev/null +++ b/packages/blake3-wasm/build/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/packages/gearhash-wasm/index.html b/packages/gearhash-wasm/index.html deleted file mode 100644 index c170ddeb9a..0000000000 --- a/packages/gearhash-wasm/index.html +++ /dev/null @@ -1,10 +0,0 @@ - - - - - - - diff --git a/packages/xetchunk-wasm/asconfig.json b/packages/xetchunk-wasm/asconfig.json new file mode 100644 index 0000000000..8776597856 --- /dev/null +++ b/packages/xetchunk-wasm/asconfig.json @@ -0,0 +1,22 @@ +{ + "targets": { + "debug": { + "outFile": "build/debug.wasm", + "textFile": "build/debug.wat", + "sourceMap": true, + "debug": true + }, + "release": { + "outFile": "build/release.wasm", + "textFile": "build/release.wat", + "sourceMap": true, + "optimizeLevel": 3, + "shrinkLevel": 0, + "converge": false, + "noAssert": false + } + }, + "options": { + "bindings": "esm" + } +} \ No newline at end of file diff --git a/packages/xetchunk-wasm/build/.gitignore b/packages/xetchunk-wasm/build/.gitignore new file mode 100644 index 0000000000..d6b7ef32c8 --- /dev/null +++ b/packages/xetchunk-wasm/build/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore From af2ccc1e00a5289f18317edfb2207e7d00907ede Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Wed, 4 Jun 2025 16:54:50 +0200 Subject: [PATCH 16/44] add exports for JS --- packages/blake3-wasm/assembly/blake3.ts | 4 ++++ packages/xetchunk-wasm/assembly/xet-chunker.ts | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/packages/blake3-wasm/assembly/blake3.ts b/packages/blake3-wasm/assembly/blake3.ts index eadddaa2de..8fd20946c4 100644 --- a/packages/blake3-wasm/assembly/blake3.ts +++ b/packages/blake3-wasm/assembly/blake3.ts @@ -351,3 +351,7 @@ function parent_cv( ): StaticArray { return parent_output(left_child_cv, right_child_cv, key_words, flags).chaining_value(); } + +export function createBlake3Hasher(): Blake3Hasher { + return new Blake3Hasher(); +} diff --git a/packages/xetchunk-wasm/assembly/xet-chunker.ts b/packages/xetchunk-wasm/assembly/xet-chunker.ts index fac83a1f8e..2145684eab 100644 --- a/packages/xetchunk-wasm/assembly/xet-chunker.ts +++ b/packages/xetchunk-wasm/assembly/xet-chunker.ts @@ -133,3 +133,7 @@ function computeDataHash(data: Uint8Array): Uint8Array { hasher.finalize(hash); return hash; } + +export function createXetChunker(targetChunkSize: usize = TARGET_CHUNK_SIZE): XetChunker { + return new XetChunker(targetChunkSize); +} From f1a31cb2191cd686d9282f21376a041848916de0 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Fri, 13 Jun 2025 11:37:37 +0200 Subject: [PATCH 17/44] add test for blake3 + simple function --- packages/blake3-wasm/assembly/blake3.ts | 53 +++++--- packages/blake3-wasm/tests/index.js | 163 ++++++++++++++++++++++++ 2 files changed, 198 insertions(+), 18 deletions(-) create mode 100644 packages/blake3-wasm/tests/index.js diff --git a/packages/blake3-wasm/assembly/blake3.ts b/packages/blake3-wasm/assembly/blake3.ts index 8fd20946c4..d864765385 100644 --- a/packages/blake3-wasm/assembly/blake3.ts +++ b/packages/blake3-wasm/assembly/blake3.ts @@ -1,8 +1,8 @@ // Constants from the reference implementation -const OUT_LEN: usize = 32; +const OUT_LEN: i32 = 32; // const KEY_LEN: usize = 32; -const BLOCK_LEN: usize = 64; -const CHUNK_LEN: usize = 1024; +const BLOCK_LEN: i32 = 64; +const CHUNK_LEN: i32 = 1024; const CHUNK_START: u32 = 1 << 0; const CHUNK_END: u32 = 1 << 1; @@ -16,10 +16,10 @@ const IV: StaticArray = [ 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19, ]; -const MSG_PERMUTATION: StaticArray = [2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8]; +const MSG_PERMUTATION: StaticArray = [2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8]; // The mixing function, G, which mixes either a column or a diagonal. -function g(state: StaticArray, a: usize, b: usize, c: usize, d: usize, mx: u32, my: u32): void { +function g(state: StaticArray, a: i32, b: i32, c: i32, d: i32, mx: u32, my: u32): void { state[a] = state[a] + state[b] + mx; state[d] = rotl32(state[d] ^ state[a], 16); state[c] = state[c] + state[d]; @@ -115,7 +115,7 @@ function words_from_little_endian_bytes(bytes: Uint8Array, words: StaticArray; private cv_stack: StaticArray>; @@ -123,17 +123,19 @@ export class Blake3Hasher { private flags: u32; constructor() { - this.key_words = new StaticArray(8); + const key_words = new StaticArray(8); for (let i = 0; i < 8; i++) { - this.key_words[i] = IV[i]; + key_words[i] = IV[i]; } - this.chunk_state = new ChunkState(this.key_words, 0, 0); + this.key_words = key_words; + this.chunk_state = new ChunkState(key_words, 0, 0); this.cv_stack = new StaticArray>(54); + this.cv_stack_len = 0; + this.flags = 0; + for (let i = 0; i < 54; i++) { this.cv_stack[i] = new StaticArray(8); } - this.cv_stack_len = 0; - this.flags = 0; } update(input: Uint8Array): void { @@ -205,17 +207,18 @@ class ChunkState { constructor(key_words: StaticArray, chunk_counter: u64, flags: u32) { this.chaining_value = new StaticArray(8); - for (let i = 0; i < 8; i++) { - this.chaining_value[i] = key_words[i]; - } this.chunk_counter = chunk_counter; this.block = new Uint8Array(BLOCK_LEN); this.block_len = 0; this.blocks_compressed = 0; this.flags = flags; + + for (let i = 0; i < 8; i++) { + this.chaining_value[i] = key_words[i]; + } } - len(): usize { + len(): i32 { return BLOCK_LEN * this.blocks_compressed + this.block_len; } @@ -249,7 +252,7 @@ class ChunkState { for (let i = 0; i < take; i++) { this.block[this.block_len + i] = input[inputPos + i]; } - this.block_len += take; + this.block_len += take as u8; inputPos += take; } } @@ -352,6 +355,20 @@ function parent_cv( return parent_output(left_child_cv, right_child_cv, key_words, flags).chaining_value(); } -export function createBlake3Hasher(): Blake3Hasher { - return new Blake3Hasher(); +export function blake3(input: Uint8Array): Uint8Array { + const hasher = new Blake3Hasher(); + hasher.update(input); + const output = new Uint8Array(32); + hasher.finalize(output); + return output; +} + +export function blake3Hex(input: Uint8Array): string { + const hash = blake3(input); + const hex = new Array(64); + for (let i = 0; i < 32; i++) { + hex[i * 2] = (hash[i] >> 4).toString(16); + hex[i * 2 + 1] = (hash[i] & 0x0f).toString(16); + } + return hex.join(""); } diff --git a/packages/blake3-wasm/tests/index.js b/packages/blake3-wasm/tests/index.js new file mode 100644 index 0000000000..8f5b95b9f6 --- /dev/null +++ b/packages/blake3-wasm/tests/index.js @@ -0,0 +1,163 @@ +// Adapted from https://github.com/mcmilk/BLAKE3-tests/blob/11a8abeceac93b5eba664eae3679efb4ffa5bc0a/blake3_test.c + +import { blake3Hex } from "../build/debug.js"; + +const buffer = new Uint8Array(102400); +let i = 0; +let j = 0; + +for (i = 0, j = 0; i < buffer.length; i++, j++) { + if (j === 251) { + j = 0; + } + buffer[i] = j; +} + +const testCases = [ + { + buf: buffer.slice(0, 0), + expected: "af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262", + }, + { + buf: buffer.slice(0, 1), + expected: "2d3adedff11b61f14c886e35afa036736dcd87a74d27b5c1510225d0f592e213", + }, + { + buf: buffer.slice(0, 2), + expected: "7b7015bb92cf0b318037702a6cdd81dee41224f734684c2c122cd6359cb1ee63", + }, + { + buf: buffer.slice(0, 3), + expected: "e1be4d7a8ab5560aa4199eea339849ba8e293d55ca0a81006726d184519e647f", + }, + { + buf: buffer.slice(0, 4), + expected: "f30f5ab28fe047904037f77b6da4fea1e27241c5d132638d8bedce9d40494f32", + }, + { + buf: buffer.slice(0, 5), + expected: "b40b44dfd97e7a84a996a91af8b85188c66c126940ba7aad2e7ae6b385402aa2", + }, + { + buf: buffer.slice(0, 6), + expected: "06c4e8ffb6872fad96f9aaca5eee1553eb62aed0ad7198cef42e87f6a616c844", + }, + { + buf: buffer.slice(0, 7), + expected: "3f8770f387faad08faa9d8414e9f449ac68e6ff0417f673f602a646a891419fe", + }, + { + buf: buffer.slice(0, 8), + expected: "2351207d04fc16ade43ccab08600939c7c1fa70a5c0aaca76063d04c3228eaeb", + }, + { + buf: buffer.slice(0, 63), + expected: "e9bc37a594daad83be9470df7f7b3798297c3d834ce80ba85d6e207627b7db7b", + }, + { + buf: buffer.slice(0, 64), + expected: "4eed7141ea4a5cd4b788606bd23f46e212af9cacebacdc7d1f4c6dc7f2511b98", + }, + { + buf: buffer.slice(0, 65), + expected: "de1e5fa0be70df6d2be8fffd0e99ceaa8eb6e8c93a63f2d8d1c30ecb6b263dee", + }, + { + buf: buffer.slice(0, 127), + expected: "d81293fda863f008c09e92fc382a81f5a0b4a1251cba1634016a0f86a6bd640d", + }, + { + buf: buffer.slice(0, 128), + expected: "f17e570564b26578c33bb7f44643f539624b05df1a76c81f30acd548c44b45ef", + }, + { + buf: buffer.slice(0, 129), + expected: "683aaae9f3c5ba37eaaf072aed0f9e30bac0865137bae68b1fde4ca2aebdcb12", + }, + { + buf: buffer.slice(0, 1023), + expected: "10108970eeda3eb932baac1428c7a2163b0e924c9a9e25b35bba72b28f70bd1", + }, + { + buf: buffer.slice(0, 1024), + expected: "42214739f095a406f3fc83deb889744ac00df831c10daa55189b5d121c855af", + }, + { + buf: buffer.slice(0, 1025), + expected: "d00278ae47eb27b34faecf67b4fe263f82d5412916c1ffd97c8cb7fb814b844", + }, + { + buf: buffer.slice(0, 2048), + expected: "e776b6028c7cd22a4d0ba182a8bf62205d2ef576467e838ed6f2529b85fba24a", + }, + { + buf: buffer.slice(0, 2049), + expected: "5f4d72f40d7a5f82b15ca2b2e44b1de3c2ef86c426c95c1af0b6879522563030", + }, + { + buf: buffer.slice(0, 3072), + expected: "b98cb0ff3623be03326b373de6b9095218513e64f1ee2edd2525c7ad1e5cffd2", + }, + { + buf: buffer.slice(0, 3073), + expected: "7124b49501012f81cc7f11ca069ec9226cecb8a2c850cfe644e327d22d3e1cd3", + }, + { + buf: buffer.slice(0, 4096), + expected: "015094013f57a5277b59d8475c0501042c0b642e531b0a1c8f58d2163229e969", + }, + { + buf: buffer.slice(0, 4097), + expected: "9b4052b38f1c5fc8b1f9ff7ac7b27cd242487b3d890d15c96a1c25b8aa0fb995", + }, + { + buf: buffer.slice(0, 5120), + expected: "9cadc15fed8b5d854562b26a9536d9707cadeda9b143978f319ab34230535833a", + }, + { + buf: buffer.slice(0, 5121), + expected: "628bd2cb2004694adaab7bbd778a25df25c47b9d4155a55f8fbd79f2fe154cff9", + }, + { + buf: buffer.slice(0, 6144), + expected: "3e2e5b74e048f3add6d21faab3f83aa44d3b2278afb83b80b3c35164ebeca205", + }, + { + buf: buffer.slice(0, 6145), + expected: "f1323a8631446cc50536a9f705ee5cb619424d46887f3c376c695b70e0f0507f", + }, + { + buf: buffer.slice(0, 7168), + expected: "61da957ec2499a95d6b8023e2b0e604ec7f6b50e80a9678b89d2628e99ada77a", + }, + { + buf: buffer.slice(0, 7169), + expected: "a003fc7a51754a9b3c7fae0367ab3d782dccf28855a03d435f8cfe74605e7817", + }, + { + buf: buffer.slice(0, 8192), + expected: "aae792484c8efe4f19e2ca7d371d8c467ffb10748d8a5a1ae579948f718a2a63", + }, + { + buf: buffer.slice(0, 8193), + expected: "bab6c09cb8ce8cf459261398d2e7aef35700bf488116ceb94a36d0f5f1b7bc3bb", + }, + { + buf: buffer.slice(0, 102400), + expected: "bc3e3d41a1146b069abffad3c0d44860cf664390afce4d9661f7902e7943e085", + }, +]; + +for (const testCase of testCases) { + const result = blake3Hex(testCase.buf); + console.log(result); + + if (result !== testCase.expected) { + console.error(`Test case failed: ${testCase.buf.length} bytes`); + console.error(`Expected: ${testCase.expected}`); + console.error(`Actual: ${result}`); + process.exit(1); + } +} + +console.log("All test cases passed"); From fef842369290b6b8c5c8d5a72658e6dada525a90 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Fri, 13 Jun 2025 12:00:18 +0200 Subject: [PATCH 18/44] add rust reference implementation in package --- packages/blake3-wasm/.npmignore | 1 + packages/blake3-wasm/vendor/Cargo.lock | 7 + packages/blake3-wasm/vendor/Cargo.toml | 12 + packages/blake3-wasm/vendor/README.md | 27 ++ packages/blake3-wasm/vendor/src/blake3.rs | 376 ++++++++++++++++++ packages/blake3-wasm/vendor/src/lib.rs | 3 + packages/blake3-wasm/vendor/src/main.rs | 22 + packages/blake3-wasm/vendor/target/.gitignore | 2 + 8 files changed, 450 insertions(+) create mode 100644 packages/blake3-wasm/.npmignore create mode 100644 packages/blake3-wasm/vendor/Cargo.lock create mode 100644 packages/blake3-wasm/vendor/Cargo.toml create mode 100644 packages/blake3-wasm/vendor/README.md create mode 100644 packages/blake3-wasm/vendor/src/blake3.rs create mode 100644 packages/blake3-wasm/vendor/src/lib.rs create mode 100644 packages/blake3-wasm/vendor/src/main.rs create mode 100644 packages/blake3-wasm/vendor/target/.gitignore diff --git a/packages/blake3-wasm/.npmignore b/packages/blake3-wasm/.npmignore new file mode 100644 index 0000000000..5657f6ea7d --- /dev/null +++ b/packages/blake3-wasm/.npmignore @@ -0,0 +1 @@ +vendor \ No newline at end of file diff --git a/packages/blake3-wasm/vendor/Cargo.lock b/packages/blake3-wasm/vendor/Cargo.lock new file mode 100644 index 0000000000..9f0162bf75 --- /dev/null +++ b/packages/blake3-wasm/vendor/Cargo.lock @@ -0,0 +1,7 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "blake3-example" +version = "0.1.0" diff --git a/packages/blake3-wasm/vendor/Cargo.toml b/packages/blake3-wasm/vendor/Cargo.toml new file mode 100644 index 0000000000..7f31968ed3 --- /dev/null +++ b/packages/blake3-wasm/vendor/Cargo.toml @@ -0,0 +1,12 @@ +[package] +name = "blake3-example" +version = "0.1.0" +edition = "2021" + +[lib] +name = "reference_impl" +path = "src/lib.rs" + +[[bin]] +name = "blake3-example" +path = "src/main.rs" \ No newline at end of file diff --git a/packages/blake3-wasm/vendor/README.md b/packages/blake3-wasm/vendor/README.md new file mode 100644 index 0000000000..46cce0d076 --- /dev/null +++ b/packages/blake3-wasm/vendor/README.md @@ -0,0 +1,27 @@ +# BLAKE3 Example + +This is a simple example that demonstrates using the BLAKE3 hash function with empty input. + +## Prerequisites + +- Rust and Cargo installed on your system. You can install them from [rustup.rs](https://rustup.rs/) + +## Running the Example + +1. Open a terminal in this directory +2. Run the following command: + ```bash + cargo run + ``` + +The program will output a 32-byte hash in hexadecimal format. For empty input, the expected output should be: +``` +af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262 +``` + +## What the Code Does + +1. Creates a new BLAKE3 hasher +2. Updates it with empty input +3. Finalizes the hash into a 32-byte buffer +4. Prints the hash in hexadecimal format \ No newline at end of file diff --git a/packages/blake3-wasm/vendor/src/blake3.rs b/packages/blake3-wasm/vendor/src/blake3.rs new file mode 100644 index 0000000000..bc701784f8 --- /dev/null +++ b/packages/blake3-wasm/vendor/src/blake3.rs @@ -0,0 +1,376 @@ +// From https://github.com/BLAKE3-team/BLAKE3/blob/master/reference_impl/reference_impl.rs + +//! This is the reference implementation of BLAKE3. It is used for testing and +//! as a readable example of the algorithms involved. Section 5.1 of [the BLAKE3 +//! spec](https://github.com/BLAKE3-team/BLAKE3-specs/blob/master/blake3.pdf) +//! discusses this implementation. You can render docs for this implementation +//! by running `cargo doc --open` in this directory. +//! +//! # Example +//! +//! ``` +//! let mut hasher = reference_impl::Hasher::new(); +//! hasher.update(b"abc"); +//! hasher.update(b"def"); +//! let mut hash = [0; 32]; +//! hasher.finalize(&mut hash); +//! let mut extended_hash = [0; 500]; +//! hasher.finalize(&mut extended_hash); +//! assert_eq!(hash, extended_hash[..32]); +//! ``` + +use core::cmp::min; + +const OUT_LEN: usize = 32; +const KEY_LEN: usize = 32; +const BLOCK_LEN: usize = 64; +const CHUNK_LEN: usize = 1024; + +const CHUNK_START: u32 = 1 << 0; +const CHUNK_END: u32 = 1 << 1; +const PARENT: u32 = 1 << 2; +const ROOT: u32 = 1 << 3; +const KEYED_HASH: u32 = 1 << 4; +const DERIVE_KEY_CONTEXT: u32 = 1 << 5; +const DERIVE_KEY_MATERIAL: u32 = 1 << 6; + +const IV: [u32; 8] = [ + 0x6A09E667, 0xBB67AE85, 0x3C6EF372, 0xA54FF53A, 0x510E527F, 0x9B05688C, 0x1F83D9AB, 0x5BE0CD19, +]; + +const MSG_PERMUTATION: [usize; 16] = [2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8]; + +// The mixing function, G, which mixes either a column or a diagonal. +fn g(state: &mut [u32; 16], a: usize, b: usize, c: usize, d: usize, mx: u32, my: u32) { + state[a] = state[a].wrapping_add(state[b]).wrapping_add(mx); + state[d] = (state[d] ^ state[a]).rotate_right(16); + state[c] = state[c].wrapping_add(state[d]); + state[b] = (state[b] ^ state[c]).rotate_right(12); + state[a] = state[a].wrapping_add(state[b]).wrapping_add(my); + state[d] = (state[d] ^ state[a]).rotate_right(8); + state[c] = state[c].wrapping_add(state[d]); + state[b] = (state[b] ^ state[c]).rotate_right(7); +} + +fn round(state: &mut [u32; 16], m: &[u32; 16]) { + // Mix the columns. + g(state, 0, 4, 8, 12, m[0], m[1]); + g(state, 1, 5, 9, 13, m[2], m[3]); + g(state, 2, 6, 10, 14, m[4], m[5]); + g(state, 3, 7, 11, 15, m[6], m[7]); + // Mix the diagonals. + g(state, 0, 5, 10, 15, m[8], m[9]); + g(state, 1, 6, 11, 12, m[10], m[11]); + g(state, 2, 7, 8, 13, m[12], m[13]); + g(state, 3, 4, 9, 14, m[14], m[15]); +} + +fn permute(m: &mut [u32; 16]) { + let mut permuted = [0; 16]; + for i in 0..16 { + permuted[i] = m[MSG_PERMUTATION[i]]; + } + *m = permuted; +} + +fn compress( + chaining_value: &[u32; 8], + block_words: &[u32; 16], + counter: u64, + block_len: u32, + flags: u32, +) -> [u32; 16] { + let counter_low = counter as u32; + let counter_high = (counter >> 32) as u32; + #[rustfmt::skip] + let mut state = [ + chaining_value[0], chaining_value[1], chaining_value[2], chaining_value[3], + chaining_value[4], chaining_value[5], chaining_value[6], chaining_value[7], + IV[0], IV[1], IV[2], IV[3], + counter_low, counter_high, block_len, flags, + ]; + let mut block = *block_words; + + round(&mut state, &block); // round 1 + permute(&mut block); + round(&mut state, &block); // round 2 + permute(&mut block); + round(&mut state, &block); // round 3 + permute(&mut block); + round(&mut state, &block); // round 4 + permute(&mut block); + round(&mut state, &block); // round 5 + permute(&mut block); + round(&mut state, &block); // round 6 + permute(&mut block); + round(&mut state, &block); // round 7 + + for i in 0..8 { + state[i] ^= state[i + 8]; + state[i + 8] ^= chaining_value[i]; + } + state +} + +fn first_8_words(compression_output: [u32; 16]) -> [u32; 8] { + compression_output[0..8].try_into().unwrap() +} + +fn words_from_little_endian_bytes(bytes: &[u8], words: &mut [u32]) { + debug_assert_eq!(bytes.len(), 4 * words.len()); + for (four_bytes, word) in bytes.chunks_exact(4).zip(words) { + *word = u32::from_le_bytes(four_bytes.try_into().unwrap()); + } +} + +// Each chunk or parent node can produce either an 8-word chaining value or, by +// setting the ROOT flag, any number of final output bytes. The Output struct +// captures the state just prior to choosing between those two possibilities. +struct Output { + input_chaining_value: [u32; 8], + block_words: [u32; 16], + counter: u64, + block_len: u32, + flags: u32, +} + +impl Output { + fn chaining_value(&self) -> [u32; 8] { + first_8_words(compress( + &self.input_chaining_value, + &self.block_words, + self.counter, + self.block_len, + self.flags, + )) + } + + fn root_output_bytes(&self, out_slice: &mut [u8]) { + let mut output_block_counter = 0; + for out_block in out_slice.chunks_mut(2 * OUT_LEN) { + let words = compress( + &self.input_chaining_value, + &self.block_words, + output_block_counter, + self.block_len, + self.flags | ROOT, + ); + // The output length might not be a multiple of 4. + for (word, out_word) in words.iter().zip(out_block.chunks_mut(4)) { + out_word.copy_from_slice(&word.to_le_bytes()[..out_word.len()]); + } + output_block_counter += 1; + } + } +} + +struct ChunkState { + chaining_value: [u32; 8], + chunk_counter: u64, + block: [u8; BLOCK_LEN], + block_len: u8, + blocks_compressed: u8, + flags: u32, +} + +impl ChunkState { + fn new(key_words: [u32; 8], chunk_counter: u64, flags: u32) -> Self { + Self { + chaining_value: key_words, + chunk_counter, + block: [0; BLOCK_LEN], + block_len: 0, + blocks_compressed: 0, + flags, + } + } + + fn len(&self) -> usize { + BLOCK_LEN * self.blocks_compressed as usize + self.block_len as usize + } + + fn start_flag(&self) -> u32 { + if self.blocks_compressed == 0 { + CHUNK_START + } else { + 0 + } + } + + fn update(&mut self, mut input: &[u8]) { + while !input.is_empty() { + // If the block buffer is full, compress it and clear it. More + // input is coming, so this compression is not CHUNK_END. + if self.block_len as usize == BLOCK_LEN { + let mut block_words = [0; 16]; + words_from_little_endian_bytes(&self.block, &mut block_words); + self.chaining_value = first_8_words(compress( + &self.chaining_value, + &block_words, + self.chunk_counter, + BLOCK_LEN as u32, + self.flags | self.start_flag(), + )); + self.blocks_compressed += 1; + self.block = [0; BLOCK_LEN]; + self.block_len = 0; + } + + // Copy input bytes into the block buffer. + let want = BLOCK_LEN - self.block_len as usize; + let take = min(want, input.len()); + self.block[self.block_len as usize..][..take].copy_from_slice(&input[..take]); + self.block_len += take as u8; + input = &input[take..]; + } + } + + fn output(&self) -> Output { + let mut block_words = [0; 16]; + words_from_little_endian_bytes(&self.block, &mut block_words); + Output { + input_chaining_value: self.chaining_value, + block_words, + counter: self.chunk_counter, + block_len: self.block_len as u32, + flags: self.flags | self.start_flag() | CHUNK_END, + } + } +} + +fn parent_output( + left_child_cv: [u32; 8], + right_child_cv: [u32; 8], + key_words: [u32; 8], + flags: u32, +) -> Output { + let mut block_words = [0; 16]; + block_words[..8].copy_from_slice(&left_child_cv); + block_words[8..].copy_from_slice(&right_child_cv); + Output { + input_chaining_value: key_words, + block_words, + counter: 0, // Always 0 for parent nodes. + block_len: BLOCK_LEN as u32, // Always BLOCK_LEN (64) for parent nodes. + flags: PARENT | flags, + } +} + +fn parent_cv( + left_child_cv: [u32; 8], + right_child_cv: [u32; 8], + key_words: [u32; 8], + flags: u32, +) -> [u32; 8] { + parent_output(left_child_cv, right_child_cv, key_words, flags).chaining_value() +} + +/// An incremental hasher that can accept any number of writes. +pub struct Hasher { + chunk_state: ChunkState, + key_words: [u32; 8], + cv_stack: [[u32; 8]; 54], // Space for 54 subtree chaining values: + cv_stack_len: u8, // 2^54 * CHUNK_LEN = 2^64 + flags: u32, +} + +impl Hasher { + fn new_internal(key_words: [u32; 8], flags: u32) -> Self { + Self { + chunk_state: ChunkState::new(key_words, 0, flags), + key_words, + cv_stack: [[0; 8]; 54], + cv_stack_len: 0, + flags, + } + } + + /// Construct a new `Hasher` for the regular hash function. + pub fn new() -> Self { + Self::new_internal(IV, 0) + } + + /// Construct a new `Hasher` for the keyed hash function. + pub fn new_keyed(key: &[u8; KEY_LEN]) -> Self { + let mut key_words = [0; 8]; + words_from_little_endian_bytes(key, &mut key_words); + Self::new_internal(key_words, KEYED_HASH) + } + + /// Construct a new `Hasher` for the key derivation function. The context + /// string should be hardcoded, globally unique, and application-specific. + pub fn new_derive_key(context: &str) -> Self { + let mut context_hasher = Self::new_internal(IV, DERIVE_KEY_CONTEXT); + context_hasher.update(context.as_bytes()); + let mut context_key = [0; KEY_LEN]; + context_hasher.finalize(&mut context_key); + let mut context_key_words = [0; 8]; + words_from_little_endian_bytes(&context_key, &mut context_key_words); + Self::new_internal(context_key_words, DERIVE_KEY_MATERIAL) + } + + fn push_stack(&mut self, cv: [u32; 8]) { + self.cv_stack[self.cv_stack_len as usize] = cv; + self.cv_stack_len += 1; + } + + fn pop_stack(&mut self) -> [u32; 8] { + self.cv_stack_len -= 1; + self.cv_stack[self.cv_stack_len as usize] + } + + // Section 5.1.2 of the BLAKE3 spec explains this algorithm in more detail. + fn add_chunk_chaining_value(&mut self, mut new_cv: [u32; 8], mut total_chunks: u64) { + // This chunk might complete some subtrees. For each completed subtree, + // its left child will be the current top entry in the CV stack, and + // its right child will be the current value of `new_cv`. Pop each left + // child off the stack, merge it with `new_cv`, and overwrite `new_cv` + // with the result. After all these merges, push the final value of + // `new_cv` onto the stack. The number of completed subtrees is given + // by the number of trailing 0-bits in the new total number of chunks. + while total_chunks & 1 == 0 { + new_cv = parent_cv(self.pop_stack(), new_cv, self.key_words, self.flags); + total_chunks >>= 1; + } + self.push_stack(new_cv); + } + + /// Add input to the hash state. This can be called any number of times. + pub fn update(&mut self, mut input: &[u8]) { + while !input.is_empty() { + // If the current chunk is complete, finalize it and reset the + // chunk state. More input is coming, so this chunk is not ROOT. + if self.chunk_state.len() == CHUNK_LEN { + let chunk_cv = self.chunk_state.output().chaining_value(); + let total_chunks = self.chunk_state.chunk_counter + 1; + self.add_chunk_chaining_value(chunk_cv, total_chunks); + self.chunk_state = ChunkState::new(self.key_words, total_chunks, self.flags); + } + + // Compress input bytes into the current chunk state. + let want = CHUNK_LEN - self.chunk_state.len(); + let take = min(want, input.len()); + self.chunk_state.update(&input[..take]); + input = &input[take..]; + } + } + + /// Finalize the hash and write any number of output bytes. + pub fn finalize(&self, out_slice: &mut [u8]) { + // Starting with the Output from the current chunk, compute all the + // parent chaining values along the right edge of the tree, until we + // have the root Output. + let mut output = self.chunk_state.output(); + let mut parent_nodes_remaining = self.cv_stack_len as usize; + while parent_nodes_remaining > 0 { + parent_nodes_remaining -= 1; + output = parent_output( + self.cv_stack[parent_nodes_remaining], + output.chaining_value(), + self.key_words, + self.flags, + ); + } + output.root_output_bytes(out_slice); + } +} diff --git a/packages/blake3-wasm/vendor/src/lib.rs b/packages/blake3-wasm/vendor/src/lib.rs new file mode 100644 index 0000000000..874b108ebf --- /dev/null +++ b/packages/blake3-wasm/vendor/src/lib.rs @@ -0,0 +1,3 @@ +mod blake3; + +pub use blake3::*; \ No newline at end of file diff --git a/packages/blake3-wasm/vendor/src/main.rs b/packages/blake3-wasm/vendor/src/main.rs new file mode 100644 index 0000000000..ff9aa8915d --- /dev/null +++ b/packages/blake3-wasm/vendor/src/main.rs @@ -0,0 +1,22 @@ +use std::io::Write; + +fn main() { + // Create a new hasher + let mut hasher = reference_impl::Hasher::new(); + + // Update with empty input + hasher.update(b""); + + // Create a buffer for the output + let mut output = [0u8; 32]; + + // Get the hash + hasher.finalize(&mut output); + + // Print the hash in hex format + let mut stdout = std::io::stdout(); + for byte in output { + write!(stdout, "{:02x}", byte).unwrap(); + } + println!(); +} \ No newline at end of file diff --git a/packages/blake3-wasm/vendor/target/.gitignore b/packages/blake3-wasm/vendor/target/.gitignore new file mode 100644 index 0000000000..d6b7ef32c8 --- /dev/null +++ b/packages/blake3-wasm/vendor/target/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore From 48996e101f84121025bc9db5b25886c93da8f88a Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Fri, 13 Jun 2025 12:18:15 +0200 Subject: [PATCH 19/44] fix mixing function --- packages/blake3-wasm/assembly/blake3.ts | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/packages/blake3-wasm/assembly/blake3.ts b/packages/blake3-wasm/assembly/blake3.ts index d864765385..cf341e33d9 100644 --- a/packages/blake3-wasm/assembly/blake3.ts +++ b/packages/blake3-wasm/assembly/blake3.ts @@ -21,18 +21,13 @@ const MSG_PERMUTATION: StaticArray = [2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, // The mixing function, G, which mixes either a column or a diagonal. function g(state: StaticArray, a: i32, b: i32, c: i32, d: i32, mx: u32, my: u32): void { state[a] = state[a] + state[b] + mx; - state[d] = rotl32(state[d] ^ state[a], 16); + state[d] = rotr(state[d] ^ state[a], 16); state[c] = state[c] + state[d]; - state[b] = rotl32(state[b] ^ state[c], 12); + state[b] = rotr(state[b] ^ state[c], 12); state[a] = state[a] + state[b] + my; - state[d] = rotl32(state[d] ^ state[a], 8); + state[d] = rotr(state[d] ^ state[a], 8); state[c] = state[c] + state[d]; - state[b] = rotl32(state[b] ^ state[c], 7); -} - -// Rotate left by n bits -function rotl32(x: u32, n: u32): u32 { - return (x << n) | (x >>> (32 - n)); + state[b] = rotr(state[b] ^ state[c], 7); } function round(state: StaticArray, m: StaticArray): void { From eff0eeb7ad6b3f49070f268f7106cccb7cf59ac7 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Fri, 13 Jun 2025 12:40:54 +0200 Subject: [PATCH 20/44] Blake 3 workspnpm --filter blake3-wasm build && pnpm --filter blake3-wasm test --- packages/blake3-wasm/assembly/blake3.ts | 6 +++++- packages/blake3-wasm/tests/index.js | 12 ++++++------ packages/blake3-wasm/vendor/src/main.rs | 10 +++++++++- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/packages/blake3-wasm/assembly/blake3.ts b/packages/blake3-wasm/assembly/blake3.ts index cf341e33d9..880abace75 100644 --- a/packages/blake3-wasm/assembly/blake3.ts +++ b/packages/blake3-wasm/assembly/blake3.ts @@ -106,7 +106,11 @@ function compress( function words_from_little_endian_bytes(bytes: Uint8Array, words: StaticArray): void { for (let i = 0; i < words.length; i++) { const offset = i * 4; - words[i] = bytes[offset] | (bytes[offset + 1] << 8) | (bytes[offset + 2] << 16) | (bytes[offset + 3] << 24); + words[i] = + bytes[offset] | + ((bytes[offset + 1] as u32) << 8) | + ((bytes[offset + 2] as u32) << 16) | + ((bytes[offset + 3] as u32) << 24); } } diff --git a/packages/blake3-wasm/tests/index.js b/packages/blake3-wasm/tests/index.js index 8f5b95b9f6..55463e9495 100644 --- a/packages/blake3-wasm/tests/index.js +++ b/packages/blake3-wasm/tests/index.js @@ -76,15 +76,15 @@ const testCases = [ }, { buf: buffer.slice(0, 1023), - expected: "10108970eeda3eb932baac1428c7a2163b0e924c9a9e25b35bba72b28f70bd1", + expected: "10108970eeda3eb932baac1428c7a2163b0e924c9a9e25b35bba72b28f70bd11", }, { buf: buffer.slice(0, 1024), - expected: "42214739f095a406f3fc83deb889744ac00df831c10daa55189b5d121c855af", + expected: "42214739f095a406f3fc83deb889744ac00df831c10daa55189b5d121c855af7", }, { buf: buffer.slice(0, 1025), - expected: "d00278ae47eb27b34faecf67b4fe263f82d5412916c1ffd97c8cb7fb814b844", + expected: "d00278ae47eb27b34faecf67b4fe263f82d5412916c1ffd97c8cb7fb814b8444", }, { buf: buffer.slice(0, 2048), @@ -112,11 +112,11 @@ const testCases = [ }, { buf: buffer.slice(0, 5120), - expected: "9cadc15fed8b5d854562b26a9536d9707cadeda9b143978f319ab34230535833a", + expected: "9cadc15fed8b5d854562b26a9536d9707cadeda9b143978f319ab34230535833", }, { buf: buffer.slice(0, 5121), - expected: "628bd2cb2004694adaab7bbd778a25df25c47b9d4155a55f8fbd79f2fe154cff9", + expected: "628bd2cb2004694adaab7bbd778a25df25c47b9d4155a55f8fbd79f2fe154cff", }, { buf: buffer.slice(0, 6144), @@ -140,7 +140,7 @@ const testCases = [ }, { buf: buffer.slice(0, 8193), - expected: "bab6c09cb8ce8cf459261398d2e7aef35700bf488116ceb94a36d0f5f1b7bc3bb", + expected: "bab6c09cb8ce8cf459261398d2e7aef35700bf488116ceb94a36d0f5f1b7bc3b", }, { buf: buffer.slice(0, 102400), diff --git a/packages/blake3-wasm/vendor/src/main.rs b/packages/blake3-wasm/vendor/src/main.rs index ff9aa8915d..76a1537cbd 100644 --- a/packages/blake3-wasm/vendor/src/main.rs +++ b/packages/blake3-wasm/vendor/src/main.rs @@ -1,20 +1,28 @@ use std::io::Write; fn main() { + println!("Starting BLAKE3 hash computation for empty input"); + // Create a new hasher let mut hasher = reference_impl::Hasher::new(); + println!("Created new hasher"); // Update with empty input - hasher.update(b""); + let input = &[0u8, 1u8]; + println!("Input length: {} bytes", input.len()); + hasher.update(input); + println!("Updated hasher with input"); // Create a buffer for the output let mut output = [0u8; 32]; // Get the hash hasher.finalize(&mut output); + println!("Finalized hash computation"); // Print the hash in hex format let mut stdout = std::io::stdout(); + print!("Final hash: "); for byte in output { write!(stdout, "{:02x}", byte).unwrap(); } From d87e9b057dd52dea6592eb31e9f7bf6769e73aea Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Wed, 18 Jun 2025 17:59:43 +0200 Subject: [PATCH 21/44] improvements --- packages/README.md | 3 - packages/blake3-wasm/README.md | 25 ++++++ packages/blake3-wasm/assembly/blake3.ts | 14 ++++ packages/blake3-wasm/assembly/tsconfig.json | 2 +- packages/blake3-wasm/package.json | 2 +- packages/blake3-wasm/pnpm-lock.yaml | 10 +-- packages/gearhash-wasm/README.md | 5 +- packages/gearhash-wasm/package.json | 2 +- packages/gearhash-wasm/pnpm-lock.yaml | 2 +- packages/xetchunk-wasm/README.md | 27 +++++++ packages/xetchunk-wasm/assembly/index.ts | 2 +- packages/xetchunk-wasm/assembly/tsconfig.json | 2 +- .../xetchunk-wasm/assembly/xet-chunker.ts | 76 ++++++++++--------- packages/xetchunk-wasm/package.json | 2 +- packages/xetchunk-wasm/pnpm-lock.yaml | 10 +-- packages/xetchunk-wasm/tests/index.js | 17 +++++ 16 files changed, 144 insertions(+), 57 deletions(-) delete mode 100644 packages/README.md create mode 100644 packages/blake3-wasm/README.md create mode 100644 packages/xetchunk-wasm/README.md create mode 100644 packages/xetchunk-wasm/tests/index.js diff --git a/packages/README.md b/packages/README.md deleted file mode 100644 index 020157a990..0000000000 --- a/packages/README.md +++ /dev/null @@ -1,3 +0,0 @@ -A WASM implementation of blake3 using assemblyscript. - -Implementation based on https://github.com/BLAKE3-team/BLAKE3/blob/master/reference_impl/reference_impl.rs \ No newline at end of file diff --git a/packages/blake3-wasm/README.md b/packages/blake3-wasm/README.md new file mode 100644 index 0000000000..944cca112e --- /dev/null +++ b/packages/blake3-wasm/README.md @@ -0,0 +1,25 @@ +JS and WASM implementations of https://github.com/BLAKE3-team/BLAKE3 + +Using [AssemblyScript](https://www.assemblyscript.org/) to generate a lean WASM. + +## Usage + +```javascript +import { blake3, blake3Hex, createHasher, update, finalize } from '@huggingface/gearhash-wasm'; + +// Create a Uint8Array of data to search through +const data = new Uint8Array(1_000_000); // Example: 1MB of data +// ... fill data with your content ... + +const hashUint8 = blake3(data); +const hashHex = blake3Hex(data); + +// Or streaming fashion +const hasher = createHasher(); + +for (const chunk of dataSource) { + hasher.update(chunk); +} + +const hash = hasher.finalize(); +``` \ No newline at end of file diff --git a/packages/blake3-wasm/assembly/blake3.ts b/packages/blake3-wasm/assembly/blake3.ts index 880abace75..e3c346b11a 100644 --- a/packages/blake3-wasm/assembly/blake3.ts +++ b/packages/blake3-wasm/assembly/blake3.ts @@ -371,3 +371,17 @@ export function blake3Hex(input: Uint8Array): string { } return hex.join(""); } + +export function createHasher(): Blake3Hasher { + return new Blake3Hasher(); +} + +export function update(hasher: Blake3Hasher, input: Uint8Array): void { + hasher.update(input); +} + +export function finalize(hasher: Blake3Hasher): Uint8Array { + const output = new Uint8Array(32); + hasher.finalize(output); + return output; +} diff --git a/packages/blake3-wasm/assembly/tsconfig.json b/packages/blake3-wasm/assembly/tsconfig.json index 33daff5dac..8131d68a0a 100644 --- a/packages/blake3-wasm/assembly/tsconfig.json +++ b/packages/blake3-wasm/assembly/tsconfig.json @@ -1,4 +1,4 @@ { - "extends": "../node_modules/.pnpm/assemblyscript@0.27.37/node_modules/assemblyscript/std/assembly.json", + "extends": "../node_modules/.pnpm/assemblyscript@0.27.36/node_modules/assemblyscript/std/assembly.json", "include": ["./**/*.ts"] } diff --git a/packages/blake3-wasm/package.json b/packages/blake3-wasm/package.json index 4085980507..f7350c6941 100644 --- a/packages/blake3-wasm/package.json +++ b/packages/blake3-wasm/package.json @@ -28,6 +28,6 @@ } }, "devDependencies": { - "assemblyscript": "^0.27.36" + "assemblyscript": "0.27.36" } } diff --git a/packages/blake3-wasm/pnpm-lock.yaml b/packages/blake3-wasm/pnpm-lock.yaml index f96d25e4f8..9d7ac0a92a 100644 --- a/packages/blake3-wasm/pnpm-lock.yaml +++ b/packages/blake3-wasm/pnpm-lock.yaml @@ -9,13 +9,13 @@ importers: .: devDependencies: assemblyscript: - specifier: ^0.27.36 - version: 0.27.37 + specifier: 0.27.36 + version: 0.27.36 packages: - assemblyscript@0.27.37: - resolution: {integrity: sha512-YtY5k3PiV3SyUQ6gRlR2OCn8dcVRwkpiG/k2T5buoL2ymH/Z/YbaYWbk/f9mO2HTgEtGWjPiAQrIuvA7G/63Gg==} + assemblyscript@0.27.36: + resolution: {integrity: sha512-1qX2zf6p7l/mNYv8r21jC/Yft7kX7XKR3xUHw41zvV4xad5lyC8w7jZiwZBGoy64VKZLc+bTDJDWi8Kb70YrHA==} engines: {node: '>=18', npm: '>=10'} hasBin: true @@ -28,7 +28,7 @@ packages: snapshots: - assemblyscript@0.27.37: + assemblyscript@0.27.36: dependencies: binaryen: 116.0.0-nightly.20240114 long: 5.3.2 diff --git a/packages/gearhash-wasm/README.md b/packages/gearhash-wasm/README.md index cf72dafbd3..8f14fa8da3 100644 --- a/packages/gearhash-wasm/README.md +++ b/packages/gearhash-wasm/README.md @@ -11,15 +11,14 @@ import { nextMatch } from '@huggingface/gearhash-wasm'; const data = new Uint8Array(1000000); // Example: 1MB of data // ... fill data with your content ... -// Search for a pattern with a specific mask -const mask = 0x0000d90003530000n; // Example mask as a BigInt +const mask = 0x0000d90003530000n; // Example mask as a BigInt, more 0s => bigger chunks const match = nextMatch(data, mask); const allMatches = nextMatches(data, mask).matches; ``` The `nextMatch` function takes two parameters: - `data`: A Uint8Array containing the data to search through -- `mask`: A BigInt representing the pattern mask to search for +- `mask`: A BigInt, the bigger it is the bigger the chunks are The function returns an object with the `position` (i32) and `hash` (u64) properties diff --git a/packages/gearhash-wasm/package.json b/packages/gearhash-wasm/package.json index ecc3ef79ff..60d0ae0cd9 100644 --- a/packages/gearhash-wasm/package.json +++ b/packages/gearhash-wasm/package.json @@ -28,6 +28,6 @@ } }, "devDependencies": { - "assemblyscript": "^0.27.36" + "assemblyscript": "0.27.36" } } diff --git a/packages/gearhash-wasm/pnpm-lock.yaml b/packages/gearhash-wasm/pnpm-lock.yaml index cf8533a3b9..9d7ac0a92a 100644 --- a/packages/gearhash-wasm/pnpm-lock.yaml +++ b/packages/gearhash-wasm/pnpm-lock.yaml @@ -9,7 +9,7 @@ importers: .: devDependencies: assemblyscript: - specifier: ^0.27.36 + specifier: 0.27.36 version: 0.27.36 packages: diff --git a/packages/xetchunk-wasm/README.md b/packages/xetchunk-wasm/README.md new file mode 100644 index 0000000000..3ab3d656a1 --- /dev/null +++ b/packages/xetchunk-wasm/README.md @@ -0,0 +1,27 @@ +JS and WASM implementations of https://github.com/huggingface/xet-core/blob/main/deduplication/src/chunking.rs + +Using [AssemblyScript](https://www.assemblyscript.org/) to generate a lean WASM. + +## Usage + +```javascript +import { createChunker, getChunks, nextBlock, finalize } from '@huggingface/xetchunk-wasm'; + +const TARGET_CHUNK_SIZE = Math.pow(2, 12); + +// Create a Uint8Array of data to search through +const data = new Uint8Array(1000000); // Example: 1MB of data +// ... fill data with your content ... + +const chunks = getChunks(data, TARGET_CHUNK_SIZE); + +// Alternative, in case your data is streaming +const chunker = createChunker(TARGET_CHUNK_SIZE); + +for await (const data of source) { + const chunks = nextBlock(chunker, data); + console.log(chunks); +} + +console.log("last chunk", finalize(chunker)); +``` diff --git a/packages/xetchunk-wasm/assembly/index.ts b/packages/xetchunk-wasm/assembly/index.ts index d889a1ba0a..b8b8b62702 100644 --- a/packages/xetchunk-wasm/assembly/index.ts +++ b/packages/xetchunk-wasm/assembly/index.ts @@ -1 +1 @@ -export * from "./xet-chunker"; +export { createChunker, finalize, nextBlock } from "./xet-chunker"; diff --git a/packages/xetchunk-wasm/assembly/tsconfig.json b/packages/xetchunk-wasm/assembly/tsconfig.json index 33daff5dac..8131d68a0a 100644 --- a/packages/xetchunk-wasm/assembly/tsconfig.json +++ b/packages/xetchunk-wasm/assembly/tsconfig.json @@ -1,4 +1,4 @@ { - "extends": "../node_modules/.pnpm/assemblyscript@0.27.37/node_modules/assemblyscript/std/assembly.json", + "extends": "../node_modules/.pnpm/assemblyscript@0.27.36/node_modules/assemblyscript/std/assembly.json", "include": ["./**/*.ts"] } diff --git a/packages/xetchunk-wasm/assembly/xet-chunker.ts b/packages/xetchunk-wasm/assembly/xet-chunker.ts index 2145684eab..41757ec929 100644 --- a/packages/xetchunk-wasm/assembly/xet-chunker.ts +++ b/packages/xetchunk-wasm/assembly/xet-chunker.ts @@ -1,55 +1,53 @@ import { nextMatch } from "@huggingface/gearhash-wasm/assembly"; -import { Blake3Hasher } from "@huggingface/blake3-wasm/assembly"; +import { blake3 } from "@huggingface/blake3-wasm/assembly"; // Constants -const TARGET_CHUNK_SIZE: usize = 64 * 1024; // 64KB -const MINIMUM_CHUNK_DIVISOR: usize = 8; -const MAXIMUM_CHUNK_MULTIPLIER: usize = 2; -const HASH_WINDOW_SIZE: usize = 64; +const TARGET_CHUNK_SIZE: i32 = 64 * 1024; // 64KB +const MINIMUM_CHUNK_DIVISOR: i32 = 8; +const MAXIMUM_CHUNK_MULTIPLIER: i32 = 2; +const HASH_WINDOW_SIZE: i32 = 64; export class Chunk { hash: Uint8Array; data: Uint8Array; - - constructor(hash: Uint8Array, data: Uint8Array) { - this.hash = hash; - this.data = data; - } } // Type for the next() method return value -export class NextResult { +class NextResult { chunk: Chunk | null; - bytesConsumed: usize; + bytesConsumed: i32; - constructor(chunk: Chunk | null, bytesConsumed: usize) { + constructor(chunk: Chunk | null, bytesConsumed: i32) { this.chunk = chunk; this.bytesConsumed = bytesConsumed; } } -export class XetChunker { - private minimumChunk: usize; - private maximumChunk: usize; +class XetChunker { + private minimumChunk: i32; + private maximumChunk: i32; private mask: u64; private chunkBuf: Uint8Array; - private curChunkLen: usize; + private curChunkLen: i32; private hash: u64; - constructor(targetChunkSize: usize = TARGET_CHUNK_SIZE) { + constructor(targetChunkSize: i32 = TARGET_CHUNK_SIZE) { // Validate target chunk size is a power of 2 + assert(targetChunkSize > 0, "Target chunk size must be greater than 0"); assert((targetChunkSize & (targetChunkSize - 1)) == 0, "Target chunk size must be a power of 2"); assert(targetChunkSize > HASH_WINDOW_SIZE, "Target chunk size must be greater than hash window size"); - assert(targetChunkSize < u32.MAX_VALUE, "Target chunk size must be less than u32.MAX_VALUE"); + assert(targetChunkSize < i32.MAX_VALUE, "Target chunk size must be less than i32.MAX_VALUE"); let mask = (targetChunkSize - 1) as u64; // Shift mask left by leading zeros count mask = mask << (64 - clz(mask)); + const maximumChunk = targetChunkSize * MAXIMUM_CHUNK_MULTIPLIER; + this.minimumChunk = targetChunkSize / MINIMUM_CHUNK_DIVISOR; - this.maximumChunk = targetChunkSize * MAXIMUM_CHUNK_MULTIPLIER; + this.maximumChunk = maximumChunk; this.mask = mask; - this.chunkBuf = new Uint8Array(this.maximumChunk); + this.chunkBuf = new Uint8Array(maximumChunk); this.curChunkLen = 0; this.hash = 0; } @@ -57,7 +55,7 @@ export class XetChunker { next(data: Uint8Array, isFinal: boolean): NextResult { const nBytes = data.length; let createChunk = false; - let consumeLen: usize = 0; + let consumeLen: i32 = 0; if (nBytes != 0) { // Skip minimum chunk size @@ -70,7 +68,7 @@ export class XetChunker { // Calculate read end const readEnd = min(nBytes, consumeLen + this.maximumChunk - this.curChunkLen); - let bytesToNextBoundary: usize; + let bytesToNextBoundary: i32; const matchResult = nextMatch(data.subarray(consumeLen, readEnd), this.mask, this.hash); if (matchResult.position != -1) { @@ -97,7 +95,10 @@ export class XetChunker { if (createChunk || (isFinal && this.curChunkLen > 0)) { const chunkData = this.chunkBuf.subarray(0, this.curChunkLen); - const chunk = new Chunk(computeDataHash(chunkData), chunkData); + const chunk: Chunk = { + data: chunkData, + hash: blake3(chunkData), + }; this.curChunkLen = 0; this.hash = 0; return new NextResult(chunk, consumeLen); @@ -108,12 +109,12 @@ export class XetChunker { nextBlock(data: Uint8Array, isFinal: boolean): Chunk[] { const chunks: Chunk[] = []; - let pos: usize = 0; + let pos: i32 = 0; while (pos < data.length) { const result = this.next(data.subarray(pos), isFinal); if (result.chunk) { - chunks.push(result.chunk); + chunks.push(result.chunk!); } pos += result.bytesConsumed; } @@ -126,14 +127,21 @@ export class XetChunker { } } -function computeDataHash(data: Uint8Array): Uint8Array { - const hasher = new Blake3Hasher(); - hasher.update(data); - const hash = new Uint8Array(32); - hasher.finalize(hash); - return hash; +export function createChunker(targetChunkSize: i32 = TARGET_CHUNK_SIZE): XetChunker { + const chunker = new XetChunker(targetChunkSize); + + return chunker; +} + +export function nextBlock(chunker: XetChunker, data: Uint8Array): Chunk[] { + return chunker.nextBlock(data, false); +} + +export function finalize(chunker: XetChunker): Chunk | null { + return chunker.finish(); } -export function createXetChunker(targetChunkSize: usize = TARGET_CHUNK_SIZE): XetChunker { - return new XetChunker(targetChunkSize); +export function getChunks(data: Uint8Array, targetChunkSize: i32 = TARGET_CHUNK_SIZE): Chunk[] { + const chunker = createChunker(targetChunkSize); + return chunker.nextBlock(data, true); } diff --git a/packages/xetchunk-wasm/package.json b/packages/xetchunk-wasm/package.json index 1842aa1c47..3bc5540ec6 100644 --- a/packages/xetchunk-wasm/package.json +++ b/packages/xetchunk-wasm/package.json @@ -34,6 +34,6 @@ } }, "devDependencies": { - "assemblyscript": "^0.27.36" + "assemblyscript": "0.27.36" } } diff --git a/packages/xetchunk-wasm/pnpm-lock.yaml b/packages/xetchunk-wasm/pnpm-lock.yaml index 32bdab0b41..4e5f34eb68 100644 --- a/packages/xetchunk-wasm/pnpm-lock.yaml +++ b/packages/xetchunk-wasm/pnpm-lock.yaml @@ -16,13 +16,13 @@ importers: version: link:../gearhash-wasm devDependencies: assemblyscript: - specifier: ^0.27.36 - version: 0.27.37 + specifier: 0.27.36 + version: 0.27.36 packages: - assemblyscript@0.27.37: - resolution: {integrity: sha512-YtY5k3PiV3SyUQ6gRlR2OCn8dcVRwkpiG/k2T5buoL2ymH/Z/YbaYWbk/f9mO2HTgEtGWjPiAQrIuvA7G/63Gg==} + assemblyscript@0.27.36: + resolution: {integrity: sha512-1qX2zf6p7l/mNYv8r21jC/Yft7kX7XKR3xUHw41zvV4xad5lyC8w7jZiwZBGoy64VKZLc+bTDJDWi8Kb70YrHA==} engines: {node: '>=18', npm: '>=10'} hasBin: true @@ -35,7 +35,7 @@ packages: snapshots: - assemblyscript@0.27.37: + assemblyscript@0.27.36: dependencies: binaryen: 116.0.0-nightly.20240114 long: 5.3.2 diff --git a/packages/xetchunk-wasm/tests/index.js b/packages/xetchunk-wasm/tests/index.js new file mode 100644 index 0000000000..f1a485f785 --- /dev/null +++ b/packages/xetchunk-wasm/tests/index.js @@ -0,0 +1,17 @@ +import { createChunker, finalize, nextBlock } from "../build/debug.js"; + +const chunker = createChunker(Math.pow(2, 12)); + +const data = new Uint8Array(100_000); + +for (let i = 0; i < data.length; i++) { + data[i] = i; +} + +const chunks = nextBlock(chunker, data); + +console.log("chunks", chunks); + +const lastChunk = finalize(chunker); + +console.log("lastChunk", lastChunk); From f47f07d29474008ffa0f1a843d223252f31057c1 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Wed, 18 Jun 2025 20:58:15 +0200 Subject: [PATCH 22/44] only output length --- packages/xetchunk-wasm/assembly/xet-chunker.ts | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/packages/xetchunk-wasm/assembly/xet-chunker.ts b/packages/xetchunk-wasm/assembly/xet-chunker.ts index 41757ec929..0abfebbfec 100644 --- a/packages/xetchunk-wasm/assembly/xet-chunker.ts +++ b/packages/xetchunk-wasm/assembly/xet-chunker.ts @@ -9,7 +9,7 @@ const HASH_WINDOW_SIZE: i32 = 64; export class Chunk { hash: Uint8Array; - data: Uint8Array; + length: i32; } // Type for the next() method return value @@ -96,7 +96,7 @@ class XetChunker { if (createChunk || (isFinal && this.curChunkLen > 0)) { const chunkData = this.chunkBuf.subarray(0, this.curChunkLen); const chunk: Chunk = { - data: chunkData, + length: chunkData.length, hash: blake3(chunkData), }; this.curChunkLen = 0; @@ -114,6 +114,7 @@ class XetChunker { while (pos < data.length) { const result = this.next(data.subarray(pos), isFinal); if (result.chunk) { + // eslint-disable-next-line @typescript-eslint/no-non-null-assertion chunks.push(result.chunk!); } pos += result.bytesConsumed; From aa44db2a7fad8c7e8a1f4fc53a8d0a4d99f130d9 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Wed, 18 Jun 2025 21:24:34 +0200 Subject: [PATCH 23/44] add vendor test for gearhash --- packages/blake3-wasm/LICENSE_A2 | 202 ++++++++++ packages/blake3-wasm/LICENSE_A2LLVM | 202 ++++++++++ packages/blake3-wasm/LICENSE_CC0 | 121 ++++++ packages/gearhash-wasm/tests/index.js | 360 ++++++++++++------ packages/gearhash-wasm/vendor/.gitignore | 4 + packages/gearhash-wasm/vendor/Cargo.toml | 25 ++ packages/gearhash-wasm/vendor/LICENSE-APACHE | 202 ++++++++++ packages/gearhash-wasm/vendor/LICENSE-MIT | 21 + packages/gearhash-wasm/vendor/README.md | 60 +++ packages/gearhash-wasm/vendor/src/lib.rs | 103 +++++ packages/gearhash-wasm/vendor/src/scalar.rs | 14 + packages/gearhash-wasm/vendor/src/table.rs | 262 +++++++++++++ .../gearhash-wasm/vendor/test_gearhash.rs | 106 ++++++ 13 files changed, 1569 insertions(+), 113 deletions(-) create mode 100644 packages/blake3-wasm/LICENSE_A2 create mode 100644 packages/blake3-wasm/LICENSE_A2LLVM create mode 100644 packages/blake3-wasm/LICENSE_CC0 create mode 100644 packages/gearhash-wasm/vendor/.gitignore create mode 100644 packages/gearhash-wasm/vendor/Cargo.toml create mode 100644 packages/gearhash-wasm/vendor/LICENSE-APACHE create mode 100644 packages/gearhash-wasm/vendor/LICENSE-MIT create mode 100644 packages/gearhash-wasm/vendor/README.md create mode 100644 packages/gearhash-wasm/vendor/src/lib.rs create mode 100644 packages/gearhash-wasm/vendor/src/scalar.rs create mode 100644 packages/gearhash-wasm/vendor/src/table.rs create mode 100644 packages/gearhash-wasm/vendor/test_gearhash.rs diff --git a/packages/blake3-wasm/LICENSE_A2 b/packages/blake3-wasm/LICENSE_A2 new file mode 100644 index 0000000000..2cdf43fa3e --- /dev/null +++ b/packages/blake3-wasm/LICENSE_A2 @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2019 Jack O'Connor and Samuel Neves + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/packages/blake3-wasm/LICENSE_A2LLVM b/packages/blake3-wasm/LICENSE_A2LLVM new file mode 100644 index 0000000000..2cdf43fa3e --- /dev/null +++ b/packages/blake3-wasm/LICENSE_A2LLVM @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2019 Jack O'Connor and Samuel Neves + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/packages/blake3-wasm/LICENSE_CC0 b/packages/blake3-wasm/LICENSE_CC0 new file mode 100644 index 0000000000..1625c17936 --- /dev/null +++ b/packages/blake3-wasm/LICENSE_CC0 @@ -0,0 +1,121 @@ +Creative Commons Legal Code + +CC0 1.0 Universal + + CREATIVE COMMONS CORPORATION IS NOT A LAW FIRM AND DOES NOT PROVIDE + LEGAL SERVICES. DISTRIBUTION OF THIS DOCUMENT DOES NOT CREATE AN + ATTORNEY-CLIENT RELATIONSHIP. CREATIVE COMMONS PROVIDES THIS + INFORMATION ON AN "AS-IS" BASIS. CREATIVE COMMONS MAKES NO WARRANTIES + REGARDING THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS + PROVIDED HEREUNDER, AND DISCLAIMS LIABILITY FOR DAMAGES RESULTING FROM + THE USE OF THIS DOCUMENT OR THE INFORMATION OR WORKS PROVIDED + HEREUNDER. + +Statement of Purpose + +The laws of most jurisdictions throughout the world automatically confer +exclusive Copyright and Related Rights (defined below) upon the creator +and subsequent owner(s) (each and all, an "owner") of an original work of +authorship and/or a database (each, a "Work"). + +Certain owners wish to permanently relinquish those rights to a Work for +the purpose of contributing to a commons of creative, cultural and +scientific works ("Commons") that the public can reliably and without fear +of later claims of infringement build upon, modify, incorporate in other +works, reuse and redistribute as freely as possible in any form whatsoever +and for any purposes, including without limitation commercial purposes. +These owners may contribute to the Commons to promote the ideal of a free +culture and the further production of creative, cultural and scientific +works, or to gain reputation or greater distribution for their Work in +part through the use and efforts of others. + +For these and/or other purposes and motivations, and without any +expectation of additional consideration or compensation, the person +associating CC0 with a Work (the "Affirmer"), to the extent that he or she +is an owner of Copyright and Related Rights in the Work, voluntarily +elects to apply CC0 to the Work and publicly distribute the Work under its +terms, with knowledge of his or her Copyright and Related Rights in the +Work and the meaning and intended legal effect of CC0 on those rights. + +1. Copyright and Related Rights. A Work made available under CC0 may be +protected by copyright and related or neighboring rights ("Copyright and +Related Rights"). Copyright and Related Rights include, but are not +limited to, the following: + + i. the right to reproduce, adapt, distribute, perform, display, + communicate, and translate a Work; + ii. moral rights retained by the original author(s) and/or performer(s); +iii. publicity and privacy rights pertaining to a person's image or + likeness depicted in a Work; + iv. rights protecting against unfair competition in regards to a Work, + subject to the limitations in paragraph 4(a), below; + v. rights protecting the extraction, dissemination, use and reuse of data + in a Work; + vi. database rights (such as those arising under Directive 96/9/EC of the + European Parliament and of the Council of 11 March 1996 on the legal + protection of databases, and under any national implementation + thereof, including any amended or successor version of such + directive); and +vii. other similar, equivalent or corresponding rights throughout the + world based on applicable law or treaty, and any national + implementations thereof. + +2. Waiver. To the greatest extent permitted by, but not in contravention +of, applicable law, Affirmer hereby overtly, fully, permanently, +irrevocably and unconditionally waives, abandons, and surrenders all of +Affirmer's Copyright and Related Rights and associated claims and causes +of action, whether now known or unknown (including existing as well as +future claims and causes of action), in the Work (i) in all territories +worldwide, (ii) for the maximum duration provided by applicable law or +treaty (including future time extensions), (iii) in any current or future +medium and for any number of copies, and (iv) for any purpose whatsoever, +including without limitation commercial, advertising or promotional +purposes (the "Waiver"). Affirmer makes the Waiver for the benefit of each +member of the public at large and to the detriment of Affirmer's heirs and +successors, fully intending that such Waiver shall not be subject to +revocation, rescission, cancellation, termination, or any other legal or +equitable action to disrupt the quiet enjoyment of the Work by the public +as contemplated by Affirmer's express Statement of Purpose. + +3. Public License Fallback. Should any part of the Waiver for any reason +be judged legally invalid or ineffective under applicable law, then the +Waiver shall be preserved to the maximum extent permitted taking into +account Affirmer's express Statement of Purpose. In addition, to the +extent the Waiver is so judged Affirmer hereby grants to each affected +person a royalty-free, non transferable, non sublicensable, non exclusive, +irrevocable and unconditional license to exercise Affirmer's Copyright and +Related Rights in the Work (i) in all territories worldwide, (ii) for the +maximum duration provided by applicable law or treaty (including future +time extensions), (iii) in any current or future medium and for any number +of copies, and (iv) for any purpose whatsoever, including without +limitation commercial, advertising or promotional purposes (the +"License"). The License shall be deemed effective as of the date CC0 was +applied by Affirmer to the Work. Should any part of the License for any +reason be judged legally invalid or ineffective under applicable law, such +partial invalidity or ineffectiveness shall not invalidate the remainder +of the License, and in such case Affirmer hereby affirms that he or she +will not (i) exercise any of his or her remaining Copyright and Related +Rights in the Work or (ii) assert any associated claims and causes of +action with respect to the Work, in either case contrary to Affirmer's +express Statement of Purpose. + +4. Limitations and Disclaimers. + + a. No trademark or patent rights held by Affirmer are waived, abandoned, + surrendered, licensed or otherwise affected by this document. + b. Affirmer offers the Work as-is and makes no representations or + warranties of any kind concerning the Work, express, implied, + statutory or otherwise, including without limitation warranties of + title, merchantability, fitness for a particular purpose, non + infringement, or the absence of latent or other defects, accuracy, or + the present or absence of errors, whether or not discoverable, all to + the greatest extent permissible under applicable law. + c. Affirmer disclaims responsibility for clearing rights of other persons + that may apply to the Work or any use thereof, including without + limitation any person's Copyright and Related Rights in the Work. + Further, Affirmer disclaims responsibility for obtaining any necessary + consents, permissions or other rights required for any use of the + Work. + d. Affirmer understands and acknowledges that Creative Commons is not a + party to this document and has no duty or obligation with respect to + this CC0 or use of the Work. \ No newline at end of file diff --git a/packages/gearhash-wasm/tests/index.js b/packages/gearhash-wasm/tests/index.js index d3d220da00..a5ec712bac 100644 --- a/packages/gearhash-wasm/tests/index.js +++ b/packages/gearhash-wasm/tests/index.js @@ -1,117 +1,251 @@ -import assert from "assert"; -import { nextMatch, nextMatches } from "../build/debug.js"; - -// Simple seeded random number generator -function seededRandom(seed) { - return function () { - seed = (seed * 16807) % 2147483647; - return (seed - 1) / 2147483646; - }; +import { nextMatch } from "../build/debug.js"; + +// Simple deterministic RNG for reproducible results (same as Rust version) +class SimpleRng { + constructor(seed) { + this.state = BigInt(seed); + } + + nextU64() { + // Simple xorshift algorithm (same as Rust version) + this.state ^= this.state << 13n; + this.state ^= this.state >> 7n; + this.state ^= this.state << 17n; + return this.state; + } + + fillBytes(dest) { + for (let i = 0; i < dest.length; i += 8) { + const value = this.nextU64(); + for (let j = 0; j < 8 && i + j < dest.length; j++) { + dest[i + j] = Number((value >> BigInt(j * 8)) & 0xffn); + } + } + } +} + +const BENCH_INPUT_SEED = 0xbecd17f; +const BENCH_MASK = 0x0000d90003530000n; +const INPUT_SIZE = 100_000; + +function generateTestInput() { + const bytes = new Uint8Array(INPUT_SIZE); + const rng = new SimpleRng(BENCH_INPUT_SEED); + rng.fillBytes(bytes); + return bytes; +} + +function testGearhash() { + console.log(`Generating test input with seed: 0x${BENCH_INPUT_SEED.toString(16)}`); + const inputBuf = generateTestInput(); + console.log(`Input size: ${inputBuf.length} bytes`); + console.log(`Mask: 0x${BENCH_MASK.toString(16)}`); + + let offset = 0; + let chunkCount = 0; + let totalProcessed = 0; + let hash = 0n; + + console.log("\nProcessing chunks:"); + console.log("Chunk | Offset | Size | Hash"); + console.log("------|--------|------|------------------"); + + while (offset < inputBuf.length) { + const chunkStart = offset; + + const result = nextMatch(inputBuf.subarray(offset), BENCH_MASK, hash); + if (result.matchSize > 0) { + offset += result.matchSize; + totalProcessed += result.matchSize; + chunkCount += 1; + hash = result.hash; + + console.log( + `${chunkCount.toString().padStart(5)} | ${chunkStart.toString().padStart(6)} | ${result.matchSize + .toString() + .padStart(4)} | 0x${hash.toString(16).padStart(16, "0")}` + ); + } else { + // No more matches, process remaining bytes + const remaining = inputBuf.length - offset; + // Update hash for remaining bytes + for (let i = 0; i < remaining; i++) { + hash = ((hash << 1n) + BigInt(inputBuf[offset + i])) & 0xffffffffffffffffn; + } + totalProcessed += remaining; + chunkCount += 1; + + console.log( + `${chunkCount.toString().padStart(5)} | ${offset.toString().padStart(6)} | ${remaining + .toString() + .padStart(4)} | 0x${hash.toString(16).padStart(16, "0")} (final)` + ); + break; + } + } + + console.log("\nSummary:"); + console.log(`Total chunks: ${chunkCount}`); + console.log(`Total bytes processed: ${totalProcessed}`); + console.log(`Average chunk size: ${(totalProcessed / chunkCount).toFixed(1)} bytes`); + + // Print first few bytes of each chunk for verification + console.log("\nFirst 16 bytes of each chunk:"); + offset = 0; + chunkCount = 0; + hash = 0n; + + while (offset < inputBuf.length) { + const result = nextMatch(inputBuf.subarray(offset), BENCH_MASK, hash); + if (result.matchSize > 0) { + const chunk = inputBuf.subarray(offset, offset + result.matchSize); + const hexBytes = Array.from(chunk.slice(0, Math.min(16, chunk.length))) + .map((b) => b.toString(16).padStart(2, "0")) + .join(""); + console.log(`Chunk ${chunkCount + 1}: ${hexBytes}`); + offset += result.matchSize; + chunkCount += 1; + hash = result.hash; + } else { + const chunk = inputBuf.subarray(offset); + const hexBytes = Array.from(chunk.slice(0, Math.min(16, chunk.length))) + .map((b) => b.toString(16).padStart(2, "0")) + .join(""); + console.log(`Chunk ${chunkCount + 1}: ${hexBytes} (final)`); + break; + } + } + + return { chunkCount, totalProcessed, averageChunkSize: totalProcessed / chunkCount }; +} + +// Parse the expected results from Rust +function parseExpectedResults(resultData) { + const lines = resultData.trim().split("\n"); + const results = []; + + for (const line of lines) { + const match = line.match(/\s*(\d+)\s*\|\s*(\d+)\s*\|\s*(\d+)\s*\|\s*(0x[a-f0-9]+)/); + if (match) { + results.push({ + chunk: parseInt(match[1]), + offset: parseInt(match[2]), + size: parseInt(match[3]), + hash: match[4], + }); + } + } + + return results; } -// Create seeded random data -const seed = 12345; // Fixed seed for deterministic results -const random = seededRandom(seed); -const randomData = new Uint8Array(150_000).map(() => Math.floor(random() * 256)); - -// Test with a known mask -assert.deepStrictEqual(nextMatch(randomData, 0x0000d90003530000n), { position: 459, hash: 9546224108073667431n }); -assert.deepStrictEqual(nextMatch(randomData.subarray(459), 0x0000d90003530000n), { - position: 3658, - hash: 4043712133052525799n, -}); - -assert.deepStrictEqual(nextMatches(randomData, 0x0000d90003530000n), { - remaining: 1206, - hash: 18262966296195680063n, - matches: [ - { position: 459, hash: 9546224108073667431n }, - { position: 3658, hash: 4043712133052525799n }, - { position: 2013, hash: 6111702085179831561n }, - { position: 1593, hash: 12901166541873917249n }, - { position: 1566, hash: 7692186462913612151n }, - { position: 211, hash: 16543980755458487441n }, - { position: 1778, hash: 15644384556715661587n }, - { position: 566, hash: 9793366463237592247n }, - { position: 2079, hash: 11221321116171663064n }, - { position: 2940, hash: 1564726223525919786n }, - { position: 809, hash: 15395839328876515337n }, - { position: 946, hash: 10585747199093122759n }, - { position: 854, hash: 4479393852251501569n }, - { position: 436, hash: 15702966577303948694n }, - { position: 2165, hash: 17148900940125069205n }, - { position: 273, hash: 11505890591385615424n }, - { position: 1459, hash: 10774060112464860369n }, - { position: 158, hash: 2233823235057951370n }, - { position: 7, hash: 1983310208686139647n }, - { position: 1926, hash: 4499661659570185271n }, - { position: 1529, hash: 16090517590946392505n }, - { position: 1751, hash: 12536054222087023458n }, - { position: 1222, hash: 334146166487300408n }, - { position: 2230, hash: 6981431015531396608n }, - { position: 826, hash: 11877997991061156988n }, - { position: 33, hash: 8454422284689001989n }, - { position: 1731, hash: 15095819886766624527n }, - { position: 8842, hash: 6362744947164356842n }, - { position: 928, hash: 3627691864743766239n }, - { position: 684, hash: 1137480049753900759n }, - { position: 5301, hash: 10541554813326859395n }, - { position: 2546, hash: 14704288147532701373n }, - { position: 11856, hash: 9653226176528805511n }, - { position: 650, hash: 12714262162290274678n }, - { position: 1346, hash: 2525679969999819421n }, - { position: 353, hash: 2532749299807420736n }, - { position: 1091, hash: 693561665209300041n }, - { position: 729, hash: 11014435606385442344n }, - { position: 1204, hash: 10083883741570968570n }, - { position: 1671, hash: 12308901096302322810n }, - { position: 1362, hash: 13399339535394154305n }, - { position: 1858, hash: 792389713896955383n }, - { position: 2248, hash: 15568664728418446816n }, - { position: 1790, hash: 4328805983976714464n }, - { position: 634, hash: 722305044694988273n }, - { position: 741, hash: 17978970776495983968n }, - { position: 901, hash: 5911861036065769110n }, - { position: 302, hash: 1334790489764850513n }, - { position: 1435, hash: 16174119877357924758n }, - { position: 61, hash: 12103430617785210167n }, - { position: 1, hash: 35334639850667n }, - { position: 2074, hash: 7449519750512442798n }, - { position: 2061, hash: 1805950971475184864n }, - { position: 1612, hash: 5837797879339327135n }, - { position: 3281, hash: 6649572008787195357n }, - { position: 39, hash: 16137242368496690753n }, - { position: 263, hash: 8133543763164586431n }, - { position: 2333, hash: 17019949823094703325n }, - { position: 1160, hash: 8949503946391874147n }, - { position: 641, hash: 18344573417262448121n }, - { position: 2588, hash: 13345294745157777411n }, - { position: 3116, hash: 7832639641689314418n }, - { position: 4671, hash: 13762161036402935807n }, - { position: 276, hash: 10924644382434953404n }, - { position: 4430, hash: 9045519457622973922n }, - { position: 32, hash: 4188636638659752674n }, - { position: 2470, hash: 1184167847892138852n }, - { position: 694, hash: 11699508361075635892n }, - { position: 1703, hash: 9012268790677532920n }, - { position: 47, hash: 6528251874505412319n }, - { position: 2672, hash: 8484789019946020371n }, - { position: 202, hash: 1365160724288031760n }, - { position: 467, hash: 10426152000837661087n }, - { position: 496, hash: 3605417399306471847n }, - { position: 3777, hash: 8410473338876477323n }, - { position: 80, hash: 3693273711429567121n }, - { position: 813, hash: 9224216742837123228n }, - { position: 3115, hash: 5150752707627454542n }, - { position: 806, hash: 8797260981186887018n }, - { position: 4915, hash: 1483374079741560715n }, - { position: 2118, hash: 1742900153494554703n }, - { position: 1515, hash: 4635371751468227093n }, - { position: 2393, hash: 15282968615371427111n }, - { position: 4331, hash: 4659818917792066036n }, - { position: 1188, hash: 3862441883651577693n }, - { position: 2663, hash: 8524789558855117254n }, - ], -}); +const resultData = ` + 1 | 0 | 5919 | 0x17c402cb182c5718 + 2 | 5919 | 265 | 0xe739063654888081 + 3 | 6184 | 4855 | 0x38a82261e80810f9 + 4 | 11039 | 1029 | 0x803f24c9ac20ddd5 + 5 | 12068 | 583 | 0xb4b724e26824ace3 + 6 | 12651 | 358 | 0x11bd22180c0c5ac5 + 7 | 13009 | 3078 | 0x810a04be24846ffc + 8 | 16087 | 1207 | 0x5f940641d088dada + 9 | 17294 | 251 | 0xf09502d5f4acfb4e + 10 | 17545 | 3053 | 0xf0b120d014ace72d + 11 | 20598 | 9120 | 0xa458064aa82403e5 + 12 | 29718 | 3288 | 0x9ccf04ecc000996b + 13 | 33006 | 590 | 0xd4ba00dd9408b6b5 + 14 | 33596 | 1401 | 0xd42a2000a4a46d11 + 15 | 34997 | 2573 | 0xc914022f9c28e722 + 16 | 37570 | 1300 | 0xd63b0401a484c0bc + 17 | 38870 | 98 | 0x996f0499402c1e96 + 18 | 38968 | 2802 | 0xf43406dfb42c9324 + 19 | 41770 | 3237 | 0x1bd026252c0ccbe3 + 20 | 45007 | 7368 | 0x7da400e8e0aca934 + 21 | 52375 | 439 | 0xcd9b208f38201fa7 + 22 | 52814 | 1477 | 0x9497226484a0a015 + 23 | 54291 | 7158 | 0x5a3100fa9888dfe5 + 24 | 61449 | 2168 | 0x21ed20bbf008a4ef + 25 | 63617 | 2475 | 0x7b0522392480392d + 26 | 66092 | 26 | 0xdfe6048a9c0c125f + 27 | 66118 | 7548 | 0xf8a72278802c1523 + 28 | 73666 | 7826 | 0x5997242ba00cb3fd + 29 | 81492 | 215 | 0x489e26bd7c08ec4c + 30 | 81707 | 760 | 0x84d526f1542066b2 + 31 | 82467 | 1929 | 0x085d02a31024d324 + 32 | 84396 | 3947 | 0x8cc4240eb8a8b8e3 + 33 | 88343 | 1511 | 0x98b1204ccc001231 + 34 | 89854 | 2895 | 0x35402430a8a8d1f1 + 35 | 92749 | 7025 | 0x52bd0269e8084b97 + 36 | 99774 | 226 | 0xd86ff8f143fe10b4 `; console.log("ok"); + +// Run the test and capture output for comparison +console.log("\n" + "=".repeat(50)); +console.log("RUNNING GEARHASH TEST"); +console.log("=".repeat(50)); + +// Capture console output for comparison +const originalLog = console.log; +let capturedOutput = []; + +console.log = function (...args) { + capturedOutput.push(args.join(" ")); + originalLog.apply(console, args); +}; + +// Run the test +const testResults = testGearhash(); + +// Restore console.log +console.log = originalLog; + +// Extract the chunk data from captured output +const chunkLines = capturedOutput.filter((line) => line.match(/^\s*\d+\s*\|\s*\d+\s*\|\s*\d+\s*\|\s*0x[a-f0-9]+/)); + +// Format the captured results for comparison +const capturedResultData = chunkLines.join("\n"); + +console.log("\n" + "=".repeat(50)); +console.log("COMPARISON RESULTS"); +console.log("=".repeat(50)); + +// Compare with expected results +const expectedResults = parseExpectedResults(resultData); +const actualResults = parseExpectedResults(capturedResultData); + +let matches = 0; +let totalChunks = Math.min(actualResults.length, expectedResults.length); + +console.log(`Comparing ${totalChunks} chunks...`); + +for (let i = 0; i < totalChunks; i++) { + const actual = actualResults[i]; + const expected = expectedResults[i]; + + if (actual.offset === expected.offset && actual.size === expected.size && actual.hash === expected.hash) { + matches++; + } else { + console.log(`❌ Mismatch at chunk ${i + 1}:`); + console.log(` Expected: offset=${expected.offset}, size=${expected.size}, hash=${expected.hash}`); + console.log(` Actual: offset=${actual.offset}, size=${actual.size}, hash=${actual.hash}`); + } +} + +console.log(`\n✅ Results: ${matches}/${totalChunks} chunks match exactly`); +console.log(`📊 Accuracy: ${((matches / totalChunks) * 100).toFixed(2)}%`); + +if (matches === totalChunks) { + console.log("🎉 All chunks match! AssemblyScript implementation is correct."); +} else { + console.log("⚠ïļ Some chunks don't match. Check the implementation."); +} + +// Test summary +console.log("\n" + "=".repeat(50)); +console.log("TEST SUMMARY"); +console.log("=".repeat(50)); +console.log(`Total chunks processed: ${testResults.chunkCount}`); +console.log(`Total bytes processed: ${testResults.totalProcessed}`); +console.log(`Average chunk size: ${testResults.averageChunkSize.toFixed(1)} bytes`); +console.log(`Matching chunks: ${matches}/${totalChunks}`); +console.log(`Accuracy: ${((matches / totalChunks) * 100).toFixed(2)}%`); diff --git a/packages/gearhash-wasm/vendor/.gitignore b/packages/gearhash-wasm/vendor/.gitignore new file mode 100644 index 0000000000..293dd90a84 --- /dev/null +++ b/packages/gearhash-wasm/vendor/.gitignore @@ -0,0 +1,4 @@ +/target +**/*.rs.bk +Cargo.lock +.idea \ No newline at end of file diff --git a/packages/gearhash-wasm/vendor/Cargo.toml b/packages/gearhash-wasm/vendor/Cargo.toml new file mode 100644 index 0000000000..e425f8932a --- /dev/null +++ b/packages/gearhash-wasm/vendor/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "gearhash" +version = "0.1.3" +edition = "2018" +license = "MIT OR Apache-2.0" +authors = ["Sam Rijs "] +description = "Fast, SIMD-accelerated hash function for content-defined chunking" +repository = "https://github.com/srijs/rust-gearhash" +readme = "README.md" +keywords = ["hash", "gear", "fast", "cdc", "chunking"] + +[features] +bench = [] + +[dependencies] +cfg-if = "0.1.10" + +[dev-dependencies] +lazy_static = "1.4.0" +quickcheck = "0.9.0" +rand = "0.7.2" + +[[bin]] +name = "test_gearhash" +path = "test_gearhash.rs" \ No newline at end of file diff --git a/packages/gearhash-wasm/vendor/LICENSE-APACHE b/packages/gearhash-wasm/vendor/LICENSE-APACHE new file mode 100644 index 0000000000..2cdf43fa3e --- /dev/null +++ b/packages/gearhash-wasm/vendor/LICENSE-APACHE @@ -0,0 +1,202 @@ + + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright 2019 Jack O'Connor and Samuel Neves + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. \ No newline at end of file diff --git a/packages/gearhash-wasm/vendor/LICENSE-MIT b/packages/gearhash-wasm/vendor/LICENSE-MIT new file mode 100644 index 0000000000..487d7160eb --- /dev/null +++ b/packages/gearhash-wasm/vendor/LICENSE-MIT @@ -0,0 +1,21 @@ +MIT License + +Copyright (c) 2019 Sam Rijs and contributors + +Permission is hereby granted, free of charge, to any person obtaining a copy +of this software and associated documentation files (the "Software"), to deal +in the Software without restriction, including without limitation the rights +to use, copy, modify, merge, publish, distribute, sublicense, and/or sell +copies of the Software, and to permit persons to whom the Software is +furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all +copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +SOFTWARE. \ No newline at end of file diff --git a/packages/gearhash-wasm/vendor/README.md b/packages/gearhash-wasm/vendor/README.md new file mode 100644 index 0000000000..34310148b7 --- /dev/null +++ b/packages/gearhash-wasm/vendor/README.md @@ -0,0 +1,60 @@ +# GearHash Test + +This directory contains the GearHash library for content-defined chunking. + +## Running the Test + +To run the test that generates deterministic input and processes it through GearHash: + +```bash +cd packages/gearhash-wasm/vendor +cargo run --bin test_gearhash +``` + +Or if you want to create a binary: + +```bash +cargo build --bin test_gearhash +./target/debug/test_gearhash +``` + +## Test Details + +The test: +1. Generates a 1MB deterministic input using a simple xorshift RNG with seed `0xa383d96f7becd17e` +2. Uses mask `0x0000d90003530000` for chunk boundary detection +3. Processes the input through GearHash and reports chunk boundaries +4. Shows chunk sizes, offsets, and hash values for verification + +## AssemblyScript Adaptation + +The test uses a simple deterministic RNG that can be easily ported to AssemblyScript: + +```typescript +class SimpleRng { + private state: u64; + + constructor(seed: u64) { + this.state = seed; + } + + nextU64(): u64 { + this.state ^= this.state << 13; + this.state ^= this.state >> 7; + this.state ^= this.state << 17; + return this.state; + } + + fillBytes(dest: Uint8Array): void { + for (let i = 0; i < dest.length; i += 8) { + const value = this.nextU64(); + for (let j = 0; j < 8 && i + j < dest.length; j++) { + dest[i + j] = (value >> (j * 8)) as u8; + } + } + } +} +``` + +The test results can be used to verify that the AssemblyScript implementation produces the same chunk boundaries. + diff --git a/packages/gearhash-wasm/vendor/src/lib.rs b/packages/gearhash-wasm/vendor/src/lib.rs new file mode 100644 index 0000000000..58aa95aeee --- /dev/null +++ b/packages/gearhash-wasm/vendor/src/lib.rs @@ -0,0 +1,103 @@ +// From https://github.com/srijs/rust-gearhash/blob/master/src/lib.rs + +//! The GEAR hashing function is a fast, rolling hash function that +//! is well suited for content defined chunking. In particular, it is +//! used as a building block for the [FastCDC](https://www.usenix.org/node/196197) +//! algorithm. +//! +//! The implementation provided in this crate consists of both a simple, +//! scalar variant, as well as optimized versions for the SSE4.2 and AVX2 +//! instruction sets. +//! +//! ## Example +//! +//! ``` +//! fn find_all_chunks(buf: &[u8], mask: u64) -> Vec<&[u8]> { +//! // set up initial state +//! let mut chunks = vec![]; +//! let mut offset = 0; +//! +//! // create new hasher +//! let mut hasher = gearhash::Hasher::default(); +//! +//! // loop through all matches, and push the corresponding chunks +//! while let Some(boundary) = hasher.next_match(&buf[offset..], mask) { +//! chunks.push(&buf[offset..offset + boundary]); +//! offset += boundary; +//! } +//! +//! // push final chunk +//! chunks.push(&buf[offset..]); +//! chunks +//! } +//! ``` + +#![cfg_attr(feature = "bench", feature(test))] + +#[cfg(feature = "bench")] +extern crate test; +#[cfg(feature = "bench")] +mod bench; + +mod scalar; +mod table; + +pub use table::{Table, DEFAULT_TABLE}; + +/// Gear hash state. Processes bytes to find chunk boundaries. +#[derive(Clone)] +pub struct Hasher<'t> { + table: &'t Table, + hash: u64, +} + +impl<'t> Hasher<'t> { + /// Create a new hasher with the given table. + pub fn new(table: &'t Table) -> Self { + Self { table, hash: 0 } + } + + /// Update the hash state by processing all the bytes in the given slice. + pub fn update(&mut self, buf: &[u8]) { + for b in buf.iter() { + self.hash = (self.hash << 1).wrapping_add(self.table[*b as usize]); + } + } + + /// Match the current hash state against the given mask. + /// + /// Returns true if `hash & mask == 0`, false otherwise. + pub fn is_match(&self, mask: u64) -> bool { + self.hash & mask == 0 + } + + /// Processes the given byte slice until a match is found for the given mask. + /// + /// If a match is found before the end of the byte slice, it returns the number + /// of bytes processed. If no match has been found, it returns `None`. + pub fn next_match(&mut self, buf: &[u8], mask: u64) -> Option { + crate::scalar::next_match(&mut self.hash, self.table, buf, mask) + } + + /// Retrieve the current hash value. + pub fn get_hash(&self) -> u64 { + self.hash + } + + /// Set the hash value to the given integer. + pub fn set_hash(&mut self, hash: u64) { + self.hash = hash + } +} + +impl Default for Hasher<'static> { + fn default() -> Self { + Hasher::new(&DEFAULT_TABLE) + } +} + +impl<'t> std::fmt::Debug for Hasher<'t> { + fn fmt(&self, f: &mut std::fmt::Formatter) -> std::fmt::Result { + f.debug_struct("Hasher").field("hash", &self.hash).finish() + } +} \ No newline at end of file diff --git a/packages/gearhash-wasm/vendor/src/scalar.rs b/packages/gearhash-wasm/vendor/src/scalar.rs new file mode 100644 index 0000000000..f06ab449f6 --- /dev/null +++ b/packages/gearhash-wasm/vendor/src/scalar.rs @@ -0,0 +1,14 @@ +use crate::Table; + +#[inline] +pub(crate) fn next_match(hash: &mut u64, table: &Table, buf: &[u8], mask: u64) -> Option { + for (i, b) in buf.iter().enumerate() { + *hash = (*hash << 1).wrapping_add(table[*b as usize]); + + if *hash & mask == 0 { + return Some(i + 1); + } + } + + None +} diff --git a/packages/gearhash-wasm/vendor/src/table.rs b/packages/gearhash-wasm/vendor/src/table.rs new file mode 100644 index 0000000000..898e603422 --- /dev/null +++ b/packages/gearhash-wasm/vendor/src/table.rs @@ -0,0 +1,262 @@ +/// Gear hash table. +pub type Table = [u64; 256]; + +/// Default hash table, using random (but static) integers. +pub static DEFAULT_TABLE: Table = [ + 0xb088d3a9e840f559, + 0x5652c7f739ed20d6, + 0x45b28969898972ab, + 0x6b0a89d5b68ec777, + 0x368f573e8b7a31b7, + 0x1dc636dce936d94b, + 0x207a4c4e5554d5b6, + 0xa474b34628239acb, + 0x3b06a83e1ca3b912, + 0x90e78d6c2f02baf7, + 0xe1c92df7150d9a8a, + 0x8e95053a1086d3ad, + 0x5a2ef4f1b83a0722, + 0xa50fac949f807fae, + 0x0e7303eb80d8d681, + 0x99b07edc1570ad0f, + 0x689d2fb555fd3076, + 0x00005082119ea468, + 0xc4b08306a88fcc28, + 0x3eb0678af6374afd, + 0xf19f87ab86ad7436, + 0xf2129fbfbe6bc736, + 0x481149575c98a4ed, + 0x0000010695477bc5, + 0x1fba37801a9ceacc, + 0x3bf06fd663a49b6d, + 0x99687e9782e3874b, + 0x79a10673aa50d8e3, + 0xe4accf9e6211f420, + 0x2520e71f87579071, + 0x2bd5d3fd781a8a9b, + 0x00de4dcddd11c873, + 0xeaa9311c5a87392f, + 0xdb748eb617bc40ff, + 0xaf579a8df620bf6f, + 0x86a6e5da1b09c2b1, + 0xcc2fc30ac322a12e, + 0x355e2afec1f74267, + 0x2d99c8f4c021a47b, + 0xbade4b4a9404cfc3, + 0xf7b518721d707d69, + 0x3286b6587bf32c20, + 0x0000b68886af270c, + 0xa115d6e4db8a9079, + 0x484f7e9c97b2e199, + 0xccca7bb75713e301, + 0xbf2584a62bb0f160, + 0xade7e813625dbcc8, + 0x000070940d87955a, + 0x8ae69108139e626f, + 0xbd776ad72fde38a2, + 0xfb6b001fc2fcc0cf, + 0xc7a474b8e67bc427, + 0xbaf6f11610eb5d58, + 0x09cb1f5b6de770d1, + 0xb0b219e6977d4c47, + 0x00ccbc386ea7ad4a, + 0xcc849d0adf973f01, + 0x73a3ef7d016af770, + 0xc807d2d386bdbdfe, + 0x7f2ac9966c791730, + 0xd037a86bc6c504da, + 0xf3f17c661eaa609d, + 0xaca626b04daae687, + 0x755a99374f4a5b07, + 0x90837ee65b2caede, + 0x6ee8ad93fd560785, + 0x0000d9e11053edd8, + 0x9e063bb2d21cdbd7, + 0x07ab77f12a01d2b2, + 0xec550255e6641b44, + 0x78fb94a8449c14c6, + 0xc7510e1bc6c0f5f5, + 0x0000320b36e4cae3, + 0x827c33262c8b1a2d, + 0x14675f0b48ea4144, + 0x267bd3a6498deceb, + 0xf1916ff982f5035e, + 0x86221b7ff434fb88, + 0x9dbecee7386f49d8, + 0xea58f8cac80f8f4a, + 0x008d198692fc64d8, + 0x6d38704fbabf9a36, + 0xe032cb07d1e7be4c, + 0x228d21f6ad450890, + 0x635cb1bfc02589a5, + 0x4620a1739ca2ce71, + 0xa7e7dfe3aae5fb58, + 0x0c10ca932b3c0deb, + 0x2727fee884afed7b, + 0xa2df1c6df9e2ab1f, + 0x4dcdd1ac0774f523, + 0x000070ffad33e24e, + 0xa2ace87bc5977816, + 0x9892275ab4286049, + 0xc2861181ddf18959, + 0xbb9972a042483e19, + 0xef70cd3766513078, + 0x00000513abfc9864, + 0xc058b61858c94083, + 0x09e850859725e0de, + 0x9197fb3bf83e7d94, + 0x7e1e626d12b64bce, + 0x520c54507f7b57d1, + 0xbee1797174e22416, + 0x6fd9ac3222e95587, + 0x0023957c9adfbf3e, + 0xa01c7d7e234bbe15, + 0xaba2c758b8a38cbb, + 0x0d1fa0ceec3e2b30, + 0x0bb6a58b7e60b991, + 0x4333dd5b9fa26635, + 0xc2fd3b7d4001c1a3, + 0xfb41802454731127, + 0x65a56185a50d18cb, + 0xf67a02bd8784b54f, + 0x696f11dd67e65063, + 0x00002022fca814ab, + 0x8cd6be912db9d852, + 0x695189b6e9ae8a57, + 0xee9453b50ada0c28, + 0xd8fc5ea91a78845e, + 0xab86bf191a4aa767, + 0x0000c6b5c86415e5, + 0x267310178e08a22e, + 0xed2d101b078bca25, + 0x3b41ed84b226a8fb, + 0x13e622120f28dc06, + 0xa315f5ebfb706d26, + 0x8816c34e3301bace, + 0xe9395b9cbb71fdae, + 0x002ce9202e721648, + 0x4283db1d2bb3c91c, + 0xd77d461ad2b1a6a5, + 0xe2ec17e46eeb866b, + 0xb8e0be4039fbc47c, + 0xdea160c4d5299d04, + 0x7eec86c8d28c3634, + 0x2119ad129f98a399, + 0xa6ccf46b61a283ef, + 0x2c52cedef658c617, + 0x2db4871169acdd83, + 0x0000f0d6f39ecbe9, + 0x3dd5d8c98d2f9489, + 0x8a1872a22b01f584, + 0xf282a4c40e7b3cf2, + 0x8020ec2ccb1ba196, + 0x6693b6e09e59e313, + 0x0000ce19cc7c83eb, + 0x20cb5735f6479c3b, + 0x762ebf3759d75a5b, + 0x207bfe823d693975, + 0xd77dc112339cd9d5, + 0x9ba7834284627d03, + 0x217dc513e95f51e9, + 0xb27b1a29fc5e7816, + 0x00d5cd9831bb662d, + 0x71e39b806d75734c, + 0x7e572af006fb1a23, + 0xa2734f2f6ae91f85, + 0xbf82c6b5022cddf2, + 0x5c3beac60761a0de, + 0xcdc893bb47416998, + 0x6d1085615c187e01, + 0x77f8ae30ac277c5d, + 0x917c6b81122a2c91, + 0x5b75b699add16967, + 0x0000cf6ae79a069b, + 0xf3c40afa60de1104, + 0x2063127aa59167c3, + 0x621de62269d1894d, + 0xd188ac1de62b4726, + 0x107036e2154b673c, + 0x0000b85f28553a1d, + 0xf2ef4e4c18236f3d, + 0xd9d6de6611b9f602, + 0xa1fc7955fb47911c, + 0xeb85fd032f298dbd, + 0xbe27502fb3befae1, + 0xe3034251c4cd661e, + 0x441364d354071836, + 0x0082b36c75f2983e, + 0xb145910316fa66f0, + 0x021c069c9847caf7, + 0x2910dfc75a4b5221, + 0x735b353e1c57a8b5, + 0xce44312ce98ed96c, + 0xbc942e4506bdfa65, + 0xf05086a71257941b, + 0xfec3b215d351cead, + 0x00ae1055e0144202, + 0xf54b40846f42e454, + 0x00007fd9c8bcbcc8, + 0xbfbd9ef317de9bfe, + 0xa804302ff2854e12, + 0x39ce4957a5e5d8d4, + 0xffb9e2a45637ba84, + 0x55b9ad1d9ea0818b, + 0x00008acbf319178a, + 0x48e2bfc8d0fbfb38, + 0x8be39841e848b5e8, + 0x0e2712160696a08b, + 0xd51096e84b44242a, + 0x1101ba176792e13a, + 0xc22e770f4531689d, + 0x1689eff272bbc56c, + 0x00a92a197f5650ec, + 0xbc765990bda1784e, + 0xc61441e392fcb8ae, + 0x07e13a2ced31e4a0, + 0x92cbe984234e9d4d, + 0x8f4ff572bb7d8ac5, + 0x0b9670c00b963bd0, + 0x62955a581a03eb01, + 0x645f83e5ea000254, + 0x41fce516cd88f299, + 0xbbda9748da7a98cf, + 0x0000aab2fe4845fa, + 0x19761b069bf56555, + 0x8b8f5e8343b6ad56, + 0x3e5d1cfd144821d9, + 0xec5c1e2ca2b0cd8f, + 0xfaf7e0fea7fbb57f, + 0x000000d3ba12961b, + 0xda3f90178401b18e, + 0x70ff906de33a5feb, + 0x0527d5a7c06970e7, + 0x22d8e773607c13e9, + 0xc9ab70df643c3bac, + 0xeda4c6dc8abe12e3, + 0xecef1f410033e78a, + 0x0024c2b274ac72cb, + 0x06740d954fa900b4, + 0x1d7a299b323d6304, + 0xb3c37cb298cbead5, + 0xc986e3c76178739b, + 0x9fabea364b46f58a, + 0x6da214c5af85cc56, + 0x17a43ed8b7a38f84, + 0x6eccec511d9adbeb, + 0xf9cab30913335afb, + 0x4a5e60c5f415eed2, + 0x00006967503672b4, + 0x9da51d121454bb87, + 0x84321e13b9bbc816, + 0xfb3d6fb6ab2fdd8d, + 0x60305eed8e160a8d, + 0xcbbf4b14e9946ce8, + 0x00004f63381b10c3, + 0x07d5b7816fcc4e10, + 0xe5a536726a6a8155, + 0x57afb23447a07fdd, + 0x18f346f7abc9d394, + 0x636dc655d61ad33d, + 0xcc8bab4939f7f3f6, + 0x63c7a906c1dd187b, +]; \ No newline at end of file diff --git a/packages/gearhash-wasm/vendor/test_gearhash.rs b/packages/gearhash-wasm/vendor/test_gearhash.rs new file mode 100644 index 0000000000..961ba3bdd8 --- /dev/null +++ b/packages/gearhash-wasm/vendor/test_gearhash.rs @@ -0,0 +1,106 @@ +use gearhash::{Hasher, DEFAULT_TABLE}; + +// Simple deterministic RNG for reproducible results +struct SimpleRng { + state: u64, +} + +impl SimpleRng { + fn new(seed: u64) -> Self { + Self { state: seed } + } + + fn next_u64(&mut self) -> u64 { + // Simple xorshift algorithm + self.state ^= self.state << 13; + self.state ^= self.state >> 7; + self.state ^= self.state << 17; + self.state + } + + fn fill_bytes(&mut self, dest: &mut [u8]) { + for chunk in dest.chunks_mut(8) { + let value = self.next_u64(); + for (i, byte) in chunk.iter_mut().enumerate() { + *byte = (value >> (i * 8)) as u8; + } + } + } +} + +const BENCH_INPUT_SEED: u64 = 0xbecd17f; +const BENCH_MASK: u64 = 0x0000d90003530000; +const INPUT_SIZE: usize = 100_000; + +fn generate_test_input() -> Vec { + let mut bytes = vec![0u8; INPUT_SIZE]; + let mut rng = SimpleRng::new(BENCH_INPUT_SEED); + rng.fill_bytes(&mut bytes); + bytes +} + +fn test_gearhash() { + println!("Generating test input with seed: 0x{:x}", BENCH_INPUT_SEED); + let input_buf = generate_test_input(); + println!("Input size: {} bytes", input_buf.len()); + println!("Mask: 0x{:x}", BENCH_MASK); + + let mut hasher = Hasher::new(&DEFAULT_TABLE); + let mut offset = 0; + let mut chunk_count = 0; + let mut total_processed = 0; + + println!("\nProcessing chunks:"); + println!("Chunk | Offset | Size | Hash"); + println!("------|--------|------|------------------"); + + while offset < input_buf.len() { + let chunk_start = offset; + + if let Some(match_size) = hasher.next_match(&input_buf[offset..], BENCH_MASK) { + offset += match_size; + total_processed += match_size; + chunk_count += 1; + + println!("{:5} | {:6} | {:4} | 0x{:016x}", + chunk_count, chunk_start, match_size, hasher.get_hash()); + } else { + // No more matches, process remaining bytes + let remaining = input_buf.len() - offset; + hasher.update(&input_buf[offset..]); + total_processed += remaining; + chunk_count += 1; + + println!("{:5} | {:6} | {:4} | 0x{:016x} (final)", + chunk_count, offset, remaining, hasher.get_hash()); + break; + } + } + + println!("\nSummary:"); + println!("Total chunks: {}", chunk_count); + println!("Total bytes processed: {}", total_processed); + println!("Average chunk size: {:.1} bytes", total_processed as f64 / chunk_count as f64); + + // Print first few bytes of each chunk for verification + println!("\nFirst 16 bytes of each chunk:"); + offset = 0; + chunk_count = 0; + + while offset < input_buf.len() { + if let Some(match_size) = hasher.next_match(&input_buf[offset..], BENCH_MASK) { + let chunk = &input_buf[offset..offset + match_size]; + println!("Chunk {}: {:02x?}", chunk_count + 1, &chunk[..chunk.len().min(16)]); + offset += match_size; + chunk_count += 1; + } else { + let chunk = &input_buf[offset..]; + println!("Chunk {}: {:02x?} (final)", chunk_count + 1, &chunk[..chunk.len().min(16)]); + break; + } + } +} + +fn main() { + test_gearhash(); +} \ No newline at end of file From e3d8cdca3f8dd5345c823f1523a4e63dcc879f15 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Wed, 18 Jun 2025 21:29:28 +0200 Subject: [PATCH 24/44] comment on mask param --- packages/gearhash-wasm/README.md | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/packages/gearhash-wasm/README.md b/packages/gearhash-wasm/README.md index 8f14fa8da3..2c0de81843 100644 --- a/packages/gearhash-wasm/README.md +++ b/packages/gearhash-wasm/README.md @@ -11,14 +11,15 @@ import { nextMatch } from '@huggingface/gearhash-wasm'; const data = new Uint8Array(1000000); // Example: 1MB of data // ... fill data with your content ... -const mask = 0x0000d90003530000n; // Example mask as a BigInt, more 0s => bigger chunks +const mask = 0x0000d90003530000n; // Example mask as a BigInt, more 1s in binary repr => bigger chunks +//^ it has 11 1s in binary, so chunks will be ~2048 long const match = nextMatch(data, mask); const allMatches = nextMatches(data, mask).matches; ``` The `nextMatch` function takes two parameters: - `data`: A Uint8Array containing the data to search through -- `mask`: A BigInt, the bigger it is the bigger the chunks are +- `mask`: A BigInt, the more 1s it has in its binary representation, the bigger the chunk The function returns an object with the `position` (i32) and `hash` (u64) properties @@ -81,3 +82,4 @@ console.log(length, "bytes without a match, ending hash: ", hash); ## Possible improvements +SIMD \ No newline at end of file From 13c607aaa22a985a897cd748df282b8127736025 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Wed, 18 Jun 2025 22:49:44 +0200 Subject: [PATCH 25/44] wip try to match rust & assembly --- packages/gearhash-wasm/assembly/next-match.ts | 17 ++++- packages/gearhash-wasm/tests/index.js | 65 ++++++++----------- .../gearhash-wasm/vendor/test_gearhash.rs | 26 +++++--- 3 files changed, 60 insertions(+), 48 deletions(-) diff --git a/packages/gearhash-wasm/assembly/next-match.ts b/packages/gearhash-wasm/assembly/next-match.ts index 1093f77a80..8d13fe777c 100644 --- a/packages/gearhash-wasm/assembly/next-match.ts +++ b/packages/gearhash-wasm/assembly/next-match.ts @@ -12,9 +12,24 @@ export class MatchResult { export function nextMatch(buf: Uint8Array, mask: u64, hash: u64 = 0): MatchResult { for (let i = 0; i < buf.length; i++) { const b = buf[i]; - hash = (hash << 1) + DEFAULT_TABLE[b]; + // Use proper unsigned operations to match Rust's wrapping_add behavior + hash = ((hash << 1) as u64) + (DEFAULT_TABLE[b] as u64); + + // console.log( + // "hash " + + // hash.toString(16) + + // " " + + // (hash << 1).toString(16) + + // " " + + // b.toString(16) + + // " " + + // (DEFAULT_TABLE[b] as u64).toString(16) + // ); + // console.log("mask " + mask.toString(16)); + // console.log("hash & mask " + (hash & mask).toString(16)); if ((hash & mask) == 0) { + // console.log("match found at position " + (i + 1).toString()); return { position: i + 1, hash }; } } diff --git a/packages/gearhash-wasm/tests/index.js b/packages/gearhash-wasm/tests/index.js index a5ec712bac..3572aad775 100644 --- a/packages/gearhash-wasm/tests/index.js +++ b/packages/gearhash-wasm/tests/index.js @@ -1,19 +1,26 @@ -import { nextMatch } from "../build/debug.js"; +import { nextMatch, nextMatches } from "../build/debug.js"; -// Simple deterministic RNG for reproducible results (same as Rust version) +// Simple deterministic RNG for reproducible results (32-bit version) class SimpleRng { constructor(seed) { - this.state = BigInt(seed); + this.state = seed; } - nextU64() { - // Simple xorshift algorithm (same as Rust version) - this.state ^= this.state << 13n; - this.state ^= this.state >> 7n; - this.state ^= this.state << 17n; + nextU32() { + // Simple 32-bit xorshift algorithm (same as Rust version) + this.state ^= this.state << 13; + this.state ^= this.state >> 17; + this.state ^= this.state << 5; return this.state; } + nextU64() { + // Generate two 32-bit values and combine them + const low = this.nextU32(); + const high = this.nextU32(); + return (BigInt(high) << 32n) | BigInt(low); + } + fillBytes(dest) { for (let i = 0; i < dest.length; i += 8) { const value = this.nextU64(); @@ -50,38 +57,20 @@ function testGearhash() { console.log("Chunk | Offset | Size | Hash"); console.log("------|--------|------|------------------"); - while (offset < inputBuf.length) { - const chunkStart = offset; - - const result = nextMatch(inputBuf.subarray(offset), BENCH_MASK, hash); - if (result.matchSize > 0) { - offset += result.matchSize; - totalProcessed += result.matchSize; - chunkCount += 1; - hash = result.hash; + const result = nextMatches(inputBuf, BENCH_MASK, 0); + const matches = [...result.matches, { position: result.remaining, hash: result.hash }]; - console.log( - `${chunkCount.toString().padStart(5)} | ${chunkStart.toString().padStart(6)} | ${result.matchSize - .toString() - .padStart(4)} | 0x${hash.toString(16).padStart(16, "0")}` - ); - } else { - // No more matches, process remaining bytes - const remaining = inputBuf.length - offset; - // Update hash for remaining bytes - for (let i = 0; i < remaining; i++) { - hash = ((hash << 1n) + BigInt(inputBuf[offset + i])) & 0xffffffffffffffffn; - } - totalProcessed += remaining; - chunkCount += 1; + for (const match of matches) { + offset += match.position; + totalProcessed += match.position; + chunkCount += 1; + hash = match.hash; - console.log( - `${chunkCount.toString().padStart(5)} | ${offset.toString().padStart(6)} | ${remaining - .toString() - .padStart(4)} | 0x${hash.toString(16).padStart(16, "0")} (final)` - ); - break; - } + console.log( + `${chunkCount.toString().padStart(5)} | ${offset.toString().padStart(6)} | ${match.position + .toString() + .padStart(4)} | 0x${match.hash.toString(16).padStart(16, "0")}` + ); } console.log("\nSummary:"); diff --git a/packages/gearhash-wasm/vendor/test_gearhash.rs b/packages/gearhash-wasm/vendor/test_gearhash.rs index 961ba3bdd8..dfb859ebca 100644 --- a/packages/gearhash-wasm/vendor/test_gearhash.rs +++ b/packages/gearhash-wasm/vendor/test_gearhash.rs @@ -1,23 +1,30 @@ use gearhash::{Hasher, DEFAULT_TABLE}; -// Simple deterministic RNG for reproducible results +// Simple deterministic RNG for reproducible results (32-bit version) struct SimpleRng { - state: u64, + state: u32, } impl SimpleRng { - fn new(seed: u64) -> Self { + fn new(seed: u32) -> Self { Self { state: seed } } - fn next_u64(&mut self) -> u64 { - // Simple xorshift algorithm + fn next_u32(&mut self) -> u32 { + // Simple 32-bit xorshift algorithm self.state ^= self.state << 13; - self.state ^= self.state >> 7; - self.state ^= self.state << 17; + self.state ^= self.state >> 17; + self.state ^= self.state << 5; self.state } + fn next_u64(&mut self) -> u64 { + // Generate two 32-bit values and combine them + let low = self.next_u32() as u64; + let high = self.next_u32() as u64; + (high << 32) | low + } + fn fill_bytes(&mut self, dest: &mut [u8]) { for chunk in dest.chunks_mut(8) { let value = self.next_u64(); @@ -28,7 +35,7 @@ impl SimpleRng { } } -const BENCH_INPUT_SEED: u64 = 0xbecd17f; +const BENCH_INPUT_SEED: u32 = 0xbecd17f; const BENCH_MASK: u64 = 0x0000d90003530000; const INPUT_SIZE: usize = 100_000; @@ -64,10 +71,11 @@ fn test_gearhash() { println!("{:5} | {:6} | {:4} | 0x{:016x}", chunk_count, chunk_start, match_size, hasher.get_hash()); + + hasher.set_hash(0); } else { // No more matches, process remaining bytes let remaining = input_buf.len() - offset; - hasher.update(&input_buf[offset..]); total_processed += remaining; chunk_count += 1; From ea05edee63ee15a103472271a8607653b36884e7 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Wed, 18 Jun 2025 23:01:33 +0200 Subject: [PATCH 26/44] fix gearhash test --- packages/gearhash-wasm/assembly/next-match.ts | 17 +-- packages/gearhash-wasm/tests/index.js | 124 ++++++++++-------- .../gearhash-wasm/vendor/test_gearhash.rs | 29 ++-- 3 files changed, 81 insertions(+), 89 deletions(-) diff --git a/packages/gearhash-wasm/assembly/next-match.ts b/packages/gearhash-wasm/assembly/next-match.ts index 8d13fe777c..1093f77a80 100644 --- a/packages/gearhash-wasm/assembly/next-match.ts +++ b/packages/gearhash-wasm/assembly/next-match.ts @@ -12,24 +12,9 @@ export class MatchResult { export function nextMatch(buf: Uint8Array, mask: u64, hash: u64 = 0): MatchResult { for (let i = 0; i < buf.length; i++) { const b = buf[i]; - // Use proper unsigned operations to match Rust's wrapping_add behavior - hash = ((hash << 1) as u64) + (DEFAULT_TABLE[b] as u64); - - // console.log( - // "hash " + - // hash.toString(16) + - // " " + - // (hash << 1).toString(16) + - // " " + - // b.toString(16) + - // " " + - // (DEFAULT_TABLE[b] as u64).toString(16) - // ); - // console.log("mask " + mask.toString(16)); - // console.log("hash & mask " + (hash & mask).toString(16)); + hash = (hash << 1) + DEFAULT_TABLE[b]; if ((hash & mask) == 0) { - // console.log("match found at position " + (i + 1).toString()); return { position: i + 1, hash }; } } diff --git a/packages/gearhash-wasm/tests/index.js b/packages/gearhash-wasm/tests/index.js index 3572aad775..dfed8e01ae 100644 --- a/packages/gearhash-wasm/tests/index.js +++ b/packages/gearhash-wasm/tests/index.js @@ -1,31 +1,23 @@ import { nextMatch, nextMatches } from "../build/debug.js"; -// Simple deterministic RNG for reproducible results (32-bit version) +// Simple deterministic RNG for reproducible results (24-bit version) class SimpleRng { constructor(seed) { - this.state = seed; + this.state = seed & 0xffffff; // Keep only 24 bits } - nextU32() { - // Simple 32-bit xorshift algorithm (same as Rust version) - this.state ^= this.state << 13; - this.state ^= this.state >> 17; - this.state ^= this.state << 5; + nextU24() { + // Simple 24-bit linear congruential generator + // Using 24-bit arithmetic to avoid overflow + this.state = (this.state * 1111 + 12345) & 0xffffff; return this.state; } - nextU64() { - // Generate two 32-bit values and combine them - const low = this.nextU32(); - const high = this.nextU32(); - return (BigInt(high) << 32n) | BigInt(low); - } - fillBytes(dest) { - for (let i = 0; i < dest.length; i += 8) { - const value = this.nextU64(); - for (let j = 0; j < 8 && i + j < dest.length; j++) { - dest[i + j] = Number((value >> BigInt(j * 8)) & 0xffn); + for (let i = 0; i < dest.length; i += 3) { + const value = this.nextU24(); + for (let j = 0; j < 3 && i + j < dest.length; j++) { + dest[i + j] = (value >> (j * 8)) & 0xff; } } } @@ -61,7 +53,6 @@ function testGearhash() { const matches = [...result.matches, { position: result.remaining, hash: result.hash }]; for (const match of matches) { - offset += match.position; totalProcessed += match.position; chunkCount += 1; hash = match.hash; @@ -71,6 +62,7 @@ function testGearhash() { .toString() .padStart(4)} | 0x${match.hash.toString(16).padStart(16, "0")}` ); + offset += match.position; } console.log("\nSummary:"); @@ -128,43 +120,53 @@ function parseExpectedResults(resultData) { return results; } -const resultData = ` - 1 | 0 | 5919 | 0x17c402cb182c5718 - 2 | 5919 | 265 | 0xe739063654888081 - 3 | 6184 | 4855 | 0x38a82261e80810f9 - 4 | 11039 | 1029 | 0x803f24c9ac20ddd5 - 5 | 12068 | 583 | 0xb4b724e26824ace3 - 6 | 12651 | 358 | 0x11bd22180c0c5ac5 - 7 | 13009 | 3078 | 0x810a04be24846ffc - 8 | 16087 | 1207 | 0x5f940641d088dada - 9 | 17294 | 251 | 0xf09502d5f4acfb4e - 10 | 17545 | 3053 | 0xf0b120d014ace72d - 11 | 20598 | 9120 | 0xa458064aa82403e5 - 12 | 29718 | 3288 | 0x9ccf04ecc000996b - 13 | 33006 | 590 | 0xd4ba00dd9408b6b5 - 14 | 33596 | 1401 | 0xd42a2000a4a46d11 - 15 | 34997 | 2573 | 0xc914022f9c28e722 - 16 | 37570 | 1300 | 0xd63b0401a484c0bc - 17 | 38870 | 98 | 0x996f0499402c1e96 - 18 | 38968 | 2802 | 0xf43406dfb42c9324 - 19 | 41770 | 3237 | 0x1bd026252c0ccbe3 - 20 | 45007 | 7368 | 0x7da400e8e0aca934 - 21 | 52375 | 439 | 0xcd9b208f38201fa7 - 22 | 52814 | 1477 | 0x9497226484a0a015 - 23 | 54291 | 7158 | 0x5a3100fa9888dfe5 - 24 | 61449 | 2168 | 0x21ed20bbf008a4ef - 25 | 63617 | 2475 | 0x7b0522392480392d - 26 | 66092 | 26 | 0xdfe6048a9c0c125f - 27 | 66118 | 7548 | 0xf8a72278802c1523 - 28 | 73666 | 7826 | 0x5997242ba00cb3fd - 29 | 81492 | 215 | 0x489e26bd7c08ec4c - 30 | 81707 | 760 | 0x84d526f1542066b2 - 31 | 82467 | 1929 | 0x085d02a31024d324 - 32 | 84396 | 3947 | 0x8cc4240eb8a8b8e3 - 33 | 88343 | 1511 | 0x98b1204ccc001231 - 34 | 89854 | 2895 | 0x35402430a8a8d1f1 - 35 | 92749 | 7025 | 0x52bd0269e8084b97 - 36 | 99774 | 226 | 0xd86ff8f143fe10b4 `; +const resultData = `Chunk | Offset | Size | Hash +------|--------|------|------------------ + 1 | 0 | 3598 | 0x033220f080ac5f77 + 2 | 3598 | 3995 | 0xd06b22f324ac5f28 + 3 | 7593 | 4708 | 0xa3a324f81808429c + 4 | 12301 | 484 | 0x12a5006aa4a4425b + 5 | 12785 | 1484 | 0x0b240413a4a4d5a2 + 6 | 14269 | 563 | 0xc646022fbc848bc6 + 7 | 14832 | 6663 | 0x7c7a2296e4a4c325 + 8 | 21495 | 1220 | 0xbe1f2468f0841b68 + 9 | 22715 | 1175 | 0xf87e2299e00c57d9 + 10 | 23890 | 779 | 0x79ca2634d00cd6b9 + 11 | 24669 | 2069 | 0xcb7a063594081a74 + 12 | 26738 | 2623 | 0xdccc26b6c0acb733 + 13 | 29361 | 596 | 0x4fb6201a1c20143e + 14 | 29957 | 622 | 0x81e726272020706f + 15 | 30579 | 3834 | 0x630622fca084a60a + 16 | 34413 | 2379 | 0x177b2240080810b1 + 17 | 36792 | 3527 | 0x663b261bbc2451ed + 18 | 40319 | 1665 | 0xf94f06db94003e2f + 19 | 41984 | 1240 | 0xc5ca208c0c24cefc + 20 | 43224 | 1274 | 0x8139244f740cba39 + 21 | 44498 | 3680 | 0x4440044520045a9d + 22 | 48178 | 1487 | 0xe00f2049a0a43a58 + 23 | 49665 | 4293 | 0x366a26940408279d + 24 | 53958 | 1184 | 0x3a582683902cb3fe + 25 | 55142 | 383 | 0x002d0499e080702e + 26 | 55525 | 1206 | 0x34ba041aa4084fbd + 27 | 56731 | 506 | 0x0c53045c00a0a228 + 28 | 57237 | 8019 | 0xf85b202d9c0813a5 + 29 | 65256 | 1070 | 0x1c862295ac8863ba + 30 | 66326 | 3359 | 0x4e4804d7b82805c7 + 31 | 69685 | 1744 | 0x75b7224cc8209457 + 32 | 71429 | 152 | 0xb01e26b40c0cf7c0 + 33 | 71581 | 11 | 0xc66002b7f48c0472 + 34 | 71592 | 1209 | 0x0a33021dc4007363 + 35 | 72801 | 1795 | 0xd0cc22ea708c921f + 36 | 74596 | 856 | 0x49e3007c9c2c5727 + 37 | 75452 | 97 | 0xe0b422e3c40c89dc + 38 | 75549 | 1299 | 0xbd1806074024536a + 39 | 76848 | 131 | 0xd61104147c28928d + 40 | 76979 | 1987 | 0x31930627a080ebb0 + 41 | 78966 | 11254 | 0x4c4400e65c24beff + 42 | 90220 | 868 | 0xa92400ca5ca02488 + 43 | 91088 | 6279 | 0x5a3d0443f0a0d81a + 44 | 97367 | 969 | 0x7770042d140c7472 + 45 | 98336 | 1664 | 0xe508202f55c46d2d`; console.log("ok"); @@ -217,6 +219,7 @@ for (let i = 0; i < totalChunks; i++) { console.log(`❌ Mismatch at chunk ${i + 1}:`); console.log(` Expected: offset=${expected.offset}, size=${expected.size}, hash=${expected.hash}`); console.log(` Actual: offset=${actual.offset}, size=${actual.size}, hash=${actual.hash}`); + process.exitCode = 1; } } @@ -238,3 +241,12 @@ console.log(`Total bytes processed: ${testResults.totalProcessed}`); console.log(`Average chunk size: ${testResults.averageChunkSize.toFixed(1)} bytes`); console.log(`Matching chunks: ${matches}/${totalChunks}`); console.log(`Accuracy: ${((matches / totalChunks) * 100).toFixed(2)}%`); + +const input = generateTestInput().slice(0, 100); + +let output = ""; +for (let i = 0; i < input.length; i++) { + output += input[i].toString(16).padStart(2, "0") + " "; +} + +console.log("First 100 bytes", output); diff --git a/packages/gearhash-wasm/vendor/test_gearhash.rs b/packages/gearhash-wasm/vendor/test_gearhash.rs index dfb859ebca..99606f1752 100644 --- a/packages/gearhash-wasm/vendor/test_gearhash.rs +++ b/packages/gearhash-wasm/vendor/test_gearhash.rs @@ -1,35 +1,27 @@ use gearhash::{Hasher, DEFAULT_TABLE}; -// Simple deterministic RNG for reproducible results (32-bit version) +// Simple deterministic RNG for reproducible results (24-bit version) struct SimpleRng { state: u32, } impl SimpleRng { fn new(seed: u32) -> Self { - Self { state: seed } + Self { state: seed & 0xFFFFFF } // Keep only 24 bits } - fn next_u32(&mut self) -> u32 { - // Simple 32-bit xorshift algorithm - self.state ^= self.state << 13; - self.state ^= self.state >> 17; - self.state ^= self.state << 5; + fn next_u24(&mut self) -> u32 { + // Simple 24-bit linear congruential generator + // Using 24-bit arithmetic to avoid overflow + self.state = (self.state.wrapping_mul(1111) + 12345) & 0xFFFFFF; self.state } - fn next_u64(&mut self) -> u64 { - // Generate two 32-bit values and combine them - let low = self.next_u32() as u64; - let high = self.next_u32() as u64; - (high << 32) | low - } - fn fill_bytes(&mut self, dest: &mut [u8]) { - for chunk in dest.chunks_mut(8) { - let value = self.next_u64(); + for chunk in dest.chunks_mut(3) { + let value = self.next_u24(); for (i, byte) in chunk.iter_mut().enumerate() { - *byte = (value >> (i * 8)) as u8; + *byte = ((value >> (i * 8)) & 0xFF) as u8; } } } @@ -111,4 +103,7 @@ fn test_gearhash() { fn main() { test_gearhash(); + + let input_buf = generate_test_input(); + println!("First 100 bytes: {:02x?}", &input_buf[..100]); } \ No newline at end of file From fb8e5aac98e95ef7f9a15d77887a17719b99d3a1 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Mon, 7 Jul 2025 16:34:23 +0200 Subject: [PATCH 27/44] add splitmix64-wasm module --- packages/splitmix64-wasm/.npmignore | 1 + packages/splitmix64-wasm/README.md | 16 +++++++ packages/splitmix64-wasm/asconfig.json | 22 ++++++++++ packages/splitmix64-wasm/assembly/index.ts | 40 +++++++++++++++++ .../splitmix64-wasm/assembly/tsconfig.json | 4 ++ packages/splitmix64-wasm/build/.gitignore | 2 + packages/splitmix64-wasm/package.json | 33 ++++++++++++++ packages/splitmix64-wasm/pnpm-lock.yaml | 38 ++++++++++++++++ packages/splitmix64-wasm/tests/index.js | 29 +++++++++++++ packages/xetchunk-wasm/asconfig.json | 43 ++++++++++--------- packages/xetchunk-wasm/package.json | 1 + packages/xetchunk-wasm/pnpm-lock.yaml | 3 ++ packages/xetchunk-wasm/tests/index.js | 38 ++++++++++++---- pnpm-workspace.yaml | 1 + 14 files changed, 241 insertions(+), 30 deletions(-) create mode 100644 packages/splitmix64-wasm/.npmignore create mode 100644 packages/splitmix64-wasm/README.md create mode 100644 packages/splitmix64-wasm/asconfig.json create mode 100644 packages/splitmix64-wasm/assembly/index.ts create mode 100644 packages/splitmix64-wasm/assembly/tsconfig.json create mode 100644 packages/splitmix64-wasm/build/.gitignore create mode 100644 packages/splitmix64-wasm/package.json create mode 100644 packages/splitmix64-wasm/pnpm-lock.yaml create mode 100644 packages/splitmix64-wasm/tests/index.js diff --git a/packages/splitmix64-wasm/.npmignore b/packages/splitmix64-wasm/.npmignore new file mode 100644 index 0000000000..5657f6ea7d --- /dev/null +++ b/packages/splitmix64-wasm/.npmignore @@ -0,0 +1 @@ +vendor \ No newline at end of file diff --git a/packages/splitmix64-wasm/README.md b/packages/splitmix64-wasm/README.md new file mode 100644 index 0000000000..90f0cf2413 --- /dev/null +++ b/packages/splitmix64-wasm/README.md @@ -0,0 +1,16 @@ +JS and WASM implementations of splitmix-64 + +Using [AssemblyScript](https://www.assemblyscript.org/) to generate a lean WASM. + +Used internally to reproduce rust tests + +Let us know if you want us to expose more functions. + +## Usage + +```javascript +import { createRandomArray } from '@huggingface/splitmix64-wasm'; + +// Create an ArrayBuffer of data, with u64s converted to le u8s +const data = new createRandomArray(256_000, 1); // Example: 256kB of data +``` \ No newline at end of file diff --git a/packages/splitmix64-wasm/asconfig.json b/packages/splitmix64-wasm/asconfig.json new file mode 100644 index 0000000000..8776597856 --- /dev/null +++ b/packages/splitmix64-wasm/asconfig.json @@ -0,0 +1,22 @@ +{ + "targets": { + "debug": { + "outFile": "build/debug.wasm", + "textFile": "build/debug.wat", + "sourceMap": true, + "debug": true + }, + "release": { + "outFile": "build/release.wasm", + "textFile": "build/release.wat", + "sourceMap": true, + "optimizeLevel": 3, + "shrinkLevel": 0, + "converge": false, + "noAssert": false + } + }, + "options": { + "bindings": "esm" + } +} \ No newline at end of file diff --git a/packages/splitmix64-wasm/assembly/index.ts b/packages/splitmix64-wasm/assembly/index.ts new file mode 100644 index 0000000000..f40a276c7a --- /dev/null +++ b/packages/splitmix64-wasm/assembly/index.ts @@ -0,0 +1,40 @@ +// fn splitmix64_next(state: &mut u64) -> u64 { +// *state = state.wrapping_add(0x9E3779B97F4A7C15); +// let mut z = *state; +// z = (z ^ (z >> 30)).wrapping_mul(0xBF58476D1CE4E5B9); +// z = (z ^ (z >> 27)).wrapping_mul(0x94D049BB133111EB); +// z ^ (z >> 31) +// } + +// fn create_random_data(n: usize, seed: u64) -> Vec { +// // This test will actually need to be run in different environments, so to generate +// // the table below, create random data using a simple SplitMix rng that can be ported here +// // as is without dependening on other packages. +// let mut ret = Vec::with_capacity(n + 7); + +// let mut state = seed; + +// while ret.len() < n { +// let next_u64 = splitmix64_next(&mut state); +// ret.extend_from_slice(&next_u64.to_le_bytes()); +// } + +// // Has extra bits on there since we're adding in blocks of 8. +// ret.resize(n, 0); + +// ret +// } + +export function createRandomArray(size: u32, seed: u64): ArrayBuffer { + const array = new ArrayBuffer(size + 7); + const view = new DataView(array); + let state = seed; + for (let i: u32 = 0; i < size; i += 8) { + state = state + 0x9e3779b97f4a7c15; + let z = state; + z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9; + z = (z ^ (z >> 27)) * 0x94d049bb133111eb; + view.setUint64(i, z ^ (z >> 31), true); + } + return array; +} diff --git a/packages/splitmix64-wasm/assembly/tsconfig.json b/packages/splitmix64-wasm/assembly/tsconfig.json new file mode 100644 index 0000000000..8131d68a0a --- /dev/null +++ b/packages/splitmix64-wasm/assembly/tsconfig.json @@ -0,0 +1,4 @@ +{ + "extends": "../node_modules/.pnpm/assemblyscript@0.27.36/node_modules/assemblyscript/std/assembly.json", + "include": ["./**/*.ts"] +} diff --git a/packages/splitmix64-wasm/build/.gitignore b/packages/splitmix64-wasm/build/.gitignore new file mode 100644 index 0000000000..d6b7ef32c8 --- /dev/null +++ b/packages/splitmix64-wasm/build/.gitignore @@ -0,0 +1,2 @@ +* +!.gitignore diff --git a/packages/splitmix64-wasm/package.json b/packages/splitmix64-wasm/package.json new file mode 100644 index 0000000000..6e98c4639f --- /dev/null +++ b/packages/splitmix64-wasm/package.json @@ -0,0 +1,33 @@ +{ + "name": "@huggingface/splitmix64-wasm", + "version": "0.0.1", + "scripts": { + "build:debug": "asc assembly/index.ts --target debug", + "build:release": "asc assembly/index.ts --target release", + "build": "pnpm run build:debug && npm run build:release", + "test": "node tests", + "prepare": "pnpm run build" + }, + "keywords": [ + "blake3", + "assemblyscript", + "assembly", + "wasm" + ], + "type": "module", + "exports": { + ".": { + "import": "./build/release.js", + "types": "./build/release.d.ts" + }, + "./assembly": { + "import": "./assembly/index.ts" + }, + "./wasm": { + "import": "./build/release.wasm" + } + }, + "devDependencies": { + "assemblyscript": "0.27.36" + } +} diff --git a/packages/splitmix64-wasm/pnpm-lock.yaml b/packages/splitmix64-wasm/pnpm-lock.yaml new file mode 100644 index 0000000000..9d7ac0a92a --- /dev/null +++ b/packages/splitmix64-wasm/pnpm-lock.yaml @@ -0,0 +1,38 @@ +lockfileVersion: '9.0' + +settings: + autoInstallPeers: true + excludeLinksFromLockfile: false + +importers: + + .: + devDependencies: + assemblyscript: + specifier: 0.27.36 + version: 0.27.36 + +packages: + + assemblyscript@0.27.36: + resolution: {integrity: sha512-1qX2zf6p7l/mNYv8r21jC/Yft7kX7XKR3xUHw41zvV4xad5lyC8w7jZiwZBGoy64VKZLc+bTDJDWi8Kb70YrHA==} + engines: {node: '>=18', npm: '>=10'} + hasBin: true + + binaryen@116.0.0-nightly.20240114: + resolution: {integrity: sha512-0GZrojJnuhoe+hiwji7QFaL3tBlJoA+KFUN7ouYSDGZLSo9CKM8swQX8n/UcbR0d1VuZKU+nhogNzv423JEu5A==} + hasBin: true + + long@5.3.2: + resolution: {integrity: sha512-mNAgZ1GmyNhD7AuqnTG3/VQ26o760+ZYBPKjPvugO8+nLbYfX6TVpJPseBvopbdY+qpZ/lKUnmEc1LeZYS3QAA==} + +snapshots: + + assemblyscript@0.27.36: + dependencies: + binaryen: 116.0.0-nightly.20240114 + long: 5.3.2 + + binaryen@116.0.0-nightly.20240114: {} + + long@5.3.2: {} diff --git a/packages/splitmix64-wasm/tests/index.js b/packages/splitmix64-wasm/tests/index.js new file mode 100644 index 0000000000..707d841ccd --- /dev/null +++ b/packages/splitmix64-wasm/tests/index.js @@ -0,0 +1,29 @@ +// #[test] +// fn test_correctness_1mb_random_data() { +// // Test this data. +// let data = create_random_data(1000000, 0); + +// // Uncomment these to create the lines below: +// // eprintln!("(data[0], {});", data[0] as usize); +// // eprintln!("(data[127], {});", data[127] as usize); +// // eprintln!("(data[111111], {});", data[111111] as usize); + +// assert_eq!(data[0], 175); +// assert_eq!(data[127], 132); +// assert_eq!(data[111111], 118); + +// } + +import assert from "assert"; +import { createRandomArray } from "../build/debug.js"; + +const data = createRandomArray(1000000, 0); +const array = new Uint8Array(data); + +console.log(array[0]); +console.log(array[127]); +console.log(array[111111]); + +assert.strictEqual(array[0], 175); +assert.strictEqual(array[127], 132); +assert.strictEqual(array[111111], 118); diff --git a/packages/xetchunk-wasm/asconfig.json b/packages/xetchunk-wasm/asconfig.json index 8776597856..6b1186547a 100644 --- a/packages/xetchunk-wasm/asconfig.json +++ b/packages/xetchunk-wasm/asconfig.json @@ -1,22 +1,23 @@ { - "targets": { - "debug": { - "outFile": "build/debug.wasm", - "textFile": "build/debug.wat", - "sourceMap": true, - "debug": true - }, - "release": { - "outFile": "build/release.wasm", - "textFile": "build/release.wat", - "sourceMap": true, - "optimizeLevel": 3, - "shrinkLevel": 0, - "converge": false, - "noAssert": false - } - }, - "options": { - "bindings": "esm" - } -} \ No newline at end of file + "targets": { + "debug": { + "outFile": "build/debug.wasm", + "textFile": "build/debug.wat", + "sourceMap": true, + "debug": true + }, + "release": { + "outFile": "build/release.wasm", + "textFile": "build/release.wat", + "sourceMap": true, + "optimizeLevel": 3, + "shrinkLevel": 0, + "converge": false, + "noAssert": false + } + }, + "options": { + "bindings": "esm" + }, + "entries": ["assembly/index.ts", "assembly/splitmix64.ts"] +} diff --git a/packages/xetchunk-wasm/package.json b/packages/xetchunk-wasm/package.json index 3bc5540ec6..8e36512249 100644 --- a/packages/xetchunk-wasm/package.json +++ b/packages/xetchunk-wasm/package.json @@ -34,6 +34,7 @@ } }, "devDependencies": { + "@huggingface/splitmix64-wasm": "workspace:*", "assemblyscript": "0.27.36" } } diff --git a/packages/xetchunk-wasm/pnpm-lock.yaml b/packages/xetchunk-wasm/pnpm-lock.yaml index 4e5f34eb68..f6fba643b3 100644 --- a/packages/xetchunk-wasm/pnpm-lock.yaml +++ b/packages/xetchunk-wasm/pnpm-lock.yaml @@ -15,6 +15,9 @@ importers: specifier: workspace:* version: link:../gearhash-wasm devDependencies: + '@huggingface/splitmix64-wasm': + specifier: workspace:* + version: link:../splitmix64-wasm assemblyscript: specifier: 0.27.36 version: 0.27.36 diff --git a/packages/xetchunk-wasm/tests/index.js b/packages/xetchunk-wasm/tests/index.js index f1a485f785..ce33606b5e 100644 --- a/packages/xetchunk-wasm/tests/index.js +++ b/packages/xetchunk-wasm/tests/index.js @@ -1,17 +1,37 @@ -import { createChunker, finalize, nextBlock } from "../build/debug.js"; +import { createChunker, finalize, nextBlock, generateRandomArray } from "../build/debug.js"; +import assert from "assert"; -const chunker = createChunker(Math.pow(2, 12)); +const data1 = generateRandomArray(100_000, 0); -const data = new Uint8Array(100_000); +// log first 8 bytes +console.log(data1.slice(0, 8)); -for (let i = 0; i < data.length; i++) { - data[i] = i; +// Do same with nextUint64 instead of nextUint8 +const prng64 = splitmix64([0, 0]); + +for (let i = 0; i < 8; i++) { + const value = nextUint64(prng64); + console.log(value); + + // Log all 8 bytes + console.log(value[0].toString(16)); + console.log(value[1].toString(16)); } -const chunks = nextBlock(chunker, data); +assert.strictEqual(data1[0], 175); + +// const chunker = createChunker(Math.pow(2, 12)); + +// const data = new Uint8Array(100_000); + +// for (let i = 0; i < data.length; i++) { +// data[i] = Math.floor(Math.random() * 256); +// } + +// const chunks = nextBlock(chunker, data); -console.log("chunks", chunks); +// console.log("chunks", chunks); -const lastChunk = finalize(chunker); +// const lastChunk = finalize(chunker); -console.log("lastChunk", lastChunk); +// console.log("lastChunk", lastChunk); diff --git a/pnpm-workspace.yaml b/pnpm-workspace.yaml index bc118ffdff..9285ebfb63 100644 --- a/pnpm-workspace.yaml +++ b/pnpm-workspace.yaml @@ -17,3 +17,4 @@ packages: - "packages/gearhash-wasm" - "packages/blake3-wasm" - "packages/xetchunk-wasm" + - "packages/splitmix64-wasm" From 3ec13eb3b789bb9b375337086ff2eb2d84d40bff Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Mon, 7 Jul 2025 16:51:57 +0200 Subject: [PATCH 28/44] const data test passes --- packages/splitmix64-wasm/README.md | 2 + packages/splitmix64-wasm/assembly/index.ts | 2 +- packages/xetchunk-wasm/asconfig.json | 3 +- packages/xetchunk-wasm/assembly/index.ts | 2 +- packages/xetchunk-wasm/tests/index.js | 212 +++++++++-- packages/xetchunk-wasm/tests/reference.rs | 406 +++++++++++++++++++++ 6 files changed, 601 insertions(+), 26 deletions(-) create mode 100644 packages/xetchunk-wasm/tests/reference.rs diff --git a/packages/splitmix64-wasm/README.md b/packages/splitmix64-wasm/README.md index 90f0cf2413..28b3f6c79c 100644 --- a/packages/splitmix64-wasm/README.md +++ b/packages/splitmix64-wasm/README.md @@ -2,6 +2,8 @@ JS and WASM implementations of splitmix-64 Using [AssemblyScript](https://www.assemblyscript.org/) to generate a lean WASM. +The use of WASM is more for 64 bit arithmetic than for performance. + Used internally to reproduce rust tests Let us know if you want us to expose more functions. diff --git a/packages/splitmix64-wasm/assembly/index.ts b/packages/splitmix64-wasm/assembly/index.ts index f40a276c7a..8ac90cf7d9 100644 --- a/packages/splitmix64-wasm/assembly/index.ts +++ b/packages/splitmix64-wasm/assembly/index.ts @@ -36,5 +36,5 @@ export function createRandomArray(size: u32, seed: u64): ArrayBuffer { z = (z ^ (z >> 27)) * 0x94d049bb133111eb; view.setUint64(i, z ^ (z >> 31), true); } - return array; + return array.slice(0, size); } diff --git a/packages/xetchunk-wasm/asconfig.json b/packages/xetchunk-wasm/asconfig.json index 6b1186547a..b0711e8472 100644 --- a/packages/xetchunk-wasm/asconfig.json +++ b/packages/xetchunk-wasm/asconfig.json @@ -18,6 +18,5 @@ }, "options": { "bindings": "esm" - }, - "entries": ["assembly/index.ts", "assembly/splitmix64.ts"] + } } diff --git a/packages/xetchunk-wasm/assembly/index.ts b/packages/xetchunk-wasm/assembly/index.ts index b8b8b62702..b621ce6173 100644 --- a/packages/xetchunk-wasm/assembly/index.ts +++ b/packages/xetchunk-wasm/assembly/index.ts @@ -1 +1 @@ -export { createChunker, finalize, nextBlock } from "./xet-chunker"; +export { createChunker, finalize, nextBlock, getChunks } from "./xet-chunker"; diff --git a/packages/xetchunk-wasm/tests/index.js b/packages/xetchunk-wasm/tests/index.js index ce33606b5e..c4c4c60f66 100644 --- a/packages/xetchunk-wasm/tests/index.js +++ b/packages/xetchunk-wasm/tests/index.js @@ -1,37 +1,205 @@ -import { createChunker, finalize, nextBlock, generateRandomArray } from "../build/debug.js"; +import { createChunker, finalize, nextBlock, getChunks } from "../build/debug.js"; +import { createRandomArray } from "@huggingface/splitmix64-wasm"; + import assert from "assert"; -const data1 = generateRandomArray(100_000, 0); +// Helper function to get chunk boundaries from chunks +function getChunkBoundaries(chunks) { + let pos = 0; + return chunks.map((chunk) => { + pos += chunk.length; + return pos; + }); +} + +// Test 1: Basic functionality with 1MB random data +function testCorrectness1mbRandomData() { + console.log("Testing 1MB random data..."); + + // Create 1MB of random data with seed 0 + const dataBuffer = createRandomArray(1000000, 0); + const data = new Uint8Array(dataBuffer); + + // Verify specific byte values (from Rust reference) + assert.strictEqual(data[0], 175); + assert.strictEqual(data[127], 132); + assert.strictEqual(data[111111], 118); + + // Get chunks using the default chunker + const chunks = getChunks(data); + + // Get chunk boundaries + const chunkBoundaries = getChunkBoundaries(chunks); + + // Expected boundaries from Rust reference + const expectedBoundaries = [ + 84493, 134421, 144853, 243318, 271793, 336457, 467529, 494581, 582000, 596735, 616815, 653164, 678202, 724510, + 815591, 827760, 958832, 991092, 1000000, + ]; + + assert.deepStrictEqual(chunkBoundaries, expectedBoundaries); + console.log("✓ 1MB random data test passed"); +} + +// Test 2: Constant data test +function testCorrectness1mbConstData() { + console.log("Testing 1MB constant data..."); + + // Create 1MB of constant data (value 59) + const data = new Uint8Array(1000000); + data.fill(59); + + // Get chunks using the default chunker + const chunks = getChunks(data); + + // Get chunk boundaries + const chunkBoundaries = getChunkBoundaries(chunks); + + // Expected boundaries from Rust reference + const expectedBoundaries = [131072, 262144, 393216, 524288, 655360, 786432, 917504, 1000000]; + + assert.deepStrictEqual(chunkBoundaries, expectedBoundaries); + console.log("✓ 1MB constant data test passed"); +} + +// Test 3: Chunk boundary consistency test +function testChunkBoundaries() { + console.log("Testing chunk boundary consistency..."); + + // Create 256KB of random data with seed 1 + const dataBuffer = createRandomArray(256000, 1n); + const data = new Uint8Array(dataBuffer); + + // Get reference chunks using the default chunker + const refChunks = getChunks(data); + const refChunkBoundaries = getChunkBoundaries(refChunks); -// log first 8 bytes -console.log(data1.slice(0, 8)); + // Test with different block sizes + for (const addSize of [1, 37, 255]) { + const chunker = createChunker(); + const altChunks = []; -// Do same with nextUint64 instead of nextUint8 -const prng64 = splitmix64([0, 0]); + let pos = 0; + while (pos < data.length) { + const nextPos = Math.min(pos + addSize, data.length); + const nextChunk = nextBlock(chunker, data.subarray(pos, nextPos)); + altChunks.push(...nextChunk); + pos = nextPos; + } -for (let i = 0; i < 8; i++) { - const value = nextUint64(prng64); - console.log(value); + // Finalize to get any remaining chunk + const finalChunk = finalize(chunker); + if (finalChunk) { + altChunks.push(finalChunk); + } - // Log all 8 bytes - console.log(value[0].toString(16)); - console.log(value[1].toString(16)); + const altBoundaries = getChunkBoundaries(altChunks); + assert.deepStrictEqual(altBoundaries, refChunkBoundaries); + } + + console.log("✓ Chunk boundary consistency test passed"); } -assert.strictEqual(data1[0], 175); +// Test 4: Triggering data test (simplified version) +function testTriggeringData() { + console.log("Testing triggering data..."); + + // Create a pattern that triggers boundary detection + // This is a simplified version of the Rust test + const pattern = new Uint8Array([ + 154, 52, 42, 34, 159, 75, 126, 224, 70, 236, 12, 196, 79, 236, 178, 124, 127, 50, 99, 178, 44, 176, 174, 126, 250, + 235, 205, 174, 252, 122, 35, 10, 20, 101, 214, 69, 193, 8, 115, 105, 158, 228, 120, 111, 136, 162, 198, 251, 211, + 183, 253, 252, 164, 147, 63, 16, 186, 162, 117, 23, 170, 36, 205, 187, 174, 76, 210, 174, 211, 175, 12, 173, 145, + 59, 2, 70, 222, 181, 159, 227, 182, 156, 189, 51, 226, 106, 24, 50, 183, 157, 140, 10, 8, 23, 212, 70, 10, 234, 23, + 33, 219, 254, 39, 236, 70, 49, 191, 116, 9, 115, 15, 101, 26, 159, 165, 220, 15, 170, 56, 125, 92, 163, 94, 235, 38, + 40, 49, 81, + ]); -// const chunker = createChunker(Math.pow(2, 12)); + // Create 64KB of data by repeating the pattern + const data = new Uint8Array(65536); + let pos = 0; + while (pos < data.length) { + const remaining = data.length - pos; + const copySize = Math.min(pattern.length, remaining); + data.set(pattern.subarray(0, copySize), pos); + pos += copySize; + } -// const data = new Uint8Array(100_000); + // Test with different padding values + const testCases = [ + { padding: 0, expectedBoundaries: [8256, 16448, 24640, 32832, 41024, 49216, 57408, 65536] }, + { padding: 1, expectedBoundaries: [8191, 16447, 24703, 32959, 41215, 49471, 57727, 65536] }, + { padding: 2, expectedBoundaries: [8254, 16574, 24894, 33214, 41534, 49854, 58174, 65536] }, + ]; -// for (let i = 0; i < data.length; i++) { -// data[i] = Math.floor(Math.random() * 256); -// } + for (const testCase of testCases) { + // Create data with specific padding + const paddedData = new Uint8Array(65536 + testCase.padding); + paddedData.set(data); + paddedData.fill(0, 65536); // Add padding -// const chunks = nextBlock(chunker, data); + // Verify the specific byte at position 11111 + if (testCase.padding === 0) { + assert.strictEqual(paddedData[11111], 236); + } else if (testCase.padding === 1) { + assert.strictEqual(paddedData[11111], 50); + } else if (testCase.padding === 2) { + assert.strictEqual(paddedData[11111], 36); + } + + // Get chunks + const chunks = getChunks(paddedData); + const chunkBoundaries = getChunkBoundaries(chunks); + + assert.deepStrictEqual(chunkBoundaries, testCase.expectedBoundaries); + } + + console.log("✓ Triggering data test passed"); +} -// console.log("chunks", chunks); +// Test 5: Basic chunker functionality +function testBasicChunkerFunctionality() { + console.log("Testing basic chunker functionality..."); -// const lastChunk = finalize(chunker); + // Create a small test data + const data = new Uint8Array(100000); + for (let i = 0; i < data.length; i++) { + data[i] = Math.floor(Math.random() * 256); + } -// console.log("lastChunk", lastChunk); + // Test chunker creation and usage + const chunker = createChunker(); + const chunks = nextBlock(chunker, data); + const finalChunk = finalize(chunker); + + // Verify chunks have the expected structure + for (const chunk of chunks) { + assert.strictEqual(typeof chunk.length, "number"); + assert.strictEqual(typeof chunk.hash, "object"); + assert.strictEqual(chunk.hash instanceof Uint8Array, true); + } + + if (finalChunk) { + assert.strictEqual(typeof finalChunk.length, "number"); + assert.strictEqual(typeof finalChunk.hash, "object"); + assert.strictEqual(finalChunk.hash instanceof Uint8Array, true); + } + + console.log("✓ Basic chunker functionality test passed"); +} + +// Run all tests +console.log("Running xetchunk-wasm tests...\n"); + +try { + testChunkBoundaries(); + testCorrectness1mbConstData(); + testCorrectness1mbRandomData(); + testTriggeringData(); + testBasicChunkerFunctionality(); + + console.log("\n🎉 All tests passed!"); +} catch (error) { + console.error("❌ Test failed:", error.message); + process.exit(1); +} diff --git a/packages/xetchunk-wasm/tests/reference.rs b/packages/xetchunk-wasm/tests/reference.rs new file mode 100644 index 0000000000..8fbee5a6b3 --- /dev/null +++ b/packages/xetchunk-wasm/tests/reference.rs @@ -0,0 +1,406 @@ + +fn get_chunk_boundaries(chunks: &[Chunk]) -> Vec { + chunks + .iter() + .scan(0, |state, chunk| { + *state += chunk.data.len(); + Some(*state) + }) + .collect() +} + +#[test] +fn test_chunk_boundaries() { + let data = create_random_data(256000, 1); + + // Now, run the chunks through the default chunker. + let chunks = Chunker::default().next_block(&data, true); + + // Get the boundaries indices as determined by the size of the chunks above. + let ref_chunk_boundaries: Vec = get_chunk_boundaries(&chunks); + + // Test that it's correct across different chunk varieties. + for add_size in [1, 37, 255] { + let mut chunker = Chunker::default(); + + // Add repeatedly in blocks of add_size, appending to alt_chunks + let mut alt_chunks = Vec::with_capacity(chunks.len()); + + let mut pos = 0; + while pos < data.len() { + let next_pos = (pos + add_size).min(data.len()); + let next_chunk = chunker.next_block(&data[pos..next_pos], next_pos == data.len()); + alt_chunks.extend(next_chunk); + pos = next_pos; + } + + let alt_boundaries = get_chunk_boundaries(&alt_chunks); + + assert_eq!(alt_boundaries, ref_chunk_boundaries); + } +} + +#[test] +fn test_correctness_1mb_random_data() { + // Test this data. + let data = create_random_data(1000000, 0); + + // Uncomment these to create the lines below: + // eprintln!("(data[0], {});", data[0] as usize); + // eprintln!("(data[127], {});", data[127] as usize); + // eprintln!("(data[111111], {});", data[111111] as usize); + + assert_eq!(data[0], 175); + assert_eq!(data[127], 132); + assert_eq!(data[111111], 118); + + // Now, run the chunks through the default chunker. + let chunks = Chunker::default().next_block(&data, true); + + // Get the boundaries indices as determined by the size of the chunks above. + let chunk_boundaries: Vec = get_chunk_boundaries(&chunks); + + // Uncomment this to create the line below. + // eprintln!("assert_eq!(chunk_boundaries, vec!{chunk_boundaries:?})"); + assert_eq!( + chunk_boundaries, + vec![ + 84493, 134421, 144853, 243318, 271793, 336457, 467529, 494581, 582000, 596735, 616815, 653164, 678202, + 724510, 815591, 827760, 958832, 991092, 1000000 + ] + ); +} + +#[test] +fn test_correctness_1mb_const_data() { + // Test this data. + let data = vec![59u8; 1000000]; + + // Now, run the chunks through the default chunker. + let chunks = Chunker::default().next_block(&data, true); + + // Get the boundaries indices as determined by the size of the chunks above. + let chunk_boundaries: Vec = get_chunk_boundaries(&chunks); + + // Uncomment this to create the line below. + // eprintln!("assert_eq!(chunk_boundaries, vec!{chunk_boundaries:?})"); + assert_eq!(chunk_boundaries, vec![131072, 262144, 393216, 524288, 655360, 786432, 917504, 1000000]) +} + +fn get_triggering_base_data(n: usize, padding: usize) -> Vec { + // This pattern is known to trigger the boundary detection in the chunker, so repeat it to test the + // correctness of the minimum chunk size processing. + let mut data = vec![ + 154, 52, 42, 34, 159, 75, 126, 224, 70, 236, 12, 196, 79, 236, 178, 124, 127, 50, 99, 178, 44, 176, 174, + 126, 250, 235, 205, 174, 252, 122, 35, 10, 20, 101, 214, 69, 193, 8, 115, 105, 158, 228, 120, 111, 136, + 162, 198, 251, 211, 183, 253, 252, 164, 147, 63, 16, 186, 162, 117, 23, 170, 36, 205, 187, 174, 76, 210, + 174, 211, 175, 12, 173, 145, 59, 2, 70, 222, 181, 159, 227, 182, 156, 189, 51, 226, 106, 24, 50, 183, 157, + 140, 10, 8, 23, 212, 70, 10, 234, 23, 33, 219, 254, 39, 236, 70, 49, 191, 116, 9, 115, 15, 101, 26, 159, + 165, 220, 15, 170, 56, 125, 92, 163, 94, 235, 38, 40, 49, 81, + ]; + + // Add padding so we can comprehensively test the nuances of boundaries. + data.resize(data.len() + padding, 0u8); + + // Repeat the above pattern until we've filled out n bytes. + while data.len() < n { + let n_take = (n - data.len()).min(data.len()); + data.extend_from_within(0..n_take); + } + + data +} + +#[test] +fn test_correctness_100kb_hitting_data() { + // To ensure we've checked all the nuances of dealing with minimum chunk boundaries, + // and with the correct chunks as well, run through all the different options with the padding, + // checking each one. With this, then, we have a pattern that hits once per pattern with varying + // bits between the widths. + + let mut data_sample_at_11111 = [0u8; 128]; + let mut ref_cb = vec![Vec::new(); 128]; + + data_sample_at_11111[0] = 236; + ref_cb[0] = vec![8256, 16448, 24640, 32832, 41024, 49216, 57408, 65536]; + data_sample_at_11111[1] = 50; + ref_cb[1] = vec![8191, 16447, 24703, 32959, 41215, 49471, 57727, 65536]; + data_sample_at_11111[2] = 36; + ref_cb[2] = vec![8254, 16574, 24894, 33214, 41534, 49854, 58174, 65536]; + data_sample_at_11111[3] = 116; + ref_cb[3] = vec![8317, 16570, 24823, 33076, 41329, 49582, 57835, 65536]; + data_sample_at_11111[4] = 126; + ref_cb[4] = vec![8248, 16564, 24880, 33196, 41512, 49828, 58144, 65536]; + data_sample_at_11111[5] = 145; + ref_cb[5] = vec![8310, 16556, 24802, 33048, 41294, 49540, 57786, 65536]; + data_sample_at_11111[6] = 235; + ref_cb[6] = vec![8238, 16546, 24854, 33162, 41470, 49778, 58086, 65536]; + data_sample_at_11111[7] = 228; + ref_cb[7] = vec![8299, 16534, 24769, 33004, 41239, 49474, 57709, 65536]; + data_sample_at_11111[8] = 70; + ref_cb[8] = vec![8224, 16520, 24816, 33112, 41408, 49704, 58000, 65536]; + data_sample_at_11111[9] = 178; + ref_cb[9] = vec![8284, 16504, 24724, 32944, 41164, 49384, 57604, 65536]; + data_sample_at_11111[10] = 173; + ref_cb[10] = vec![8206, 16486, 24766, 33046, 41326, 49606, 57886, 65536]; + data_sample_at_11111[11] = 0; + ref_cb[11] = vec![8265, 16466, 24667, 32868, 41069, 49270, 57471, 65536]; + data_sample_at_11111[12] = 252; + ref_cb[12] = vec![8324, 16452, 24704, 32832, 41084, 49212, 57464, 65536]; + data_sample_at_11111[13] = 159; + ref_cb[13] = vec![8242, 16561, 24880, 33199, 41518, 49837, 58156, 65536]; + data_sample_at_11111[14] = 69; + ref_cb[14] = vec![8300, 16536, 24772, 33008, 41244, 49480, 57716, 65536]; + data_sample_at_11111[15] = 219; + ref_cb[15] = vec![8215, 16509, 24803, 33097, 41391, 49685, 57979, 65536]; + data_sample_at_11111[16] = 126; + ref_cb[16] = vec![8272, 16480, 24688, 32896, 41104, 49312, 57520, 65536]; + data_sample_at_11111[17] = 10; + ref_cb[17] = vec![8329, 16457, 24714, 32842, 41099, 49227, 57484, 65536]; + data_sample_at_11111[18] = 124; + ref_cb[18] = vec![8240, 16562, 24884, 33206, 41528, 49850, 58172, 65536]; + data_sample_at_11111[19] = 24; + ref_cb[19] = vec![8296, 16528, 24760, 32992, 41224, 49456, 57688, 65536]; + data_sample_at_11111[20] = 196; + ref_cb[20] = vec![8204, 16492, 24780, 33068, 41356, 49644, 57932, 65536]; + data_sample_at_11111[21] = 106; + ref_cb[21] = vec![8259, 16454, 24649, 32844, 41039, 49234, 57429, 65536]; + data_sample_at_11111[22] = 196; + ref_cb[22] = vec![8314, 16564, 24814, 33064, 41314, 49564, 57814, 65536]; + data_sample_at_11111[23] = 183; + ref_cb[23] = vec![8218, 16523, 24828, 33133, 41438, 49743, 58048, 65536]; + data_sample_at_11111[24] = 124; + ref_cb[24] = vec![8128, 16328, 24536, 32744, 40952, 49160, 57368, 65536]; + data_sample_at_11111[25] = 70; + ref_cb[25] = vec![8326, 16588, 24850, 33112, 41374, 49636, 57898, 65536]; + data_sample_at_11111[26] = 126; + ref_cb[26] = vec![8226, 16542, 24858, 33174, 41490, 49806, 58122, 65536]; + data_sample_at_11111[27] = 191; + ref_cb[27] = vec![8279, 16494, 24709, 32924, 41139, 49354, 57569, 65536]; + data_sample_at_11111[28] = 69; + ref_cb[28] = vec![8332, 16600, 24868, 33136, 41404, 49672, 57940, 65536]; + data_sample_at_11111[29] = 163; + ref_cb[29] = vec![8128, 16392, 24713, 33034, 41355, 49676, 57997, 65536]; + data_sample_at_11111[30] = 252; + ref_cb[30] = vec![8280, 16496, 24712, 32928, 41144, 49360, 57576, 65536]; + data_sample_at_11111[31] = 0; + ref_cb[31] = vec![8332, 16600, 24868, 33136, 41404, 49672, 57940, 65536]; + data_sample_at_11111[32] = 173; + ref_cb[32] = vec![8224, 16544, 24864, 33184, 41504, 49824, 58144, 65536]; + data_sample_at_11111[33] = 42; + ref_cb[33] = vec![8275, 16486, 24697, 32908, 41119, 49330, 57541, 65536]; + data_sample_at_11111[34] = 70; + ref_cb[34] = vec![8326, 16588, 24850, 33112, 41374, 49636, 57898, 65536]; + data_sample_at_11111[35] = 174; + ref_cb[35] = vec![8214, 16527, 24840, 33153, 41466, 49779, 58092, 65536]; + data_sample_at_11111[36] = 235; + ref_cb[36] = vec![8264, 16464, 24664, 32864, 41064, 49264, 57464, 65536]; + data_sample_at_11111[37] = 186; + ref_cb[37] = vec![8314, 16564, 24814, 33064, 41314, 49564, 57814, 65536]; + data_sample_at_11111[38] = 0; + ref_cb[38] = vec![8198, 16498, 24798, 33098, 41398, 49698, 57998, 65536]; + data_sample_at_11111[39] = 157; + ref_cb[39] = vec![8247, 16597, 24947, 33297, 41647, 49997, 58347, 65536]; + data_sample_at_11111[40] = 126; + ref_cb[40] = vec![8296, 16528, 24760, 32992, 41224, 49456, 57688, 65536]; + data_sample_at_11111[41] = 49; + ref_cb[41] = vec![8345, 16626, 24907, 33188, 41469, 49750, 58031, 65536]; + data_sample_at_11111[42] = 36; + ref_cb[42] = vec![8224, 16554, 24884, 33214, 41544, 49874, 58204, 65536]; + data_sample_at_11111[43] = 0; + ref_cb[43] = vec![8272, 16480, 24688, 32896, 41104, 49312, 57520, 65536]; + data_sample_at_11111[44] = 236; + ref_cb[44] = vec![8320, 16576, 24832, 33088, 41344, 49600, 57856, 65536]; + data_sample_at_11111[45] = 105; + ref_cb[45] = vec![8195, 16499, 24803, 33107, 41411, 49715, 58019, 65536]; + data_sample_at_11111[46] = 0; + ref_cb[46] = vec![8242, 16594, 24946, 33298, 41650, 50002, 58354, 65536]; + data_sample_at_11111[47] = 24; + ref_cb[47] = vec![8289, 16514, 24739, 32964, 41189, 49414, 57639, 65536]; + data_sample_at_11111[48] = 126; + ref_cb[48] = vec![8336, 16608, 24880, 33152, 41424, 49696, 57968, 65536]; + data_sample_at_11111[49] = 0; + ref_cb[49] = vec![8206, 16525, 24844, 33163, 41482, 49801, 58120, 65536]; + data_sample_at_11111[50] = 70; + ref_cb[50] = vec![8252, 16618, 24984, 33350, 41716, 50082, 58448, 65536]; + data_sample_at_11111[51] = 236; + ref_cb[51] = vec![8298, 16532, 24766, 33000, 41234, 49468, 57702, 65536]; + data_sample_at_11111[52] = 0; + ref_cb[52] = vec![8344, 16624, 24904, 33184, 41464, 49744, 58024, 65536]; + data_sample_at_11111[53] = 12; + ref_cb[53] = vec![8209, 16337, 24680, 32808, 41151, 49279, 57622, 65536]; + data_sample_at_11111[54] = 236; + ref_cb[54] = vec![8254, 16626, 24998, 33370, 41742, 50114, 58486, 65536]; + data_sample_at_11111[55] = 0; + ref_cb[55] = vec![8299, 16534, 24769, 33004, 41239, 49474, 57709, 65536]; + data_sample_at_11111[56] = 173; + ref_cb[56] = vec![8344, 16624, 24904, 33184, 41464, 49744, 58024, 65536]; + data_sample_at_11111[57] = 196; + ref_cb[57] = vec![8204, 16529, 24854, 33179, 41504, 49829, 58154, 65536]; + data_sample_at_11111[58] = 0; + ref_cb[58] = vec![8248, 16618, 24988, 33358, 41728, 50098, 58468, 65536]; + data_sample_at_11111[59] = 159; + ref_cb[59] = vec![8292, 16520, 24748, 32976, 41204, 49432, 57660, 65536]; + data_sample_at_11111[60] = 178; + ref_cb[60] = vec![8336, 16608, 24880, 33152, 41424, 49696, 57968, 65536]; + data_sample_at_11111[61] = 0; + ref_cb[61] = vec![8191, 16507, 24823, 33139, 41455, 49771, 58087, 65536]; + data_sample_at_11111[62] = 10; + ref_cb[62] = vec![8234, 16594, 24954, 33314, 41674, 50034, 58394, 65536]; + data_sample_at_11111[63] = 101; + ref_cb[63] = vec![8277, 16490, 24703, 32916, 41129, 49342, 57555, 65536]; + data_sample_at_11111[64] = 0; + ref_cb[64] = vec![8320, 16576, 24832, 33088, 41344, 49600, 57856, 65536]; + data_sample_at_11111[65] = 15; + ref_cb[65] = vec![8363, 16662, 24961, 33260, 41559, 49858, 58157, 65536]; + data_sample_at_11111[66] = 147; + ref_cb[66] = vec![8212, 16554, 24896, 33238, 41580, 49922, 58264, 65536]; + data_sample_at_11111[67] = 0; + ref_cb[67] = vec![8254, 16639, 25024, 33409, 41794, 50179, 58564, 65536]; + data_sample_at_11111[68] = 0; + ref_cb[68] = vec![8296, 16528, 24760, 32992, 41224, 49456, 57688, 65536]; + data_sample_at_11111[69] = 227; + ref_cb[69] = vec![8338, 16612, 24886, 33160, 41434, 49708, 57982, 65536]; + data_sample_at_11111[70] = 126; + ref_cb[70] = vec![8380, 16696, 25012, 33328, 41644, 49960, 58276, 65536]; + data_sample_at_11111[71] = 0; + ref_cb[71] = vec![8223, 16581, 24939, 33297, 41655, 50013, 58371, 65536]; + data_sample_at_11111[72] = 101; + ref_cb[72] = vec![8264, 16464, 24664, 32864, 41064, 49264, 57464, 65536]; + data_sample_at_11111[73] = 186; + ref_cb[73] = vec![8305, 16546, 24787, 33028, 41269, 49510, 57751, 65536]; + data_sample_at_11111[74] = 52; + ref_cb[74] = vec![8346, 16628, 24910, 33192, 41474, 49756, 58038, 65536]; + data_sample_at_11111[75] = 0; + ref_cb[75] = vec![8387, 16515, 24830, 32958, 41273, 49401, 57716, 65536]; + data_sample_at_11111[76] = 70; + ref_cb[76] = vec![8224, 16588, 24952, 33316, 41680, 50044, 58408, 65536]; + data_sample_at_11111[77] = 228; + ref_cb[77] = vec![8264, 16464, 24664, 32864, 41064, 49264, 57464, 65536]; + data_sample_at_11111[78] = 0; + ref_cb[78] = vec![8128, 16338, 24578, 32818, 41058, 49298, 57538, 65536]; + data_sample_at_11111[79] = 0; + ref_cb[79] = vec![8344, 16624, 24904, 33184, 41464, 49744, 58024, 65536]; + data_sample_at_11111[80] = 50; + ref_cb[80] = vec![8384, 16704, 25024, 33344, 41664, 49984, 58304, 65536]; + data_sample_at_11111[81] = 214; + ref_cb[81] = vec![8215, 16575, 24935, 33295, 41655, 50015, 58375, 65536]; + data_sample_at_11111[82] = 0; + ref_cb[82] = vec![8254, 16654, 25054, 33454, 41854, 50254, 58654, 65536]; + data_sample_at_11111[83] = 0; + ref_cb[83] = vec![8293, 16522, 24751, 32980, 41209, 49438, 57667, 65536]; + data_sample_at_11111[84] = 50; + ref_cb[84] = vec![8128, 16388, 24656, 32924, 41192, 49460, 57728, 65536]; + data_sample_at_11111[85] = 69; + ref_cb[85] = vec![8371, 16678, 24985, 33292, 41599, 49906, 58213, 65536]; + data_sample_at_11111[86] = 0; + ref_cb[86] = vec![8196, 16324, 24674, 32802, 41152, 49280, 57630, 65536]; + data_sample_at_11111[87] = 0; + ref_cb[87] = vec![8234, 16619, 25004, 33389, 41774, 50159, 58544, 65536]; + data_sample_at_11111[88] = 70; + ref_cb[88] = vec![8272, 16480, 24688, 32896, 41104, 49312, 57520, 65536]; + data_sample_at_11111[89] = 136; + ref_cb[89] = vec![8128, 16339, 24585, 32831, 41077, 49323, 57569, 65536]; + data_sample_at_11111[90] = 0; + ref_cb[90] = vec![8348, 16632, 24916, 33200, 41484, 49768, 58052, 65536]; + data_sample_at_11111[91] = 0; + ref_cb[91] = vec![8386, 16708, 25030, 33352, 41674, 49996, 58318, 65536]; + data_sample_at_11111[92] = 101; + ref_cb[92] = vec![8204, 16564, 24924, 33284, 41644, 50004, 58364, 65536]; + data_sample_at_11111[93] = 36; + ref_cb[93] = vec![8241, 16639, 25037, 33435, 41833, 50231, 58629, 65536]; + data_sample_at_11111[94] = 196; + ref_cb[94] = vec![8278, 16492, 24706, 32920, 41134, 49348, 57562, 65536]; + data_sample_at_11111[95] = 0; + ref_cb[95] = vec![8315, 16566, 24817, 33068, 41319, 49570, 57821, 65536]; + data_sample_at_11111[96] = 0; + ref_cb[96] = vec![8352, 16640, 24928, 33216, 41504, 49792, 58080, 65536]; + data_sample_at_11111[97] = 24; + ref_cb[97] = vec![8389, 16714, 25039, 33364, 41689, 50014, 58339, 65536]; + data_sample_at_11111[98] = 8; + ref_cb[98] = vec![8200, 16562, 24924, 33286, 41648, 50010, 58372, 65536]; + data_sample_at_11111[99] = 0; + ref_cb[99] = vec![8236, 16635, 25034, 33433, 41832, 50231, 58630, 65536]; + data_sample_at_11111[100] = 0; + ref_cb[100] = vec![8272, 16480, 24688, 32896, 41104, 49312, 57520, 65536]; + data_sample_at_11111[101] = 125; + ref_cb[101] = vec![8308, 16552, 24796, 33040, 41284, 49528, 57772, 65536]; + data_sample_at_11111[102] = 173; + ref_cb[102] = vec![8344, 16624, 24904, 33184, 41464, 49744, 58024, 65536]; + data_sample_at_11111[103] = 126; + ref_cb[103] = vec![8380, 16696, 25012, 33328, 41644, 49960, 58276, 65536]; + data_sample_at_11111[104] = 0; + ref_cb[104] = vec![8416, 16544, 24888, 33016, 41360, 49488, 57832, 65536]; + data_sample_at_11111[105] = 0; + ref_cb[105] = vec![8219, 16607, 24995, 33383, 41771, 50159, 58547, 65536]; + data_sample_at_11111[106] = 159; + ref_cb[106] = vec![8254, 16678, 25102, 33526, 41950, 50374, 58798, 65536]; + data_sample_at_11111[107] = 210; + ref_cb[107] = vec![8289, 16514, 24739, 32964, 41189, 49414, 57639, 65536]; + data_sample_at_11111[108] = 178; + ref_cb[108] = vec![8324, 16584, 24844, 33104, 41364, 49624, 57884, 65536]; + data_sample_at_11111[109] = 0; + ref_cb[109] = vec![8359, 16654, 24949, 33244, 41539, 49834, 58129, 65536]; + data_sample_at_11111[110] = 0; + ref_cb[110] = vec![8394, 16724, 25054, 33384, 41714, 50044, 58374, 65536]; + data_sample_at_11111[111] = 170; + ref_cb[111] = vec![8429, 16794, 25159, 33524, 41889, 50254, 58619, 65536]; + data_sample_at_11111[112] = 173; + ref_cb[112] = vec![8224, 16624, 25024, 33424, 41824, 50224, 58624, 65536]; + data_sample_at_11111[113] = 235; + ref_cb[113] = vec![8258, 16452, 24646, 32840, 41034, 49228, 57422, 65536]; + data_sample_at_11111[114] = 0; + ref_cb[114] = vec![8292, 16520, 24748, 32976, 41204, 49432, 57660, 65536]; + data_sample_at_11111[115] = 0; + ref_cb[115] = vec![8326, 16588, 24850, 33112, 41374, 49636, 57898, 65536]; + data_sample_at_11111[116] = 0; + ref_cb[116] = vec![8360, 16656, 24952, 33248, 41544, 49840, 58136, 65536]; + data_sample_at_11111[117] = 24; + ref_cb[117] = vec![8394, 16724, 25054, 33384, 41714, 50044, 58374, 65536]; + data_sample_at_11111[118] = 228; + ref_cb[118] = vec![8428, 16792, 25156, 33520, 41884, 50248, 58612, 65536]; + data_sample_at_11111[119] = 0; + ref_cb[119] = vec![8215, 16613, 25011, 33409, 41807, 50205, 58603, 65536]; + data_sample_at_11111[120] = 0; + ref_cb[120] = vec![8248, 16680, 25112, 33544, 41976, 50408, 58840, 65536]; + data_sample_at_11111[121] = 0; + ref_cb[121] = vec![8281, 16498, 24715, 32932, 41149, 49366, 57583, 65536]; + data_sample_at_11111[122] = 101; + ref_cb[122] = vec![8314, 16564, 24814, 33064, 41314, 49564, 57814, 65536]; + data_sample_at_11111[123] = 174; + ref_cb[123] = vec![8347, 16630, 24913, 33196, 41479, 49762, 58045, 65536]; + data_sample_at_11111[124] = 126; + ref_cb[124] = vec![8380, 16696, 25012, 33328, 41644, 49960, 58276, 65536]; + data_sample_at_11111[125] = 0; + ref_cb[125] = vec![8413, 16762, 25111, 33460, 41809, 50158, 58507, 65536]; + data_sample_at_11111[126] = 0; + ref_cb[126] = vec![8192, 16574, 24956, 33338, 41720, 50102, 58484, 65536]; + data_sample_at_11111[127] = 0; + ref_cb[127] = vec![8224, 16639, 25054, 33469, 41884, 50299, 58714, 65536]; + + // Now run the loop with this reference data. + for i in 0..128 { + let data = get_triggering_base_data(65536, i); + + // This check is here so that the tests written against this chunker + // can verify that the test data input is correct. + assert_eq!(data[11111], data_sample_at_11111[i]); + + // Uncomment to create the line above. + // eprintln!("data_sample_at_11111[{i}]={};", data[11111]); + + // Now, run the chunks through the default chunker. + let chunks = Chunker::default().next_block(&data, true); + + // Get the boundaries indices as determined by the size of the chunks above. + let chunk_boundaries: Vec = get_chunk_boundaries(&chunks); + + // Uncomment this to generate the table above. + // eprintln!("ref_cb[{i}]=vec!{chunk_boundaries:?};"); + + assert_eq!(chunk_boundaries, ref_cb[i]); + } + + // eprintln!("assert_eq!(chunk_boundaries, vec!{chunk_boundaries:?})"); + // assert_eq!(chunk_boundaries, vec![131072, 262144, 393216, 524288, 655360, 786432, 917504, 1000000]) +} \ No newline at end of file From 265c26eae0bf842139eb715404dd7db9c411aab2 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Mon, 7 Jul 2025 17:08:44 +0200 Subject: [PATCH 29/44] fully test random data --- packages/xetchunk-wasm/assembly/xet-chunker.ts | 1 + packages/xetchunk-wasm/tests/index.js | 14 +++++++++++--- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/packages/xetchunk-wasm/assembly/xet-chunker.ts b/packages/xetchunk-wasm/assembly/xet-chunker.ts index 0abfebbfec..d3975c5868 100644 --- a/packages/xetchunk-wasm/assembly/xet-chunker.ts +++ b/packages/xetchunk-wasm/assembly/xet-chunker.ts @@ -143,6 +143,7 @@ export function finalize(chunker: XetChunker): Chunk | null { } export function getChunks(data: Uint8Array, targetChunkSize: i32 = TARGET_CHUNK_SIZE): Chunk[] { + // console.log(`getChunks: ${targetChunkSize} ${data.length}`); const chunker = createChunker(targetChunkSize); return chunker.nextBlock(data, true); } diff --git a/packages/xetchunk-wasm/tests/index.js b/packages/xetchunk-wasm/tests/index.js index c4c4c60f66..0bcdb4a609 100644 --- a/packages/xetchunk-wasm/tests/index.js +++ b/packages/xetchunk-wasm/tests/index.js @@ -13,7 +13,7 @@ function getChunkBoundaries(chunks) { } // Test 1: Basic functionality with 1MB random data -function testCorrectness1mbRandomData() { +async function testCorrectness1mbRandomData() { console.log("Testing 1MB random data..."); // Create 1MB of random data with seed 0 @@ -25,6 +25,14 @@ function testCorrectness1mbRandomData() { assert.strictEqual(data[127], 132); assert.strictEqual(data[111111], 118); + const referenceSha256 = "b3d0a1f7938cd4d8413a4dcffd4313e2e8ac0cb61cb1090eb140ea8e9154befb"; + const sha256 = await crypto.subtle.digest("SHA-256", data); + const sha256Hex = Array.from(new Uint8Array(sha256)) + .map((b) => b.toString(16).padStart(2, "0")) + .join(""); + assert.strictEqual(sha256Hex, referenceSha256); + console.log("✓ 1MB random data sha256 test passed"); + // Get chunks using the default chunker const chunks = getChunks(data); @@ -194,9 +202,9 @@ console.log("Running xetchunk-wasm tests...\n"); try { testChunkBoundaries(); testCorrectness1mbConstData(); - testCorrectness1mbRandomData(); - testTriggeringData(); testBasicChunkerFunctionality(); + await testCorrectness1mbRandomData(); + testTriggeringData(); console.log("\n🎉 All tests passed!"); } catch (error) { From 275eb601cb250aa9d767eb454c9d009584c5c668 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Mon, 7 Jul 2025 17:44:54 +0200 Subject: [PATCH 30/44] fix mask in wasm xet chunking --- packages/gearhash-wasm/package.json | 5 +++-- packages/gearhash-wasm/pnpm-lock.yaml | 3 +++ packages/xetchunk-wasm/assembly/xet-chunker.ts | 2 +- 3 files changed, 7 insertions(+), 3 deletions(-) diff --git a/packages/gearhash-wasm/package.json b/packages/gearhash-wasm/package.json index 60d0ae0cd9..b470831cde 100644 --- a/packages/gearhash-wasm/package.json +++ b/packages/gearhash-wasm/package.json @@ -5,7 +5,7 @@ "build:debug": "asc assembly/index.ts --target debug", "build:release": "asc assembly/index.ts --target release", "build": "pnpm run build:debug && npm run build:release", - "test": "node tests", + "test": "node tests && node tests/xet-data.js", "prepare": "pnpm run build" }, "keywords": [ @@ -28,6 +28,7 @@ } }, "devDependencies": { - "assemblyscript": "0.27.36" + "assemblyscript": "0.27.36", + "@huggingface/splitmix64-wasm": "workspace:*" } } diff --git a/packages/gearhash-wasm/pnpm-lock.yaml b/packages/gearhash-wasm/pnpm-lock.yaml index 9d7ac0a92a..b8bd0747a6 100644 --- a/packages/gearhash-wasm/pnpm-lock.yaml +++ b/packages/gearhash-wasm/pnpm-lock.yaml @@ -8,6 +8,9 @@ importers: .: devDependencies: + '@huggingface/splitmix64-wasm': + specifier: workspace:* + version: link:../splitmix64-wasm assemblyscript: specifier: 0.27.36 version: 0.27.36 diff --git a/packages/xetchunk-wasm/assembly/xet-chunker.ts b/packages/xetchunk-wasm/assembly/xet-chunker.ts index d3975c5868..32afa6e2e6 100644 --- a/packages/xetchunk-wasm/assembly/xet-chunker.ts +++ b/packages/xetchunk-wasm/assembly/xet-chunker.ts @@ -40,7 +40,7 @@ class XetChunker { let mask = (targetChunkSize - 1) as u64; // Shift mask left by leading zeros count - mask = mask << (64 - clz(mask)); + mask = mask << clz(mask); const maximumChunk = targetChunkSize * MAXIMUM_CHUNK_MULTIPLIER; From dd1bd542a9183c4400d73754cd3b7fbc45461285 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Tue, 8 Jul 2025 11:04:58 +0200 Subject: [PATCH 31/44] fix test --- packages/xetchunk-wasm/package.json | 2 +- packages/xetchunk-wasm/tests/index.js | 213 ---------- packages/xetchunk-wasm/tests/index.test.ts | 430 +++++++++++++++++++++ packages/xetchunk-wasm/vitest.config.ts | 13 + 4 files changed, 444 insertions(+), 214 deletions(-) delete mode 100644 packages/xetchunk-wasm/tests/index.js create mode 100644 packages/xetchunk-wasm/tests/index.test.ts create mode 100644 packages/xetchunk-wasm/vitest.config.ts diff --git a/packages/xetchunk-wasm/package.json b/packages/xetchunk-wasm/package.json index 8e36512249..2989bef841 100644 --- a/packages/xetchunk-wasm/package.json +++ b/packages/xetchunk-wasm/package.json @@ -5,7 +5,7 @@ "build:debug": "asc assembly/index.ts --target debug", "build:release": "asc assembly/index.ts --target release", "build": "pnpm run build:debug && npm run build:release", - "test": "node tests", + "test": "vitest run", "prepare": "pnpm run build" }, "keywords": [ diff --git a/packages/xetchunk-wasm/tests/index.js b/packages/xetchunk-wasm/tests/index.js deleted file mode 100644 index 0bcdb4a609..0000000000 --- a/packages/xetchunk-wasm/tests/index.js +++ /dev/null @@ -1,213 +0,0 @@ -import { createChunker, finalize, nextBlock, getChunks } from "../build/debug.js"; -import { createRandomArray } from "@huggingface/splitmix64-wasm"; - -import assert from "assert"; - -// Helper function to get chunk boundaries from chunks -function getChunkBoundaries(chunks) { - let pos = 0; - return chunks.map((chunk) => { - pos += chunk.length; - return pos; - }); -} - -// Test 1: Basic functionality with 1MB random data -async function testCorrectness1mbRandomData() { - console.log("Testing 1MB random data..."); - - // Create 1MB of random data with seed 0 - const dataBuffer = createRandomArray(1000000, 0); - const data = new Uint8Array(dataBuffer); - - // Verify specific byte values (from Rust reference) - assert.strictEqual(data[0], 175); - assert.strictEqual(data[127], 132); - assert.strictEqual(data[111111], 118); - - const referenceSha256 = "b3d0a1f7938cd4d8413a4dcffd4313e2e8ac0cb61cb1090eb140ea8e9154befb"; - const sha256 = await crypto.subtle.digest("SHA-256", data); - const sha256Hex = Array.from(new Uint8Array(sha256)) - .map((b) => b.toString(16).padStart(2, "0")) - .join(""); - assert.strictEqual(sha256Hex, referenceSha256); - console.log("✓ 1MB random data sha256 test passed"); - - // Get chunks using the default chunker - const chunks = getChunks(data); - - // Get chunk boundaries - const chunkBoundaries = getChunkBoundaries(chunks); - - // Expected boundaries from Rust reference - const expectedBoundaries = [ - 84493, 134421, 144853, 243318, 271793, 336457, 467529, 494581, 582000, 596735, 616815, 653164, 678202, 724510, - 815591, 827760, 958832, 991092, 1000000, - ]; - - assert.deepStrictEqual(chunkBoundaries, expectedBoundaries); - console.log("✓ 1MB random data test passed"); -} - -// Test 2: Constant data test -function testCorrectness1mbConstData() { - console.log("Testing 1MB constant data..."); - - // Create 1MB of constant data (value 59) - const data = new Uint8Array(1000000); - data.fill(59); - - // Get chunks using the default chunker - const chunks = getChunks(data); - - // Get chunk boundaries - const chunkBoundaries = getChunkBoundaries(chunks); - - // Expected boundaries from Rust reference - const expectedBoundaries = [131072, 262144, 393216, 524288, 655360, 786432, 917504, 1000000]; - - assert.deepStrictEqual(chunkBoundaries, expectedBoundaries); - console.log("✓ 1MB constant data test passed"); -} - -// Test 3: Chunk boundary consistency test -function testChunkBoundaries() { - console.log("Testing chunk boundary consistency..."); - - // Create 256KB of random data with seed 1 - const dataBuffer = createRandomArray(256000, 1n); - const data = new Uint8Array(dataBuffer); - - // Get reference chunks using the default chunker - const refChunks = getChunks(data); - const refChunkBoundaries = getChunkBoundaries(refChunks); - - // Test with different block sizes - for (const addSize of [1, 37, 255]) { - const chunker = createChunker(); - const altChunks = []; - - let pos = 0; - while (pos < data.length) { - const nextPos = Math.min(pos + addSize, data.length); - const nextChunk = nextBlock(chunker, data.subarray(pos, nextPos)); - altChunks.push(...nextChunk); - pos = nextPos; - } - - // Finalize to get any remaining chunk - const finalChunk = finalize(chunker); - if (finalChunk) { - altChunks.push(finalChunk); - } - - const altBoundaries = getChunkBoundaries(altChunks); - assert.deepStrictEqual(altBoundaries, refChunkBoundaries); - } - - console.log("✓ Chunk boundary consistency test passed"); -} - -// Test 4: Triggering data test (simplified version) -function testTriggeringData() { - console.log("Testing triggering data..."); - - // Create a pattern that triggers boundary detection - // This is a simplified version of the Rust test - const pattern = new Uint8Array([ - 154, 52, 42, 34, 159, 75, 126, 224, 70, 236, 12, 196, 79, 236, 178, 124, 127, 50, 99, 178, 44, 176, 174, 126, 250, - 235, 205, 174, 252, 122, 35, 10, 20, 101, 214, 69, 193, 8, 115, 105, 158, 228, 120, 111, 136, 162, 198, 251, 211, - 183, 253, 252, 164, 147, 63, 16, 186, 162, 117, 23, 170, 36, 205, 187, 174, 76, 210, 174, 211, 175, 12, 173, 145, - 59, 2, 70, 222, 181, 159, 227, 182, 156, 189, 51, 226, 106, 24, 50, 183, 157, 140, 10, 8, 23, 212, 70, 10, 234, 23, - 33, 219, 254, 39, 236, 70, 49, 191, 116, 9, 115, 15, 101, 26, 159, 165, 220, 15, 170, 56, 125, 92, 163, 94, 235, 38, - 40, 49, 81, - ]); - - // Create 64KB of data by repeating the pattern - const data = new Uint8Array(65536); - let pos = 0; - while (pos < data.length) { - const remaining = data.length - pos; - const copySize = Math.min(pattern.length, remaining); - data.set(pattern.subarray(0, copySize), pos); - pos += copySize; - } - - // Test with different padding values - const testCases = [ - { padding: 0, expectedBoundaries: [8256, 16448, 24640, 32832, 41024, 49216, 57408, 65536] }, - { padding: 1, expectedBoundaries: [8191, 16447, 24703, 32959, 41215, 49471, 57727, 65536] }, - { padding: 2, expectedBoundaries: [8254, 16574, 24894, 33214, 41534, 49854, 58174, 65536] }, - ]; - - for (const testCase of testCases) { - // Create data with specific padding - const paddedData = new Uint8Array(65536 + testCase.padding); - paddedData.set(data); - paddedData.fill(0, 65536); // Add padding - - // Verify the specific byte at position 11111 - if (testCase.padding === 0) { - assert.strictEqual(paddedData[11111], 236); - } else if (testCase.padding === 1) { - assert.strictEqual(paddedData[11111], 50); - } else if (testCase.padding === 2) { - assert.strictEqual(paddedData[11111], 36); - } - - // Get chunks - const chunks = getChunks(paddedData); - const chunkBoundaries = getChunkBoundaries(chunks); - - assert.deepStrictEqual(chunkBoundaries, testCase.expectedBoundaries); - } - - console.log("✓ Triggering data test passed"); -} - -// Test 5: Basic chunker functionality -function testBasicChunkerFunctionality() { - console.log("Testing basic chunker functionality..."); - - // Create a small test data - const data = new Uint8Array(100000); - for (let i = 0; i < data.length; i++) { - data[i] = Math.floor(Math.random() * 256); - } - - // Test chunker creation and usage - const chunker = createChunker(); - const chunks = nextBlock(chunker, data); - const finalChunk = finalize(chunker); - - // Verify chunks have the expected structure - for (const chunk of chunks) { - assert.strictEqual(typeof chunk.length, "number"); - assert.strictEqual(typeof chunk.hash, "object"); - assert.strictEqual(chunk.hash instanceof Uint8Array, true); - } - - if (finalChunk) { - assert.strictEqual(typeof finalChunk.length, "number"); - assert.strictEqual(typeof finalChunk.hash, "object"); - assert.strictEqual(finalChunk.hash instanceof Uint8Array, true); - } - - console.log("✓ Basic chunker functionality test passed"); -} - -// Run all tests -console.log("Running xetchunk-wasm tests...\n"); - -try { - testChunkBoundaries(); - testCorrectness1mbConstData(); - testBasicChunkerFunctionality(); - await testCorrectness1mbRandomData(); - testTriggeringData(); - - console.log("\n🎉 All tests passed!"); -} catch (error) { - console.error("❌ Test failed:", error.message); - process.exit(1); -} diff --git a/packages/xetchunk-wasm/tests/index.test.ts b/packages/xetchunk-wasm/tests/index.test.ts new file mode 100644 index 0000000000..830e37d887 --- /dev/null +++ b/packages/xetchunk-wasm/tests/index.test.ts @@ -0,0 +1,430 @@ +import { describe, it, expect } from "vitest"; +import { createChunker, finalize, nextBlock, getChunks } from "../build/debug.js"; +import { createRandomArray } from "@huggingface/splitmix64-wasm"; + +// Helper function to get chunk boundaries from chunks +function getChunkBoundaries(chunks: Array<{ length: number; hash: Uint8Array }>): number[] { + let pos = 0; + return chunks.map((chunk) => { + pos += chunk.length; + return pos; + }); +} + +describe("xetchunk-wasm", () => { + describe("Basic functionality with 1MB random data", () => { + it("should pass 1MB random data test", async () => { + // Create 1MB of random data with seed 0 + const dataBuffer = createRandomArray(1000000, 0); + const data = new Uint8Array(dataBuffer); + + // Verify specific byte values (from Rust reference) + expect(data[0]).toBe(175); + expect(data[127]).toBe(132); + expect(data[111111]).toBe(118); + + const referenceSha256 = "b3d0a1f7938cd4d8413a4dcffd4313e2e8ac0cb61cb1090eb140ea8e9154befb"; + const sha256 = await crypto.subtle.digest("SHA-256", data); + const sha256Hex = Array.from(new Uint8Array(sha256)) + .map((b) => b.toString(16).padStart(2, "0")) + .join(""); + expect(sha256Hex).toBe(referenceSha256); + + // Get chunks using the default chunker + const chunks = getChunks(data); + + // Get chunk boundaries + const chunkBoundaries = getChunkBoundaries(chunks); + + // Expected boundaries from Rust reference + const expectedBoundaries = [ + 84493, 134421, 144853, 243318, 271793, 336457, 467529, 494581, 582000, 596735, 616815, 653164, 678202, 724510, + 815591, 827760, 958832, 991092, 1000000, + ]; + + expect(chunkBoundaries).toEqual(expectedBoundaries); + }); + }); + + describe("Constant data test", () => { + it("should pass 1MB constant data test", () => { + // Create 1MB of constant data (value 59) + const data = new Uint8Array(1000000); + data.fill(59); + + // Get chunks using the default chunker + const chunks = getChunks(data); + + // Get chunk boundaries + const chunkBoundaries = getChunkBoundaries(chunks); + + // Expected boundaries from Rust reference + const expectedBoundaries = [131072, 262144, 393216, 524288, 655360, 786432, 917504, 1000000]; + + expect(chunkBoundaries).toEqual(expectedBoundaries); + }); + }); + + describe("Chunk boundary consistency test", () => { + it("should maintain consistent chunk boundaries with different block sizes", () => { + // Create 256KB of random data with seed 1 + const dataBuffer = createRandomArray(256000, 1n); + const data = new Uint8Array(dataBuffer); + + // Get reference chunks using the default chunker + const refChunks = getChunks(data); + const refChunkBoundaries = getChunkBoundaries(refChunks); + + // Test with different block sizes + for (const addSize of [1, 37, 255]) { + const chunker = createChunker(); + const altChunks: Array<{ length: number; hash: Uint8Array }> = []; + + let pos = 0; + while (pos < data.length) { + const nextPos = Math.min(pos + addSize, data.length); + const nextChunk = nextBlock(chunker, data.subarray(pos, nextPos)); + altChunks.push(...nextChunk); + pos = nextPos; + } + + // Finalize to get any remaining chunk + const finalChunk = finalize(chunker); + if (finalChunk) { + altChunks.push(finalChunk); + } + + const altBoundaries = getChunkBoundaries(altChunks); + expect(altBoundaries).toEqual(refChunkBoundaries); + } + }); + }); + + describe("Triggering data test", () => { + it("should handle triggering data patterns correctly", () => { + // Create a pattern that triggers boundary detection + // This is a simplified version of the Rust test + function get_triggering_base_data(n: number, padding: number) { + const pattern = new Uint8Array([ + 154, 52, 42, 34, 159, 75, 126, 224, 70, 236, 12, 196, 79, 236, 178, 124, 127, 50, 99, 178, 44, 176, 174, 126, + 250, 235, 205, 174, 252, 122, 35, 10, 20, 101, 214, 69, 193, 8, 115, 105, 158, 228, 120, 111, 136, 162, 198, + 251, 211, 183, 253, 252, 164, 147, 63, 16, 186, 162, 117, 23, 170, 36, 205, 187, 174, 76, 210, 174, 211, 175, + 12, 173, 145, 59, 2, 70, 222, 181, 159, 227, 182, 156, 189, 51, 226, 106, 24, 50, 183, 157, 140, 10, 8, 23, + 212, 70, 10, 234, 23, 33, 219, 254, 39, 236, 70, 49, 191, 116, 9, 115, 15, 101, 26, 159, 165, 220, 15, 170, + 56, 125, 92, 163, 94, 235, 38, 40, 49, 81, + ]); + + // Create 64KB of data by repeating the pattern + const data = new Uint8Array(n); + + for (let i = 0; i < n; i += pattern.length + padding) { + data.set(pattern.slice(0, Math.min(pattern.length, n - i)), i); + } + + return data; + } + + const data_sample_at_11111 = new Uint8Array(128); + const ref_cb = new Array(128); + + data_sample_at_11111[0] = 236; + ref_cb[0] = [8256, 16448, 24640, 32832, 41024, 49216, 57408, 65536]; + data_sample_at_11111[1] = 50; + ref_cb[1] = [8191, 16447, 24703, 32959, 41215, 49471, 57727, 65536]; + data_sample_at_11111[2] = 36; + ref_cb[2] = [8254, 16574, 24894, 33214, 41534, 49854, 58174, 65536]; + data_sample_at_11111[3] = 116; + ref_cb[3] = [8317, 16570, 24823, 33076, 41329, 49582, 57835, 65536]; + data_sample_at_11111[4] = 126; + ref_cb[4] = [8248, 16564, 24880, 33196, 41512, 49828, 58144, 65536]; + data_sample_at_11111[5] = 145; + ref_cb[5] = [8310, 16556, 24802, 33048, 41294, 49540, 57786, 65536]; + data_sample_at_11111[6] = 235; + ref_cb[6] = [8238, 16546, 24854, 33162, 41470, 49778, 58086, 65536]; + data_sample_at_11111[7] = 228; + ref_cb[7] = [8299, 16534, 24769, 33004, 41239, 49474, 57709, 65536]; + data_sample_at_11111[8] = 70; + ref_cb[8] = [8224, 16520, 24816, 33112, 41408, 49704, 58000, 65536]; + data_sample_at_11111[9] = 178; + ref_cb[9] = [8284, 16504, 24724, 32944, 41164, 49384, 57604, 65536]; + data_sample_at_11111[10] = 173; + ref_cb[10] = [8206, 16486, 24766, 33046, 41326, 49606, 57886, 65536]; + data_sample_at_11111[11] = 0; + ref_cb[11] = [8265, 16466, 24667, 32868, 41069, 49270, 57471, 65536]; + data_sample_at_11111[12] = 252; + ref_cb[12] = [8324, 16452, 24704, 32832, 41084, 49212, 57464, 65536]; + data_sample_at_11111[13] = 159; + ref_cb[13] = [8242, 16561, 24880, 33199, 41518, 49837, 58156, 65536]; + data_sample_at_11111[14] = 69; + ref_cb[14] = [8300, 16536, 24772, 33008, 41244, 49480, 57716, 65536]; + data_sample_at_11111[15] = 219; + ref_cb[15] = [8215, 16509, 24803, 33097, 41391, 49685, 57979, 65536]; + data_sample_at_11111[16] = 126; + ref_cb[16] = [8272, 16480, 24688, 32896, 41104, 49312, 57520, 65536]; + data_sample_at_11111[17] = 10; + ref_cb[17] = [8329, 16457, 24714, 32842, 41099, 49227, 57484, 65536]; + data_sample_at_11111[18] = 124; + ref_cb[18] = [8240, 16562, 24884, 33206, 41528, 49850, 58172, 65536]; + data_sample_at_11111[19] = 24; + ref_cb[19] = [8296, 16528, 24760, 32992, 41224, 49456, 57688, 65536]; + data_sample_at_11111[20] = 196; + ref_cb[20] = [8204, 16492, 24780, 33068, 41356, 49644, 57932, 65536]; + data_sample_at_11111[21] = 106; + ref_cb[21] = [8259, 16454, 24649, 32844, 41039, 49234, 57429, 65536]; + data_sample_at_11111[22] = 196; + ref_cb[22] = [8314, 16564, 24814, 33064, 41314, 49564, 57814, 65536]; + data_sample_at_11111[23] = 183; + ref_cb[23] = [8218, 16523, 24828, 33133, 41438, 49743, 58048, 65536]; + data_sample_at_11111[24] = 124; + ref_cb[24] = [8128, 16328, 24536, 32744, 40952, 49160, 57368, 65536]; + data_sample_at_11111[25] = 70; + ref_cb[25] = [8326, 16588, 24850, 33112, 41374, 49636, 57898, 65536]; + data_sample_at_11111[26] = 126; + ref_cb[26] = [8226, 16542, 24858, 33174, 41490, 49806, 58122, 65536]; + data_sample_at_11111[27] = 191; + ref_cb[27] = [8279, 16494, 24709, 32924, 41139, 49354, 57569, 65536]; + data_sample_at_11111[28] = 69; + ref_cb[28] = [8332, 16600, 24868, 33136, 41404, 49672, 57940, 65536]; + data_sample_at_11111[29] = 163; + ref_cb[29] = [8128, 16392, 24713, 33034, 41355, 49676, 57997, 65536]; + data_sample_at_11111[30] = 252; + ref_cb[30] = [8280, 16496, 24712, 32928, 41144, 49360, 57576, 65536]; + data_sample_at_11111[31] = 0; + ref_cb[31] = [8332, 16600, 24868, 33136, 41404, 49672, 57940, 65536]; + data_sample_at_11111[32] = 173; + ref_cb[32] = [8224, 16544, 24864, 33184, 41504, 49824, 58144, 65536]; + data_sample_at_11111[33] = 42; + ref_cb[33] = [8275, 16486, 24697, 32908, 41119, 49330, 57541, 65536]; + data_sample_at_11111[34] = 70; + ref_cb[34] = [8326, 16588, 24850, 33112, 41374, 49636, 57898, 65536]; + data_sample_at_11111[35] = 174; + ref_cb[35] = [8214, 16527, 24840, 33153, 41466, 49779, 58092, 65536]; + data_sample_at_11111[36] = 235; + ref_cb[36] = [8264, 16464, 24664, 32864, 41064, 49264, 57464, 65536]; + data_sample_at_11111[37] = 186; + ref_cb[37] = [8314, 16564, 24814, 33064, 41314, 49564, 57814, 65536]; + data_sample_at_11111[38] = 0; + ref_cb[38] = [8198, 16498, 24798, 33098, 41398, 49698, 57998, 65536]; + data_sample_at_11111[39] = 157; + ref_cb[39] = [8247, 16597, 24947, 33297, 41647, 49997, 58347, 65536]; + data_sample_at_11111[40] = 126; + ref_cb[40] = [8296, 16528, 24760, 32992, 41224, 49456, 57688, 65536]; + data_sample_at_11111[41] = 49; + ref_cb[41] = [8345, 16626, 24907, 33188, 41469, 49750, 58031, 65536]; + data_sample_at_11111[42] = 36; + ref_cb[42] = [8224, 16554, 24884, 33214, 41544, 49874, 58204, 65536]; + data_sample_at_11111[43] = 0; + ref_cb[43] = [8272, 16480, 24688, 32896, 41104, 49312, 57520, 65536]; + data_sample_at_11111[44] = 236; + ref_cb[44] = [8320, 16576, 24832, 33088, 41344, 49600, 57856, 65536]; + data_sample_at_11111[45] = 105; + ref_cb[45] = [8195, 16499, 24803, 33107, 41411, 49715, 58019, 65536]; + data_sample_at_11111[46] = 0; + ref_cb[46] = [8242, 16594, 24946, 33298, 41650, 50002, 58354, 65536]; + data_sample_at_11111[47] = 24; + ref_cb[47] = [8289, 16514, 24739, 32964, 41189, 49414, 57639, 65536]; + data_sample_at_11111[48] = 126; + ref_cb[48] = [8336, 16608, 24880, 33152, 41424, 49696, 57968, 65536]; + data_sample_at_11111[49] = 0; + ref_cb[49] = [8206, 16525, 24844, 33163, 41482, 49801, 58120, 65536]; + data_sample_at_11111[50] = 70; + ref_cb[50] = [8252, 16618, 24984, 33350, 41716, 50082, 58448, 65536]; + data_sample_at_11111[51] = 236; + ref_cb[51] = [8298, 16532, 24766, 33000, 41234, 49468, 57702, 65536]; + data_sample_at_11111[52] = 0; + ref_cb[52] = [8344, 16624, 24904, 33184, 41464, 49744, 58024, 65536]; + data_sample_at_11111[53] = 12; + ref_cb[53] = [8209, 16337, 24680, 32808, 41151, 49279, 57622, 65536]; + data_sample_at_11111[54] = 236; + ref_cb[54] = [8254, 16626, 24998, 33370, 41742, 50114, 58486, 65536]; + data_sample_at_11111[55] = 0; + ref_cb[55] = [8299, 16534, 24769, 33004, 41239, 49474, 57709, 65536]; + data_sample_at_11111[56] = 173; + ref_cb[56] = [8344, 16624, 24904, 33184, 41464, 49744, 58024, 65536]; + data_sample_at_11111[57] = 196; + ref_cb[57] = [8204, 16529, 24854, 33179, 41504, 49829, 58154, 65536]; + data_sample_at_11111[58] = 0; + ref_cb[58] = [8248, 16618, 24988, 33358, 41728, 50098, 58468, 65536]; + data_sample_at_11111[59] = 159; + ref_cb[59] = [8292, 16520, 24748, 32976, 41204, 49432, 57660, 65536]; + data_sample_at_11111[60] = 178; + ref_cb[60] = [8336, 16608, 24880, 33152, 41424, 49696, 57968, 65536]; + data_sample_at_11111[61] = 0; + ref_cb[61] = [8191, 16507, 24823, 33139, 41455, 49771, 58087, 65536]; + data_sample_at_11111[62] = 10; + ref_cb[62] = [8234, 16594, 24954, 33314, 41674, 50034, 58394, 65536]; + data_sample_at_11111[63] = 101; + ref_cb[63] = [8277, 16490, 24703, 32916, 41129, 49342, 57555, 65536]; + data_sample_at_11111[64] = 0; + ref_cb[64] = [8320, 16576, 24832, 33088, 41344, 49600, 57856, 65536]; + data_sample_at_11111[65] = 15; + ref_cb[65] = [8363, 16662, 24961, 33260, 41559, 49858, 58157, 65536]; + data_sample_at_11111[66] = 147; + ref_cb[66] = [8212, 16554, 24896, 33238, 41580, 49922, 58264, 65536]; + data_sample_at_11111[67] = 0; + ref_cb[67] = [8254, 16639, 25024, 33409, 41794, 50179, 58564, 65536]; + data_sample_at_11111[68] = 0; + ref_cb[68] = [8296, 16528, 24760, 32992, 41224, 49456, 57688, 65536]; + data_sample_at_11111[69] = 227; + ref_cb[69] = [8338, 16612, 24886, 33160, 41434, 49708, 57982, 65536]; + data_sample_at_11111[70] = 126; + ref_cb[70] = [8380, 16696, 25012, 33328, 41644, 49960, 58276, 65536]; + data_sample_at_11111[71] = 0; + ref_cb[71] = [8223, 16581, 24939, 33297, 41655, 50013, 58371, 65536]; + data_sample_at_11111[72] = 101; + ref_cb[72] = [8264, 16464, 24664, 32864, 41064, 49264, 57464, 65536]; + data_sample_at_11111[73] = 186; + ref_cb[73] = [8305, 16546, 24787, 33028, 41269, 49510, 57751, 65536]; + data_sample_at_11111[74] = 52; + ref_cb[74] = [8346, 16628, 24910, 33192, 41474, 49756, 58038, 65536]; + data_sample_at_11111[75] = 0; + ref_cb[75] = [8387, 16515, 24830, 32958, 41273, 49401, 57716, 65536]; + data_sample_at_11111[76] = 70; + ref_cb[76] = [8224, 16588, 24952, 33316, 41680, 50044, 58408, 65536]; + data_sample_at_11111[77] = 228; + ref_cb[77] = [8264, 16464, 24664, 32864, 41064, 49264, 57464, 65536]; + data_sample_at_11111[78] = 0; + ref_cb[78] = [8128, 16338, 24578, 32818, 41058, 49298, 57538, 65536]; + data_sample_at_11111[79] = 0; + ref_cb[79] = [8344, 16624, 24904, 33184, 41464, 49744, 58024, 65536]; + data_sample_at_11111[80] = 50; + ref_cb[80] = [8384, 16704, 25024, 33344, 41664, 49984, 58304, 65536]; + data_sample_at_11111[81] = 214; + ref_cb[81] = [8215, 16575, 24935, 33295, 41655, 50015, 58375, 65536]; + data_sample_at_11111[82] = 0; + ref_cb[82] = [8254, 16654, 25054, 33454, 41854, 50254, 58654, 65536]; + data_sample_at_11111[83] = 0; + ref_cb[83] = [8293, 16522, 24751, 32980, 41209, 49438, 57667, 65536]; + data_sample_at_11111[84] = 50; + ref_cb[84] = [8128, 16388, 24656, 32924, 41192, 49460, 57728, 65536]; + data_sample_at_11111[85] = 69; + ref_cb[85] = [8371, 16678, 24985, 33292, 41599, 49906, 58213, 65536]; + data_sample_at_11111[86] = 0; + ref_cb[86] = [8196, 16324, 24674, 32802, 41152, 49280, 57630, 65536]; + data_sample_at_11111[87] = 0; + ref_cb[87] = [8234, 16619, 25004, 33389, 41774, 50159, 58544, 65536]; + data_sample_at_11111[88] = 70; + ref_cb[88] = [8272, 16480, 24688, 32896, 41104, 49312, 57520, 65536]; + data_sample_at_11111[89] = 136; + ref_cb[89] = [8128, 16339, 24585, 32831, 41077, 49323, 57569, 65536]; + data_sample_at_11111[90] = 0; + ref_cb[90] = [8348, 16632, 24916, 33200, 41484, 49768, 58052, 65536]; + data_sample_at_11111[91] = 0; + ref_cb[91] = [8386, 16708, 25030, 33352, 41674, 49996, 58318, 65536]; + data_sample_at_11111[92] = 101; + ref_cb[92] = [8204, 16564, 24924, 33284, 41644, 50004, 58364, 65536]; + data_sample_at_11111[93] = 36; + ref_cb[93] = [8241, 16639, 25037, 33435, 41833, 50231, 58629, 65536]; + data_sample_at_11111[94] = 196; + ref_cb[94] = [8278, 16492, 24706, 32920, 41134, 49348, 57562, 65536]; + data_sample_at_11111[95] = 0; + ref_cb[95] = [8315, 16566, 24817, 33068, 41319, 49570, 57821, 65536]; + data_sample_at_11111[96] = 0; + ref_cb[96] = [8352, 16640, 24928, 33216, 41504, 49792, 58080, 65536]; + data_sample_at_11111[97] = 24; + ref_cb[97] = [8389, 16714, 25039, 33364, 41689, 50014, 58339, 65536]; + data_sample_at_11111[98] = 8; + ref_cb[98] = [8200, 16562, 24924, 33286, 41648, 50010, 58372, 65536]; + data_sample_at_11111[99] = 0; + ref_cb[99] = [8236, 16635, 25034, 33433, 41832, 50231, 58630, 65536]; + data_sample_at_11111[100] = 0; + ref_cb[100] = [8272, 16480, 24688, 32896, 41104, 49312, 57520, 65536]; + data_sample_at_11111[101] = 125; + ref_cb[101] = [8308, 16552, 24796, 33040, 41284, 49528, 57772, 65536]; + data_sample_at_11111[102] = 173; + ref_cb[102] = [8344, 16624, 24904, 33184, 41464, 49744, 58024, 65536]; + data_sample_at_11111[103] = 126; + ref_cb[103] = [8380, 16696, 25012, 33328, 41644, 49960, 58276, 65536]; + data_sample_at_11111[104] = 0; + ref_cb[104] = [8416, 16544, 24888, 33016, 41360, 49488, 57832, 65536]; + data_sample_at_11111[105] = 0; + ref_cb[105] = [8219, 16607, 24995, 33383, 41771, 50159, 58547, 65536]; + data_sample_at_11111[106] = 159; + ref_cb[106] = [8254, 16678, 25102, 33526, 41950, 50374, 58798, 65536]; + data_sample_at_11111[107] = 210; + ref_cb[107] = [8289, 16514, 24739, 32964, 41189, 49414, 57639, 65536]; + data_sample_at_11111[108] = 178; + ref_cb[108] = [8324, 16584, 24844, 33104, 41364, 49624, 57884, 65536]; + data_sample_at_11111[109] = 0; + ref_cb[109] = [8359, 16654, 24949, 33244, 41539, 49834, 58129, 65536]; + data_sample_at_11111[110] = 0; + ref_cb[110] = [8394, 16724, 25054, 33384, 41714, 50044, 58374, 65536]; + data_sample_at_11111[111] = 170; + ref_cb[111] = [8429, 16794, 25159, 33524, 41889, 50254, 58619, 65536]; + data_sample_at_11111[112] = 173; + ref_cb[112] = [8224, 16624, 25024, 33424, 41824, 50224, 58624, 65536]; + data_sample_at_11111[113] = 235; + ref_cb[113] = [8258, 16452, 24646, 32840, 41034, 49228, 57422, 65536]; + data_sample_at_11111[114] = 0; + ref_cb[114] = [8292, 16520, 24748, 32976, 41204, 49432, 57660, 65536]; + data_sample_at_11111[115] = 0; + ref_cb[115] = [8326, 16588, 24850, 33112, 41374, 49636, 57898, 65536]; + data_sample_at_11111[116] = 0; + ref_cb[116] = [8360, 16656, 24952, 33248, 41544, 49840, 58136, 65536]; + data_sample_at_11111[117] = 24; + ref_cb[117] = [8394, 16724, 25054, 33384, 41714, 50044, 58374, 65536]; + data_sample_at_11111[118] = 228; + ref_cb[118] = [8428, 16792, 25156, 33520, 41884, 50248, 58612, 65536]; + data_sample_at_11111[119] = 0; + ref_cb[119] = [8215, 16613, 25011, 33409, 41807, 50205, 58603, 65536]; + data_sample_at_11111[120] = 0; + ref_cb[120] = [8248, 16680, 25112, 33544, 41976, 50408, 58840, 65536]; + data_sample_at_11111[121] = 0; + ref_cb[121] = [8281, 16498, 24715, 32932, 41149, 49366, 57583, 65536]; + data_sample_at_11111[122] = 101; + ref_cb[122] = [8314, 16564, 24814, 33064, 41314, 49564, 57814, 65536]; + data_sample_at_11111[123] = 174; + ref_cb[123] = [8347, 16630, 24913, 33196, 41479, 49762, 58045, 65536]; + data_sample_at_11111[124] = 126; + ref_cb[124] = [8380, 16696, 25012, 33328, 41644, 49960, 58276, 65536]; + data_sample_at_11111[125] = 0; + ref_cb[125] = [8413, 16762, 25111, 33460, 41809, 50158, 58507, 65536]; + data_sample_at_11111[126] = 0; + ref_cb[126] = [8192, 16574, 24956, 33338, 41720, 50102, 58484, 65536]; + data_sample_at_11111[127] = 0; + ref_cb[127] = [8224, 16639, 25054, 33469, 41884, 50299, 58714, 65536]; + + // Now run the loop with this reference data. + + for (let i = 0; i < 128; i++) { + console.log(`Running test case ${i}`); + const data = get_triggering_base_data(65536, i); + + expect(data[11111]).toBe(data_sample_at_11111[i]); + + const chunks = getChunks(data); + const chunkBoundaries = getChunkBoundaries(chunks); + + expect(chunkBoundaries).toEqual(ref_cb[i]); + } + }); + }); + + describe("Basic chunker functionality", () => { + it("should create and use chunker correctly", () => { + // Create a small test data + const data = new Uint8Array(100000); + for (let i = 0; i < data.length; i++) { + data[i] = Math.floor(Math.random() * 256); + } + + // Test chunker creation and usage + const chunker = createChunker(); + const chunks = nextBlock(chunker, data); + const finalChunk = finalize(chunker); + + // Verify chunks have the expected structure + for (const chunk of chunks) { + expect(typeof chunk.length).toBe("number"); + expect(typeof chunk.hash).toBe("object"); + expect(chunk.hash instanceof Uint8Array).toBe(true); + } + + if (finalChunk) { + expect(typeof finalChunk.length).toBe("number"); + expect(typeof finalChunk.hash).toBe("object"); + expect(finalChunk.hash instanceof Uint8Array).toBe(true); + } + }); + }); +}); diff --git a/packages/xetchunk-wasm/vitest.config.ts b/packages/xetchunk-wasm/vitest.config.ts new file mode 100644 index 0000000000..b6d61dc045 --- /dev/null +++ b/packages/xetchunk-wasm/vitest.config.ts @@ -0,0 +1,13 @@ +import { defineConfig } from "vitest/config"; + +export default defineConfig({ + test: { + globals: true, + environment: "node", + }, + resolve: { + alias: { + "@huggingface/splitmix64-wasm": "./node_modules/@huggingface/splitmix64-wasm/build/release.js", + }, + }, +}); From 3be827a843241aa70eb5006e51efe03613138770 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Tue, 8 Jul 2025 11:12:26 +0200 Subject: [PATCH 32/44] change gearhash tests to wasm --- packages/gearhash-wasm/package.json | 2 +- packages/gearhash-wasm/tests/index.js | 252 ------------------- packages/gearhash-wasm/tests/index.test.ts | 274 +++++++++++++++++++++ packages/gearhash-wasm/vitest.config.ts | 13 + 4 files changed, 288 insertions(+), 253 deletions(-) delete mode 100644 packages/gearhash-wasm/tests/index.js create mode 100644 packages/gearhash-wasm/tests/index.test.ts create mode 100644 packages/gearhash-wasm/vitest.config.ts diff --git a/packages/gearhash-wasm/package.json b/packages/gearhash-wasm/package.json index b470831cde..d027f8ab77 100644 --- a/packages/gearhash-wasm/package.json +++ b/packages/gearhash-wasm/package.json @@ -5,7 +5,7 @@ "build:debug": "asc assembly/index.ts --target debug", "build:release": "asc assembly/index.ts --target release", "build": "pnpm run build:debug && npm run build:release", - "test": "node tests && node tests/xet-data.js", + "test": "vitest run", "prepare": "pnpm run build" }, "keywords": [ diff --git a/packages/gearhash-wasm/tests/index.js b/packages/gearhash-wasm/tests/index.js deleted file mode 100644 index dfed8e01ae..0000000000 --- a/packages/gearhash-wasm/tests/index.js +++ /dev/null @@ -1,252 +0,0 @@ -import { nextMatch, nextMatches } from "../build/debug.js"; - -// Simple deterministic RNG for reproducible results (24-bit version) -class SimpleRng { - constructor(seed) { - this.state = seed & 0xffffff; // Keep only 24 bits - } - - nextU24() { - // Simple 24-bit linear congruential generator - // Using 24-bit arithmetic to avoid overflow - this.state = (this.state * 1111 + 12345) & 0xffffff; - return this.state; - } - - fillBytes(dest) { - for (let i = 0; i < dest.length; i += 3) { - const value = this.nextU24(); - for (let j = 0; j < 3 && i + j < dest.length; j++) { - dest[i + j] = (value >> (j * 8)) & 0xff; - } - } - } -} - -const BENCH_INPUT_SEED = 0xbecd17f; -const BENCH_MASK = 0x0000d90003530000n; -const INPUT_SIZE = 100_000; - -function generateTestInput() { - const bytes = new Uint8Array(INPUT_SIZE); - const rng = new SimpleRng(BENCH_INPUT_SEED); - rng.fillBytes(bytes); - return bytes; -} - -function testGearhash() { - console.log(`Generating test input with seed: 0x${BENCH_INPUT_SEED.toString(16)}`); - const inputBuf = generateTestInput(); - console.log(`Input size: ${inputBuf.length} bytes`); - console.log(`Mask: 0x${BENCH_MASK.toString(16)}`); - - let offset = 0; - let chunkCount = 0; - let totalProcessed = 0; - let hash = 0n; - - console.log("\nProcessing chunks:"); - console.log("Chunk | Offset | Size | Hash"); - console.log("------|--------|------|------------------"); - - const result = nextMatches(inputBuf, BENCH_MASK, 0); - const matches = [...result.matches, { position: result.remaining, hash: result.hash }]; - - for (const match of matches) { - totalProcessed += match.position; - chunkCount += 1; - hash = match.hash; - - console.log( - `${chunkCount.toString().padStart(5)} | ${offset.toString().padStart(6)} | ${match.position - .toString() - .padStart(4)} | 0x${match.hash.toString(16).padStart(16, "0")}` - ); - offset += match.position; - } - - console.log("\nSummary:"); - console.log(`Total chunks: ${chunkCount}`); - console.log(`Total bytes processed: ${totalProcessed}`); - console.log(`Average chunk size: ${(totalProcessed / chunkCount).toFixed(1)} bytes`); - - // Print first few bytes of each chunk for verification - console.log("\nFirst 16 bytes of each chunk:"); - offset = 0; - chunkCount = 0; - hash = 0n; - - while (offset < inputBuf.length) { - const result = nextMatch(inputBuf.subarray(offset), BENCH_MASK, hash); - if (result.matchSize > 0) { - const chunk = inputBuf.subarray(offset, offset + result.matchSize); - const hexBytes = Array.from(chunk.slice(0, Math.min(16, chunk.length))) - .map((b) => b.toString(16).padStart(2, "0")) - .join(""); - console.log(`Chunk ${chunkCount + 1}: ${hexBytes}`); - offset += result.matchSize; - chunkCount += 1; - hash = result.hash; - } else { - const chunk = inputBuf.subarray(offset); - const hexBytes = Array.from(chunk.slice(0, Math.min(16, chunk.length))) - .map((b) => b.toString(16).padStart(2, "0")) - .join(""); - console.log(`Chunk ${chunkCount + 1}: ${hexBytes} (final)`); - break; - } - } - - return { chunkCount, totalProcessed, averageChunkSize: totalProcessed / chunkCount }; -} - -// Parse the expected results from Rust -function parseExpectedResults(resultData) { - const lines = resultData.trim().split("\n"); - const results = []; - - for (const line of lines) { - const match = line.match(/\s*(\d+)\s*\|\s*(\d+)\s*\|\s*(\d+)\s*\|\s*(0x[a-f0-9]+)/); - if (match) { - results.push({ - chunk: parseInt(match[1]), - offset: parseInt(match[2]), - size: parseInt(match[3]), - hash: match[4], - }); - } - } - - return results; -} - -const resultData = `Chunk | Offset | Size | Hash -------|--------|------|------------------ - 1 | 0 | 3598 | 0x033220f080ac5f77 - 2 | 3598 | 3995 | 0xd06b22f324ac5f28 - 3 | 7593 | 4708 | 0xa3a324f81808429c - 4 | 12301 | 484 | 0x12a5006aa4a4425b - 5 | 12785 | 1484 | 0x0b240413a4a4d5a2 - 6 | 14269 | 563 | 0xc646022fbc848bc6 - 7 | 14832 | 6663 | 0x7c7a2296e4a4c325 - 8 | 21495 | 1220 | 0xbe1f2468f0841b68 - 9 | 22715 | 1175 | 0xf87e2299e00c57d9 - 10 | 23890 | 779 | 0x79ca2634d00cd6b9 - 11 | 24669 | 2069 | 0xcb7a063594081a74 - 12 | 26738 | 2623 | 0xdccc26b6c0acb733 - 13 | 29361 | 596 | 0x4fb6201a1c20143e - 14 | 29957 | 622 | 0x81e726272020706f - 15 | 30579 | 3834 | 0x630622fca084a60a - 16 | 34413 | 2379 | 0x177b2240080810b1 - 17 | 36792 | 3527 | 0x663b261bbc2451ed - 18 | 40319 | 1665 | 0xf94f06db94003e2f - 19 | 41984 | 1240 | 0xc5ca208c0c24cefc - 20 | 43224 | 1274 | 0x8139244f740cba39 - 21 | 44498 | 3680 | 0x4440044520045a9d - 22 | 48178 | 1487 | 0xe00f2049a0a43a58 - 23 | 49665 | 4293 | 0x366a26940408279d - 24 | 53958 | 1184 | 0x3a582683902cb3fe - 25 | 55142 | 383 | 0x002d0499e080702e - 26 | 55525 | 1206 | 0x34ba041aa4084fbd - 27 | 56731 | 506 | 0x0c53045c00a0a228 - 28 | 57237 | 8019 | 0xf85b202d9c0813a5 - 29 | 65256 | 1070 | 0x1c862295ac8863ba - 30 | 66326 | 3359 | 0x4e4804d7b82805c7 - 31 | 69685 | 1744 | 0x75b7224cc8209457 - 32 | 71429 | 152 | 0xb01e26b40c0cf7c0 - 33 | 71581 | 11 | 0xc66002b7f48c0472 - 34 | 71592 | 1209 | 0x0a33021dc4007363 - 35 | 72801 | 1795 | 0xd0cc22ea708c921f - 36 | 74596 | 856 | 0x49e3007c9c2c5727 - 37 | 75452 | 97 | 0xe0b422e3c40c89dc - 38 | 75549 | 1299 | 0xbd1806074024536a - 39 | 76848 | 131 | 0xd61104147c28928d - 40 | 76979 | 1987 | 0x31930627a080ebb0 - 41 | 78966 | 11254 | 0x4c4400e65c24beff - 42 | 90220 | 868 | 0xa92400ca5ca02488 - 43 | 91088 | 6279 | 0x5a3d0443f0a0d81a - 44 | 97367 | 969 | 0x7770042d140c7472 - 45 | 98336 | 1664 | 0xe508202f55c46d2d`; - -console.log("ok"); - -// Run the test and capture output for comparison -console.log("\n" + "=".repeat(50)); -console.log("RUNNING GEARHASH TEST"); -console.log("=".repeat(50)); - -// Capture console output for comparison -const originalLog = console.log; -let capturedOutput = []; - -console.log = function (...args) { - capturedOutput.push(args.join(" ")); - originalLog.apply(console, args); -}; - -// Run the test -const testResults = testGearhash(); - -// Restore console.log -console.log = originalLog; - -// Extract the chunk data from captured output -const chunkLines = capturedOutput.filter((line) => line.match(/^\s*\d+\s*\|\s*\d+\s*\|\s*\d+\s*\|\s*0x[a-f0-9]+/)); - -// Format the captured results for comparison -const capturedResultData = chunkLines.join("\n"); - -console.log("\n" + "=".repeat(50)); -console.log("COMPARISON RESULTS"); -console.log("=".repeat(50)); - -// Compare with expected results -const expectedResults = parseExpectedResults(resultData); -const actualResults = parseExpectedResults(capturedResultData); - -let matches = 0; -let totalChunks = Math.min(actualResults.length, expectedResults.length); - -console.log(`Comparing ${totalChunks} chunks...`); - -for (let i = 0; i < totalChunks; i++) { - const actual = actualResults[i]; - const expected = expectedResults[i]; - - if (actual.offset === expected.offset && actual.size === expected.size && actual.hash === expected.hash) { - matches++; - } else { - console.log(`❌ Mismatch at chunk ${i + 1}:`); - console.log(` Expected: offset=${expected.offset}, size=${expected.size}, hash=${expected.hash}`); - console.log(` Actual: offset=${actual.offset}, size=${actual.size}, hash=${actual.hash}`); - process.exitCode = 1; - } -} - -console.log(`\n✅ Results: ${matches}/${totalChunks} chunks match exactly`); -console.log(`📊 Accuracy: ${((matches / totalChunks) * 100).toFixed(2)}%`); - -if (matches === totalChunks) { - console.log("🎉 All chunks match! AssemblyScript implementation is correct."); -} else { - console.log("⚠ïļ Some chunks don't match. Check the implementation."); -} - -// Test summary -console.log("\n" + "=".repeat(50)); -console.log("TEST SUMMARY"); -console.log("=".repeat(50)); -console.log(`Total chunks processed: ${testResults.chunkCount}`); -console.log(`Total bytes processed: ${testResults.totalProcessed}`); -console.log(`Average chunk size: ${testResults.averageChunkSize.toFixed(1)} bytes`); -console.log(`Matching chunks: ${matches}/${totalChunks}`); -console.log(`Accuracy: ${((matches / totalChunks) * 100).toFixed(2)}%`); - -const input = generateTestInput().slice(0, 100); - -let output = ""; -for (let i = 0; i < input.length; i++) { - output += input[i].toString(16).padStart(2, "0") + " "; -} - -console.log("First 100 bytes", output); diff --git a/packages/gearhash-wasm/tests/index.test.ts b/packages/gearhash-wasm/tests/index.test.ts new file mode 100644 index 0000000000..35e28eba8a --- /dev/null +++ b/packages/gearhash-wasm/tests/index.test.ts @@ -0,0 +1,274 @@ +import { describe, it, expect } from "vitest"; +import { nextMatch, nextMatches } from "../build/debug.js"; + +// Simple deterministic RNG for reproducible results (24-bit version) +// Alternatively, could have used WASM for 64-bit arithmetic. +class SimpleRng { + private state: number; + + constructor(seed: number) { + this.state = seed & 0xffffff; // Keep only 24 bits + } + + nextU24(): number { + // Simple 24-bit linear congruential generator + // Using 24-bit arithmetic to avoid overflow + this.state = (this.state * 1111 + 12345) & 0xffffff; + return this.state; + } + + fillBytes(dest: Uint8Array): void { + for (let i = 0; i < dest.length; i += 3) { + const value = this.nextU24(); + for (let j = 0; j < 3 && i + j < dest.length; j++) { + dest[i + j] = (value >> (j * 8)) & 0xff; + } + } + } +} + +const BENCH_INPUT_SEED = 0xbecd17f; +const BENCH_MASK = 0x0000d90003530000n; +const INPUT_SIZE = 100_000; + +function generateTestInput(): Uint8Array { + const bytes = new Uint8Array(INPUT_SIZE); + const rng = new SimpleRng(BENCH_INPUT_SEED); + rng.fillBytes(bytes); + return bytes; +} + +interface TestResults { + chunkCount: number; + totalProcessed: number; + averageChunkSize: number; +} + +interface ExpectedResult { + chunk: number; + offset: number; + size: number; + hash: string; +} + +function testGearhash(): TestResults { + const inputBuf = generateTestInput(); + + let chunkCount = 0; + let totalProcessed = 0; + + const result = nextMatches(inputBuf, BENCH_MASK, 0n); + const matches = [...result.matches, { position: result.remaining, hash: result.hash }]; + + for (const match of matches) { + totalProcessed += match.position; + chunkCount += 1; + } + + return { chunkCount, totalProcessed, averageChunkSize: totalProcessed / chunkCount }; +} + +// Parse the expected results from Rust +function parseExpectedResults(resultData: string): ExpectedResult[] { + const lines = resultData.trim().split("\n"); + const results: ExpectedResult[] = []; + + for (const line of lines) { + const match = line.match(/\s*(\d+)\s*\|\s*(\d+)\s*\|\s*(\d+)\s*\|\s*(0x[a-f0-9]+)/); + if (match) { + results.push({ + chunk: parseInt(match[1]), + offset: parseInt(match[2]), + size: parseInt(match[3]), + hash: match[4], + }); + } + } + + return results; +} + +const resultData = `Chunk | Offset | Size | Hash +------|--------|------|------------------ + 1 | 0 | 3598 | 0x033220f080ac5f77 + 2 | 3598 | 3995 | 0xd06b22f324ac5f28 + 3 | 7593 | 4708 | 0xa3a324f81808429c + 4 | 12301 | 484 | 0x12a5006aa4a4425b + 5 | 12785 | 1484 | 0x0b240413a4a4d5a2 + 6 | 14269 | 563 | 0xc646022fbc848bc6 + 7 | 14832 | 6663 | 0x7c7a2296e4a4c325 + 8 | 21495 | 1220 | 0xbe1f2468f0841b68 + 9 | 22715 | 1175 | 0xf87e2299e00c57d9 + 10 | 23890 | 779 | 0x79ca2634d00cd6b9 + 11 | 24669 | 2069 | 0xcb7a063594081a74 + 12 | 26738 | 2623 | 0xdccc26b6c0acb733 + 13 | 29361 | 596 | 0x4fb6201a1c20143e + 14 | 29957 | 622 | 0x81e726272020706f + 15 | 30579 | 3834 | 0x630622fca084a60a + 16 | 34413 | 2379 | 0x177b2240080810b1 + 17 | 36792 | 3527 | 0x663b261bbc2451ed + 18 | 40319 | 1665 | 0xf94f06db94003e2f + 19 | 41984 | 1240 | 0xc5ca208c0c24cefc + 20 | 43224 | 1274 | 0x8139244f740cba39 + 21 | 44498 | 3680 | 0x4440044520045a9d + 22 | 48178 | 1487 | 0xe00f2049a0a43a58 + 23 | 49665 | 4293 | 0x366a26940408279d + 24 | 53958 | 1184 | 0x3a582683902cb3fe + 25 | 55142 | 383 | 0x002d0499e080702e + 26 | 55525 | 1206 | 0x34ba041aa4084fbd + 27 | 56731 | 506 | 0x0c53045c00a0a228 + 28 | 57237 | 8019 | 0xf85b202d9c0813a5 + 29 | 65256 | 1070 | 0x1c862295ac8863ba + 30 | 66326 | 3359 | 0x4e4804d7b82805c7 + 31 | 69685 | 1744 | 0x75b7224cc8209457 + 32 | 71429 | 152 | 0xb01e26b40c0cf7c0 + 33 | 71581 | 11 | 0xc66002b7f48c0472 + 34 | 71592 | 1209 | 0x0a33021dc4007363 + 35 | 72801 | 1795 | 0xd0cc22ea708c921f + 36 | 74596 | 856 | 0x49e3007c9c2c5727 + 37 | 75452 | 97 | 0xe0b422e3c40c89dc + 38 | 75549 | 1299 | 0xbd1806074024536a + 39 | 76848 | 131 | 0xd61104147c28928d + 40 | 76979 | 1987 | 0x31930627a080ebb0 + 41 | 78966 | 11254 | 0x4c4400e65c24beff + 42 | 90220 | 868 | 0xa92400ca5ca02488 + 43 | 91088 | 6279 | 0x5a3d0443f0a0d81a + 44 | 97367 | 969 | 0x7770042d140c7472 + 45 | 98336 | 1664 | 0xe508202f55c46d2d`; + +describe("gearhash-wasm", () => { + describe("Basic functionality", () => { + it("should generate test input correctly", () => { + const input = generateTestInput(); + expect(input.length).toBe(INPUT_SIZE); + + // Verify specific byte values for reproducibility + // These values may vary depending on the RNG implementation + expect(typeof input[0]).toBe("number"); + expect(input[0]).toBeGreaterThanOrEqual(0); + expect(input[0]).toBeLessThanOrEqual(255); + expect(typeof input[100]).toBe("number"); + expect(typeof input[1000]).toBe("number"); + }); + + it("should process chunks correctly", () => { + const testResults = testGearhash(); + + expect(testResults.chunkCount).toBeGreaterThan(0); + expect(testResults.totalProcessed).toBe(INPUT_SIZE); + expect(testResults.averageChunkSize).toBeGreaterThan(0); + }); + }); + + describe("Chunk matching accuracy", () => { + it("should match expected results from Rust implementation", () => { + const inputBuf = generateTestInput(); + const result = nextMatches(inputBuf, BENCH_MASK, 0n); + const allMatches = [...result.matches, { position: result.remaining, hash: result.hash }]; + + // Generate actual results in the same format as expected + const actualResults: ExpectedResult[] = []; + let offset = 0; + let chunkCount = 0; + + for (const match of allMatches) { + chunkCount += 1; + actualResults.push({ + chunk: chunkCount, + offset: offset, + size: match.position, + hash: `0x${match.hash.toString(16).padStart(16, "0")}`, + }); + offset += match.position; + } + + // Compare with expected results + const expectedResults = parseExpectedResults(resultData); + const totalChunks = Math.min(actualResults.length, expectedResults.length); + + expect(totalChunks).toBe(expectedResults.length); + expect(totalChunks).toBe(45); + + let matchCount = 0; + for (let i = 0; i < totalChunks; i++) { + const actual = actualResults[i]; + const expected = expectedResults[i]; + + if (actual.offset === expected.offset && actual.size === expected.size && actual.hash === expected.hash) { + matchCount++; + } + } + + // We expect at least 90% accuracy compared to Rust implementation + const accuracy = (matchCount / totalChunks) * 100; + expect(accuracy).toBeGreaterThanOrEqual(100); + }); + }); + + describe("Individual chunk processing", () => { + it("should process individual chunks correctly", () => { + const input = generateTestInput(); + let offset = 0; + let hash = 0n; + + while (offset < input.length) { + const result = nextMatch(input.subarray(offset), BENCH_MASK, hash); + + // Position can be -1 to indicate no match found + expect(result.position).toBeGreaterThanOrEqual(-1); + expect(typeof result.hash).toBe("bigint"); + + if (result.position > 0) { + offset += result.position; + hash = result.hash; + } else { + // No more matches, break + break; + } + } + }); + }); + + describe("Edge cases", () => { + it("should handle empty input", () => { + const emptyInput = new Uint8Array(0); + const result = nextMatches(emptyInput, BENCH_MASK, 0n); + + expect(result.matches.length).toBe(0); + expect(result.remaining).toBe(0); + }); + + it("should handle small input", () => { + const smallInput = new Uint8Array([1, 2, 3, 4, 5]); + const result = nextMatches(smallInput, BENCH_MASK, 0n); + + expect(result.matches.length).toBeGreaterThanOrEqual(0); + expect(result.remaining).toBeGreaterThanOrEqual(0); + }); + + it("should handle different masks", () => { + const input = generateTestInput().slice(0, 1000); + const differentMasks = [0x0000ff0000000000n, 0x00000000ff000000n, 0x000000000000ff00n]; + + for (const mask of differentMasks) { + const result = nextMatches(input, mask, 0n); + expect(result.matches.length).toBeGreaterThanOrEqual(0); + } + }); + }); + + describe("Performance characteristics", () => { + it("should maintain reasonable chunk sizes", () => { + const testResults = testGearhash(); + + // Average chunk size should be reasonable (not too small, not too large) + expect(testResults.averageChunkSize).toBeGreaterThan(100); + expect(testResults.averageChunkSize).toBeLessThan(10000); + }); + + it("should process all input data", () => { + const testResults = testGearhash(); + expect(testResults.totalProcessed).toBe(INPUT_SIZE); + }); + }); +}); diff --git a/packages/gearhash-wasm/vitest.config.ts b/packages/gearhash-wasm/vitest.config.ts new file mode 100644 index 0000000000..b6d61dc045 --- /dev/null +++ b/packages/gearhash-wasm/vitest.config.ts @@ -0,0 +1,13 @@ +import { defineConfig } from "vitest/config"; + +export default defineConfig({ + test: { + globals: true, + environment: "node", + }, + resolve: { + alias: { + "@huggingface/splitmix64-wasm": "./node_modules/@huggingface/splitmix64-wasm/build/release.js", + }, + }, +}); From c4f4053ee33d5a11d8e9c00c0b2c24a11b39ac4b Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Tue, 8 Jul 2025 12:20:36 +0200 Subject: [PATCH 33/44] small opt --- packages/blake3-wasm/assembly/blake3.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/packages/blake3-wasm/assembly/blake3.ts b/packages/blake3-wasm/assembly/blake3.ts index e3c346b11a..2e8201853b 100644 --- a/packages/blake3-wasm/assembly/blake3.ts +++ b/packages/blake3-wasm/assembly/blake3.ts @@ -103,7 +103,7 @@ function compress( return state; } -function words_from_little_endian_bytes(bytes: Uint8Array, words: StaticArray): void { +function words_from_little_endian_bytes(bytes: StaticArray, words: StaticArray): void { for (let i = 0; i < words.length; i++) { const offset = i * 4; words[i] = @@ -199,7 +199,7 @@ class Blake3Hasher { class ChunkState { chaining_value: StaticArray; chunk_counter: u64; - block: Uint8Array; + block: StaticArray; block_len: u8; blocks_compressed: u8; flags: u32; @@ -207,7 +207,7 @@ class ChunkState { constructor(key_words: StaticArray, chunk_counter: u64, flags: u32) { this.chaining_value = new StaticArray(8); this.chunk_counter = chunk_counter; - this.block = new Uint8Array(BLOCK_LEN); + this.block = new StaticArray(BLOCK_LEN); this.block_len = 0; this.blocks_compressed = 0; this.flags = flags; @@ -242,7 +242,7 @@ class ChunkState { this.chaining_value[i] = compressed[i]; } this.blocks_compressed++; - this.block = new Uint8Array(BLOCK_LEN); + this.block = new StaticArray(BLOCK_LEN); this.block_len = 0; } From 322819414844b8360efd56efca6d9a5bee4eddf7 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Tue, 8 Jul 2025 14:13:05 +0200 Subject: [PATCH 34/44] publish wasm packages --- .github/workflows/blake3-wasm-publish.yml | 64 +++++++++++++++++++ .github/workflows/gearhash-wasm-publish.yml | 64 +++++++++++++++++++ .github/workflows/splitmix64-wasm-publish.yml | 64 +++++++++++++++++++ .github/workflows/xetchunk-wasm-publish.yml | 64 +++++++++++++++++++ packages/blake3-wasm/.npmignore | 1 - packages/blake3-wasm/package.json | 17 ++++- packages/gearhash-wasm/package.json | 14 +++- packages/splitmix64-wasm/package.json | 16 ++++- packages/xetchunk-wasm/package.json | 14 +++- 9 files changed, 312 insertions(+), 6 deletions(-) create mode 100644 .github/workflows/blake3-wasm-publish.yml create mode 100644 .github/workflows/gearhash-wasm-publish.yml create mode 100644 .github/workflows/splitmix64-wasm-publish.yml create mode 100644 .github/workflows/xetchunk-wasm-publish.yml delete mode 100644 packages/blake3-wasm/.npmignore diff --git a/.github/workflows/blake3-wasm-publish.yml b/.github/workflows/blake3-wasm-publish.yml new file mode 100644 index 0000000000..30018ffda8 --- /dev/null +++ b/.github/workflows/blake3-wasm-publish.yml @@ -0,0 +1,64 @@ +name: Blake3 WASM - Version and Release + +on: + workflow_dispatch: + inputs: + newversion: + type: choice + description: "Semantic Version Bump Type" + default: patch + options: + - patch + - minor + - major + +concurrency: + group: "push-to-main" + +defaults: + run: + working-directory: packages/blake3-wasm + +jobs: + version_and_release: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + # Needed to push the tag and the commit on the main branch, otherwise we get: + # > Run git push --follow-tags + # remote: error: GH006: Protected branch update failed for refs/heads/main. + # remote: error: Changes must be made through a pull request. Required status check "lint" is expected. + token: ${{ secrets.BOT_ACCESS_TOKEN }} + - run: npm install -g corepack@latest && corepack enable + - uses: actions/setup-node@v3 + with: + node-version: "20" + cache: "pnpm" + cache-dependency-path: | + packages/blake3-wasm/pnpm-lock.yaml + # setting a registry enables the NODE_AUTH_TOKEN env variable where we can set an npm token. REQUIRED + registry-url: "https://registry.npmjs.org" + - run: pnpm install + - run: git config --global user.name machineuser + - run: git config --global user.email infra+machineuser@huggingface.co + - run: | + PACKAGE_VERSION=$(node -p "require('./package.json').version") + BUMPED_VERSION=$(node -p "require('semver').inc('$PACKAGE_VERSION', '${{ github.event.inputs.newversion }}')") + # Update package.json with the new version + node -e "const fs = require('fs'); const package = JSON.parse(fs.readFileSync('./package.json')); package.version = '$BUMPED_VERSION'; fs.writeFileSync('./package.json', JSON.stringify(package, null, '\t') + '\n');" + git commit . -m "🔖 @huggingface/blake3-wasm $BUMPED_VERSION" + git tag "blake3-wasm-v$BUMPED_VERSION" + - run: pnpm publish --no-git-checks . + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + - run: (git pull --rebase && git push --follow-tags) || (git pull --rebase && git push --follow-tags) + # hack - reuse actions/setup-node@v3 just to set a new registry + - uses: actions/setup-node@v3 + with: + node-version: "20" + registry-url: "https://npm.pkg.github.com" + # Disable for now, until github supports PATs for writing github packages (https://github.com/github/roadmap/issues/558) + # - run: pnpm publish --no-git-checks . + # env: + # NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/gearhash-wasm-publish.yml b/.github/workflows/gearhash-wasm-publish.yml new file mode 100644 index 0000000000..4070718785 --- /dev/null +++ b/.github/workflows/gearhash-wasm-publish.yml @@ -0,0 +1,64 @@ +name: Gearhash WASM - Version and Release + +on: + workflow_dispatch: + inputs: + newversion: + type: choice + description: "Semantic Version Bump Type" + default: patch + options: + - patch + - minor + - major + +concurrency: + group: "push-to-main" + +defaults: + run: + working-directory: packages/gearhash-wasm + +jobs: + version_and_release: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + # Needed to push the tag and the commit on the main branch, otherwise we get: + # > Run git push --follow-tags + # remote: error: GH006: Protected branch update failed for refs/heads/main. + # remote: error: Changes must be made through a pull request. Required status check "lint" is expected. + token: ${{ secrets.BOT_ACCESS_TOKEN }} + - run: npm install -g corepack@latest && corepack enable + - uses: actions/setup-node@v3 + with: + node-version: "20" + cache: "pnpm" + cache-dependency-path: | + packages/gearhash-wasm/pnpm-lock.yaml + # setting a registry enables the NODE_AUTH_TOKEN env variable where we can set an npm token. REQUIRED + registry-url: "https://registry.npmjs.org" + - run: pnpm install + - run: git config --global user.name machineuser + - run: git config --global user.email infra+machineuser@huggingface.co + - run: | + PACKAGE_VERSION=$(node -p "require('./package.json').version") + BUMPED_VERSION=$(node -p "require('semver').inc('$PACKAGE_VERSION', '${{ github.event.inputs.newversion }}')") + # Update package.json with the new version + node -e "const fs = require('fs'); const package = JSON.parse(fs.readFileSync('./package.json')); package.version = '$BUMPED_VERSION'; fs.writeFileSync('./package.json', JSON.stringify(package, null, '\t') + '\n');" + git commit . -m "🔖 @huggingface/gearhash-wasm $BUMPED_VERSION" + git tag "gearhash-wasm-v$BUMPED_VERSION" + - run: pnpm publish --no-git-checks . + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + - run: (git pull --rebase && git push --follow-tags) || (git pull --rebase && git push --follow-tags) + # hack - reuse actions/setup-node@v3 just to set a new registry + - uses: actions/setup-node@v3 + with: + node-version: "20" + registry-url: "https://npm.pkg.github.com" + # Disable for now, until github supports PATs for writing github packages (https://github.com/github/roadmap/issues/558) + # - run: pnpm publish --no-git-checks . + # env: + # NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/splitmix64-wasm-publish.yml b/.github/workflows/splitmix64-wasm-publish.yml new file mode 100644 index 0000000000..f35f7c0a5b --- /dev/null +++ b/.github/workflows/splitmix64-wasm-publish.yml @@ -0,0 +1,64 @@ +name: Splitmix64 WASM - Version and Release + +on: + workflow_dispatch: + inputs: + newversion: + type: choice + description: "Semantic Version Bump Type" + default: patch + options: + - patch + - minor + - major + +concurrency: + group: "push-to-main" + +defaults: + run: + working-directory: packages/splitmix64-wasm + +jobs: + version_and_release: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + # Needed to push the tag and the commit on the main branch, otherwise we get: + # > Run git push --follow-tags + # remote: error: GH006: Protected branch update failed for refs/heads/main. + # remote: error: Changes must be made through a pull request. Required status check "lint" is expected. + token: ${{ secrets.BOT_ACCESS_TOKEN }} + - run: npm install -g corepack@latest && corepack enable + - uses: actions/setup-node@v3 + with: + node-version: "20" + cache: "pnpm" + cache-dependency-path: | + packages/splitmix64-wasm/pnpm-lock.yaml + # setting a registry enables the NODE_AUTH_TOKEN env variable where we can set an npm token. REQUIRED + registry-url: "https://registry.npmjs.org" + - run: pnpm install + - run: git config --global user.name machineuser + - run: git config --global user.email infra+machineuser@huggingface.co + - run: | + PACKAGE_VERSION=$(node -p "require('./package.json').version") + BUMPED_VERSION=$(node -p "require('semver').inc('$PACKAGE_VERSION', '${{ github.event.inputs.newversion }}')") + # Update package.json with the new version + node -e "const fs = require('fs'); const package = JSON.parse(fs.readFileSync('./package.json')); package.version = '$BUMPED_VERSION'; fs.writeFileSync('./package.json', JSON.stringify(package, null, '\t') + '\n');" + git commit . -m "🔖 @huggingface/splitmix64-wasm $BUMPED_VERSION" + git tag "splitmix64-wasm-v$BUMPED_VERSION" + - run: pnpm publish --no-git-checks . + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + - run: (git pull --rebase && git push --follow-tags) || (git pull --rebase && git push --follow-tags) + # hack - reuse actions/setup-node@v3 just to set a new registry + - uses: actions/setup-node@v3 + with: + node-version: "20" + registry-url: "https://npm.pkg.github.com" + # Disable for now, until github supports PATs for writing github packages (https://github.com/github/roadmap/issues/558) + # - run: pnpm publish --no-git-checks . + # env: + # NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/.github/workflows/xetchunk-wasm-publish.yml b/.github/workflows/xetchunk-wasm-publish.yml new file mode 100644 index 0000000000..d56da18b9c --- /dev/null +++ b/.github/workflows/xetchunk-wasm-publish.yml @@ -0,0 +1,64 @@ +name: Xetchunk WASM - Version and Release + +on: + workflow_dispatch: + inputs: + newversion: + type: choice + description: "Semantic Version Bump Type" + default: patch + options: + - patch + - minor + - major + +concurrency: + group: "push-to-main" + +defaults: + run: + working-directory: packages/xetchunk-wasm + +jobs: + version_and_release: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + with: + # Needed to push the tag and the commit on the main branch, otherwise we get: + # > Run git push --follow-tags + # remote: error: GH006: Protected branch update failed for refs/heads/main. + # remote: error: Changes must be made through a pull request. Required status check "lint" is expected. + token: ${{ secrets.BOT_ACCESS_TOKEN }} + - run: npm install -g corepack@latest && corepack enable + - uses: actions/setup-node@v3 + with: + node-version: "20" + cache: "pnpm" + cache-dependency-path: | + packages/xetchunk-wasm/pnpm-lock.yaml + # setting a registry enables the NODE_AUTH_TOKEN env variable where we can set an npm token. REQUIRED + registry-url: "https://registry.npmjs.org" + - run: pnpm install + - run: git config --global user.name machineuser + - run: git config --global user.email infra+machineuser@huggingface.co + - run: | + PACKAGE_VERSION=$(node -p "require('./package.json').version") + BUMPED_VERSION=$(node -p "require('semver').inc('$PACKAGE_VERSION', '${{ github.event.inputs.newversion }}')") + # Update package.json with the new version + node -e "const fs = require('fs'); const package = JSON.parse(fs.readFileSync('./package.json')); package.version = '$BUMPED_VERSION'; fs.writeFileSync('./package.json', JSON.stringify(package, null, '\t') + '\n');" + git commit . -m "🔖 @huggingface/xetchunk-wasm $BUMPED_VERSION" + git tag "xetchunk-wasm-v$BUMPED_VERSION" + - run: pnpm publish --no-git-checks . + env: + NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} + - run: (git pull --rebase && git push --follow-tags) || (git pull --rebase && git push --follow-tags) + # hack - reuse actions/setup-node@v3 just to set a new registry + - uses: actions/setup-node@v3 + with: + node-version: "20" + registry-url: "https://npm.pkg.github.com" + # Disable for now, until github supports PATs for writing github packages (https://github.com/github/roadmap/issues/558) + # - run: pnpm publish --no-git-checks . + # env: + # NODE_AUTH_TOKEN: ${{ secrets.GITHUB_TOKEN }} diff --git a/packages/blake3-wasm/.npmignore b/packages/blake3-wasm/.npmignore deleted file mode 100644 index 5657f6ea7d..0000000000 --- a/packages/blake3-wasm/.npmignore +++ /dev/null @@ -1 +0,0 @@ -vendor \ No newline at end of file diff --git a/packages/blake3-wasm/package.json b/packages/blake3-wasm/package.json index f7350c6941..d4d5b37331 100644 --- a/packages/blake3-wasm/package.json +++ b/packages/blake3-wasm/package.json @@ -1,6 +1,6 @@ { "name": "@huggingface/blake3-wasm", - "version": "0.0.1", + "version": "0.0.2", "scripts": { "build:debug": "asc assembly/index.ts --target debug", "build:release": "asc assembly/index.ts --target release", @@ -27,6 +27,21 @@ "import": "./build/release.wasm" } }, + "main": "./build/release.js", + "types": "./build/release.d.ts", + "files": [ + "build/release.js", + "build/release.d.ts", + "build/release.wasm", + "build/release.wat", + "build/release.wasm.map", + "LICENSE_A2", + "LICENSE_A2LLVM", + "LICENSE_C0", + "README.md", + "asconfig.json", + "assembly" + ], "devDependencies": { "assemblyscript": "0.27.36" } diff --git a/packages/gearhash-wasm/package.json b/packages/gearhash-wasm/package.json index d027f8ab77..42b67a87c0 100644 --- a/packages/gearhash-wasm/package.json +++ b/packages/gearhash-wasm/package.json @@ -15,6 +15,8 @@ "wasm" ], "type": "module", + "main": "./build/release.js", + "types": "./build/release.d.ts", "exports": { ".": { "import": "./build/release.js", @@ -30,5 +32,15 @@ "devDependencies": { "assemblyscript": "0.27.36", "@huggingface/splitmix64-wasm": "workspace:*" - } + }, + "files": [ + "build/release.js", + "build/release.d.ts", + "build/release.wasm", + "build/release.wat", + "build/release.wasm.map", + "README.md", + "asconfig.json", + "assembly" + ] } diff --git a/packages/splitmix64-wasm/package.json b/packages/splitmix64-wasm/package.json index 6e98c4639f..69d91cfae8 100644 --- a/packages/splitmix64-wasm/package.json +++ b/packages/splitmix64-wasm/package.json @@ -9,7 +9,7 @@ "prepare": "pnpm run build" }, "keywords": [ - "blake3", + "splitmix64", "assemblyscript", "assembly", "wasm" @@ -27,7 +27,19 @@ "import": "./build/release.wasm" } }, + "main": "./build/release.js", + "types": "./build/release.d.ts", "devDependencies": { "assemblyscript": "0.27.36" - } + }, + "files": [ + "build/release.js", + "build/release.d.ts", + "build/release.wasm", + "build/release.wat", + "build/release.wasm.map", + "README.md", + "asconfig.json", + "assembly" + ] } diff --git a/packages/xetchunk-wasm/package.json b/packages/xetchunk-wasm/package.json index 2989bef841..44cc7371ea 100644 --- a/packages/xetchunk-wasm/package.json +++ b/packages/xetchunk-wasm/package.json @@ -33,8 +33,20 @@ "import": "./build/release.wasm" } }, + "main": "./build/release.js", + "types": "./build/release.d.ts", "devDependencies": { "@huggingface/splitmix64-wasm": "workspace:*", "assemblyscript": "0.27.36" - } + }, + "files": [ + "build/release.js", + "build/release.d.ts", + "build/release.wasm", + "build/release.wat", + "build/release.wasm.map", + "README.md", + "asconfig.json", + "assembly" + ] } From 6386e73005c9d25f2ba9e4252480353183354b40 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Tue, 8 Jul 2025 14:55:32 +0200 Subject: [PATCH 35/44] add test for chunk hashes --- packages/xetchunk-wasm/tests/index.test.ts | 29 ++++++++++++++++++++++ 1 file changed, 29 insertions(+) diff --git a/packages/xetchunk-wasm/tests/index.test.ts b/packages/xetchunk-wasm/tests/index.test.ts index 830e37d887..0d6af22ca0 100644 --- a/packages/xetchunk-wasm/tests/index.test.ts +++ b/packages/xetchunk-wasm/tests/index.test.ts @@ -42,7 +42,36 @@ describe("xetchunk-wasm", () => { 815591, 827760, 958832, 991092, 1000000, ]; + const expectedChunkHashes = [ + "6eca1e7dadaf08cca5d82d318c800f07c2ddcec115a7e8627e5edd9605b94b8d", + "624ea34d72a06e5d43a1b8dd10763f0d22f165992c397ed97563899b27fdd88a", + "4411d2ec847c6e3f451a7451ff3933adb4f1c31587421b9f730b698a78313b47", + "6342bde97433e29e0779ad33eb8040d986679040361b3cc3a06230fe60dd6c9b", + "405253fcf15bba751adc4f507d3453273daff81ed4d8acd71c521aa1cbddc0b5", + "4482374af7f8bebfdb5c5df0299f80128d6c58886ad7b218c562b1d74064e4cb", + "80acc8d39c853b4b8a8c6ad7b63bf2ea68f62c2226b92f06349f92cc84d213cc", + "a7076d7d343f711fb20fe6cd023248d8d051e8fe7d44172596cd5c7ea7edaf65", + "44755217bbb4dadc81ea7695765230a34a2e6cb3b55f373f1de35aeba79ae92c", + "001adc1d302d5f039278325dcbd5ec3b194f4794f1629d6f962f9f4bb78a7bff", + "f8460a337c186f07c2e225bb287a1d3d3d686dc69d0828e99640f7d8852c5b90", + "c9bc3da29025dc1ba562d303d815151d9a937367abb766ae842a165e8493d9fe", + "5044339dfd65e8163bdfe642614a6be604b04d6aeacf222cf219ad287bfc5cf1", + "163622db0fe0da93f2ef964eed4c485f3c7a9c312f8e8e8a312ab4bb8141f13e", + "e1730534a858aa0258ad8904ef12b829b8a123a1c611250275c9ca9471e4c650", + "1fbc6854f9185caba1e1f55393f41f83b895b18f9c99245c029025ca48f1e14b", + "8a27b66ccf05b864b6bef6fb5c970fe894f73d2330e8e98fe7841dcdbd9e9576", + "bc70a33e7a9ec820cac24b87023469a57bdae1bf91cc3961b95806c64a525221", + "03e5b5f5a088269ec4b329f1e04debfac4cb54b9c0facf038f7e8e0f054be7e2", + ]; + expect(chunkBoundaries).toEqual(expectedBoundaries); + expect( + chunks.map((chunk) => + Array.from(chunk.hash) + .map((b) => b.toString(16).padStart(2, "0")) + .join("") + ) + ).toEqual(expectedChunkHashes); }); }); From a02c55ce88f361d8aef781120c6fdc4f253b0602 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Tue, 8 Jul 2025 16:39:39 +0200 Subject: [PATCH 36/44] support for blake3 keyed --- packages/blake3-wasm/README.md | 8 +++ packages/blake3-wasm/assembly/blake3.ts | 54 ++++++++++++++++--- packages/blake3-wasm/package.json | 2 +- packages/blake3-wasm/tests/index.js | 50 ++++++++++++++++- .../xetchunk-wasm/assembly/xet-chunker.ts | 13 ++++- packages/xetchunk-wasm/package.json | 2 +- 6 files changed, 116 insertions(+), 13 deletions(-) diff --git a/packages/blake3-wasm/README.md b/packages/blake3-wasm/README.md index 944cca112e..b971608b36 100644 --- a/packages/blake3-wasm/README.md +++ b/packages/blake3-wasm/README.md @@ -22,4 +22,12 @@ for (const chunk of dataSource) { } const hash = hasher.finalize(); + +// When passing custom key +const hashKeyed = blake3Keyed(data, new Uint8Array([ + 0,1,2,3,4,5,6,7,8, + 9,10,11,12,13,14,15, + 16,17,18,19,20,21,22,23, + 24,25,26,27,28,29,30,31 +])); ``` \ No newline at end of file diff --git a/packages/blake3-wasm/assembly/blake3.ts b/packages/blake3-wasm/assembly/blake3.ts index 2e8201853b..5091750dc7 100644 --- a/packages/blake3-wasm/assembly/blake3.ts +++ b/packages/blake3-wasm/assembly/blake3.ts @@ -8,7 +8,7 @@ const CHUNK_START: u32 = 1 << 0; const CHUNK_END: u32 = 1 << 1; const PARENT: u32 = 1 << 2; const ROOT: u32 = 1 << 3; -//const KEYED_HASH: u32 = 1 << 4; +const KEYED_HASH: u32 = 1 << 4; //const DERIVE_KEY_CONTEXT: u32 = 1 << 5; // const DERIVE_KEY_MATERIAL: u32 = 1 << 6; @@ -121,22 +121,38 @@ class Blake3Hasher { private cv_stack_len: u8; private flags: u32; - constructor() { - const key_words = new StaticArray(8); - for (let i = 0; i < 8; i++) { - key_words[i] = IV[i]; - } + constructor(key_words: StaticArray = [IV[0], IV[1], IV[2], IV[3], IV[4], IV[5], IV[6], IV[7]], flags: u32 = 0) { this.key_words = key_words; - this.chunk_state = new ChunkState(key_words, 0, 0); + this.chunk_state = new ChunkState(key_words, 0, flags); this.cv_stack = new StaticArray>(54); this.cv_stack_len = 0; - this.flags = 0; + this.flags = flags; for (let i = 0; i < 54; i++) { this.cv_stack[i] = new StaticArray(8); } } + // Constructor for keyed hash + static newKeyed(key: Uint8Array): Blake3Hasher { + if (key.length != 32) { + throw new Error("Key must be exactly 32 bytes"); + } + + const key_words = new StaticArray(8); + // const key_static = new StaticArray(32); + // for (let i = 0; i < 32; i++) { + // key_static[i] = key[i]; + // } + // words_from_little_endian_bytes(key_static, key_words); + const dataView = new DataView(key.buffer); + for (let i = 0; i < 8; i++) { + key_words[i] = dataView.getUint32(i * 4, true); + } + + return new Blake3Hasher(key_words, KEYED_HASH); + } + update(input: Uint8Array): void { let inputPos = 0; while (inputPos < input.length) { @@ -372,10 +388,32 @@ export function blake3Hex(input: Uint8Array): string { return hex.join(""); } +export function blake3Keyed(input: Uint8Array, key: Uint8Array): Uint8Array { + const hasher = Blake3Hasher.newKeyed(key); + hasher.update(input); + const output = new Uint8Array(32); + hasher.finalize(output); + return output; +} + +export function blake3KeyedHex(input: Uint8Array, key: Uint8Array): string { + const hash = blake3Keyed(input, key); + const hex = new Array(64); + for (let i = 0; i < 32; i++) { + hex[i * 2] = (hash[i] >> 4).toString(16); + hex[i * 2 + 1] = (hash[i] & 0x0f).toString(16); + } + return hex.join(""); +} + export function createHasher(): Blake3Hasher { return new Blake3Hasher(); } +export function createKeyedHasher(key: Uint8Array): Blake3Hasher { + return Blake3Hasher.newKeyed(key); +} + export function update(hasher: Blake3Hasher, input: Uint8Array): void { hasher.update(input); } diff --git a/packages/blake3-wasm/package.json b/packages/blake3-wasm/package.json index d4d5b37331..1cadbffd2d 100644 --- a/packages/blake3-wasm/package.json +++ b/packages/blake3-wasm/package.json @@ -1,6 +1,6 @@ { "name": "@huggingface/blake3-wasm", - "version": "0.0.2", + "version": "0.0.3", "scripts": { "build:debug": "asc assembly/index.ts --target debug", "build:release": "asc assembly/index.ts --target release", diff --git a/packages/blake3-wasm/tests/index.js b/packages/blake3-wasm/tests/index.js index 55463e9495..9436c23d27 100644 --- a/packages/blake3-wasm/tests/index.js +++ b/packages/blake3-wasm/tests/index.js @@ -1,6 +1,6 @@ // Adapted from https://github.com/mcmilk/BLAKE3-tests/blob/11a8abeceac93b5eba664eae3679efb4ffa5bc0a/blake3_test.c -import { blake3Hex } from "../build/debug.js"; +import { blake3Hex, blake3KeyedHex } from "../build/debug.js"; const buffer = new Uint8Array(102400); let i = 0; @@ -13,138 +13,176 @@ for (i = 0, j = 0; i < buffer.length; i++, j++) { buffer[i] = j; } +const key = new Uint8Array(32); +for (let i = 0; i < 32; i++) { + key[i] = "whats the Elvish word for friend".charCodeAt(i); +} + const testCases = [ { buf: buffer.slice(0, 0), expected: "af1349b9f5f9a1a6a0404dea36dcc9499bcb25c9adc112b7cc9a93cae41f3262", + keyed: "92b2b75604ed3c761f9d6f62392c8a9227ad0ea3f09573e783f1498a4ed60d26", }, { buf: buffer.slice(0, 1), expected: "2d3adedff11b61f14c886e35afa036736dcd87a74d27b5c1510225d0f592e213", + keyed: "6d7878dfff2f485635d39013278ae14f1454b8c0a3a2d34bc1ab38228a80c95b", }, { buf: buffer.slice(0, 2), expected: "7b7015bb92cf0b318037702a6cdd81dee41224f734684c2c122cd6359cb1ee63", + keyed: "5392ddae0e0a69d5f40160462cbd9bd889375082ff224ac9c758802b7a6fd20a", }, { buf: buffer.slice(0, 3), expected: "e1be4d7a8ab5560aa4199eea339849ba8e293d55ca0a81006726d184519e647f", + keyed: "39e67b76b5a007d4921969779fe666da67b5213b096084ab674742f0d5ec62b9", }, { buf: buffer.slice(0, 4), expected: "f30f5ab28fe047904037f77b6da4fea1e27241c5d132638d8bedce9d40494f32", + keyed: "7671dde590c95d5ac9616651ff5aa0a27bee5913a348e053b8aa9108917fe070", }, { buf: buffer.slice(0, 5), expected: "b40b44dfd97e7a84a996a91af8b85188c66c126940ba7aad2e7ae6b385402aa2", + keyed: "73ac69eecf286894d8102018a6fc729f4b1f4247d3703f69bdc6a5fe3e0c8461", }, { buf: buffer.slice(0, 6), expected: "06c4e8ffb6872fad96f9aaca5eee1553eb62aed0ad7198cef42e87f6a616c844", + keyed: "82d3199d0013035682cc7f2a399d4c212544376a839aa863a0f4c91220ca7a6d", }, { buf: buffer.slice(0, 7), expected: "3f8770f387faad08faa9d8414e9f449ac68e6ff0417f673f602a646a891419fe", + keyed: "af0a7ec382aedc0cfd626e49e7628bc7a353a4cb108855541a5651bf64fbb28a", }, { buf: buffer.slice(0, 8), expected: "2351207d04fc16ade43ccab08600939c7c1fa70a5c0aaca76063d04c3228eaeb", + keyed: "be2f5495c61cba1bb348a34948c004045e3bd4dae8f0fe82bf44d0da245a0600", }, { buf: buffer.slice(0, 63), expected: "e9bc37a594daad83be9470df7f7b3798297c3d834ce80ba85d6e207627b7db7b", + keyed: "bb1eb5d4afa793c1ebdd9fb08def6c36d10096986ae0cfe148cd101170ce37ae", }, { buf: buffer.slice(0, 64), expected: "4eed7141ea4a5cd4b788606bd23f46e212af9cacebacdc7d1f4c6dc7f2511b98", + keyed: "ba8ced36f327700d213f120b1a207a3b8c04330528586f414d09f2f7d9ccb7e6", }, { buf: buffer.slice(0, 65), expected: "de1e5fa0be70df6d2be8fffd0e99ceaa8eb6e8c93a63f2d8d1c30ecb6b263dee", + keyed: "c0a4edefa2d2accb9277c371ac12fcdbb52988a86edc54f0716e1591b4326e72", }, { buf: buffer.slice(0, 127), expected: "d81293fda863f008c09e92fc382a81f5a0b4a1251cba1634016a0f86a6bd640d", + keyed: "c64200ae7dfaf35577ac5a9521c47863fb71514a3bcad18819218b818de85818", }, { buf: buffer.slice(0, 128), expected: "f17e570564b26578c33bb7f44643f539624b05df1a76c81f30acd548c44b45ef", + keyed: "b04fe15577457267ff3b6f3c947d93be581e7e3a4b018679125eaf86f6a628ec", }, { buf: buffer.slice(0, 129), expected: "683aaae9f3c5ba37eaaf072aed0f9e30bac0865137bae68b1fde4ca2aebdcb12", + keyed: "d4a64dae6cdccbac1e5287f54f17c5f985105457c1a2ec1878ebd4b57e20d38f", }, { buf: buffer.slice(0, 1023), expected: "10108970eeda3eb932baac1428c7a2163b0e924c9a9e25b35bba72b28f70bd11", + keyed: "c951ecdf03288d0fcc96ee3413563d8a6d3589547f2c2fb36d9786470f1b9d6e", }, { buf: buffer.slice(0, 1024), expected: "42214739f095a406f3fc83deb889744ac00df831c10daa55189b5d121c855af7", + keyed: "75c46f6f3d9eb4f55ecaaee480db732e6c2105546f1e675003687c31719c7ba4", }, { buf: buffer.slice(0, 1025), expected: "d00278ae47eb27b34faecf67b4fe263f82d5412916c1ffd97c8cb7fb814b8444", + keyed: "357dc55de0c7e382c900fd6e320acc04146be01db6a8ce7210b7189bd664ea69", }, { buf: buffer.slice(0, 2048), expected: "e776b6028c7cd22a4d0ba182a8bf62205d2ef576467e838ed6f2529b85fba24a", + keyed: "879cf1fa2ea0e79126cb1063617a05b6ad9d0b696d0d757cf053439f60a99dd1", }, { buf: buffer.slice(0, 2049), expected: "5f4d72f40d7a5f82b15ca2b2e44b1de3c2ef86c426c95c1af0b6879522563030", + keyed: "9f29700902f7c86e514ddc4df1e3049f258b2472b6dd5267f61bf13983b78dd5", }, { buf: buffer.slice(0, 3072), expected: "b98cb0ff3623be03326b373de6b9095218513e64f1ee2edd2525c7ad1e5cffd2", + keyed: "044a0e7b172a312dc02a4c9a818c036ffa2776368d7f528268d2e6b5df191770", }, { buf: buffer.slice(0, 3073), expected: "7124b49501012f81cc7f11ca069ec9226cecb8a2c850cfe644e327d22d3e1cd3", + keyed: "68dede9bef00ba89e43f31a6825f4cf433389fedae75c04ee9f0cf16a427c95a", }, { buf: buffer.slice(0, 4096), expected: "015094013f57a5277b59d8475c0501042c0b642e531b0a1c8f58d2163229e969", + keyed: "befc660aea2f1718884cd8deb9902811d332f4fc4a38cf7c7300d597a081bfc0", }, { buf: buffer.slice(0, 4097), expected: "9b4052b38f1c5fc8b1f9ff7ac7b27cd242487b3d890d15c96a1c25b8aa0fb995", + keyed: "00df940cd36bb9fa7cbbc3556744e0dbc8191401afe70520ba292ee3ca80abbc", }, { buf: buffer.slice(0, 5120), expected: "9cadc15fed8b5d854562b26a9536d9707cadeda9b143978f319ab34230535833", + keyed: "2c493e48e9b9bf31e0553a22b23503c0a3388f035cece68eb438d22fa1943e20", }, { buf: buffer.slice(0, 5121), expected: "628bd2cb2004694adaab7bbd778a25df25c47b9d4155a55f8fbd79f2fe154cff", + keyed: "6ccf1c34753e7a044db80798ecd0782a8f76f33563accaddbfbb2e0ea4b2d024", }, { buf: buffer.slice(0, 6144), expected: "3e2e5b74e048f3add6d21faab3f83aa44d3b2278afb83b80b3c35164ebeca205", + keyed: "3d6b6d21281d0ade5b2b016ae4034c5dec10ca7e475f90f76eac7138e9bc8f1d", }, { buf: buffer.slice(0, 6145), expected: "f1323a8631446cc50536a9f705ee5cb619424d46887f3c376c695b70e0f0507f", + keyed: "9ac301e9e39e45e3250a7e3b3df701aa0fb6889fbd80eeecf28dbc6300fbc539", }, { buf: buffer.slice(0, 7168), expected: "61da957ec2499a95d6b8023e2b0e604ec7f6b50e80a9678b89d2628e99ada77a", + keyed: "b42835e40e9d4a7f42ad8cc04f85a963a76e18198377ed84adddeaecacc6f3fc", }, { buf: buffer.slice(0, 7169), expected: "a003fc7a51754a9b3c7fae0367ab3d782dccf28855a03d435f8cfe74605e7817", + keyed: "ed9b1a922c046fdb3d423ae34e143b05ca1bf28b710432857bf738bcedbfa511", }, { buf: buffer.slice(0, 8192), expected: "aae792484c8efe4f19e2ca7d371d8c467ffb10748d8a5a1ae579948f718a2a63", + keyed: "dc9637c8845a770b4cbf76b8daec0eebf7dc2eac11498517f08d44c8fc00d58a", }, { buf: buffer.slice(0, 8193), expected: "bab6c09cb8ce8cf459261398d2e7aef35700bf488116ceb94a36d0f5f1b7bc3b", + keyed: "954a2a75420c8d6547e3ba5b98d963e6fa6491addc8c023189cc519821b4a1f5", }, { buf: buffer.slice(0, 102400), expected: "bc3e3d41a1146b069abffad3c0d44860cf664390afce4d9661f7902e7943e085", + keyed: "1c35d1a5811083fd7119f5d5d1ba027b4d01c0c6c49fb6ff2cf75393ea5db4a7", }, ]; @@ -158,6 +196,16 @@ for (const testCase of testCases) { console.error(`Actual: ${result}`); process.exit(1); } + + const resultKeyed = blake3KeyedHex(testCase.buf, key); + console.log(resultKeyed); + + if (resultKeyed !== testCase.keyed) { + console.error(`Test case failed: ${testCase.buf.length} bytes (keyed)`); + console.error(`Expected: ${testCase.keyed}`); + console.error(`Actual: ${resultKeyed}`); + process.exit(1); + } } console.log("All test cases passed"); diff --git a/packages/xetchunk-wasm/assembly/xet-chunker.ts b/packages/xetchunk-wasm/assembly/xet-chunker.ts index 32afa6e2e6..9dcc0049bf 100644 --- a/packages/xetchunk-wasm/assembly/xet-chunker.ts +++ b/packages/xetchunk-wasm/assembly/xet-chunker.ts @@ -1,5 +1,5 @@ import { nextMatch } from "@huggingface/gearhash-wasm/assembly"; -import { blake3 } from "@huggingface/blake3-wasm/assembly"; +import { blake3Keyed } from "@huggingface/blake3-wasm/assembly"; // Constants const TARGET_CHUNK_SIZE: i32 = 64 * 1024; // 64KB @@ -7,6 +7,15 @@ const MINIMUM_CHUNK_DIVISOR: i32 = 8; const MAXIMUM_CHUNK_MULTIPLIER: i32 = 2; const HASH_WINDOW_SIZE: i32 = 64; +const BLAKE3_DATA_KEY = new Uint8Array(32); +const STATIC_KEY: StaticArray = [ + 102, 151, 245, 119, 91, 149, 80, 222, 49, 53, 203, 172, 165, 151, 24, 28, 157, 228, 33, 16, 155, 235, 43, 88, 180, + 208, 176, 75, 147, 173, 242, 41, +]; +for (let i = 0; i < 32; i++) { + BLAKE3_DATA_KEY[i] = STATIC_KEY[i]; +} + export class Chunk { hash: Uint8Array; length: i32; @@ -97,7 +106,7 @@ class XetChunker { const chunkData = this.chunkBuf.subarray(0, this.curChunkLen); const chunk: Chunk = { length: chunkData.length, - hash: blake3(chunkData), + hash: blake3Keyed(chunkData, BLAKE3_DATA_KEY), }; this.curChunkLen = 0; this.hash = 0; diff --git a/packages/xetchunk-wasm/package.json b/packages/xetchunk-wasm/package.json index 44cc7371ea..108c0a5103 100644 --- a/packages/xetchunk-wasm/package.json +++ b/packages/xetchunk-wasm/package.json @@ -1,6 +1,6 @@ { "name": "@huggingface/xetchunk-wasm", - "version": "0.0.1", + "version": "0.0.2", "scripts": { "build:debug": "asc assembly/index.ts --target debug", "build:release": "asc assembly/index.ts --target release", From d6a860923d54d831147afff203b7e8e1d1a91e80 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Tue, 8 Jul 2025 17:10:57 +0200 Subject: [PATCH 37/44] add tests with xet data key --- packages/blake3-wasm/package.json | 2 +- .../tests/{index.js => index.test.ts} | 121 ++++++++++++++---- packages/blake3-wasm/vitest.config.ts | 8 ++ 3 files changed, 108 insertions(+), 23 deletions(-) rename packages/blake3-wasm/tests/{index.js => index.test.ts} (65%) create mode 100644 packages/blake3-wasm/vitest.config.ts diff --git a/packages/blake3-wasm/package.json b/packages/blake3-wasm/package.json index 1cadbffd2d..2fd61e316f 100644 --- a/packages/blake3-wasm/package.json +++ b/packages/blake3-wasm/package.json @@ -5,7 +5,7 @@ "build:debug": "asc assembly/index.ts --target debug", "build:release": "asc assembly/index.ts --target release", "build": "pnpm run build:debug && npm run build:release", - "test": "node tests", + "test": "vitest run", "prepare": "pnpm run build" }, "keywords": [ diff --git a/packages/blake3-wasm/tests/index.js b/packages/blake3-wasm/tests/index.test.ts similarity index 65% rename from packages/blake3-wasm/tests/index.js rename to packages/blake3-wasm/tests/index.test.ts index 9436c23d27..52aea9858f 100644 --- a/packages/blake3-wasm/tests/index.js +++ b/packages/blake3-wasm/tests/index.test.ts @@ -1,6 +1,8 @@ // Adapted from https://github.com/mcmilk/BLAKE3-tests/blob/11a8abeceac93b5eba664eae3679efb4ffa5bc0a/blake3_test.c +import { describe, expect } from "vitest"; import { blake3Hex, blake3KeyedHex } from "../build/debug.js"; +import { it } from "vitest"; const buffer = new Uint8Array(102400); let i = 0; @@ -13,11 +15,24 @@ for (i = 0, j = 0; i < buffer.length; i++, j++) { buffer[i] = j; } -const key = new Uint8Array(32); -for (let i = 0; i < 32; i++) { - key[i] = "whats the Elvish word for friend".charCodeAt(i); +function uint8ArrayFromString(str: string) { + const arr = new Uint8Array(str.length); + for (let i = 0; i < str.length; i++) { + arr[i] = str.charCodeAt(i); + } + return arr; +} + +function uint8ArrayFromBytes(bytes: number[]) { + const arr = new Uint8Array(bytes.length); + for (let i = 0; i < bytes.length; i++) { + arr[i] = bytes[i]; + } + return arr; } +const key = uint8ArrayFromString("whats the Elvish word for friend"); + const testCases = [ { buf: buffer.slice(0, 0), @@ -186,26 +201,88 @@ const testCases = [ }, ]; -for (const testCase of testCases) { - const result = blake3Hex(testCase.buf); - console.log(result); +describe("blake3", () => { + describe("BLAKE3_TESTS", () => { + for (const testCase of testCases) { + it(`should pass ${testCase.buf.length} bytes`, () => { + const result = blake3Hex(testCase.buf); + expect(result).toBe(testCase.expected); - if (result !== testCase.expected) { - console.error(`Test case failed: ${testCase.buf.length} bytes`); - console.error(`Expected: ${testCase.expected}`); - console.error(`Actual: ${result}`); - process.exit(1); - } + const resultKeyed = blake3KeyedHex(testCase.buf, key); + expect(resultKeyed).toBe(testCase.keyed); + }); + } + }); - const resultKeyed = blake3KeyedHex(testCase.buf, key); - console.log(resultKeyed); + describe("compute_data_hash with xet key", () => { + const DATA_KEY = new Uint8Array(32); + const arr = [ + 102, 151, 245, 119, 91, 149, 80, 222, 49, 53, 203, 172, 165, 151, 24, 28, 157, 228, 33, 16, 155, 235, 43, 88, 180, + 208, 176, 75, 147, 173, 242, 41, + ]; + for (let i = 0; i < 32; i++) { + DATA_KEY[i] = arr[i]; + } + it("should pass empty string", () => { + const result = blake3KeyedHex(uint8ArrayFromString(""), key); + expect(result).toBe("e0f2cf784e7e5f10c34f84af150e9a5ff9664216debad915364d741049870f67"); + }); - if (resultKeyed !== testCase.keyed) { - console.error(`Test case failed: ${testCase.buf.length} bytes (keyed)`); - console.error(`Expected: ${testCase.keyed}`); - console.error(`Actual: ${resultKeyed}`); - process.exit(1); - } -} + it("should pass 'hello world'", () => { + const result = blake3KeyedHex(uint8ArrayFromString("hello world"), key); + expect(result).toBe("4e39378b9d359f118190557a1f44130219a54e5fcfa07bd96cf50b466fe651b1"); + }); + + it("should pass 'test'", () => { + const result = blake3KeyedHex(uint8ArrayFromString("test"), key); + expect(result).toBe("86858d1210748f161707a68afd4cc4c46097aac5c76d1091cfe5bab82bee2af6"); + }); + + it("should pass '123456789'", () => { + const result = blake3KeyedHex(buffer, key); + expect(result).toBe("1b58809b9645a598741bf62b29f411d9212c3de9c8c3d0f08afaa7237c32fdd6"); + }); + + it("should pass '!@#$%^&*()'", () => { + const result = blake3KeyedHex(uint8ArrayFromString("!@#$%^&*()"), key); + expect(result).toBe("b0085e856c550c11de350c6154c58f3c560fee691bc790b300fbf5e3fcb45ddb"); + }); + + it("should pass a longer string", () => { + const result = blake3KeyedHex( + uint8ArrayFromString( + "This is a much longer string that will test how the hash function handles larger inputs. It contains multiple sentences and various characters." + ), + key + ); + expect(result).toBe("2324ec7d18249c682cfcc8a1262e51832592941a9f59adb5daeaa201e761f794"); + }); + + it("should pass some binary data", () => { + const result = blake3KeyedHex( + uint8ArrayFromBytes([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 255, 254, 253, 252, 251, 250]), + key + ); + expect(result).toBe("8d6d58c2e16d79eb8a6880728cbcf20bb8a67eedc6f9a5caa48b2225abdbf082"); + }); + + it("should pass 32 zeros", () => { + const result = blake3KeyedHex(uint8ArrayFromBytes(new Array(32).fill(0)), key); + expect(result).toBe("db07bd1bc239b73a2320450d008594d74ded925a9fb25c250dca159923551511"); + }); + + it("should pass 32 ones", () => { + const result = blake3KeyedHex(uint8ArrayFromBytes(new Array(32).fill(255)), key); + expect(result).toBe("7bdd5091548666844324a85f5383d110285b70a458fa0ba2f001a866a352cd48"); + }); -console.log("All test cases passed"); + it("should pass sequence of 0-63", () => { + const uint8Array = new Uint8Array(64); + for (let i = 0; i < 64; i++) { + uint8Array[i] = i; + } + const result = blake3KeyedHex(uint8Array, key); + expect(result).toBe("82b0d040d5890fb35cfbbcb7f8e3f06ef86b2ece33d09463d9626536c4833a7e"); + }); + }); +}); diff --git a/packages/blake3-wasm/vitest.config.ts b/packages/blake3-wasm/vitest.config.ts new file mode 100644 index 0000000000..2fb5c48d93 --- /dev/null +++ b/packages/blake3-wasm/vitest.config.ts @@ -0,0 +1,8 @@ +import { defineConfig } from "vitest/config"; + +export default defineConfig({ + test: { + globals: true, + environment: "node", + }, +}); From d8f4348607b7977f39598a64bd7f0814fd9e34cf Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Tue, 8 Jul 2025 18:21:12 +0200 Subject: [PATCH 38/44] fix hash => datahash conversion --- packages/blake3-wasm/tests/index.test.ts | 80 ------------------- packages/xetchunk-wasm/assembly/index.ts | 2 +- .../xetchunk-wasm/assembly/xet-chunker.ts | 15 ++++ packages/xetchunk-wasm/tests/index.test.ts | 10 +-- 4 files changed, 18 insertions(+), 89 deletions(-) diff --git a/packages/blake3-wasm/tests/index.test.ts b/packages/blake3-wasm/tests/index.test.ts index 52aea9858f..6f4550d893 100644 --- a/packages/blake3-wasm/tests/index.test.ts +++ b/packages/blake3-wasm/tests/index.test.ts @@ -23,14 +23,6 @@ function uint8ArrayFromString(str: string) { return arr; } -function uint8ArrayFromBytes(bytes: number[]) { - const arr = new Uint8Array(bytes.length); - for (let i = 0; i < bytes.length; i++) { - arr[i] = bytes[i]; - } - return arr; -} - const key = uint8ArrayFromString("whats the Elvish word for friend"); const testCases = [ @@ -213,76 +205,4 @@ describe("blake3", () => { }); } }); - - describe("compute_data_hash with xet key", () => { - const DATA_KEY = new Uint8Array(32); - const arr = [ - 102, 151, 245, 119, 91, 149, 80, 222, 49, 53, 203, 172, 165, 151, 24, 28, 157, 228, 33, 16, 155, 235, 43, 88, 180, - 208, 176, 75, 147, 173, 242, 41, - ]; - for (let i = 0; i < 32; i++) { - DATA_KEY[i] = arr[i]; - } - it("should pass empty string", () => { - const result = blake3KeyedHex(uint8ArrayFromString(""), key); - expect(result).toBe("e0f2cf784e7e5f10c34f84af150e9a5ff9664216debad915364d741049870f67"); - }); - - it("should pass 'hello world'", () => { - const result = blake3KeyedHex(uint8ArrayFromString("hello world"), key); - expect(result).toBe("4e39378b9d359f118190557a1f44130219a54e5fcfa07bd96cf50b466fe651b1"); - }); - - it("should pass 'test'", () => { - const result = blake3KeyedHex(uint8ArrayFromString("test"), key); - expect(result).toBe("86858d1210748f161707a68afd4cc4c46097aac5c76d1091cfe5bab82bee2af6"); - }); - - it("should pass '123456789'", () => { - const result = blake3KeyedHex(buffer, key); - expect(result).toBe("1b58809b9645a598741bf62b29f411d9212c3de9c8c3d0f08afaa7237c32fdd6"); - }); - - it("should pass '!@#$%^&*()'", () => { - const result = blake3KeyedHex(uint8ArrayFromString("!@#$%^&*()"), key); - expect(result).toBe("b0085e856c550c11de350c6154c58f3c560fee691bc790b300fbf5e3fcb45ddb"); - }); - - it("should pass a longer string", () => { - const result = blake3KeyedHex( - uint8ArrayFromString( - "This is a much longer string that will test how the hash function handles larger inputs. It contains multiple sentences and various characters." - ), - key - ); - expect(result).toBe("2324ec7d18249c682cfcc8a1262e51832592941a9f59adb5daeaa201e761f794"); - }); - - it("should pass some binary data", () => { - const result = blake3KeyedHex( - uint8ArrayFromBytes([0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 255, 254, 253, 252, 251, 250]), - key - ); - expect(result).toBe("8d6d58c2e16d79eb8a6880728cbcf20bb8a67eedc6f9a5caa48b2225abdbf082"); - }); - - it("should pass 32 zeros", () => { - const result = blake3KeyedHex(uint8ArrayFromBytes(new Array(32).fill(0)), key); - expect(result).toBe("db07bd1bc239b73a2320450d008594d74ded925a9fb25c250dca159923551511"); - }); - - it("should pass 32 ones", () => { - const result = blake3KeyedHex(uint8ArrayFromBytes(new Array(32).fill(255)), key); - expect(result).toBe("7bdd5091548666844324a85f5383d110285b70a458fa0ba2f001a866a352cd48"); - }); - - it("should pass sequence of 0-63", () => { - const uint8Array = new Uint8Array(64); - for (let i = 0; i < 64; i++) { - uint8Array[i] = i; - } - const result = blake3KeyedHex(uint8Array, key); - expect(result).toBe("82b0d040d5890fb35cfbbcb7f8e3f06ef86b2ece33d09463d9626536c4833a7e"); - }); - }); }); diff --git a/packages/xetchunk-wasm/assembly/index.ts b/packages/xetchunk-wasm/assembly/index.ts index b621ce6173..3faa02ec20 100644 --- a/packages/xetchunk-wasm/assembly/index.ts +++ b/packages/xetchunk-wasm/assembly/index.ts @@ -1 +1 @@ -export { createChunker, finalize, nextBlock, getChunks } from "./xet-chunker"; +export { createChunker, finalize, nextBlock, getChunks, hashToHex } from "./xet-chunker"; diff --git a/packages/xetchunk-wasm/assembly/xet-chunker.ts b/packages/xetchunk-wasm/assembly/xet-chunker.ts index 9dcc0049bf..76b95619dd 100644 --- a/packages/xetchunk-wasm/assembly/xet-chunker.ts +++ b/packages/xetchunk-wasm/assembly/xet-chunker.ts @@ -156,3 +156,18 @@ export function getChunks(data: Uint8Array, targetChunkSize: i32 = TARGET_CHUNK_ const chunker = createChunker(targetChunkSize); return chunker.nextBlock(data, true); } + +export function hashToHex(hash: Uint8Array): string { + const view = new DataView(hash.buffer); + const u64 = view.getUint64(0, true); + const u64_2 = view.getUint64(8, true); + const u64_3 = view.getUint64(16, true); + const u64_4 = view.getUint64(24, true); + + const hex = + u64.toString(16).padStart(16, "0") + + u64_2.toString(16).padStart(16, "0") + + u64_3.toString(16).padStart(16, "0") + + u64_4.toString(16).padStart(16, "0"); + return hex; +} diff --git a/packages/xetchunk-wasm/tests/index.test.ts b/packages/xetchunk-wasm/tests/index.test.ts index 0d6af22ca0..bd97e85844 100644 --- a/packages/xetchunk-wasm/tests/index.test.ts +++ b/packages/xetchunk-wasm/tests/index.test.ts @@ -1,5 +1,5 @@ import { describe, it, expect } from "vitest"; -import { createChunker, finalize, nextBlock, getChunks } from "../build/debug.js"; +import { createChunker, finalize, nextBlock, getChunks, hashToHex } from "../build/debug.js"; import { createRandomArray } from "@huggingface/splitmix64-wasm"; // Helper function to get chunk boundaries from chunks @@ -65,13 +65,7 @@ describe("xetchunk-wasm", () => { ]; expect(chunkBoundaries).toEqual(expectedBoundaries); - expect( - chunks.map((chunk) => - Array.from(chunk.hash) - .map((b) => b.toString(16).padStart(2, "0")) - .join("") - ) - ).toEqual(expectedChunkHashes); + expect(chunks.map((chunk) => hashToHex(chunk.hash))).toEqual(expectedChunkHashes); }); }); From 1fa349e172432e72e7d46bf8901dcc3c1730dbd2 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Wed, 9 Jul 2025 11:59:57 +0200 Subject: [PATCH 39/44] add xorb hash, my bad, thought it was already done --- packages/xetchunk-wasm/README.md | 3 +- packages/xetchunk-wasm/assembly/index.ts | 1 + packages/xetchunk-wasm/assembly/xorb-hash.ts | 74 ++++++++++++++++++++ 3 files changed, 77 insertions(+), 1 deletion(-) create mode 100644 packages/xetchunk-wasm/assembly/xorb-hash.ts diff --git a/packages/xetchunk-wasm/README.md b/packages/xetchunk-wasm/README.md index 3ab3d656a1..1cc1e6ed92 100644 --- a/packages/xetchunk-wasm/README.md +++ b/packages/xetchunk-wasm/README.md @@ -5,7 +5,7 @@ Using [AssemblyScript](https://www.assemblyscript.org/) to generate a lean WASM. ## Usage ```javascript -import { createChunker, getChunks, nextBlock, finalize } from '@huggingface/xetchunk-wasm'; +import { createChunker, getChunks, nextBlock, finalize, xorbHash } from '@huggingface/xetchunk-wasm'; const TARGET_CHUNK_SIZE = Math.pow(2, 12); @@ -14,6 +14,7 @@ const data = new Uint8Array(1000000); // Example: 1MB of data // ... fill data with your content ... const chunks = getChunks(data, TARGET_CHUNK_SIZE); +console.log("xorbHash", xorbHasht(chunks)); // Alternative, in case your data is streaming const chunker = createChunker(TARGET_CHUNK_SIZE); diff --git a/packages/xetchunk-wasm/assembly/index.ts b/packages/xetchunk-wasm/assembly/index.ts index 3faa02ec20..044729e593 100644 --- a/packages/xetchunk-wasm/assembly/index.ts +++ b/packages/xetchunk-wasm/assembly/index.ts @@ -1 +1,2 @@ export { createChunker, finalize, nextBlock, getChunks, hashToHex } from "./xet-chunker"; +export { xorbHash } from "./xorb-hash"; diff --git a/packages/xetchunk-wasm/assembly/xorb-hash.ts b/packages/xetchunk-wasm/assembly/xorb-hash.ts new file mode 100644 index 0000000000..f10ec5fb21 --- /dev/null +++ b/packages/xetchunk-wasm/assembly/xorb-hash.ts @@ -0,0 +1,74 @@ +import { blake3Keyed } from "@huggingface/blake3-wasm/assembly"; +// eslint-disable-next-line @typescript-eslint/consistent-type-imports +import { Chunk } from "./xet-chunker"; + +const MEAN_CHUNK_PER_NODE = 4; + +if (MEAN_CHUNK_PER_NODE % 256 !== 0) { + throw new Error("MEAN_CHUNK_PER_NODE must be a multiple of 256"); + // ^ So we only need to check the last byte of the last u64 in the chunk hash +} + +const BLAKE3_NODE_KEY = new Uint8Array(32); +const STATIC_KEY: StaticArray = [ + 1, 126, 197, 199, 165, 71, 41, 150, 253, 148, 102, 102, 180, 138, 2, 230, 93, 221, 83, 111, 55, 199, 109, 210, 248, + 99, 82, 230, 74, 83, 113, 63, +]; +for (let i = 0; i < 32; i++) { + BLAKE3_NODE_KEY[i] = STATIC_KEY[i]; +} + +const INDEX_OF_LAST_BYTE_OF_LAST_U64_IN_CHUNK_HASH = 3 * 8; +// ^ 32 bytes, 8 bytes per u64, take the first byte of the last u64 due to little endianness +// ^ Assumes that MEAN_CHUNK_PER_NODE is a power of 2 and less than 256 + +export function xorbHash(chunks: Chunk[]): Uint8Array { + // Split chunks in groups of 2 - 2 * MEAN_CHUNK_PER_NODE with mean of MEAN_CHUNK_PER_NODE + // to form a tree of nodes + // Then recursively hash the groups + + if (chunks.length === 0) { + // Return empty hash for empty chunks array + return new Uint8Array(32); + } + + while (chunks.length > 1) { + const nodes: Chunk[] = []; + let currentIndex = 0; + let numOfChildrenSoFar = 0; + // ^ It's 1 less than it should be, propagating because of error in reference implementation + for (let i = 0; i < chunks.length; i++) { + if ( + i === chunks.length - 1 || + numOfChildrenSoFar === 2 * MEAN_CHUNK_PER_NODE || + (numOfChildrenSoFar >= 2 && + chunks[i].hash[INDEX_OF_LAST_BYTE_OF_LAST_U64_IN_CHUNK_HASH] % MEAN_CHUNK_PER_NODE === 0) + ) { + nodes.push(nodeHash(chunks.slice(currentIndex, i + 1))); + currentIndex = i + 1; + numOfChildrenSoFar = 0; + } else { + numOfChildrenSoFar++; + } + } + chunks = nodes; + } + + return nodeHash(chunks).hash; +} + +function nodeHash(chunks: Chunk[]): Chunk { + const array = new Uint8Array((32 + 8) * chunks.length); + const view = new DataView(array.buffer); + let totalLength = 0; + for (let i = 0; i < chunks.length; i++) { + array.set(chunks[i].hash, i * (32 + 8)); + view.setUint64(i * (32 + 8) + 32, chunks[i].length, true); + totalLength += chunks[i].length; + } + const hash = blake3Keyed(array, BLAKE3_NODE_KEY); + return { + hash: hash, + length: totalLength, + }; +} From d0ba0b25f5c14a5c6dd00beb264e9a5496dc328b Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Wed, 9 Jul 2025 16:25:53 +0200 Subject: [PATCH 40/44] add bench command --- packages/xetchunk-wasm/README.md | 8 +++ packages/xetchunk-wasm/assembly/xorb-hash.ts | 8 +-- packages/xetchunk-wasm/package.json | 1 + packages/xetchunk-wasm/tests/bench.js | 75 ++++++++++++++++++++ 4 files changed, 88 insertions(+), 4 deletions(-) create mode 100644 packages/xetchunk-wasm/tests/bench.js diff --git a/packages/xetchunk-wasm/README.md b/packages/xetchunk-wasm/README.md index 1cc1e6ed92..9bebca391e 100644 --- a/packages/xetchunk-wasm/README.md +++ b/packages/xetchunk-wasm/README.md @@ -26,3 +26,11 @@ for await (const data of source) { console.log("last chunk", finalize(chunker)); ``` + +## Beanchmarking chunking + +```shell +pnpm install +pnpm --filter xetchunk-wasm build +pnpm --filter xetchunk-wasm bench path/to/a-big-file +``` \ No newline at end of file diff --git a/packages/xetchunk-wasm/assembly/xorb-hash.ts b/packages/xetchunk-wasm/assembly/xorb-hash.ts index f10ec5fb21..06a0c1069e 100644 --- a/packages/xetchunk-wasm/assembly/xorb-hash.ts +++ b/packages/xetchunk-wasm/assembly/xorb-hash.ts @@ -4,10 +4,10 @@ import { Chunk } from "./xet-chunker"; const MEAN_CHUNK_PER_NODE = 4; -if (MEAN_CHUNK_PER_NODE % 256 !== 0) { - throw new Error("MEAN_CHUNK_PER_NODE must be a multiple of 256"); - // ^ So we only need to check the last byte of the last u64 in the chunk hash -} +// if (MEAN_CHUNK_PER_NODE % 256 !== 0) { +// throw new Error("MEAN_CHUNK_PER_NODE must be a multiple of 256"); +// // ^ So we only need to check the last byte of the last u64 in the chunk hash +// } const BLAKE3_NODE_KEY = new Uint8Array(32); const STATIC_KEY: StaticArray = [ diff --git a/packages/xetchunk-wasm/package.json b/packages/xetchunk-wasm/package.json index 108c0a5103..16b19f8e6a 100644 --- a/packages/xetchunk-wasm/package.json +++ b/packages/xetchunk-wasm/package.json @@ -6,6 +6,7 @@ "build:release": "asc assembly/index.ts --target release", "build": "pnpm run build:debug && npm run build:release", "test": "vitest run", + "bench": "node tests/bench.js", "prepare": "pnpm run build" }, "keywords": [ diff --git a/packages/xetchunk-wasm/tests/bench.js b/packages/xetchunk-wasm/tests/bench.js new file mode 100644 index 0000000000..4731ec870a --- /dev/null +++ b/packages/xetchunk-wasm/tests/bench.js @@ -0,0 +1,75 @@ +import { parseArgs } from "node:util"; +import { createChunker, finalize, nextBlock } from "../build/release.js"; +import { createReadStream } from "node:fs"; + +const { positionals } = parseArgs({ + args: process.argv.slice(2), + allowPositionals: true, +}); + +if (!positionals[0]) { + console.error("Usage: node tests/bench.js "); + process.exit(1); +} + +const GB = 1_000_000_000; + +console.log("loading first GB of data in memory"); +const data = new Uint8Array(GB); + +const stream = createReadStream(positionals[0]); +let totalRead = 0; + +for await (const chunk of stream) { + data.set(chunk.slice(0, data.length - totalRead), totalRead); + totalRead += chunk.length; + + if (totalRead >= data.length) { + stream.close(); + break; + } +} + +if (totalRead < data.length) { + console.log("not enough data, repeating in memory"); + + while (totalRead < data.length) { + data.set(data.slice(0, GB), totalRead); + totalRead += GB; + } +} + +console.log("data loaded in memory, starting to chunk in 64MB chunks (for a max of 30 seconds)"); + +const start = performance.now(); +const chunker = createChunker(); + +let totalProcessed = 0; +let totalChunks = 0; +let stoppedEarly = false; + +for (let i = 0; i < data.length; i += 64_000_000) { + const chunks = nextBlock(chunker, data.slice(i, i + 64_000_000)); + totalProcessed += 64_000_000; + totalChunks += chunks.length; + + if (performance.now() - start > 30_000) { + console.log("30 seconds elapsed, stopping"); + stoppedEarly = true; + break; + } +} + +if (!stoppedEarly) { + const chunks = finalize(chunker); + totalChunks += chunks.length; + totalProcessed += chunks.length * chunks[0].length; +} + +console.log( + `chunked ${totalChunks} chunks in ${performance.now() - start}ms, ${( + totalProcessed / + 1_000_000 / + ((performance.now() - start) / 1000) + ).toFixed(3)} MB/s` +); From 698582d9eeff689df5b0282b117fae0c311d87af Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Wed, 9 Jul 2025 18:02:39 +0200 Subject: [PATCH 41/44] add benchmark --- packages/xetchunk-wasm/tests/bench.js | 122 +++-- packages/xetchunk-wasm/vendor/README.md | 1 + .../xetchunk-wasm/vendor/chunker_wasm.d.ts | 9 + packages/xetchunk-wasm/vendor/chunker_wasm.js | 30 ++ .../xetchunk-wasm/vendor/chunker_wasm_bg.js | 490 ++++++++++++++++++ .../xetchunk-wasm/vendor/chunker_wasm_bg.wasm | Bin 0 -> 134833 bytes .../vendor/chunker_wasm_bg.wasm.d.ts | 16 + packages/xetchunk-wasm/vendor/package.json | 17 + 8 files changed, 654 insertions(+), 31 deletions(-) create mode 100644 packages/xetchunk-wasm/vendor/README.md create mode 100644 packages/xetchunk-wasm/vendor/chunker_wasm.d.ts create mode 100644 packages/xetchunk-wasm/vendor/chunker_wasm.js create mode 100644 packages/xetchunk-wasm/vendor/chunker_wasm_bg.js create mode 100644 packages/xetchunk-wasm/vendor/chunker_wasm_bg.wasm create mode 100644 packages/xetchunk-wasm/vendor/chunker_wasm_bg.wasm.d.ts create mode 100644 packages/xetchunk-wasm/vendor/package.json diff --git a/packages/xetchunk-wasm/tests/bench.js b/packages/xetchunk-wasm/tests/bench.js index 4731ec870a..736742138a 100644 --- a/packages/xetchunk-wasm/tests/bench.js +++ b/packages/xetchunk-wasm/tests/bench.js @@ -1,6 +1,7 @@ import { parseArgs } from "node:util"; import { createChunker, finalize, nextBlock } from "../build/release.js"; import { createReadStream } from "node:fs"; +import { Chunker } from "../vendor/chunker_wasm.js"; const { positionals } = parseArgs({ args: process.argv.slice(2), @@ -12,10 +13,11 @@ if (!positionals[0]) { process.exit(1); } -const GB = 1_000_000_000; +const BYTES = 100_000_000; +const CHUNK_SIZE = 10_000_000; -console.log("loading first GB of data in memory"); -const data = new Uint8Array(GB); +console.log(`loading first ${BYTES.toLocaleString("en-US")} bytes of data in memory`); +const data = new Uint8Array(BYTES); const stream = createReadStream(positionals[0]); let totalRead = 0; @@ -34,42 +36,100 @@ if (totalRead < data.length) { console.log("not enough data, repeating in memory"); while (totalRead < data.length) { - data.set(data.slice(0, GB), totalRead); - totalRead += GB; + data.set(data.slice(0, BYTES), totalRead); + totalRead += BYTES; } } -console.log("data loaded in memory, starting to chunk in 64MB chunks (for a max of 30 seconds)"); +console.log( + `data loaded in memory, starting to process data ${CHUNK_SIZE.toLocaleString( + "en-US" + )} bytes at a time (for a max of 30 seconds)` +); -const start = performance.now(); -const chunker = createChunker(); +function testAssemblyChunker() { + const start = performance.now(); + const chunker = createChunker(64 * 1024); + + let totalProcessed = 0; + let totalChunks = 0; + let stoppedEarly = false; + + for (let i = 0; i < data.length; i += CHUNK_SIZE) { + const chunks = nextBlock(chunker, data.subarray(i, i + CHUNK_SIZE)); + console.log("chunks", chunks.length); + totalProcessed += CHUNK_SIZE; + totalChunks += chunks.length; + + if (performance.now() - start > 30_000) { + console.log("30 seconds elapsed, stopping"); + stoppedEarly = true; + break; + } + } -let totalProcessed = 0; -let totalChunks = 0; -let stoppedEarly = false; + if (!stoppedEarly) { + const lastChunk = finalize(chunker); + if (lastChunk) { + totalChunks += 1; + totalProcessed = data.length; + } + } -for (let i = 0; i < data.length; i += 64_000_000) { - const chunks = nextBlock(chunker, data.slice(i, i + 64_000_000)); - totalProcessed += 64_000_000; - totalChunks += chunks.length; + console.log( + `chunked ${totalChunks} chunks in ${performance.now() - start}ms, ${( + totalProcessed / + 1_000_000 / + ((performance.now() - start) / 1000) + ).toFixed(3)} MB/s` + ); +} - if (performance.now() - start > 30_000) { - console.log("30 seconds elapsed, stopping"); - stoppedEarly = true; - break; +testAssemblyChunker(); + +console.log("testing rust Chunker"); + +function testRustChunker() { + const start = performance.now(); + const chunker = new Chunker(64 * 1024); + + let totalProcessed = 0; + let totalChunks = 0; + let stoppedEarly = false; + + let chunks = []; + for (let i = 0; i < data.length; i += CHUNK_SIZE) { + chunks = chunker.add_data(data.subarray(i, i + CHUNK_SIZE)); + console.log("chunks", chunks.length); + totalProcessed += CHUNK_SIZE; + totalChunks += chunks.length; + + if (performance.now() - start > 30_000) { + console.log("30 seconds elapsed, stopping"); + stoppedEarly = true; + break; + } + } + + if (!stoppedEarly) { + chunks = chunker.finish(); + if (chunks.length > 0) { + totalChunks += chunks.length; + totalProcessed += chunks.length * chunks[0].length; + } } -} -if (!stoppedEarly) { - const chunks = finalize(chunker); - totalChunks += chunks.length; - totalProcessed += chunks.length * chunks[0].length; + console.log( + `chunked ${totalChunks} chunks in ${performance.now() - start}ms, ${( + totalProcessed / + 1_000_000 / + ((performance.now() - start) / 1000) + ).toFixed(3)} MB/s` + ); } -console.log( - `chunked ${totalChunks} chunks in ${performance.now() - start}ms, ${( - totalProcessed / - 1_000_000 / - ((performance.now() - start) / 1000) - ).toFixed(3)} MB/s` -); +testRustChunker(); + +console.log("testing assembly Chunker again"); + +testAssemblyChunker(); diff --git a/packages/xetchunk-wasm/vendor/README.md b/packages/xetchunk-wasm/vendor/README.md new file mode 100644 index 0000000000..a1f2556344 --- /dev/null +++ b/packages/xetchunk-wasm/vendor/README.md @@ -0,0 +1 @@ +This is the WASM generated from the rust client \ No newline at end of file diff --git a/packages/xetchunk-wasm/vendor/chunker_wasm.d.ts b/packages/xetchunk-wasm/vendor/chunker_wasm.d.ts new file mode 100644 index 0000000000..33c9c7efcc --- /dev/null +++ b/packages/xetchunk-wasm/vendor/chunker_wasm.d.ts @@ -0,0 +1,9 @@ +/* tslint:disable */ +/* eslint-disable */ +export function compute_xorb_hash(chunks_array: any): string; +export class Chunker { + free(): void; + constructor(target_chunk_size: number); + add_data(data: Uint8Array): any; + finish(): any; +} diff --git a/packages/xetchunk-wasm/vendor/chunker_wasm.js b/packages/xetchunk-wasm/vendor/chunker_wasm.js new file mode 100644 index 0000000000..d689c94c90 --- /dev/null +++ b/packages/xetchunk-wasm/vendor/chunker_wasm.js @@ -0,0 +1,30 @@ +// export * from "./chunker_wasm_bg.js"; +import * as __glue_imports from "./chunker_wasm_bg.js"; + +const wasmUrl = new URL("./chunker_wasm_bg.wasm", import.meta.url); +const binary = await (await import("node:fs/promises")).readFile(wasmUrl); +// console.log("binary", binary); + +const wasmModule = await WebAssembly.compile(binary); +const imports = Object.entries( + WebAssembly.Module.imports(wasmModule).reduce( + (result, item) => ({ + ...result, + [item.module]: [...(result[item.module] || []), item.name], + }), + {} + ) +).map(([from, names]) => ({ from, names })); + +// const exports = WebAssembly.Module.exports(wasmModule).map((item) => item.name); + +// console.log("imports", imports); + +const wasm = await WebAssembly.instantiate(wasmModule, { + "./chunker_wasm_bg.js": Object.fromEntries(imports[0].names.map((name) => [name, __glue_imports[name]])), +}); +export * from "./chunker_wasm_bg.js"; +import { __wbg_set_wasm } from "./chunker_wasm_bg.js"; +__wbg_set_wasm(wasm.exports); +// console.log("exports", exports); +wasm.exports.__wbindgen_start(); diff --git a/packages/xetchunk-wasm/vendor/chunker_wasm_bg.js b/packages/xetchunk-wasm/vendor/chunker_wasm_bg.js new file mode 100644 index 0000000000..d8bfe1c7d9 --- /dev/null +++ b/packages/xetchunk-wasm/vendor/chunker_wasm_bg.js @@ -0,0 +1,490 @@ +let wasm; +export function __wbg_set_wasm(val) { + wasm = val; +} + + +let WASM_VECTOR_LEN = 0; + +let cachedUint8ArrayMemory0 = null; + +function getUint8ArrayMemory0() { + if (cachedUint8ArrayMemory0 === null || cachedUint8ArrayMemory0.byteLength === 0) { + cachedUint8ArrayMemory0 = new Uint8Array(wasm.memory.buffer); + } + return cachedUint8ArrayMemory0; +} + +const lTextEncoder = typeof TextEncoder === 'undefined' ? (0, module.require)('util').TextEncoder : TextEncoder; + +let cachedTextEncoder = new lTextEncoder('utf-8'); + +const encodeString = (typeof cachedTextEncoder.encodeInto === 'function' + ? function (arg, view) { + return cachedTextEncoder.encodeInto(arg, view); +} + : function (arg, view) { + const buf = cachedTextEncoder.encode(arg); + view.set(buf); + return { + read: arg.length, + written: buf.length + }; +}); + +function passStringToWasm0(arg, malloc, realloc) { + + if (realloc === undefined) { + const buf = cachedTextEncoder.encode(arg); + const ptr = malloc(buf.length, 1) >>> 0; + getUint8ArrayMemory0().subarray(ptr, ptr + buf.length).set(buf); + WASM_VECTOR_LEN = buf.length; + return ptr; + } + + let len = arg.length; + let ptr = malloc(len, 1) >>> 0; + + const mem = getUint8ArrayMemory0(); + + let offset = 0; + + for (; offset < len; offset++) { + const code = arg.charCodeAt(offset); + if (code > 0x7F) break; + mem[ptr + offset] = code; + } + + if (offset !== len) { + if (offset !== 0) { + arg = arg.slice(offset); + } + ptr = realloc(ptr, len, len = offset + arg.length * 3, 1) >>> 0; + const view = getUint8ArrayMemory0().subarray(ptr + offset, ptr + len); + const ret = encodeString(arg, view); + + offset += ret.written; + ptr = realloc(ptr, len, offset, 1) >>> 0; + } + + WASM_VECTOR_LEN = offset; + return ptr; +} + +let cachedDataViewMemory0 = null; + +function getDataViewMemory0() { + if (cachedDataViewMemory0 === null || cachedDataViewMemory0.buffer.detached === true || (cachedDataViewMemory0.buffer.detached === undefined && cachedDataViewMemory0.buffer !== wasm.memory.buffer)) { + cachedDataViewMemory0 = new DataView(wasm.memory.buffer); + } + return cachedDataViewMemory0; +} + +function addToExternrefTable0(obj) { + const idx = wasm.__externref_table_alloc(); + wasm.__wbindgen_export_4.set(idx, obj); + return idx; +} + +function handleError(f, args) { + try { + return f.apply(this, args); + } catch (e) { + const idx = addToExternrefTable0(e); + wasm.__wbindgen_exn_store(idx); + } +} + +const lTextDecoder = typeof TextDecoder === 'undefined' ? (0, module.require)('util').TextDecoder : TextDecoder; + +let cachedTextDecoder = new lTextDecoder('utf-8', { ignoreBOM: true, fatal: true }); + +cachedTextDecoder.decode(); + +function getStringFromWasm0(ptr, len) { + ptr = ptr >>> 0; + return cachedTextDecoder.decode(getUint8ArrayMemory0().subarray(ptr, ptr + len)); +} + +function debugString(val) { + // primitive types + const type = typeof val; + if (type == 'number' || type == 'boolean' || val == null) { + return `${val}`; + } + if (type == 'string') { + return `"${val}"`; + } + if (type == 'symbol') { + const description = val.description; + if (description == null) { + return 'Symbol'; + } else { + return `Symbol(${description})`; + } + } + if (type == 'function') { + const name = val.name; + if (typeof name == 'string' && name.length > 0) { + return `Function(${name})`; + } else { + return 'Function'; + } + } + // objects + if (Array.isArray(val)) { + const length = val.length; + let debug = '['; + if (length > 0) { + debug += debugString(val[0]); + } + for(let i = 1; i < length; i++) { + debug += ', ' + debugString(val[i]); + } + debug += ']'; + return debug; + } + // Test for built-in + const builtInMatches = /\[object ([^\]]+)\]/.exec(toString.call(val)); + let className; + if (builtInMatches && builtInMatches.length > 1) { + className = builtInMatches[1]; + } else { + // Failed to match the standard '[object ClassName]' + return toString.call(val); + } + if (className == 'Object') { + // we're a user defined class or Object + // JSON.stringify avoids problems with cycles, and is generally much + // easier than looping through ownProperties of `val`. + try { + return 'Object(' + JSON.stringify(val) + ')'; + } catch (_) { + return 'Object'; + } + } + // errors + if (val instanceof Error) { + return `${val.name}: ${val.message}\n${val.stack}`; + } + // TODO we could test for more things here, like `Set`s and `Map`s. + return className; +} + +function isLikeNone(x) { + return x === undefined || x === null; +} + +function passArray8ToWasm0(arg, malloc) { + const ptr = malloc(arg.length * 1, 1) >>> 0; + getUint8ArrayMemory0().set(arg, ptr / 1); + WASM_VECTOR_LEN = arg.length; + return ptr; +} + +function takeFromExternrefTable0(idx) { + const value = wasm.__wbindgen_export_4.get(idx); + wasm.__externref_table_dealloc(idx); + return value; +} +/** + * @param {any} chunks_array + * @returns {string} + */ +export function compute_xorb_hash(chunks_array) { + let deferred2_0; + let deferred2_1; + try { + const ret = wasm.compute_xorb_hash(chunks_array); + var ptr1 = ret[0]; + var len1 = ret[1]; + if (ret[3]) { + ptr1 = 0; len1 = 0; + throw takeFromExternrefTable0(ret[2]); + } + deferred2_0 = ptr1; + deferred2_1 = len1; + return getStringFromWasm0(ptr1, len1); + } finally { + wasm.__wbindgen_free(deferred2_0, deferred2_1, 1); + } +} + +const ChunkerFinalization = (typeof FinalizationRegistry === 'undefined') + ? { register: () => {}, unregister: () => {} } + : new FinalizationRegistry(ptr => wasm.__wbg_chunker_free(ptr >>> 0, 1)); + +export class Chunker { + + __destroy_into_raw() { + const ptr = this.__wbg_ptr; + this.__wbg_ptr = 0; + ChunkerFinalization.unregister(this); + return ptr; + } + + free() { + const ptr = this.__destroy_into_raw(); + wasm.__wbg_chunker_free(ptr, 0); + } + /** + * @param {number} target_chunk_size + */ + constructor(target_chunk_size) { + const ret = wasm.chunker_new(target_chunk_size); + this.__wbg_ptr = ret >>> 0; + ChunkerFinalization.register(this, this.__wbg_ptr, this); + return this; + } + /** + * @param {Uint8Array} data + * @returns {any} + */ + add_data(data) { + const ptr0 = passArray8ToWasm0(data, wasm.__wbindgen_malloc); + const len0 = WASM_VECTOR_LEN; + const ret = wasm.chunker_add_data(this.__wbg_ptr, ptr0, len0); + if (ret[2]) { + throw takeFromExternrefTable0(ret[1]); + } + return takeFromExternrefTable0(ret[0]); + } + /** + * @returns {any} + */ + finish() { + const ret = wasm.chunker_finish(this.__wbg_ptr); + if (ret[2]) { + throw takeFromExternrefTable0(ret[1]); + } + return takeFromExternrefTable0(ret[0]); + } +} + +export function __wbg_String_8f0eb39a4a4c2f66(arg0, arg1) { + const ret = String(arg1); + const ptr1 = passStringToWasm0(ret, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc); + const len1 = WASM_VECTOR_LEN; + getDataViewMemory0().setInt32(arg0 + 4 * 1, len1, true); + getDataViewMemory0().setInt32(arg0 + 4 * 0, ptr1, true); +}; + +export function __wbg_buffer_609cc3eee51ed158(arg0) { + const ret = arg0.buffer; + return ret; +}; + +export function __wbg_call_672a4d21634d4a24() { return handleError(function (arg0, arg1) { + const ret = arg0.call(arg1); + return ret; +}, arguments) }; + +export function __wbg_done_769e5ede4b31c67b(arg0) { + const ret = arg0.done; + return ret; +}; + +export function __wbg_get_67b2ba62fc30de12() { return handleError(function (arg0, arg1) { + const ret = Reflect.get(arg0, arg1); + return ret; +}, arguments) }; + +export function __wbg_get_b9b93047fe3cf45b(arg0, arg1) { + const ret = arg0[arg1 >>> 0]; + return ret; +}; + +export function __wbg_getwithrefkey_1dc361bd10053bfe(arg0, arg1) { + const ret = arg0[arg1]; + return ret; +}; + +export function __wbg_instanceof_ArrayBuffer_e14585432e3737fc(arg0) { + let result; + try { + result = arg0 instanceof ArrayBuffer; + } catch (_) { + result = false; + } + const ret = result; + return ret; +}; + +export function __wbg_instanceof_Uint8Array_17156bcf118086a9(arg0) { + let result; + try { + result = arg0 instanceof Uint8Array; + } catch (_) { + result = false; + } + const ret = result; + return ret; +}; + +export function __wbg_isArray_a1eab7e0d067391b(arg0) { + const ret = Array.isArray(arg0); + return ret; +}; + +export function __wbg_isSafeInteger_343e2beeeece1bb0(arg0) { + const ret = Number.isSafeInteger(arg0); + return ret; +}; + +export function __wbg_iterator_9a24c88df860dc65() { + const ret = Symbol.iterator; + return ret; +}; + +export function __wbg_length_a446193dc22c12f8(arg0) { + const ret = arg0.length; + return ret; +}; + +export function __wbg_length_e2d2a49132c1b256(arg0) { + const ret = arg0.length; + return ret; +}; + +export function __wbg_log_31c4454272417045(arg0, arg1) { + console.log(getStringFromWasm0(arg0, arg1)); +}; + +export function __wbg_new_405e22f390576ce2() { + const ret = new Object(); + return ret; +}; + +export function __wbg_new_78feb108b6472713() { + const ret = new Array(); + return ret; +}; + +export function __wbg_new_a12002a7f91c75be(arg0) { + const ret = new Uint8Array(arg0); + return ret; +}; + +export function __wbg_next_25feadfc0913fea9(arg0) { + const ret = arg0.next; + return ret; +}; + +export function __wbg_next_6574e1a8a62d1055() { return handleError(function (arg0) { + const ret = arg0.next(); + return ret; +}, arguments) }; + +export function __wbg_set_37837023f3d740e8(arg0, arg1, arg2) { + arg0[arg1 >>> 0] = arg2; +}; + +export function __wbg_set_3f1d0b984ed272ed(arg0, arg1, arg2) { + arg0[arg1] = arg2; +}; + +export function __wbg_set_65595bdd868b3009(arg0, arg1, arg2) { + arg0.set(arg1, arg2 >>> 0); +}; + +export function __wbg_value_cd1ffa7b1ab794f1(arg0) { + const ret = arg0.value; + return ret; +}; + +export function __wbindgen_as_number(arg0) { + const ret = +arg0; + return ret; +}; + +export function __wbindgen_boolean_get(arg0) { + const v = arg0; + const ret = typeof(v) === 'boolean' ? (v ? 1 : 0) : 2; + return ret; +}; + +export function __wbindgen_debug_string(arg0, arg1) { + const ret = debugString(arg1); + const ptr1 = passStringToWasm0(ret, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc); + const len1 = WASM_VECTOR_LEN; + getDataViewMemory0().setInt32(arg0 + 4 * 1, len1, true); + getDataViewMemory0().setInt32(arg0 + 4 * 0, ptr1, true); +}; + +export function __wbindgen_error_new(arg0, arg1) { + const ret = new Error(getStringFromWasm0(arg0, arg1)); + return ret; +}; + +export function __wbindgen_in(arg0, arg1) { + const ret = arg0 in arg1; + return ret; +}; + +export function __wbindgen_init_externref_table() { + const table = wasm.__wbindgen_export_4; + const offset = table.grow(4); + table.set(0, undefined); + table.set(offset + 0, undefined); + table.set(offset + 1, null); + table.set(offset + 2, true); + table.set(offset + 3, false); + ; +}; + +export function __wbindgen_is_function(arg0) { + const ret = typeof(arg0) === 'function'; + return ret; +}; + +export function __wbindgen_is_object(arg0) { + const val = arg0; + const ret = typeof(val) === 'object' && val !== null; + return ret; +}; + +export function __wbindgen_is_undefined(arg0) { + const ret = arg0 === undefined; + return ret; +}; + +export function __wbindgen_jsval_loose_eq(arg0, arg1) { + const ret = arg0 == arg1; + return ret; +}; + +export function __wbindgen_memory() { + const ret = wasm.memory; + return ret; +}; + +export function __wbindgen_number_get(arg0, arg1) { + const obj = arg1; + const ret = typeof(obj) === 'number' ? obj : undefined; + getDataViewMemory0().setFloat64(arg0 + 8 * 1, isLikeNone(ret) ? 0 : ret, true); + getDataViewMemory0().setInt32(arg0 + 4 * 0, !isLikeNone(ret), true); +}; + +export function __wbindgen_number_new(arg0) { + const ret = arg0; + return ret; +}; + +export function __wbindgen_string_get(arg0, arg1) { + const obj = arg1; + const ret = typeof(obj) === 'string' ? obj : undefined; + var ptr1 = isLikeNone(ret) ? 0 : passStringToWasm0(ret, wasm.__wbindgen_malloc, wasm.__wbindgen_realloc); + var len1 = WASM_VECTOR_LEN; + getDataViewMemory0().setInt32(arg0 + 4 * 1, len1, true); + getDataViewMemory0().setInt32(arg0 + 4 * 0, ptr1, true); +}; + +export function __wbindgen_string_new(arg0, arg1) { + const ret = getStringFromWasm0(arg0, arg1); + return ret; +}; + +export function __wbindgen_throw(arg0, arg1) { + throw new Error(getStringFromWasm0(arg0, arg1)); +}; + diff --git a/packages/xetchunk-wasm/vendor/chunker_wasm_bg.wasm b/packages/xetchunk-wasm/vendor/chunker_wasm_bg.wasm new file mode 100644 index 0000000000000000000000000000000000000000..bdcdb84fff2e8fd4a94075f43c6566552062b344 GIT binary patch literal 134833 zcmd443z%KkRp)tL_f_{+)h($cl~j71dn`NUi3O#%EHa>7U^~JXhsR`SdS+xjO15ta z$&xBc0*+M1az>#|$AF>DkOnv7jtmonzn;tx=uAhR4&ct@VY&?%2w=e7af1OjF@quQ zcz*x2&pG#0smgK4$M=nu)IEEj{akD9_1bIi6C6JKePIv;;crIU?}#p32rt|bU$7q% zfjtLzggk|(coph>jrV$&_#a-Jx+4hkY~Du~P6c-aWLg0-0=0NYbV>#5s$}iwPl5G% z%3f2{>Zr4O?+EXue0=H@twpCU*t1$Dht4&K^ZYjK*BVsS^M1(T`8!&qlyRP{fI-CP zt&qy=KXTgssYu4wo`!0}4Nv7m@k#o7pP#h-ki6F??S~}I3ncbK1N16ro{xTWbZY$T zzW(T4?>ad*d*;x)0r#OJcYa;}?3zP|-hJfmlgI9yJ$dNt{F%E?-g)TG+4-QeY~c+m zeCMGPr|vwobNkU>9z)RF$|(fOdZY`g0!?Nat#Cy&h@zx(9uu@#CBSBswv#+Pk#eWg(U?771yfXb<} zvxjC+uTWuarNWU@r%ucsK539%e#lk0oqX5#9RX)6qooSB!)Fiq^1<3=$Gu6zxbx6= zc~tK^K0SM6=Z%MV9o}_x$MHRTR>1o*qY-A-E!)FZ-vh&a_ucb%otZs8H~Zd0+m9XH zxo7*4W80^vckeuMd^YGVTQ?`Lv$OMub{^k;Z2HKJ`*zJ9gUCSE#2eM1w?jv6JaXgC z>0Nt|&+a^WeAn(H!TM!uPt{b=S+;MdKksZ7DtUwd_z?k)+ zz59;O9@##<@5r8Adw1;JzB3qJwwo%xA-d!5I(c;d?o%gMXn)cJ?d0rx<`3=IeSG%t zvExUl85K`A2CZc~s8-ptd+)BhV@uNGZkIio1u>!f|AUJyX#0jc^$74IT@7cNQ z*e*I;p-KgjyQX)~?$~jB=Z(|5_wG45yW@?AWi+{W-_E_$J9Zx5d2H{l>Dhh3hGj9c zIiLSocjnCD_Z~XDeHM7lP9K}zvv=o>+gF10w!GHKv-5{f9-TdP{E$_jdDrpdtp3^U zyLRu}y=&)=*`0fL?md2V1$5<$@9wkTb@=$~!ISf|cT#ufuAQ?xj?D7k(b?@sj!dsm zH(wY>e206cZ#;T*Cv|pjpFOsH_r4WyU4iG}?K`HYcO2e({KoA^_wGJ2`^Gf{&po?$ z-?;n8v19x8>^rh^dius-a#?sTUa^OF?b@^b#+}EG?qJF8IKFR%+1zjyIRCr5PtNc2 zDA~Su`|dqQjvn8>ec$xHJuI>1xo}l*9GgAzt~+xPxdNN)s>@uK!ud|Udj+95Rw?&= zv)^~>%zIZLJzGe5gLj|2d;SpYVfM^Pc{%& zaU%|cWV5ML;aC&~QMe`LZ!gGJh3)z^s9908DhTRPdn5@OjiHEi9EVXo3gRegL|cPo zRnUwm$A1l~2T9DkAPKJJ&rq0zw4IRMjA<%pQz{99>)sMIr$&OYAh^EWcX3C8mW6wM!_nN`9jw@er9$ym~R(J39dukB6axKu|vlW&mRtcAsi|4 z&<*ZBdspzU!m*>LzVDuQ&Cecsy<4&8N_%wG%_ArHoL>S*wDp>>%f>ddUq{oli? zT%LW8^s7^6W`oa%kkt}I9`aRgUtOV$+4tOY>dgG1U15l>v9$cLyp!)At<@$ljB72f=0_>cZN{C~rD{80FT@X_#JhQAX&7d{yMYWP>YQ;kd1xR3fz zg)fD_6rMTud*O$}lb;O#hOB=VJ{$g6_>u6VJU<)$Fcton@PGPJ`19evp|{@)e?9#D z@DIX23V%9$K71|wzru^*UGKU?%RdwTkN-9Na`<}qIp*-k;h%&H;eiWx&-_&QgVE5J ze!u=7!w*b`>4h{%gQ;jD+mwb=K{wl~RI}gDCY2t~*7Va&>0}zY*Td6iUH9Et&;HS4$y}DIHm%nC*^o+3(SAR>MvpgS>-*_wdW~A@W<6yEzL$U^ zO{J4;OF!+U%{2DS_S_dfefb&MYFeX>em0W!Y^cN5ZCh<^%xbDTK`*2_wAE31W7g}Z zwX~Ba<;)ZJYd?PFH*0fQ&6?}=DKOEpYgv{_F}xBtp*fnJx$Z0bhDL6`&r%K zGL(*}`7{$W>~U+hxlg%`yy)7nHZpl@Xr!NY(~VYtZ8{{7j942K7)dAlR)ry2UMofk;+Dw7UFo5W1!>J!W!?l+nZk-hwNgFAE0(Z@HOl|GUQrc?h@oiby z=XJ{pwo*V^S6MJciX7E*i~UF^PXJBVK3J9vvoqoNV>t^j-+EE zj0v0P1moOd^MrVZXxYz{`7~9+&vPOL-L%Vl<(Km`@O9D+jFK^cmqt3CZl<+bnhF~= zkpt6cFlTTJdcuXp=DeQwYCJGjg*ybkQy`k8upMHQZ|4X(wIFy0n%6r_bNW2m zfVX*e7SnAF8WVgW!J6t@t2Q4`x6Eg}YIbUm#Nr?$E}`6z`?=K1FE znSeS$3eH!>Cv^K2@Cn^+pm*>IAr9~fa)IAECa1+;6CYcGPfB=vu3G`06R8owLC^p!*E&EeA{1dLCJZkOP5K(|`=HBW+z@L8eTMeDPqTLa%>e6E4vXq2q3 z20ePJ`>OCcX8fYITW=W5n{&E^TGT7pl{#-Q8PE8^3L zpo-76tPtT7Zu$3KpQ5iEQUiQ4<`vhc$0u|&wHdUeTf zN6{13f{lh&*C^R*jDN3T&hXpOf@Mm;=oL)S7Z^qajoNrd(Tv0|jFXL)@^dsrEdkhG ze!@(wGVjS{GWG%qyINGaxx{FcLhYL+PFbN{6bNYwP}*p^#l|%%ePD}(AMNruQntHA z`MIuUI-dg@99 zCnY@GBZwqJ;id958o1Z`c~35|(|~w2Bps$8JSh;2fqp{`gR!jv09v8z#XHt8S3kNn z=xU%Gkgpdk!3BystLyOV+J1qJd6zhPbF$^DMyEa zGMR2=64zMW1UO5#fSwXYwm!v-p?+A*Y^w~LJyDYm*QA*z0tU>mnq((rhpNIrLz_6b z+7n?y(b`sZK)-vUjil>>8tFBpCrFQa!w8V?iN;B&|7( ztwD4lIxzT`2JWxJ+mi#1j@+kT|AAYF8yk2+R}0;HPWNi=($7DI@e*XMn}!p&rPrWf zZLY^?-7BxZ-ncC}kk%ZCaLsdfX2IM!7oO%-I6vpUvEOU7(}s)Y-Af;SD(I_`@Fw-t z3p!C-KU7^$iSXPUtv1}2TPEjwam?s245EboL2>Qiux5;HR8N!Jc@BF~TF;W^*dWib^lK3>cZ12;=<0Vqiti?-r40pmqq z*t+M_`nykqR)$Q)_)L!YerC3E;zG*-oc#{atGNgL5N{ifg|pNESfC`hX_zVBwFttI z2b{-I)C&!oWOlOv*zquK55Q}p*YHgWc%3Y`nNf#s98efAgGmEkG#us#Q?{Age$7OF zUaA5w0MuzPDdeSb52Fe2kPXT4M}0=TBdOM-qG4cAcY(p80dWBkde~UbCS$OHK-q9z zH=Z__LmJ+Ud{LE@xz~hv%zsM`$403IxdLZ3W}sFv1|fP98Hwxg|AQKnUvxZxj9%0U z+Ce)y;AnF`iyh=LcZN{~^QZ6fI1fvlhXb6`^qa=HD89fs;zh*Dh@i@n4n#IVf%PmA zB{6Q_Z!dHYdvfSS(S?$FYYu(^xz1ME#~=M9N^=!HUynB{J>$VA@!1Htj&u7G~N|qYbJFYsjY!#c1}- zRV$N`NhU$D1!myb%tRDpc+u(=+y|(!;Zjl1sS7&M8PC`vXs03GKz(SZbvlDiA^b&a zvyN#51{rS!1%-Qg+uErk=)o3^YOvQ|MsEN)r{s5Yq;{O>`V;S+^-8 zER<{`3vQzf8FROv_RGIm^^tf%zUm{tE{!+%#TgD(J;j;`B!d~`l*5cFNXR9to-r(5 z_2FREhegASS3SefKq{*q8e6*Rqw1=UksY8cU-jYARjIQ_nyWH**$k4=mlxhp?bjlv|{*SwtCN!xNtM9LwsAgu()|xn#h+b z&gB#!!N^lu)fmf9BTB~l+RCgo^44R4ziRVq_>3)=1@Ji6DnsQXthlWVK}Wj)#Y zQ%6?PT|%H4&r#8bl#qy1LciskN0fLlf1}^B*rQ53oWId;S?qBoK9s-FZ&~a~B_7M) z=(jBPF(p2dztL}5?1~Z}&EM#^EcUb#AJ5YHbjP>v?|>1yIRC6uE#&sG!9 zE1?{{`Fu6;q7ur{n-{8ymz7YC-n>*zysCt9^yZanV&Ug_qa3|?O>c_CCB0FO-aMql zMgAzE-}22PN<5gq(QjGoQ6(PE-{`k2_P7!s%HQa>EcT=lkL7RlTNeA65+BLm=(j9( zMTw8*Z}eLhds>N)=Wp~|7JEjCPv&p*TNZmxiBIQm^jj8tUWsS(H~KA$y{N?J^Edh} zi@mJG3;7%Umc?FG;-&nJe#>GDT0yVmZ}eLh(~5;7jP|9rA~#Vq3~ee?R1nia?oA=` zwV3Dn2oU225}1YRm&ScJ1$%Lisjr2F1+85P;V6v{QZ2gej0CN?H;RMUIzQ*q`pv_! z=KupXpia{8ptuT;Nxv88%tSmUK7cSWhSHFhl{1nMK4Oyds@9Q$1t*FQXu!sc#6B6A zQn1U$LByIhg<2lA+s4NhL&fx)0U%l$V+9bjR05*43?S&N;nF!N2Y^^?PErVkJ$B;B zHpJ_rpf?0mx_$S`<);FYdjUY44(*MaUxD$dJT;qrfh=|#`@H}-1rGIxz9bP)g|J8r z8#fO(Xf>9ia2}mrYGs^8j4Gc2bMe&{A`hmZdxvRL?$xkV=_-mGc%%<$G{HclVHisl zIUy6`k|CLEG`S*2cTJfM6tIpBE8yJ=DXy!SRH z-ukvcu7+YrM*9G}C1+$(o$s=|2fkw5&E~hLH9(eA zS`M;OS02D3w_pX{#bq@NOh=~7YSN_WryQv8aocdT;cLYi%3^N3pc$f1i_@u3Ikgxg z#tZr5Qrd(9UW^wnBFNjDE-Z{3k$#DgU(j-Hx{y=Qasn;WY4QfNe3h~GRnxK=152zH z+;2=l%*Y1kVcJHG?e@0@?Jw+$HeRU93v>XlGhUzr`Ui#fU`?>N7*)}~tZfAjk|@yZ zkig|#GSP)}$c1wmo973y(0IYqob0EX+sDd*TTbKYx;_r}#-dgda7Qt>lpVw61gbjQj(5?%7TIAF^>61Y!ay>a?3bu~VHt0k7-hCUsrVmh^zpfka>mh;dwHb^Ko7C~@Ez zv+tleq{JpPX&fU{cv!`#m4ri2Wo$x+by-td}&)PXAS<|jKPkl?-sl;Wgqn4x_y|X zVD!=Oa2?PB^dVjbq}~2Ki@_x=dftk(^O6u=1?1uMO_+5I~}sWHFscct2sf@ALe>73l-ik zg+uEYFJ|3cicd&3WAM4>LmpdJ=8DUz$2hMdKRde`twtD;;;yQZeal_2e!niSPR-z*m%)b@H$w-cV_t??{x zwP$(bp5+niq>@ouEM0aa^4g7(w2A!4^N6y@pNM$KpK9r_XL%qwa4!b^JYRr&zDx;Z z&y}e$QqPnrf$8ZoB>)|m%NrRMc=l=K!^R|=LP;+85VXIRyqAMMzL@|gTg^ppPcEbp zivvRFb>aiiMBSFZ%o{?WJTccrfbEi)J42p@@*c{Q+bwN*`e6iy2_U;c&%y){pVV`X ziH&*|Ccw}pJqr_HZnK`R6CuvAZl=+aMM+!86|fr=@tDE>>3iffK&;0PKJvatQ3IrU zEHylqw$S2q0bce5^Y%88ofg>}NlX-pNfO;6v4KRhNWdrZ+H5!H>+BZT31t9tJzZ;- zktWaN^gcm}2h-)ne=x(&Wtc?PBi-)ngW zrUu{Z43cC3P61iq6vRC^g$EB#trHJU;mm_mYs7=I=E1p{$(G<05T^?02#6&_1;i4f z0%8eKf{_LgC4gfEh>Bbdrnku26ijWLR!!w{Wo+{7*QUjwv2dGV@5Pu!0ho(}#3I58ycRsF038PlYU)v>&Iwg$O zE(0S}H@p~dJ0u!=n5gwq+94436cIQOnRqU<90)5C;=CZf0GkOg@n2?L0MGKK9y&E_gWX3RAg zWX4=`L1xS~7i1>9azQ4M1t!F^vL;hZkl#>{Vf&PVY~9s@3|OY?3PDCfqRh0@6=B*= zm%!AhwTDH{LdcZ^O}9`E1OzXT(dQD_Jvv_XXLPyaNR%&m*`xSAwfRQ&_<5@aC^x ze05Sun>XOExhiG+)p%-dC>T%8SG7@TZYUT}%?*WI2S709eY|=8`o`j`S;A-5oCJwIo5C41umo zj%1v!vWkq)Rald8y9#$Qo>yT|#`!8ds#sBq*C+@}j*N7aKRF8I%phGdIXN^)mmE+| z4bmlhlw*T*sj|waLAsbj8~b_ec6>RK0f4vFi*P&1_k+_|P>~lq))@`&c@ykdLpA(z zMHA$}t-R-tL1tzshFXT&ktqifYLUvvR}NJnM3}kq`sI^Tct!5>^Ermd-eql`5z8i_ zHX`N`lH4~}80)e82TUQBu^!1$9;YMBNeKHG(Sjo-{Wjf`q=_ub`)j(#Nv|%~bJIOa zdQC~4P4@`tZb_p}_Ymn-C8ZK3U@cu*ljc5CPf37B5NSvp`Vz9{0K9;g_ifglKv$+g zoWNG5L9BpQra`nZ*RGA0D!$u%IVfu>=QGy9?GE)gEvK#dUi zvY~-}i(2q{$R-*FuECD>a4q3x&7CX2Q#vHP;?f%N8%?FuyETte_0}Xx?OQV_6>tk0 zE7+sPUO<)=?3Z`s@IgoEm?Ve!Zc>!qIY8dhnlvKqx_#QP>IL$TT9)EaLsl`NnuW@3 zR4ONC3GbXrW!wO0+S_c3-#7ca8PONLR z&B_TQoT6R>j|!SyZ8RICt6g+6VQnR?uFgH&IG~MI*DRq`j`!sF_OwcXYnc|Uc1o0Z zS{*5|QlZtFBJ+9;Qy8(fv{3P;4@h-LsI8Fdkg!`J)gd9cLaIZ;afMWefEDQyU_-)m zl~mKNNRSa?_TOUFJJOC*XL*S&4L2pRB zg{5P}TbMXTyoGIJ#9J6OM!W^Ijd*jb)BN4qM9pcqB^qv}t3<=v7P6rb2@g{hl<`Dy zI17+yTMqBLp~3v@h#cvd`95o`1!@j=?}SXea>U9*Z--zM3nsqE`b6P^1!k>D+Yiy| zl$2sTCG|^6kuJH7q!j6r+ek{0F1d}Q6zLL$l2W7#ZUdp48x^)zju>z0R~oU9Q)kc` z?k#{rsPan~(Tn!eU=jNX>F>>1Z7?CD$;yO`CMy#%nygI7XtFXPqshvIj3x&YvIPKM z8t&~5C1a9EW=KlU8p$oBVM?;Z#^jrUy!yQmjE1LiC2B_Od#CZO)dBwlW5D-#n*mXD zV7?3F4m|p1-pK`UGm#=_MJXi51|R+aiye3Dcy>s=^Z|tr;r5tG_C_*Kn|=4#4?Puh zpJcZP`nU`3|8}|tg61qmZ`{~96hRoevuF;iyPH{9ja$#ak>D2-Gx7a1 z!FPJz&P#R*=#*~RAW@V&Cv2rzX}6|_!$wqdBIATgtEV#{jk(?^{jUOS2k8--CGHSDu?fbU=69%j&lZ!psbo=u3 z^Yx3p&CHVMSw)7_-IKp?1=zwTW}>}-xSnp>8&C7Id2c+$&+wjTpMHSFp6G2niGPS% zR=J0%$>z~F;&(Gw(r~)b%sdN!TqVCHg!KHcQ%FIO>8kD?{BxQE;8l+uahi&f+!3d# zD9Ighnu?Ol;Bt$pZj?vN03e$#h&rd{XP(u3M9q+iPDBLm>kC-4FYb*~%SstRPU^3H z1Tqw?QR^Bf~AS3&f$5_lC7TN^%`_36aUJ z+5OEh8zu;tLLKUcb1v52-0F1Q9Gh_`33u;te!5o&A1%*;4OdLN1(bo5Ax0QMAJ2VA zRsz7akYvrH~g^N>GML(-F&|1@1bE zf$Df+HkvOo^5!XIQ`&cuOlGu!L$@7KBGqhaO>NF-NIFn#Xl0MlCf6wj1RIqHrYo_~ z6nkx&%;eD1p6CRsD0FxiKctSaj_`fKX0@I;yIgZG1pV$le(`5E`MG416VS}D$kBAS zP|3nZft9#dgF<08*Wze8Q3^FW!qB}(#8S&W#jvI8o=-R3hVHf&?$yZzfX1bg3l@VS_?!x+Gd()Op2U@~x zNr$8bw|fl^H~``?SV}yf&oKOh;}abO&aeN^*{X&M{Ge z77X*sHeE)?B+h$WLo~d&V1FP?fsYxRcTGR{u%J|~i#(pdMJ#2vx0!?~BE2oX0_j2# z@r|Wh4DI4x_edLqP{hu=zZJfN{&-5|`sn^f2>EPEhq^yXFD=@D_)L_uCnMsk11YWO zX1d>s)?4mR6{lS7LWV5FrqY@p*d9wp44&Ho?0|}QM2;!7^iOi*`!7R(! zNU0@Vs_IgDNtdcTvUe9DK``tXE)t5K-Rt}p+zL!yIao!2*156BtmG z7?KVKRF!q)$lqe3uA{R%Nt=ug93}4VmvyP*1-tk`-I(SFDSZP!#MDmlV~mlGZpZ?_ z%L7FQC%iGb-VP3nkR`PrQnMc2d$;7*AJ;+pdyYh(<9L?aS~4Nx(@%)@aip>+IK(Q*%SI~|_OI#0~(JDkKZ2A!MRkOFiRDL~cK?D`aj8n$m zn~s5eTJf}rLEOsuHa4CXWz3j15a}fXZ4C%?Oaz>Wv}u&BBYR3?pRuc-S}^;V4B8=S zMq};<+)MrmU8n1xP;wfoA%O)5kjCQfqewW6aZ}CPX@_BMX!DlGJ<%l$xOB~)=%RjU zZ$Snf{p$zC0lpM=-zpTpUE8Ho(E-W*&F)1Z2%%5Tw;);|2lNWO=?dmU|6!CE`h||s zL7^jEn&BPfBk8HZdqC0lN}#U~DB1(inAX&;tZfN1l-RdJa;s!x2>+6~$Xe~NiqMC& zlVIc9$C+_s8V6{TDPb&7BFMS~3Zt6xoZuW~f3Jz3u+5?{d7CvKnEB$Te&heS{NV5W zt5<>t2tU*gAk_RvFZ}oyf9s{+`S?r01NX9Hk<4^IptrS|KmUUt`QlG}`t!f^0;MrQ zXa4$ApSl0HKk|FO^hFh96@1`c^pIp`%?BRvmK?@hlbY1WQpI$g8Kxm3O^U*Gyb~;K>!Aj>TmzN9i;fy)LvMBzSJ%|&(|FPNlLs{V+RT`}h^;`{ zxc32I!DQqu@Xhc@-sRJ*-gsvHrJ2`*`)BU^!Hbt3di2Udc%JoR>QBP59r6rHBDa5i z5CG1aTDw3CgBtI)(HwxIikWBd4>~D2-Chzk|bM^_lQ} zY26AVk=rwDq)2z13E#h{v3H=|rSKTm2WRqWq_-Av(4Gkis#*vcKNtAWs*B&HBl{f< zkz*1|Tk(UCZr7Zn4XJm_7ksZq+jn->JP@5?8$k6O6yu-C1V&CV;J^kjnz-n_0&&+_WE?ZH{8ZDfQL-Tg3~6^ zI8NwI3IJ8KRt0mv&f(Q5P2Xl#pQ8YG?PsO6_+&WCyEtj~=W#e8rT!zt_gkZRZ z6b>V_D)C%zUapfi%omZk{)%N^Lvs8ccc0^uTM~g%?cV#N7COixFDsGUD%Uu$r>*PPd zOM?<5Ka@WxO~=9%r;~O`Me_Uzcf~GvFUBis$=le#pm$9ylDj8>k0S?7G7&GboM;L) z2op`xYTV;h>BP1K(h1G{`uOdl{m4tFfeEb55IVVr3xpKbsy8Xzku3vu@%*DCVHg|(BWv83KzNt z2X)1TnC_!t7Q5>u=A@khH$e4hD19mhNk-hUeBvOUgBPb^X_4+2cz(g(*^ox(o?xYL z!h^8-RJQ=IwUD^iqzZHYg>oPXPATsO0j=|C6Y=$pJVtVaa3CtrZlKj) zM%A&RwXf7uoE8@r)}XF@$rn>(N24-DX}{%67zVSb&RB3Kd`&Yp$d&VhjYMy9eqkD} zFuxVvFu!HrtS~y-w%)(@70t(NZh;%<-Kly-ZuEdT|Uj!**>`e+`Q zppS;5ZBfEMbT8luvN13>?Ya;Rvmi~;l^IUxyi3V45|W@jPnJRvLbs9TWLPWFROFVV zIP*t;w(dM?#8t$YzgU(Wmw8tbWc8gvu_=<&zQ{_`t=i@H0NmHMn_l@SEs=lJUp*%10c^~=<@>VMk=G47q10+Ik49UIlQ|x${`)jI5$r16HPBiRq(0P>8 zosGh*6jx$I3r(rsm~v)BD1AB`QBF$EgE|+3y?cq(*h5$}+=I-LgG?BawcZexDjPCf z%7cfcb`FoCA~1CQjN}Q&ea@h2=qOOH3*tKn1J<~OiH(65Y7xZbS_6Xr4aIahi0urQ~mC0TrjrY2EUv$4fFOGG= zUnob0Yyg3^U2UENeMCPIrFJX&iWIsY+jsimc%Oba#XA)}9Ldz0;-Y5girL1@`2OPWr(cggB$ zJdt~t7IZCK1UPzNi2vTx3*{jY@)(vCQ!ELB6(3L6VU5r_$9c)RR?K9db_cHwyTiS1 z_jX(_-Rr1t1E`UU<}4&ax%*TaS1Or^b-%%_jIInBe#f(hyBHE=N$kh>qh#uszapcU z5!S6}y~d<82=v>LIEsUq?So#!)q73%r5_R`Y}~bNI0BH!`ms1QDetJ_UPFKy;kQj} zZ{1XZ8BJ^t_`!*4N`H+Z*uIhtL4Mje&xy%F|8mgZR1mnop+kPr9ybsg+Qtae6j8QjeBMI@ZAqXSEi444>B9Yi06V%jKEO&NSRiJh z#ZEP}J*B`vU@8cqj>>5iyN~{;U*54IBE~5aKVdCC84^A)WmSr3iYv-WRfH92l+L2- z-UDBc;b2>Xd_b3biGz;-^sS3%480IVLEBdDB~eM;s0996#T)x4NrBWe{)MjQsFN46 zD+g6u%MIV>nx6(M;$7?mwh2##=;;!@Ld{I$ZvE*5V%iV_*RWN>{k*|H4&EykQ2+P z5SuDVAJ&YyAK<%1eM+f0v)X|ZG!%}E17Z{^yw=N&It}gMf(>osX(nJJtd2~Yh*qa1A89G7SRvO~94RE%Nvc0V@X7c86rxbGz+4S1>Iq3k2%( zWKlHsMehL}^jLEpglIK+%?d~W`vu}z zyZ-?;FA9J;vC2^N=@pXFbgHmR;QQ!57CEBlpacrwj0$Q6KyY3GG48PvQr5NAP)f@s zUnbH@{2(ItP%#JtZ49`P7zS5C72AvjA}IDcA_ZO3lXlwoH^NpKcW?)zhILG3u~cxG zy_l8nm&02tAkqdDF-U?hO%n(n3*l6FCiE2_3A387iD!ZO`1V!ZKDo1`dX4HvFM)eJ zq@cJYeJJp{&PCTOWZesbU{kgY;AY!UAn@8z1EGi+ zOmb0brhpdT0Ztut(6S0uNP-tSXc@YxTQ)z4KDWaz6`w@tFjp%+ix5SSbm`Nug98vs_tArHA zIy#HzCY_tHzoh*kR3Zs1G;MSb$wjGzGVr9@Vdp;48=GN6IBm%{Pd5CFmLNgheZWaU z+mX6AjkRwxcK+V1s~wT9eED)EOQ)kS9f&1ccMA5)8fqJ6T9{4PpLSuJn$<~EZ8ULG zP;CvP1i}}k-9RXC4X)qmg?4wg2Exr0!?+pM1wAnb>e7#0u%?@sfG4`WuV|GtcJFzY zuYF0XrkyQhMglA4jSDjPb0Q4&Z$d9?qGL zJz;p!W2*pZ2c=bh0OA-->xeab$&&)8^)~=2N|BH!2?+AEg)B4SDxh+)zBP-Q0RpN( zMZoB#Sd{^l0h&mocm+Hh-5qRG0EcR7amfJf1d83S7|cguDO$r^1ou)cY+OLil~O4J z9!G!QhSO1WN`LOH2;@%&_6CO_te#7p!M{YPX;GXru*)e?&7^J+SSht1I_^$OXKPrd zRE}86YKv6>ouM5ec+E18UFWm{2z#MX3qdE4cP>1&bzD(eamy59Rc|UvtD*}9>8ySW zFb0J-gt{jIq)X8rRF0?0BLu<|Ld4fKPQ*0CnFC=X#7r{H0GiF^a>$6;JfyG`WZrd< z*`sYOYo>%Cl+s@}i$}WdRn^gqtUjTc97M6#T}TslV}}m51rq=vWPE|#6F76#3TCth z!Pm<2LEyiEc;_^Q&`6R-Ihm&hIc}`rgsy2}0dQlmIx1|j8yajr0dxv$UlF+|)Jv!2 zk)lq7UHx;6iQ#E^yBK$`y5!JK2z@rVfgYYD6tkkvF?ua5Jps;~t1yEuEI|`t9cWs; zV9C$}28!^n4&8%-$(Az@Smv4y1vMMf+&sxN5;Mzc-iqc8_G?`9QlDpr4lw!_GB6|TUpQ?#leh_UB?ne zV|I5)pCu71aZjE|Pm=)rPcRyIP>hs^G#A$@_3mg(KpbeOJ$2|u_coj#M1YUPQX z56~Yu8df`tbpbq#3jdi7|JhCx!%u7;QE}mZ)SdRWNUp@1eNaXOVD$NA8G}D9io>v|2E$a7@aqA1 zfCT`TJw>oAhfh3JdYVieMB(CFblq=Bm313$U7(?rbH;={RdS~+<;0c&P8THblhPSK zP^E@KD>5DsD32Qv9iw@PFHKhIvRPn@j!@PA584U*Kbc4ie3NIH&{dkPSh?MuBLg-I>Te6=*}sz;4YRU~|=_-EZL)?_+x}XDgRI z01@Ja2r}P5?N~IZF?EYu>Iu3_LHR}d>i~aF?)l9 zP!$juW@0UFL8!P&#;&W7bySuGiL#!`G#IWNHBV$6$@(ha%Q_O7ml?8-Du>w<+sf7g zDM~O4+fTm3)|SSYD(mGCbu-Z}vUryEm-Y8=L)$x+dd}CMEH)30zdyL7G;;j|nk@)G z<+Yg=oe1m(YtUcjfgJ@>SMHD!NaH&^bPa@B+G#u^0&JdPe>q_PUgW9GU<0Ua1bdoq zl1KH2LQ%Z#Czkg2EA6l*Z(VB4rC5VYf75VesvhS5MM)UOMWXuKRH9)jJN`FCv_=xC zjBTNQ@XRnr2BHIwV+LgW;*$wZWAuK8EpLl!v(j0^#Noa4(>@N5EZyQxW}+fX-=%yU z!)L$YTR`{*b>HCalA(Z{=w`{`TY6VY@hy9~A}5Geq- z=Dw640VHJeukBG2Lk1FA^m$8*<$zp1u}~dPkP_de!Lk%I8_|4(XcWo+SL%)N9>yJi zeaI|>4sIPbz>;qWDG7555&Z7+FNS(WRQT1eUa(g(7OtxGd3&YL|6KLzWA+Lw`RZ5C z+c0dcEJWTixT^YRt&GVQoTI04DdH5>GD@Xcx{#;W15+}!$Ct_3s&020s=YXg_(OajVF2^1CX>tOB0#m&qIFY=mL#O$a^Ys`(oGE)CTdi{^ zs|W2ZId`(!pF4r$aossYyxJf8XrpDFmSQ(`g2m5>zmt)#mkev_X+lq3J@L6<2J{`n zET+{uowEA=j3!OfTa&I?dZvUoVJb5Ku7Wq9<{eA~X7S&9ICUs;aOz43 z+{zG(V;)Fu9kxJo6_7cVetn$wR-@1Bl6}|Y6O5vc>H$Ex5bdx9HY2;8ZFz4rYYtl= z6bSj^5CLa`bl8#;-#To`m2*05*+^HT&Ikx<;=#)(txrQ>68{QG+vVY;Ln7GJ`5`up zPK8g%Pl|0@>PhnblAbQ6ib|n$scGfsPD@oxzFOXwpt0ppIj4$=73gDWC`^Jo(K7zP zv1cGGkzIPhK^A+@j7c@+9^K8)YFftVKwy;ZH_Z=@DO?P&X`XTvkTSzo3Nay6e%`dg ztr{U3FRVH zq!nmeCUl>C3>=)orXLU+VK+JXFan_x<5;NV*rZU2ekoKYhDuli_KK#R6_*4^Op=4F zEUwnFOXD>0@>YFJadZZSZP4#{X5A~$idlC`<hTa6GF#F zg`LY#9e0KBFy8OJ&oFJ_W_s$dLip)wgQmj(%AWj76sL4V5hyrZ#9njOsd;kN+)jj> zQ6lw#&jaO(MYHd1pumR;vC#c_W!BC#OMw8F8~8D}!@*<`cs47ZV1!)UbZ%5$z}=QW zffm@3fqWbH_(M3GCfG8<^`v_QAS-^!NdYR%Z)uE8IZZ}AnjhwG8&=1oR72wy?47Ax~ zzPdT5Z)pLIL2;FXevDkvi>QZ#Rh~vL@6)l;@gPmeg7&q__k3O|akW9Zu5^8nZYbR- z(qc9?JlP>>?yr<3R@Wu`#Dwu$_8J=rI6|4WR8|V!24yF`kgXWEtxF_ywz!}Xcm!t9$a9ZfrKOf)`=Owy&-hb%IiZ5ziZg@6X zSX?Q?H1uFK$zatNV?Z6J5V*5~7Qs&rAhcdN&2xMJX*>ajkBG+sK8oUo57jB*3qIHl z9HhaA0*cD_;6s2Y)8Hdqf)6682JgWK=lP1XygfEN!$-co!$H7$6cRIya3Xw@IB6d! zRPf1_e&sZVr_YUyg;AW;>WyY=sNESJ8O6Gj(VnLk)O)ak6G`=yO%Hgr>=#E=}QQ*>amM z93eKuf8Ud5`yyTWJv|(8)-e70xfJ8)=0%iY0aa%<{73%mNn?G?10{=zf-b<7aiBS7 zx|W?SSxQ_dR`GRgs`@&(R6&rD$X+YCvd$Yf zmCxU4%hxW2!E7);8Y?2};HkjeA-Gc?&uW|?Nk~S<)eq;=8p@DS{RO)Uf~^q3VbTV# zaSqUeyC9i7?;0nexjWC%4r!T@1iXhExL6pRj(% zeNl`jF{m0!1Ho4K4GyAqn`So3Pkc^OZnzdvC}R=lHv~TM5x+@nJ|`|U)(Rs0WKIWJ z!2({O>U|?U&UMI(ZOY&z_pX8U;*K>Wq|K_=sMcWCgZoOp1ENT)84vUnB8m@b()*8E z+~VNA=Y(weY-1Jw8>{%wxUT~ZNY1KCOrP14lEyV6>(6D}EruNlE}L^yW{**6T=0q^ zmqX09d1~AvlBe5PM$Ex)zg}_{_Ki*u=|1fJBLC&^mGAnNHAaLLvA!&@^3e z3u>=CkT@6QL#oLQ{knpHfHODtXm?9nQ(zimXcI@-b^{tq3a|sd#iEZfNCn+u32Tw!Yi)iI` z#n&j=`8p){LW`Os+C@03P6WRI!jzc4F1zms&$eZ;J4N~vw1u^*!0~nWgl|*gj-V9L> z5p2UT(6hAcp(Z-dbcPRt856nJMBe&>|rR zQHND+3!>hb&tSX9s>EF3WhK~mMP&Haxj$OvlXOg_l5~yUQ9CKx=O{4VAr;1K-4?Ki z=Nln|J#gidu`Q?<`Stw)ohTn7-WD{9{Ko!(-Eie|ZREB9sQY%BvMVf~;G}IqtH^Kd z4}gTqThRPakJ=NI{-`xUFNd^>HRAQ|hI_yEP3kl$7Lu_7OMj`@7huq{|s ztS<6b?+;*f%3rPg@gjeGKS!4>e_Z)%ihK~Y#^;08ZNb_ipPQZ7 zBT_qSmA|gYU$;Nd4iWi$S4~lM`S>Rk^k(@J%3ojPuiqbNKZx?{mA|3L->{!pEX(I( zZQFv$B7bs!puHx_^8ul4!Nwwg<9-feTRt}}ZwodR`J46!n|wZ3-EIpu7x|m_2b+C9 z-z(Y{Y$@`$><_m1e7?uCEl7*}bbpZg{8ah9BEPpk==uDf^0Oj8+aF{;KU4lSMgBGW zgKK>LHOk*wq{#%Otx9ktz;`85v zuS4f?ZOOB|wB%7<{E}B%z7Ug3TuKD9sxR6b+bk7xqPImq-CnAnX0Iph zN1Vj8BFwai8;U%$O}kwFp)q9})6EZ|rj4I1T? zE-ceFSPxL9MIt3JQ6#pI=qm9YG6Zr31rgx)$Tn65LvPZB2&wg6JXgEpE{LnFaKiC2{W$P@t|Mtf6w+K-%{*~fQM+0)lgw!69y*8<3kWy8%!1Rhg$U)B<`6(c znDsi2pVg=yvm+=^fFvo;H~7%O+~8yAIuR>h8zsg9&S_B@Igdssv?2XO|5|l4KCWTN zx~k2i=N8?6)1Y@ruB!$SE*b=O|6maKFcjv=1%KKkUVF0v*cwRxLy6pzI@sprDL%0g&)J3vfC7`_>Y)n6MeWpqO zY&BlpjskUPM^QPx9o#zB&ICU!f6NzJ8rR_Hnc47g6=ZRJc*sfzBv7*HIg6nUBh>(d zFH2Qr{Xw*PLxxr+itiv2YLNzx%KJ=0HVmy1N?y$rkG zOejC-v@Es;EjuT*P^5w=b<Am#`xFz_Q)g40WE+ zR>xpj5QAg*iL?m;u!_47nnt5vA4ZVUiwzmkIPhuoq0a^fCj~F~f*Zp6<@BHvRlPvI zoMIhqELn6lo%u}{LS=xTEOJQKh8TPDpb>qzKnsqjcKv3Nrw+F@Cn#fI#w-L=j(LsZBK(h!!ohX^pTvjFS7YbbHVsv{QV9x$!0SFh?6 z*}6)~d_&cY2YTzIXRI^L!_5=g;~nF=j7B~^yNqU?JU8p^C$fn{O}aiNXiE-=Nx9X1 zxuroijt)Eij>R~~E4#+CE%xCq`8f-0dbH236u@HNFwiDkAoX!)^j|&^T4_@X3^&ViQ8bK&fZ>k#N`ZY&z~Caq)EB$X zsm3Bg3dbp%k0;cNF0+&4REJHxd_&)+kgRh4Z*c;-<5Z%*>Nv$OAPuQ-oT3VBghu$8 z;0#jTJ5Jg9mE$ydVy1H*n0m+Qy326A_uL8@G73X3{750a965r^$t9+wO{;R7XJpr2Km~0V!!8pt}YJGbv z?oIV=?6B~U;`qgq^N^K}BYPY*$NNX6?s{IX52`@n0*>^-0~Cp2x7o1jA`bvq-1NA;p*Y8XB8IcHCW z@Kq~UQlTxxtEdq5fDz1rts2hG7S2xl7NKyLJ3B{9XD5b%=(ANgJEcRyxRLsNJ`VK9 z%p6{U8pVUODq1$>N)FFFAGJI5`Ir~-d*=D(_q6grXj>2$`R4w#d`QWAI<0;4bXq>f zi1&0_J{@B5sb3oQo=(d*Pp9RBChzIAeDic#ena`P7cJjBotEEJzC4|lZ=O!eC*o^c zAWx^|o2S$A3C-GOo=(d*Pp9R}X4Th-ysHQ{m%}p8kXiw`l#3fM3`lTW2fbx-Z@?!CEE15wTaf!w9k4{giIK z&sRKxxe$d|OUu)qQLEHThAnEr05<(0Tx#|P-P=eS2>6dJBQiI!-`xeN>y3P2M5L`(010;(0&ycPOK!cM`qF77G z8u%D4BJ>W~MoRPo_6pqs_g2}}omtEM(8G^E6P#sM zsGt>SA6JR!&SLh8%3{BAW+pV?X~n*x?R4NR%2-8QA>(fn>d+ZI>En&4z-+M;P6FfX zbkh)TLw@F?66ui%^uQKp?(6I+!li&bx(k5iW!{8jg=|Pdeump$ju-hvE<}tOvhb>T z;Jqw(0et7kRP(7Qo739@kDMdIVF!jQ^$0IPD~5J*TG3G;h>gn}aYN9yqzvjJ$4;qS zxT)%(I|K#)=Ot!(1)D@i(3`T<51>MlgL3IzYXogDODhOy^ZJxVXJBAD1LWlx-QeK# zUNCzN#cLOp;Y6?jd}6jf6=rMxKbyHQCUPbVM0sDDGxlu$*G~=nul6BK>K2dmv@XSP%ZK%8oa<5#%bbS`rMVky_Gfy{C?yao;?VE%RJB1fV5l)>`L zbFhc?#I_m(q2VjROu#ovb*UuF)V>ODp8^ap{ZzufD{Kh>Y2oBb$Xe)1p>4rD<=6*U>^QHOnGsx}kH^`1SPtDnTD@M0DC z6LVnq7kM384qwt7ttirrOb*gK4H;sch;z!vnKp?3Ng3Y5>i0$Z#hk>q9`Gt~sXUh~ zYT*;=Pm* z#VlTdk5to_TuHrbQ?pil`Ske4mN ztHf@ZDNC-<7Ah}|w6hp9fVd~514IIKh*r?V0_YS*#DY(F67Y?Fc#4e)P}cGYJ%b>` zFxo$wL`@$-tgDvet-%1drfy`(wV}H&OjEHYcmUVI575pPWJ^#cYlk{UuhJ&b;`7MYNHd1FGQ|-$yB^3#XJTVP`Q8l zAvB^*KI3O4MA)SsyatpkSenfm5;w?})4kBT6y#8(&GLbJ3O2wAc7SrwoEIf9Yw4hZ zPzG=+JleGrFLjiyPgJodtT*tMqM z(NzwvjEz>0q5=TvW1|PdDXWRL7VMMdWOeVLIOnqw5wBfFlt}`*i$yfD1GM-p(H88A zFXc0q*icdl_GNgo^`KV$bby@QnoU_Ui%@A%{Nhk)h{E7UxngQXuyipYW7U>vPqd|I zaP@+djwkyg_jR z3IQC4_46j4wCD)HkdBPQGsh2eCU+Qy{fGe%@9mynb zhk{gQi<$hqx_{|EH_5jc8*0G6_6@L`mH(&jR+bGl)KI0Fv{t-awvpl`AGFMC+Seo# z4Y5*FoyYpzq+a;XP1Y-)o1|pSOdd$%2ijkbY^8`l=Kr*A&`jUSy5Z-2Y4dyh^}nR* z@B423WkZQ<{(i4*E5p6tiQq+QLw5&JA~`2RDe7i*J|_82z6k4Iz%62ZJSXfyrm+aM zr7w52B!jqjQ51>f_t!kT8__{@N_}V>>U7vQ(&d{XJf`vO++CteogEk8Q41N#VH}5o zfaKL(6x(w`7gL!wew;HC@g3EYq`4fM7%NKy_K4;G6|)u3pB z)#aYM@2TRlCsH3PQ`FX?Q<103OcttcS0URguQ|u|4Yq$uLaKnh!ZhxZhl&B}5-hYU z-~reoY(xnsTiUuLM;Bhf6)7%&;o!Z?1DB#ISG}`XW--I30T$97s)I7QEx7w8yFO?p z523>Ma2KVT{UW1VK24NQY56o&K24WT`-&%P9-!_I?)HA6h}pOIa{{LgQ0gH2)ivG#K}5R?d_+Zqpflhjv5Bccrq~OU2fu!I9P7+>$@wO{{YHKM zL!XXu8cHkG3QP|s^y7vG9BR%u8K1Smt(2Sg(myd5Ivn@!10 zB)SI`ZWVD~LeF*u!G{VkNQ@B|_Tqf(OP1ZkkW$8nZy4OE!1jr^N_-4a`)6W2 zC4Qry^R?YO-K+odDXGU;rLW{q4SRw`Kvpm1dH4(Rr`YoJIi$e7nCJN?W1bOE-V2-*q=@dVSNn)RDd}Qv7iS<`i05BGFG?zAP~0f zq!>;nxJz^xwWbYGzOEGZb`~LUf|TDr7Nks9H>e1414xAiAKN%7;lpKbsv-S0m8Ho* zXqF?SGF>Bx6CW9hMt_t5jZeg3C%aMiSF{iBZ2(_}fQ;4hyfI6*JT^^yH;s|vQW+A@_2CK)8z^;vTiC}Gw z^4=jq)%A1F7G2|t@Lf;jT|cjWq;`lLvcNt4CszVb?K;xU^+_)jX+Ic!BWt7J zq;+60W!>E$<)l)+2<;V4A7XHO0zcA9zD=wb0!!U1xSDMZ{%OPoACc~FBABK|EY>)z zhg)f9UTl0XxNq4WHjyC^oty{pob;a=wg46=!_WEmUB;-_i7JPKUe5k`+{MqnzqqhX zY=E7)ruz?zvcfr|))#^kia4e+)#^*CMU+`oEjPf2W?uiuA1CkB*o@b;W?Iq=Q8)XD zzO;22#Yw9%A5y}=R_p~ESb|U!#O7brTf!H3!SFk7N?Y$RImxOUlxX=33 z(1v4qcz`)VLI^Y@X|eqOTjUpuZpva4LCaZ|DvMq8i^(8-t=(N=?5~lqV-pD1T?X#qPVEcP{rGQCoQa!*w(~i%v&020@ovV2A1H&59{@?Y+=e&Z00of9?_lTbFjlg+6;vJ> zaBvqyMSLH@mr-PvrD_>6S6ImzFkt14YMQW8;}yp(xRCP;BqdJz1J1*^*&eN{HPXZi zH`PlG&(H)ygbg6sX0f9RD?Jwyz`2pK0#}@Hq!hBS!Q@z;wLG{lG~5r%8Kdz~HkRVL zNEa+xBZrdp9%o%b+?Mz&_oMq*;s<~r>~=RtNU)By;BA15$vTOO(8wt>V?}CwPz5Y| z!CbVUNg`XONLJbEr&al@`hqM&6$qVCTCmZINd^uKuvl1aanNck2kR9FjlPJrq1@e> zZBZBpt~$fE6Ca^?>4^!IFPi`quNiZb_33ga^a{T2~Ythr*Q8iR`*P6 z?ctP^#pt{y2i+>VWFNDZb)+xPOqz7r0lAVS#zFmnHA?|W571f3)1bSbxJEFE%db0V zz!vYmf|aniO)k-q7cL_r?~J+I!!W03loqBtSwcXQF+|BuVZkS8Q(w}mo1A%2D>uUY z%m+K!tW7t(lnB-26jxm>CxWi|vSnPTL1Oaa3-uZaT?oS1{@zzIg!Y&!!au!pA)Usj zY;TzW?2CT|Pw3uRXb4!J6;luqBq}KY2-*fO$3O(Ov-F_`{R5Wle~`kQ49NRX^BE`b~yoxnWnxf zM|@Is|DkFlhXQceI$=Dfw=)r#U|u!P2YO$VI>r}5CgRbbMRQ@+RtKmp=RvE#Oq@wt z!%I7RK`SlR4m^4+P!=ERfEeW3J24#M0D$DiQ|C@h!jwn?Mrsm>lvyWmLWw(zROx3{ z{fuDS1*(!NsyvNCkxN2V_>z4iEv9VGUJ(*CEuP#jb`G-H974cbkY5c5OTcUt*NaYLmH8J|cP6X}h~e}?V1 z58I`W{b$&|I^O>9cZ|13KleIe`!6rZF1~yGNM%4>6L%FGj3i1U!V&LWTID9O8}8)7 z&V{{UEOiSx4nYaF`#2RIhRicACZU|gPd;p>Ael+FXm+wPV}C)mJJFuZ zDF~K)(%FFu9ZT#yzG!t!jV$rCP6w?S(P`5w!N! z7VfE|V0XiPp}4OAybQ90hmGRu7ce9_LxN9Dk0Fubs1Na*H37*y?Rv;(teS;4{JDxB zsRwz)xT;X7ig~lX+P2^kZ(yw+@dmsgH(U);-$t97$@y99c-x=>Etkt|PsV&)%WRvR zRPrchhIVxDk>Y_MNWm~DPK)|p+_hilQh|DQbO|>tqJ>fyE3Z2d^Qy2S=KO`uU#xbb zcAd|V03-^OIAzL6RICguQmk;Dmj~vz^+8^7Q>|VzTLjY%Q-)gI0O9r4&A=G!k#DuI zLWQIgDNN@g)}}ojvYaTmNC$~hn&NmDM|zt+lPN1GNpd8Lh-5~0oab;ZZ6OhpC0L9$ z7oP`M6w+a=|MsHLN1Rg>ipNR)^>g7uA_D7ni9 zmM)@?W{6Hcb!eDNyL}<$WA%kOwo`r`=o&%QHb5*OgpY-_s@gs zQfxDo?~T`HD)K0#2A7SM9~Jlv7;H1ugceN%^Wow_soJ)g>b#7Vm#CkB;RXQP@<9v* z%@FEXC@gNnid~0=Dw-|Uf-AsKGO{T*g!!P3)}5rY2T7+Fao--DA45;J2eBARxUXQP zLj=#TR(9<&TvX2qfOIi1Gu!qhqg-;UGAHHsQ7*Y7h*1sL*$9^`!}d`wx!t(i))6kb zQ;cqB*bYY(mn_4MLsTLzx#PZar}&J&6q75%C1ERE@{+0?GV<~Eu!HpKbARbva{GPd z4i(_9y@0JFT(T)>hYA`hCTy>3S+$V*7-&7=;vVWJ~P7XyI9Cq5H87X-Q6|}%A&B(iPZLV`WC+g1Ch>w#vHoZn3a98Sll};e<&Z*%DyCq662vg zv9K;@MwY~oKr2P8JdBAw*I`VQ-`!inEaTZjX48sPx(k^8y08g11qe7BA9Ps28XXpp z(Nco4nQ)cd2+?c;&npvWy6GbKNeS^qAs=SQVeA$N=Moz3dDLS7l7a;9BzIbFiEDZ! zBy5m`^7Y^B!OhK_0yartOhRGK)Y7g^@dIdtf-)HK+=S9kfGp2UuIjxSqMY%{Vgt*} zuz}r1V3}OasQ=i{vD7(*0_0Dm-UzJlC#4a~=1>zhp!N*2bOj|POQVnKp;isB=JBQyqPzv3q8<*YnUt)BkW5SKb(*EbPnDk51v4n1 z0}Yk}4lvPUDP46q(W4kwN-ed7b%=s8GnmTk+RYTGnIs*Q8fER_k!Bu~r<#8FE*`PM zK6HS*P20()^0;^xs9Sz>^0%v<@%T=zc=nxp_{l&)iTyI=Q|mTePzf6iB2|DFS})Uv z6}_?7`_l|NNJf6Xk%lbpk*`0>DGnCN7y|zb`AG!sl;1G}MJDF89Kkm6#N?#O65orvnhQ2rEpd@mU zpI$2&JFq>8A(9r(c0E~RRTd2+SW8^ALi*g%2{nph)F?jW>#H(rglUXq?PIqXaHNgM zQdR`7t0L-VO8BNid{fBc6Tuq3HMUG6F9Af8aOx+YW*O_`$u80HlW^N{v$a~Qf3-XLH) z&&pBh)pVYxfAe+fj!t*ouqP`hK!olg>a+kV255Fbzy^{Zl}|1v&*4A%peCBJC9+<#?-4$SI(!9XDq`36Nav zzAV$(Did-U)oyHCw^$e??st&KPk1*&64DPq7mt5Oy{*3*^j0ZDz2(3iBn%`!S`2Y% zdJ73ltj_o!rnl;ydK;44%GA!#ltlvK?A_#iu>uPXN_iZSXcZJu1W>^aksNh8%fp{H zAnV*uNe3-;U>C2EFVQ(E^Hxs`JS7relM^BNkr-}2aliJxac>rhQ*-tI$Ep*H7^0-94{<}e;)7-?__==v58Xx-w686R=E}n2&cb^ry zozbf+A|k54VuhT><9d>G5>ojt!BbV?87ef}ICDlPa%RZOl5qfAMywho%nlO#>>)qv z$a3hEN!u4#f1-0JC4_hx4XmLT=twxOZjPzuf|~peqIL!L7bTD+0ufM{zXx4p-=r}< z$*$fjAh~GsEN4h!cctP4m{HAhLkaA{!3}IEVV4qK7)rQO311&dxLOG>RUR|yuN_xI z;obc`c$-8xMqU=i>R7r(ZP3^hF9vid_&Al2P@0fILGsQYDf0!QU>{&SQ=Gi7VNVwb z4rgw}D{V3g`03=e$YeHXFh|qE1k;UNDy$GNM&v8soiO8OLfoe`jhWRg92G^va@P-} z@d9d0?_5Ce$l}G1~CC4j3ld8mAJXr`UWOfa;bTDq}8!2=R665^tTB zZLIjys&yt}$jOpHw$_PE*#15eBnh~l1gm)Ee+j=?-cB#MslURp!f02ea>8B zRU4855~=Y5+bExN=b=7E$rv)Y882aZX5r^*O9HNy6`xnN5naM;GGfeUgXX|?FF}>Y z4ictV7JxiTt&3>#G{Wpi5mPJ;Si}y0Az{|?)WWRRNhIM-5Q&lUCOy(CqDblzSeZhA z$N@b;v-Hih_H}Y+MjuwpnrB$KTR3n+rTfs!MUkhDo0?ESgSYV> zl-69*Yd1G9J135m<0$;3F_Jf8wa=8&C|&9TD3ll$zoBL4%|W{ucC93H2)y|Si*V5r zQ8;rDhDLQ!Gdbnzs|;x@5Le`X{_-L-+r$M+P3gTU)aaM5Y|Z`f|F6A&`ZKSm_kUio zHw=AZx6H(u9_SnW^kuGWkaPVzKCO9szI(GtPf2tu;e?NS;9#k8cF=4$3F z7d`^vpf8(C!@wDgvLlTKowsigL`JW`@WrZCFXB5 z5*Av6J;P%F4%A2p2ph)v@%Tp6jL&>!2|h$```0KmigSHffQD3(Edotj62C-hbB1Rc zwgC!+gJO`d(z<>XgSr+ztBcxgXko1!mX;~b$uf(RRMsW!rL-%77BGk%QbT2&|n5DmcwZaJv z`W=^zI03g2b^fTF_teY&4=~T@D~wRC00O*-D_Zk-w{QikJfUhMjb!hJLe=JIZFnK? z0hWnaPw^QB3TC&BSGc_q@=K{LbLLY%#A!D8$8lHjX3ztQld2CrK$60!ykGMQ0Rj~3~98bL1e(0y|g7h z_yUvU_cc59bi980Vy1KD8G}DOUvNr@77_M~2z6H+xd|l4lm#C-FEB(uf|{l@C6lD<3?})jkM&vj=rwe{hTk zeXyu29~|=hcah~dd6>Q*9LoKE&-`A``?o#stz@9sn}_mU@A>Aqzz?+|Su%wBUx(Q}s-K~DKIPR~3|+?SM#jLA0HaIS^V zOJ}JlpenqpZ9pf=v6`cg`GyByQv&9;shb#P>6$7&9#fRH#>Yjsn4yZSV0>I6o%3<- z#K*ZzN@q!{;uDZA=i{CV!q9!Ffb;R(bQHZ$wX(6h(;o*tDvg;W+QK3_5RbFJrkEhe z8TPpCa%9BufTq||NoAoBF^Xg4gi6*kzeGy;*-|@3%lwqBhphoEspuJlJD6716XFh8 zWDr^osi6R@Io=ScRezIJO;Adz)*-;GEC^;^*QwPPc6H=SZZ;aF3~fL!e_rMu5VVul zjUOfD-TZ$(wuIEr8KPTTI|CHF`Jcd+uO_UR<=174kgEozdB&|z%E3FDVfGN@{Kx9!UC$gG2 z0;dEycF#$_Y*I$TR5kr31*t%>@OvJujMb?$D6FKB?4*Pfo{`%;z-30^ssC#vh)P;M zZIKi`txnR$;`-ASN&ixswxwS4LGzZtYcyoWas((XEm5uTFTs6rWSAJ`dNApuHp_ie zYRz}rK#2uOLDtN^P)-;cA2kuyYk5LMVqy8fjjW!o&vPGAcGy%l}Gj5OG~iY%s*;+*mA7->C4* zScOn|HId$i0)W;czxAb2z(b&n} zNmPGM_8jYMS+L4F5`m}XVuH)62%?f7Q8GgdSOzluwp!Hqvm(}Ht)HuPBr>IpI@LOI zW3`TCu_#etXdOAJb)y&U@@|hP_s?cNZ9t(mbo<3ePx_CFmCL6A+VHAVp$Z29 z<9-3J72fDpI`bqxsXP7HDCoP$@=wa&4ysm0mD2DAW?iec(M05hH5rPq_yTo$b;3n^ zRAwte@8TKH0CJ*~(ueE$kbB~JT*w7M9}fkI8m4Er#cjX-i0atH{dF(FzpN`{Nrnv- zT9kM<%=c9al7vm=C9V z`9S1PKWGJ9{#Jrbrd6olucnPkRj%#8ll|gObpxMa-QY_W%sTB3z9fppOqB?(;O|r~ zfw0PS(=AB&I#hRFl?CUyR5j0q?mY7Uz*3iotE;$z(tvh5(^EZvpewyhi-p@37 ziu?Ziw1=;G{eoVv^ViS$YZJbzuNNQXwdSNOr~cX)YxVj8y%yo+b@erUY6|~3y=JTj zua(y%qkkh7Yora2r4;jDNKXIw_LQ&@b+Cw56F`D66`wwt$YjXYd+q>lfq}8QTPF=ko+8ylJd# zEyGGrL|ZZpFx7es6KK}|vf11vKDo?RNpLQY;(YYgpJzzkXw;gIviK~|HjN&w+KWli zP)tAu20%UEM?TbeiV?Y$!KK|-i#uHD_xt1qm}%1NQUV(?TuwMEIU$e$0TKp1ZSn+c zSMr2w1zZ;Z;c9|7pcA-873?YnE^hYFF%QI;x!&#LY9}zu%>PmbJAl`kBSrEf%b(Vq z0fqKUKtdeB`fwy*_Y_4COy1bD~!Z2M{3(TS@N$FX{HCQP=Xrn?Zvx6QXq_R@O46?l#vpDBtti#i?1B@>o zP9YK}!=F|QU6!QGplvV<{QzvjA$$h0%}6#dnVr7Idos8+!ytp61xywmG6E>z8#6K~oFJ-M z$7g7?IW{yT3uX>6yRzhOD&YgU&N*d3u>Ft=K#N+}irT?bvGIeBxdw{GuQaAatRXtF z6sB23rIgSyS`hxUQB?c+Cd~ zJ+3v$)?2uw_cl3~dN_mVgCM-Wkso@@D2;U1itx^6if>U@dEncra#(5n9oP6_XT>f# z$Z{z((&FPTi9xBJnR=iQGQH-S@IlWyLZL(?ZDc>$35L85=)FmWpx!b{qB6{!cF3!C zcL_K;(^+l_#^@20v@9_}$(%wZk+kDO_n+i7l9M9SY~{5CrfLP`u`t|Y!ILaJ#vaSH z2LUaRTgD8ts{FkyJ0gAerYL*mucqgk1zir_b-rO zF`+Q_n`bIwy=Er!TOY!gR9NnmK95g5?E2r_qi?goa3CF5kl$IdS3!T5QsWR>fEXu` zI9(*HyDKqA^1qfkY$nQB!|}iR(?Hkig|?{|SeBz7MsmU7%uq*sR_bMa4gC>6O?5GM zS3XZxsusWpr`V~Qv5abinNGFQUBF$Tz@b(xw0KISTBrjs)%;3=ju2Vj9s?$oy4Y5! zz5PBu)dH#El8^rrhh65d1Gg*KdDwEz^vOCT%a}7*LvpDnm!m%~Y%TMn44Tb0W2!dI zYNiTpo#Z$}aX?XdeiGmWc9=hgt-{RnJ7fcohbqDN?T9}{ueZN8hBOEw#;Fk>L69>? zHMdYsR|w*I1x!d_N`)d?O%n-f6hTNsD9>zG{TT2w-J9CGJ87#6AW$koe?Sgo>Pwjp zq;Nr}mcqFYUW?{p@a;zG_-n{`RDkqHu>66{gQ5z~X3fT2pG) z5 z7@`a01ud@uLW9+KX=?F!POQnSg)bz%KRwoJwX&?0h}?-Vsy1^(V@6yXW*8FI{Z$Ye zo>EzFEu;zWuS3pI7#bIq7l12EB$hFeFgie8c-^v)uYgPLOT*w26_*a^uLmwDw7^9e zaDasmoq?;xuJQr6KpR4$&}n)WFp4W)3}yymFVRSap@k8|)3WrwDCJfKNM+y%szl3Z z6v66`?fK+%IO+X45>aCehu^C=agClC-ufA_KrWB7i9+XzW5GV+*`|_Ax`+!Q9E>wS zqt)J=8fF>{T6L!Wz(%rRjcgTTG&^Z0%R6nF48v1UAG4=$t7nXPC~G1>v5-E}wAZ=B zI9JP+?eB@HdbdGp`fNz;ck~`WDgx7treP=a9VFS#WV``M1{{RP1`;q{z8($&4Yq~5 zTO)e*}0O=MBKa;SzKJ2*oQhDh0}A<||9E5_Rg^J28&RqCZ!nH}JT%(62NdlLoVfks?Z za>Bqxet%&J`dsMu&>0Ao&o(w;`_43w%`fwnNWoX%p|7Y;Ffv9Q z+k-g}2T&Cvs8t#tr_))Q$S@}Ii2H+P8y{n>GnI#m0X!tZxF7+{VB12L0@f#mK<3Vn zkjrb&DY33uu;jz4Vu@BD-W0D%QJLUbQ>I+o8n_5b!WEoS6DrL3Pm8_5)@YpnSdeAv zdi#dZmcop&yEaTJnibPgA`W4&m>CSP(P-Lad+>{lucQIRV??dKwiJo2C{V{)gj-Cz zZ}S&24Ww9K`EHVW+A3l2N4=UY*&q^@9c6&jW0A00n&EoT&qmX6GHqHO3CqvZYH~7* zR?UecjIIAUS^-2a$Ocd`Loh~kQ7FOa1NYpN-b{=fS}wwkc&QSNfTi5m1T3VCp~2fE ztF#$}y0J`el!H*%<*nw0ZBI`XSc?6`74)^+U(k=qbq9++O<}B7Hb*SSngdx<5Ae;A z-O?ziFV54@_wl2(K7N^DkVa5gWrsBuDT!>s)gebdf&`$cM{s(01lPUIU-$@OlLn3W zmQfsxAJ0Q0*kP!4+Oy=Wl{~{wVm#lYJfaZNVix|iP5J( zaNgBMh)>QvLc$zla)A-Tyz313T+jt%%c0BCu!b)E9{g23w*60Sz*2QpoueD-F1o(Yr?docectTQb^+nV2&IF z9J?xe2QfY#`t%aTULVFKj^3gW22juwCet7HRm(WJB#z$9701k!3;S8h7Du}@)?2ws zqZG3xSmToi>S$?C?OOkX5ko$_ zD;Jz>0}KT112R0H(q!B()rUzH^t^)7+!M*WlI){@>ASxR8|W{+gH6N)6wqHB$N*W@`#vZz7N+(ku0- zZBz=?1d$cvqwNM_)zUm0qjuSajpx1ZONIex^aoei-w@>ZTB8pr=b!tHrNsw(ipi)V zwXttlF_WO*Wfau9cl%n$7yFeyNM;l3eH#D8FVU}7tqbBKS-xb^_=3Ig4nOp`;L&vW zoz3zoWsp-JUE(XV@57qm%d?a>SJW6PT~2^Pr8LSG(EuR$xWCy;I|s@f{ex9dhBQN& zw+Zr42*y%BsC1f*n^~k?b-+4tIjOz0eHDS%!tyh+0vc?79t_FmtQ4rX+jNzi)^b6^ znJ#F8auu^BlssNRW{OAwm&JhYy=Fsx*xF~#Eaz@We3kC3vJZ8YuG~adtgD4XMXsDy zvyV7aDX&k3M5@!uob_Q$O$VPM5=y3oqMCq;zPMM7v?EUiX=F#8<0Tw<@iQRLc>EJx z0C7B}n{Qeo;9{mc&wc+c?rox6tNv-Qqyf3IlH=du& z9A)dNlMO|)F`~B&RZ^h1aQfyJ9P%%UpwJ%;AiUb9P<+Ys2x7JrkHGe7V6V1Nzd8n8 zPYF!g0hs8!L+E)FO+e`X)P)M;)4KV$`l)EiDeeal5*{^OvwbaQ!9Zebh&BC?pbRpr zPoSmsDR8oSxPLxB3Ka*TtvSpnQy+>5iw+qqbCF)j**uw_g$T5CgCZdseAoMYw_u{7 zk{kk6V;1yn!(WDvwdf|v{clu=)4#XA`AFLPf|Qs3sg260dp4gsprqo-wVZ|$mB?-2 z$OJ+f<%$L?aCRM~noUr+j^smufMKvSco6*;QHPlo^=WH30&o(iEKV>Hfd7h8hJqby zd8a?&)fD0&yZVF0rIy0mJe6pBf01s{T3JYBFHlp9BqD9+bE@if+6-vA3rT$cQ4RMy)gt?9Lq=B=9Io6qjzVK%$f- zRcIs3=QH(Iv)xVm+-MWGPmEes-FxZw{F1vj9Z3%vCZL%Sak(r{innBQN|oC3;Vu90t%?0Rd-wZfCCtJOlVC7U;23o;VU1*| z;2%!}Z`#D`%7bd`-g0OCvU`;D^!|CpspH?wy*yrdjNmWWs?yB_`AhFDX2W)c5Nd^; zH*0m5#{U7wOSosTJszNBwQi6ezP{KL#hALB2`)w3Zl1xWrf&7>&|!{%UY~aM`9AzErfO4W;botnqvf3 zS}QVm9aNq*uaX~P=od~R+TVsSO;G+{ z@BbL>{!KqUNY0B{JoRTwZ0@GV+YRMs?k#uF_H7E5_?Bb$D{|$ra`p(5ARHsQ7!M%w zkwalSS$q9#-pA9bnbVdD{@Z3F;)!y{ zvByC5EPkbW!F*>RCzn9~tNLl>CZ(jk0w|yc!J@kh!0X3HOBpmV^yxN>)x|OmaM!Qzmd7cS~dRM|Fy)f`}hFOtfIki7q`ZxCZUQK zbGlC8>)*tn^jEOwkE54_0jm?of3j0mMhEpob;M&a`~`>6%Vi6Q&?ieM?8!2-P(2o- zhov4#3ny`Lg0K>pij#=iNH_&cDPt&5;|~xGkU)Yu^Mt3svCtZZzzGf$7#1X(Wr6c& z`92WLTznFmgdB@eUOyiwhVHCq`|3+KC`{!`a z1r2*~Db#UW&SVRhvQx8#8o*b10Gw9opet+3_W21AEr29dqbT`Eu?PtX2xL-pVQyAt-3z_CrH0Ws#}ai;oZTTeD!{CyVufoE7MU z)lzv0KDiNiPQZJXh+Nd(n$#mRdJRLgh1)I&4O!u?U+%aW$TD#bg>ARcW%_js5MOh7 zx{<+Xhi%s&`7&^@u7={Io3F~AVEqrIHBBEw=_`p#`&45RA5J;MjEASB500Oxp5VXi zHlm=bveU5BCp5zj|NX=BU-(BSUu+$jU;M={AAc&-JbkRBpEL9m0;^DHc92HWziyM3 zr|tM;wI%H&DUzUP%n#6V(VQ7SOQgTM$qXMNxa)!bZa$u+YVfihKY^CUi#l#`ZQ9aB zJi`_YyeCa6!%)w`rfE*~v%Eky+j-N}1l8GAVI783U=~EbAyT9$n|{OY3Giq>DYjLW zBsx_%B;ut~f6;dF@Ua6$u1pkf$0b5V-UKNT`93PrEmE;qZoh^!L>GGn3AE%RhgZqO znALBonOzYw=H$kIg_PPR0Zy3#v+~_-s;eraDrr~8*)<0Z;Um+v-Vx709c&{85-BEX zd=m!XKqg0j?yz8|S+Q*xAQo~%p}H~$2=m0C?f{`J14sZKIE3I$thC^sv9WQ52#DuJeeSkTDJ>^u* zzT{BD22PsbO}T*)G<&)9($K*h`03wxkS~<86E-NJgK-g(EO}Y`J|s4ySerl>TcYhn z3E62_!SO$ZvVTgH{aNvb3*(bb!pqJqmYRg;|AZV@#i!th{>g(%*c%^d5v3Yia85+6DI1?IKcj=FM03Ej;+;~uK!5S4?_0EdWSGYoP`$aGSzeqPI z3QMwyI&DKTt<&u^GX*)OWJ?AQ`AbCMr4DSZ>`W5K{!0e8x}^(-x%GHU}UraQB>=2A-nJaYBVEQ z&gc&5e0fux=tybUGO3F)AX`XtMiYFw($|KLG%`}v*uored$nEw9O$M7ig9}a#duOo z)vPGYAT@{ehzZx17itd9g!LvMFcK~xbflNp98wAKt6;HM1|!JK8=}-N=bFa->j%$PHODHF& zoUKj~RA0?YX0w;`VuPL^ZWpjU(MOBqyT*`lO)=@+r^P#k9@we&7i)MD3!$&zto|4W z&=12LHFClPenz4Ng-C^DoUk^e{H-+_1j3(3N0%dfxWqAgfcl(hWi#+PO}15Wod!r# zlF_o5T1@XLO14<0M#IF^g|%FO+c5n)&t^9V&iV| zi!mT`GL$;38BdxoT@BnA4O~_<12S!Rost7<#{bwP2P^_w#vYXjsAa7Dth8zUAtjh6 z(|0OP+=BdO;)DhQ2%cMSKXjPDBKGQ!!GM!=y$;q+l8+8xHh9VgwpI5g-Ip!l{_CuR5EZ zwSpC7>LWtHNIMlec4|3y@MAtUevF`hH>b1fY_&~+bJ|fiiCeRd^90MRHu{n5mDo&- ze31U8gHskBme)9iiuKcv_*Jc+Ov9sj@pV1f*qK;B&o_n{ow<%rn@kdTn2H6rQQTmY zkd{whz1)1&8XkY{D{QdxnJoj{h%f&`(w55n z;{$!WROO80x1&~nfUo1W_!JjkbEa0?>V<1@KCUIkb5tyu?v$=uu2Kia!v$BVJ8BXN zvi@7Rp6v?8k^_bzHvbt~U$Lj$sQ<(pYeIyru;Qt+HEixYZ{EuV*UJwx!7GPPCRSs*Nrp@}+gCH)Y>a3+-FSl?2mSQ0@g~mzIguGH z0&iuz*-8SImjDgtTN>Dk!H}iN=XRSqpTB2{DIdxXohe>U7hMKx57WQWcvelXDOEN{ zzBA|>CQI8K<5m@JyV7?;Z>0G|2P(}chjXAea^2?+={}nQmhr{>(ah|kxhwtXj>9ED zOqeK%TVkqJ@hxSxxnz6o0&;*fN+^Iys0)*o#NK0x(R(a0Bk#=;V;Qi-CSI;pmDurishFu z#CoXCnIhO;1E3$AnvnI0QdYC*i~$EMI(S1M37AFa(%M2JecaCZx7=H9yT7~)PFcR` zNV!YbH-t-d*%?T9BL zPUz?zooyQMy@LiEnokdvyTdM&V^H3=`hM8bvfi9f17p`qupT3Lf@t5t^8Dhro|?-E+=4wuVv1AmfqP>hqXFTD9s z`0hjD%0q|CD+T3Q<=HWyCw`aDG3_(it$UoDWg`VQ$una&)HlMojshABrik&L&CZ?y#V*;DSiD6c;CTaLL;EtJ|b|}LR z5bJVGlebDgw2eM|*I{NC`o!Wg&~*FZaOo}R{Kf)Fhe>A^=h97Ml!AbT%vv{l3sBLQ zOw068aVa0p-omuQ>d9xf9)$c2p&zJsMc8xbP`LU~v12|9-z}gG&5uK2pA59K>hNWU z4l(WKlkkRc)uAvnSr0M!ney8`#~W$+8#F846u##$=G`j}hdqbG?!(1SK&?{BE5fc@ zXqHonL+`P}iS==@4VIm$M>D7@kOT#OtDT|U5iUPeFj2x4Iv0L6>^uaB83n#l7Y`Tw zgx$BIZWKUv{~%ECGG!;MV2y(g3Tl}d75}mq9~KgR7RFVuHx>@dF zpbM-=VVAho&TuKf3)=v~rD2b7gH=*o!{6g-e!OqtulmGk-GTfxNaJ!VK5yy74`aS)Ig1V_Y`T)e&5#u|;UE_+W% zRKsPe4UT)O?o`8NBQ@{|!!&>E5e$-4!tMGaBlVDBP!E;pQ9e9qvzaU| zhZ5XzU}xr!OBS6#iHwWT8_ym+DHe{B&hF$97M~GaA$~tiM)ewyagif%P%iqEc6OyU7#aH2)>=C0ZeCi^pNec+X&?r6?`|(d7S2mR0Lm#t}Jf&=^cSxAI zDtl3ib|zKx@pHgylIe) zm(m}RCdO}1Wqxb=rTCI63durk`Nfey3m7$U%FVuI(%;>bpgS+XjKQhCXFAu$V04$q z@uDNsH=(#`Wi~*ccvbcZsQWB^FXe5Yhn^{iG5oC!GG^9PCu6Z%j8h^DM%fwAb%`rb zTA?wd>Y|&|J?WgKjGeJCfEd4siKvAiLKEU?ux;q;1Ad%ZIx7Wp*bN}{%nfL1%~FO2 z`39T@)}y5*^u4s?j=|K!X^PU4kD0A+;P&wp2E7p`{PiZGV!{uet#edN_(3f^KPA3h zGhs}AmJr6#{L1O2RKMt-43cJ|)J!)Jd(-JEO|ibI3oMDHdvEukp1{YI!TOc3vAr0h{K#zd1MPn1ywhmVxdZZvfr$ zH?IadhEg)`w++8eLu#cm>Ij6QkD_4gRI9-#B@wEgXEOk3a{=|`YhajBk5#d3@aEj$ zItM=e$AD?fJLGUp9nOAsXo-N}ky%3v;0y@Di^9J6pjm8fazhW?zKc&J4n-YipzOdF z=;r}8!jbza2&@Q3oe#S5Rh;C^4t$|g7t8lNo5z;rdxhn%*?BJ#Usv8&y5)LZkEc+i zF-R~m{^RUBjqN;uLG)rOe3BF40@tsyEwZR8n9=pg4u)Z_UmYtH@-Jt^@&b8f1uYA1 z;nMomoKjMwVYz6S%v&pI7^R{8LV3kxR%1F$$F#slp|IH3vPE>nkT5?0F?u@6ipGr< znY|R|G2lkb8Ie*Q`#CV%$_s8>o*M1O`NUi|@Yh>~Ax#c3Bz4t{E({iZF5zyC#U%*? zla}!fNDPz;Lk>S?GZfz#7x94y9+$Y1*1z^O0ZUtC%^Co<8alcfE)@1y8w{`}LI6_%wo@}?wEn3$?s4gFvBU!Cfh9H< z&vj0)44!7^h5L`dbL={rEMC15PY3WM?yoV|AwqAmm})y zgePqoh362WT2$;6@H9jng{N^z;ECi1JlO)y-;7ZV;JId%d0$~=Bej?eV{;#vy8QEM zFwNaj=G7-Z64C2}X@x_sB@nLf-6n<3#V0dZ-0Jy>!fGdkx zF%C?8GI3P4R+CF6!9&T)mDH%L5@5jA<%FRIJe=CVy$ax2^AG&O5F90=E?X+lu4bt) zVRcVXH+xcX0s-^KeX%eO-b0w3hR_Moka6LB=>--E*A=UUORCkv+WqC$X0>4Mmq~eE zO-2-5eQmhp27b1RIImUCww!E8zj5Vip&TL3v_&{SD%nF*#RA5>O~J-U_HYKM+9@+8 zLei}XNkU&>Z%8Nz*;VPFKx3C+qFMT%V_P8s8k0+eyK-b4fhu4lM2WpBm6fZRvW&YV}w^OKwcA0HIzVh&aPIcCN&$YWfcz+s|{Y}c37{oZ7DM0 zs0joG?dnaLEqsrvMw}`E!bP$>Ng6kWX{2E?NSC7I)EpL|iL8ch zY@*N$Vj&P#9U6z; z%C?HTcF6XO%aG1nZ6xo#QCo(2SkE}V&b`9BRTqT2|9Li3P#4nhJ&#)0w@sGJ=4U^| zfQ`7XRl!sB9^~Q8$a|Ze&?$OvRHNGMY+6}XEb$CZ`yQ~MIZ-xvV1Zr!U9B&7trYUk zZ!IOU*Z(J=WB-9P_-5hW=epU<-I)$2BL4*SOkhvXY@79jw37*Ms-vB8p-2UzEr>

rl*jd@Jb9R_aJD}37P>5EBnx8ANLI>8+1-^Sa7oiPn@f^uQ&Zca)7_oud~s4Z zGW`I)MM?ZSnkuc>xr1OrmNTn!X5>Wg*11FJN&KKDAX~oTq$Lpc%`&#)+B?9iv&n%# z>3Q>w7uEuk6Z%I$GLAU@NJ~*~)sfvvh{V06C){49pG%?4+7hlb!XZH$$fu2OJax2# zoC_yk3eQ=y?WdFRf4=)2M3W>N`$l1v!A^bQ(qtPO%7##;@1Sh?f<%*3Rjyao7Y^8^opXpA%A`~+DHStxHQtS%Gn4bxHQae)`yiR9KL6JxJ( z5<}#qnn4+85QLL$nrx>7%ja;hZ?`y0>tT^Hk^M~diNLvl5R6(S34N9wKv`hc(tRu* z`$=4teoNwxhC`ii1_WK|=zzT4!+Yw(PJHQs>O**q7@2!5hlFK5kuvpnjznTP`d2Zu^xc+{?$mv?|xgZ zO}3$-7k$yCVgn>%j?;f?YZ}v2^Br~g0t&bFFaj!fA9cuZD$1dQlyE&1QyCBIwcil9 z$*jjk^a^GskFWLni}Ne`)21qxT^5Gd0I;Z7GMUgc)08Dpk#rw34&@stUrA7x>9Apc zaUO%y<-e#Ijz-WqeQEHTW}BRic$I?IY$L)v7XoWhgeV6OoK3DRNVqJa*4b>BF1(RX z;>iRZye%1RLBm{7bl>_Llz9B1Qeh&1fy9}Yhz4clA+V!-EhVzYZ&K0X`legNg$v^& z`{*g6D?&*-nK-^B&366dVvS*~d0FOn$W13)fJfS)@&ep_Vp@ppGTw00u<73UznZ#@toWGz2;e+B>=sCg6k^rnn<wAW)jP;6Ip(Qt!Yc`Y7yG5&6J)(SKAknbS z68D&&T{tG)$oeynTUm<$=njA;gvXcz8cgK01fn^y8!;;UhZ)Miz&pxyJgsBJgFsfwj8A?-=NI z2@WU?b}`;0hb(@S`RUOw5Q}eh1#<%@4aw-Xz$D>mH&JOjG%s1bX(0|aE{xixi(YS0(~T+7A*00F+g|7;#C8X{gMY*jOWU^MMZ1vKVnXIDvUYWyISV z{wlCIfk)&ztwN|-A#x4|^4uUg`Vc+Wfz42NwS~Yw3()f09kGW5q#3+KnYmjubHzN+ zRnou`^0pvY6T(qki$`@qo+BNA+jmT6`RV!W!n?<+HO_e)Y3B^zZwvis_5RK%<#CcD@QXoPtSv zB_EarYO9I4m^eDnO-~Xk8VjSImepy+cyueA@~-lSKO`P@|D18+{iX|;PYbO-nVpTdHqznuSMU@mR( zM4ByP!udGN`m8YZZAD1d5AdQH`@A0e;h|xpw?-MQh*0J{(NW!O7#dnr#4&cjHDKfqK8ZEtX1Xf1o)gd*PzIo@NuD#w$+Fe61YqiuoDu2z%;T+Q z9v7Q=U1W(^&pZzMq4JR`bB@w+tm-zGrMk5lT|vbVixehulQOqJ0)(bc5W2jV6aV0@ zlp)<|eVCCp{H6hw4CX7*ZGeRgUc(9mgDi;5F$+=F-D#Bj+Yt8XEtEx7a7SS^=@tv? zFfEXjY@XB&)xt?Gbk$s8&^u5L;Mig5P-jwa_A&h$#Er)xS3+3@6QX-IISf&80Sn+8 zld4;NE#&j1w=#&<)Pb^%B6;QqIlX^^q^@a+nzdSco5T`WD$`Ilvi&&3(!WMN10y??I zM-Tu@+?EJt(8ZNqRH&&0=&EVUpeul{gszZoGW3MH9m!TKh9N4mgt+E~9W0LfoY)Gh zt#u8^rPL^bK^x~xku_#A(?C>IMBiwEk$cTzr7;3QcWgFCfGE)mt~d1*GLL#AzU1rZ zzaA!z(~oIZppERn5LapwllVK0+C;E(qKc-XE8IHvv-Q%UwHrDi*%&flk^s03-H@O_ zYpWH$z;8rDxwp+sq9O0ppfU0BINlHIN4535I_zx4M(_2%2^ zFl%`A*n3hkbGF27iks+dMWdmf&4*jAE6BorSzXGaS!PWy%_2GNXy~krnKJQ zSHnPKTg7rS2`%!4EPgx8U|-9MWU*Smqte?T{w!O|@+E+xJ{181?5kBgJiElojn=pu zs~1oZQuc@>CICDICR6Fe{A zg6V{qp9YV9VWT6#lvN9Cy$}kWK{6d!e8$91rd%eHj<~+-H+Pl9q{_xl%LC)#pOj`kcK#se@D5uCA+y`<>f^73GUcFGt9wZ&mLM%8fVIhPF)a?&3SqVjK(w;kKxu&px#vEtSm(`#x+m*1?QD^iKij#lD6b&TRjct zNZQk$R(Klxku(Bm=vyN}!gRlv&Qt$-B(Q!G2Y~cytijDqG_BJ$yvbS-1 zm?Uv1C8PSFN`Nv*o}lfbnEm&-I3n@k>S~S0U}H^F6~4}uEl!Kf!Kj@P$_vJ|>5Y}^@91PghA?tqzV47#Bd`2&uXWEqkq~=*Xcfk4pfqDhTsBug;TT+^Zng-Rz z1=g5mN|624}a)&MNEnNj2*W@G6_(jZGso;OQd@WV@30KO{9|5*SJ?I z?tiPgk!ol&Az9T<$hi{0uqJ_l&a{?l60D~K!Aty#93WPc!}W;>z4 z1o_X^A7A^*=#Otl!thyPlbSoQQ|SO8mS(!4YmD9#_t(OI0)~wXFE8Li-1sym{mY~w zek3ir&>-#}PfO;emOX3~Les*H4kHq&(1gd#q18O8hYlDKNr9;pj>23~NUr#p_|4;~ z-SqbJ&v5&UeT(2j<$l5ytu>Md-Ue)b%z?Zvu(#TLki}ve(uWCBA{+r?Il)0cSC@X~ z#x&;vGKMHhZB+F}qsn!kns^%$A20XaCUcq_>s%k6S6bXuMXtY`)l;htj6CupEF6Br2gf2?KdbcuqniO$pC=w#ni&rkbWUT}mSi zTAGr_CXC38YrP)W1K9l3FE zRy>X8eW*;hTy5{ifU9u^X1{S?W8scl4fhqvfv?c%S}nx)SNiuuz4jik^b)#dar`#U zfpgAEcBSNZEa2t97tUWveXF#HHqM4AF@*uSV|7uB4Pjc$fP6QH34 zT1vFzc@nc=jm$kMkmhjvv5`Otj9<)PWyv>CwfFk65Xd@l+Xu<%-%q?>OfLGB8%`QI zVl6X8Zpd}$T0~PxO{7Bc#N{OHP##T1j}Bnqx5!sGo~ux!W6-~aRmvrbR6O=mOp`xM zAwjG|>`b>Dxr;E5qHP}cmObxHuE+vOoTh1mhT%JGgY<&fJ(0s z)jJJnhpH6s7^+QxAF4=)zzx+g#v>)x#l!Z7OP&#d=|U8HRaKLUN^XpuJq*t^a5R1SOr3!u8lxIPZw3aqOS^!rN5q!6tOzzG%me5P@8v-; z3%=6>qYIe^bHeJWQIj3zhVq%v0!zzJ)~Jaqu#2lo^3j%g#nh zvC$eQrCFleSZN@dHM+bZN{t0lii3rVbpE zm?R$q#HKAh8o_hZ5@H=t(-eZf` z8gBN{)V~h5Q4wn3U`a>Hx5aRr)9}?>zvHGDvq4gzjG?yBU(>{H3L9{6U@iEJTL8FQ zO3BwS(+k0Xez>sKSZFng=kI@Vi(*?Y*Nb9x;&@gG#EU(K zEw{jJwB;;~@^YKR2aOdDj)TD*BFurS=&76wN_6$K3Z%QW{Zl5{+Pd9F0rcmLbgWT`_>n9;rzQxougz`M|hNfy~MqG z!%tUnA~B8cPR9FpB0R8$&{~i3?3?(_e|7H6yRDcl5@c0Wr3gYEWPZOn!3{i^f!Qif zdVevNPFDw{p)>;|=qTHgsch^MkRJyglp$IOB@$Y!1!gvq#U7OfTS(NXwl>dg;R5*Z zV$)}EWvM95LqnsSQT%bqqy|s&ffuIEFwX)&j6;)LD{&=hc@wmb+Eq@_iWUQ_r`ZxV zVlIY5PKBIMpauI@o147;sHuRuD3_VeljZcVU*R;KLGml?_9Xa%ivzzxC#-yrxIaam zlkzAwHDB%myVVlVdddd{=cCF##>Nr%E0o!dHZ<%<)<2#b_S6x9Zw;7f3!-M2%?fO~ zIuMT$qiS};CHwhpmPe^6TqJ~6EFrbUu-4*D0;??+VOovXNvHyHmO$5&_%h{XkUCJ} z9akzjwZxm%6bH)v?^DCjxmv3qO8ewsFCumiIwqBy zk1J_MQ_zI#u~d`aI;y^p*xoy&)VBB9MJ#8mHK}X0ltF?GHr21<9geky@WRYZa6jz7OOD~(~77T$ZCrB|OI=xc+hBVDf2vxlJ@0Ho@Qn)hXZ;uz> z8Pfju_qOpyME0#4%A#&_@8UE& z!p<7>iuiubzvZK4eqYEBbDR7S<-dzn-nq-S(l1S3JJ0m%$kTltd3L>yJeMcE*#>Nt zzU`p*UGh871kD`%1iVy4K4B*x`+b&vf5~<+1K@{$&u=;XwKoAN4T+J5;+)VhSmX>D zIVey{i8TKCv6D$`_us^Zur&UAJ+%67>Ro(}LaYdEo11xWV-Cv`;)s&#n)ct&+sUIU zP#-S8_Mm*3JTDVReKCHc9cjHy10XE#_8+I5Lm8iV^rSw8Hb2Q-yN*!CaKktChKe;O z!1F4lUsw&AHN3Z312hX04H9ICdnkv#W5?iKOp1gXAdR9{l)e|A;l0=`ryc9g3V)hA z>f&qN89|RvRcRbqtTdQdQn%KwqD;x#XsAW_qfI`XR;r09s`r)8R&q+Ox5fKYTBnrV z;L7wDoc;@5GFr}gC23f%H>yw<432(7^bZUeoXpHLebOhIyBw$j!KzOt$gT29zz|za zdK=R8$Ad;MD>5AJ=;LfL{ z669jcft;f>rUMyGQ4^^)?$8>YjY?CgeTSk+@1PL1^j7mxzvsKkM0 zcuLSxg%&3YeEndiz!uHV4zLf|dVz;J`#1v5b{G!WVDPuBo-&VYyOP?5|C9qp9*o&R zkbnfDeTZY`Jj%a|?;&KMC7KhZh`wGw$@YFVn1VQHG@G2%ViEc{T9$ome5(3b(N7=n z);8_RfJa@FHt(q6>4DIy8?>Sj=0VPX6V=rLWv??gt@pN8trx!26YIp019YNiF!PzC z_SlS27^-uY=g?pe@^U~Bef7bS9@&|Md4H#C1XITGP`t|^gNiOc04jkC24aEohRuCq z*!Hj1{yMN6E(5#*k%uI*88T))@$?r8}R4NKt?n%H*N+AD1q;F;?!S$ zWhsf5$0yi5+W!<&Dn6rqq?gB+nuOQPu{sohr3M^IFEp@DI{<}?l^CxT5)1S`Xv9bw zGD+I(UBIoZxxalc6dUDrBgFSMB%*wxv_=AhXb7#IA(<|-St?{`mwM|hn+hbiL|_q` z-{_`oN&4FFiiA(I8ED1uL+HcOG58D6;=q}4*&Jowqvc#gKG~HlW^C7FcOV;pcO~CS zZSN*3M)P7--;nf0Q#omZgCotkm3)7td~XNPhMJ$=*5&kF(;S zN&Nhil7OnuK6CWMFaOkMPyNL6WYcGleD1N&{QhU3`sEW8Q@6>4QdV_p6V#Ok+K*cK z256|coC^op3il`^R^pt%pUnU6ng8t<|NK)w_sJjhvQPi)qrdXg|N4)X_NwSo8UE;> zo%sBh@B8Zi{kv7|uYBYm{_~&t<1hZ||7f*)T}|z8@LnuC(;lz^o4QF7@WlXzQr=_< zM&_aXfbp8%`w*tHxoi$#R8BX>=7yPEC39tX`rAYk{lq6;wyRl#3zNfxZTfZ zY#iaWme4*y4e8kt%2~2DVeL6smH?Vb2$Z7U(Cl^9_00c=b)Eg2sY_D=ct^qd`+;y= zJPIye#*lrA$)&eU6?^-x-ZX%Z4}D}Qi4#H?y^N0>)o_3u_VVeE@v^O#p7-NCPV2Ep zULjlBKh!%pmbFleioKImB);fCKPud+|9nB3E==cX2HHah$XvOapwd%IzWg;x&lGF} zS1H|V*AUJVr@8On!u`%SC+p(N^_zA3Gn8Zz%))bt;7N6wa0k$${M^0=h$XA5Ds+M25pJDzoKOC?ZGg4a|9oAGil_JQ3{%7e1g(A3# zhPH;j|8;#2CmQ}l>wCI$J{L5Mnmp3GQH1&!?$XY44fe; zkq=m=XLI^2Db>S}`&vFN5|WYEEH)Y++EE1P!lkbzcLP;PECT6i)oJ&^>(p|enl!=! zU-cCL-4dUbP@QUUs=Orw%W{Im11|UQ3Z+6Y%n&L4Z-o|ZVS8TVw4rE)(-rcevmsyq z7Q;MM>hne$K-W{q(<-F1;`{h7tZ=I=KCS}pDm3Rzix)KZ5!GmX`801%K^A)dEK@L5 zn&|3ud|lBQ3@^;uXR#pia5*q%oHwku9h6195=~PjjIJJOM0)hw~?Nd>sBRI>`Tu#Qw?6qFM(T% zX!0W0`QyNS&))2e?llIBa6;Udfw1kDun(ToGasB%rsL$n#d%5h%C8TeRt7$JQdeqt zLRUU`T$zrO2k+tIx>tUEfI{3pz({EyJfJHd98)GrU(~hEe3$3_AQy=IJN$Arm$P47 zGCE-CZ>6(ZI)WwF+by<+tFk%Lc1hupV3$DU-T-7t00|^RJ14m;o>Ey7*U;gww*s8c zGn-k>G8f5&S@jxW_AXPRbF+t0a6LU$OK0wk@1f5vK}KrE_rQszit!m&JVs6Sa-0Tp zvrQkxho2+UX46hjb6S<2ej~9*`{)_80QS+OedM&#D|`MAhATV8(-4>avL1V2U~iLs z7@vRC6isCk{|yP7*JP}_>oQvsBAs*J5AjzACIDE_?VEc4bUYn%5icv-P)7(4F-PV? z%Cbkqh!rWzw9FJyQ9>4)jSg$oOoZ|f`nQnlaA~wV3nX*XNPDJlw1 z2(kurwD6{_ENFB3pmi;6Wr+(1ra#63LQ4UC)oV2*r6JW(pW+hEKa{S5X>JuShHn5j z`j3(|AWoRvQw{Mi$Y~WRbzD0eiEcV55|iZ}&VA7K-Rb%~`=%~kgczSs#zTv5AdvxNE9w*pwa~;07~*}rk_N)o+9Es#6%AJ$qa!2Pm5*)@ zfi~0gFzEF>G&I#A%*{}>^}anAnvR}3gXb|lkJZoS7q8~YVq3?`z%r!)YYCEyFh!S< z2nHiXm+d%28zZx_)x(3w9B?y5s8Gtr$Z`*lktno}5!nGHQ87lx$m++&Bp#-Ls)uKJ z>7Z6UMl=(Ahsh}hY@*h+;}}z;HD};O;4sugE>I!WXaHw=X`_LQm5oL^lHEo_-!#x= zG|+nuo(W0D(suC7Xvkfl$j`~${q{x-p_nGT`Q4gDe3)D_lh65{r@fQ zJK&?Ln!fMdo&woW(n#Qflt9X6(+ePhPy&G3hYYXCIBQ!RfNgtup*o%x7mC2Z=qWtu9 z*tL9^BJCP`u}WhyS+%ZcO;OG;y*ASBVCbi0Uc>*bz`lL){72iMwc7Q36O2yu&1u&d zoemX+HBPJD)t)l6XEfUJp`2kn@YHY){cDAKi&kG#%d{GMzEx${=NlbP^pQGkwCME1 zBefu@cSIVk5$dGaB&{J)8=n`K7Z;acP+^R9`UrJoRAjU|i7%qltBhuw$w4h>h{@HX z-nS(TEQm}cJ`Z>>@S0lj56ls|W3=R>K`YQ68|s}dWtawdvH))Yn!{kq7_mECL%RwL zs^NNP1V89zy?v-juXC*nqXk25(U@|~dS`)E$M?InNXfhmT0H@3fPfn*Tn!(JaD6}x z_5FQo^?$KW)}y`8tP=<3)oHX^ID^KBo+fjREmj>Z0Y{GBY;zWewyf;aXBb2bXC;0F zc-@-&^p|5O3}{Y)F~7j1AEr0u3{zviO2kSg4YX)a{ndK;PgZS_#$k@g!{W}@TOzP@ zBh^t+{NmMGEyK`rmBVDzqPw)IzeOADsJ|)tRU1zkKv*-_R*V85VN!pe46bv@lU9^# zr==MkHj}29qs^c<>n%o|>t)7$MkZuSBy=jzo)BtGY4=NGZ!OLa~DM}`F zHX)rU8?^^380Q$5n`t91oSa-}DY9#9IauM=qR^JP9F95-m)pZ?(dW9x24(2a)zYu0 zC&=9-aIful%9E;uow#l;^#*Vg)M_Xe7bP5YN zoxp_ums-i6m(-#FTtU$u71rB-yZ(94)i11fXR%HH+;V3Eyp{Zk-W*R_@3dCvS5uq+ z6Dh+b8$TyD;Qyw!bJWT0-60^J12zJerziHLvs-O8y)F}44YE6&8VX>aayGkun9*A3K#@GX-on|eMh7;yPF7pV zv_gLoj&K^yRLbZyf-R14iD+g>Z5FE&g*cnuZm`6YBMxX;D1Yf^^)1C+c zK!tx!8!>B!8_k7gSA)^VFfIK?eRIokU)ULI=wr{%fKNo2xULnlmrEWaSoUkcC1ds$^R;*Uetg#ex zWMZ4&#Os(C>D}tddIgrxnAu5t0t4o1MqUYSEJBDckdSt<&oj?|W z>NrUb6qk$o+TlrkXb+%p2LPvWp9u)lxYBPC@{&Z71`uYb2_@hs?6!^w_rzJkuO>QF zu+3^1>bStg3j&o)XQXuj5EoAekcH&G-S!n|NAt!%9T2$es=Q*lQ?WZTf?U!(?O;DZYWogp9XoaIlHRpjM)w|>JzwdS z)jPXS-+r(5*W_t+dP9DJamY}U*5MCYkpRzphyz2WQMZ1oF`uOuk%+T@{dq-aX*tz0Bzd^T^e!o20{}=-(2B7&xdH;)-LE-m%PE?znL^={vpp1$hD$mWB^56Asso%PG)^4JGs8#I}C zD&3lRoO5XK(LbeoZesUn{<}ds6nTCeiCpTiforuwc~NFERCZcYA5f=-{P0roH=dzXQwH zMTND!b8Km&TC+aM9W!<1Hm^9t@^=QDi|O$0XKcICDNjCc;p^z#v?R6sa6qk<6ErLJ znOn;y?GlZ6r0sb<|D3PYxUSxs$4xkf>2>Oph*`V8o7Q*A##`H7f5jN;ea3TZ*6g1L zmkkeEJNWUb{MB890{2W@cdfqe;MV<+l$R@J_$wXFn|(P|-ar}gQ~8?ao6CMmO_7}q z^{Lb3dfnz`TlwvW_1`_VosH@7h2P|jPhQ)xXh71fE`Ih%?p8PxeCWflkRQgkU%Bdo zZ-P3F{(k7h+m@^TcP@|GTb|Tz_r;GTi(8)Do4f3MZ2Yuu4oxRSb8UR`o=+gpa)<3}9{JO)`~QRhc-eGhX3`hWjw!k)-sg-OOX zPM%&AQ|GhM--}1*m9-zgdf}~Ca&Fb#H@dOk@WGE;294YtVc4Obcl0Ea;V3kh zEWGmCOF`wbd#(D8nb7Fg5LKpQ`TcX@qOJSK?K!_xBj5jH%D{QX)XqEJI(U>F6EILS zL1`W|s>S-KjlUPo3U5`Xbm*vA`Npp|%->cxV4-ZmjUmTz7W#<7hj1FkoP{JtDDw#-u*(;hvS%g zhCltpvpRn8+n1f^Es_O`8u#kf|G))h%dKnr2DgnH;4qz@aVuP%c1-{0E?LLO**hD| z?ptT|#Gdyy9DC(N{moxI6}7K-F3y}mwX9vYhX;4=Ja`&c4o?@J_y2Xtu03^n zOt`O#lC1gm)U}m;(~l*UZBOX4;_@dIQ%jdF3EA*T_7wlXUk^`Vn8fn(uMY0H+~wxh zE;m!>U70#{=^U9~offrQHBIYwbB^+4@})QP{kOBeiwo2peTpJKH|^VDzkm02zdJqq zj16qHmgzBk*{(NU9kz4h$B*Bo9)s%&Dgvcd7Rzcu?D!M>j1GZ#eMI*sbNOpI-d!&jxeqY(IhXZ^j2~ zng1IvpShire8v1mh3$sT`047CEu^>Vj>>i1IMrl*N^9K(+qy$n>g=9!cK?c>rre0Z z>r8j=9Ja4_ZvMO}vxQlk`F1U*OrmWQy|Cqokdr?ip0|0_7auNk4A&gEGV#!qhn>2` z|G4g*K1U|JzpvhMbJ@&yKRVTRh5zjz7KFTV=ZK}ee@tDE55N55o1g1H*q1xF>lS-_ z&wL}pOxn@AUY(GYi~SDn_@r*b=(6eORO_}`+muiD^qcY1;Hcc2rrEdZb=!0J3uS{u zZ6+)nJ&ssQKRPMTE*&z&9>4P1xv@(-$8LVV@9YzY*8SG$*MK%Bg1*b%Z`MBg^|z$- zbyJUw3^@0hZ0!MU4YJIOLOES!@HQEp#;M664AO{O=j96Y!(kRB^9E#WE)6-lAcG*s z{?#+!C6_g4GvvUAHhPi7koTc(Jv4?34+O*mlw=67wc>s_;6dl{O`w1d!GPBu}1B1zD_ z0oi~)0C>Cb8Y4+soW^d~6z9PNpvT2~ZfvT!A`YjA&0r6(iJt6G|?1st;i@p8l$!qn`#U~=6=xxyBG5nWJF zdvQZ9_~@85Hoc|L>@LC^frN~1n-H9tV3=|vuEGL_x`bJZjDpRztD_HHro(ZAiD;(e zy%yr|q&yig^ZoBE=aBAEhGw>g6&CrQ*dNKHouk#8Ot3yit0QBoT}v>dP?r{UDa0awR znl}XA7f=G7yb}hw8)yr-B5NToJ+WH&vV|6Skzo;;j2iw*)M(*|oEbN%ZIQ{sjBD;% zPk-0AvL=Wh+XPPO!0;Ek){dBEx3RiH8}o@#3Y7&2=%< zhU1@MbSaZ#g*R0J_7Ky%rIOytV%DHS4?O>>cH-m#AfDrrO902nTPb<#1@T1s zVTodK@>hWdYoSF4gB0%)0SlB)Z!zk1@D{UxXON$qU2lNp=-C(tIRwIssn7y98XOY1 zM6l?cMOORJ>QU8*XyDugBNX+<9jn)AbUHhF3YJGKOThRJ4}))Bb)q^rQ03WiU5cwt zqm7s)HD6;QC!S)L>B~`N4RJBJJUI6Z<*9KxBe)}ms~8qBO%+xd%?||c{YL~NtQOp> z@>aECxFeWpP+`*M2OIaL#zJ$oG?Uc|r!G4>;i5((W+H1j7pjlUkK`Q1W?VQ8U@f$3^;}+|qgX#2SJBSO za(tW)1409-wObvI2pz7Sk(uuiI*r>QYN%k6q-dNWuC7o$l!{sk$wv@{uweL#t(N?% z$>N<%?SK5DHzcCmUXcuwPY8Yj z-2#MNcOQDXJ>WvEFv{|wmX-3#7ZENQ>KL9>ui$ho7}3Zm3J%2FL^_>W$8mNw##6u_ z0S~W+{|>x)HC&3bs6{p0_c?qIa8M|0PjzuI z)Z~uS>r%nQ(kx&Fx74D``0pb*?ZcdSb!J#U^Ceq0R-NhWTtC&M}rCV`Fs{!rL zG<4@VLb|1=n<4O*#M#wZK^4Ps6!OQZ0iATe@*wO!GGW^^CKY-2s3Id()%X!i+gQqA zxHhBC@xnz7Ggs|e;|4tG?zRX3qa43hNme9zlGZIjT{lj9hGcy54WRu%x)p^9NG_x_ z%1is3?z%`OBc4EcX&;eHO!O#@@)8iudH~u_4FD8JdC2-sbg2#sQ$7k4P#w9D4{U%^ zz*~TK0UrYP15N?10e%H2!jw!sKqEjbARW*TU;q>Y-T{0MxCfAjE19~0U_f&~JfJ&Z z0KfQ0X75n0*(OA z0ImZb0u)%2bpZ_lVSso*J3vPOSw7&QPCg)l#=?l?$V+>|b+4X!=f_VOuL&%j!ovX?vId#qxld9w z|Hje3e!Zcn0P-kA4+va3z1;+fB%kaYdJP<9pgvoy^oG@QFI5@LPCDS3FVPx06{p|? zhqLeF+i18UG!fQ0SMSTfJB(Iok1+GVS8Au0Y?4ilXs+QCES8XmV<}wr#j5 zfl%cu1F+brbjIQw-fF}Xd>!-}gH8s3_K$$~aO0m@?@y$)PY@;E2 zbU!6peww@b%!5B(z9r5KVA!^AM|#Y^Abyfq-$!-T`O=+-$4@BEO7@=FbaZFn*qBw~ z=8bOkz(}#9BIga5bYi#toztf#sp{9+-}C%Xo2Bb*|^A8BES-9r-0=2|$c2 zR_P%2TP^79i$kT0#6`RlR+1>DAOSoI{iO9wJfOzRMoXcC&&^ckr?vlA`E4dwc7Zmn z(wg(;6&fOOgyO~@(+2;;oUHWDIce#g(|c#3j8J~0yS#vZ4xHvpz{?RP-X-87tP$dU zHTDG^kJJ|EwQyj~aTMl-Vl1e8E$KBX2i$&Oj1%aL0F9d4?Ha&JI?wxK4J$7%Ca$7{K96sas@M_kitcoA@#H-TmuaO$&w8-Q0I6Y`^LQZ3yFc=bAK zyK!V;E_4iPBR(E<{j>5TPYM5W|AGVlPi^bYmVe9RnP+$Ie*8gE#iR1GJnsG7(A2xL z`t0_7e3i#rKOXzWhDKja4}W}z$Hy{p?PkHzF*~-rkKMv;c z%z0fBtv}9r6`+&y!T9-p|h!IM}XzqY=W|2)swyKzri z^Z1O-ajnYp6F2vKlE&jHFYo{6!nX&@hCa#Q@$E-X1hq&zw{OCe-aJ0%P_rG!-WmGV z(kBCXyliFO=%%LiKWu-Z#d~Wu zY~AYlbQF(I?K;XN&B>e8>}eT~XGMJ$n_IE-(92J!@pyLg?X#v$=rO1N)7d<}YfMX9 z)1p5vIG--!@tGfA`}mz*7go)Dx`M}Nk9JNzp0oD%wNKac`0<5nX1--h`(*FaEj&K7 zzR#ZD?}H#e{_%7#kFOs)@~i!qU!GsD;s}q+Qo>s8RSvuwS@A88 zpLo+@oZqzRd+8NtdA#y?L{OWET46t>1ThI21gR z4;iM+WA^XElU^=u$e;0ekG8)2(8yzRKTYKK2ehk!KaM;WJ!WPWuWw>Ul-ydkBJrD( zX8wH)oa{{&_&0Bn`OakCzZzO+`QC5U%e6l(=RJU64cl;VTB82^rd|Bmz%W1P*9FZ` z92kF!zsF#h1tG7s|0873-uuizzCHbJG*}gG?EYpgR>$K%PPYZ=erC_KU`;&Uwdn4G z`R|&Sw`WO)fS26R(OX6i_qach9mV5C0q2UBUG2T4h%Mvs&-X1H@aDt;6W?T~@pzj- za}MsyiaWTToz3GPE}i@5z%8xbKEN*G@hdq?=kI!_)A>v63LbAZE$N5Y&lj$I%&zD0 zf$z+F*S=u?BR|m=9zVAGR`0AmXFpPlcJlb;)F$`+e|viZ+Sd9s?prDTbEv5C_2mIb3VK>Fzf5nYqp85@_6gC)hkYqT3G&- z=nju7(^jm`$QZWcw&($m2Q6y1?e2_qQ)S}EJRV-wca`#Pqpvuz1U!ggTB>K=y0${O zFh$G@70hDqMa#+;DsN7onU zEqmtN_K^_JtdipX=hHUdZgdp#lEtaEO5SUh-b5tLMZni0OdMCh-+K;U_Z+^_jsMjt z#)WaNGly%Me{6Z)?CIdf>dj|(2jHNG6H_yjpM;8Cw2X|ba$9q`wzcQq?d|SXg9O7h%t`zkzsVePEmg@&9)jfm?Av^T zxBoNm@xdsGt8BMTG>=SQj$~dPL95936ZTseT%n%0G9m_T3u{jwWMr6bc#@<;KZzSk z@odwkaT};XknO5TytyX5!O3xUyu$9xbsGRZIou8!oN&B##iI{XL6fxag?N%xVl$q! zH=Zp>CEP`kCY1TsUG}QGj8GP2E3K!>4ocn+7R4&H$Z-gd2b2Lg`i;b!pWH~#aL>`) z=+IEl@TlQYQBmrs=%|>e*r>Ru_^5=a#Hb{7lv=HhR>!Dg)p6>0b%HukofI7vt&Wb4 zj){(qj*E_uPKZv7PKt?&QO88b#KgqL#KpwNB*Y}fB*jL>s$-*LV`5`t<6`4u6Jir% zlj5S{)N#>qF>$eRadGi+32}*WN%2wf>iFpRnE2TExcK<^g!shxq=cvhbwYGPOhRlz zTta+8LPBCfQesr1Ix#viCNVZKE-^kaAu%yADG5zXLiPA41qdY}Q&Q3kTL!wMTk}R2 z|4%=I#so{Ky-?3INONh)YhgB_eJtRcfzul$xT!0tZGh`R*b<(RRh=-o5U0LwdX zDb`uxjSSrviFUen!Cgw_hXObaZUA7S#El6w=B21B12kXg7=J0La^T8H<$wnQUWs?m zg&jrE;qUTNtV-XGm42X=39F&KTpjbFuM$)}XCwKAyC%_uxtA2k&*_Vk*REXs$IJxr*Qu2H-;QNl&3~ z8XyUG;#emf7wWn?Xi2v>xy(h-A&c2cxv1%w*7MhWxa?;{gJWCC`r=BV$=HD-lWM za*;yeA@Y=ZiE2xftgon!v~F-cw!X+;6j&=*8lq^#Hf4uMhKk-7e<0c^IxhNJ^sVuK6 z?eptbUrXu*#3%IWJNCVe@9&DcSZC7fGnPnd)%LB^QXQMzu~T~29+^7*lxfpvZu{(u zz55S;H*niW4T9wgPp^9Z@kz;R)}A`;k?_W>HFD3Cwg%(OH+`)+dw#y#KkvcgimcuX z7DlRCgk~>Zy6oK*tJiGUzH_h4%e#I^^2;4xS+)Ae(WUahpeD`QzI@}(-HLttCEQER znuo?DCU@zY(K9Q%Prv>H2j^<_hM|t(qsLBJ@!tEJKK*Lr`xfiD^KT7qGEyp*M2HPy zRux$?E<~*M36?bVXebSnrb%kIDtS-VRMJ!us)+UKA&&JfP4MvZR0O1SOcHAq9#MYy z3(pOtY`a8BH>pbEDff`K<620(J>td5(jd9STi!E0A*NQ0JW}B)Yc38^h)TZdr4%QG zD_Z#lH4m=m@6iKQrPT_Qd&;^fT6h$CwQt)>mLm0(y&_|!O0m@IRfQ-duVEL3XUVF; zO*(pc%4*e3mU+tR1&2%gOFn6>%ku8x;n}HUgD#4!TIq7nl5I_Dd&)X_hKM_-Cx~mK z)+D)SX?&nOMI4gN`b5_nztB+VRkC+VhPGB&l+w?0=0fSX&I>+Cp16NplDw57+a_FYTQ)Eg79XoUKb9c{&uf`khA9^RDOJ*CViGQfX)Lg!Vr5%X&7F$x2R!N!vDNZ4u%i ziKw()Bj02xTl!V2lJCMKZ0W_4-&$r!JSC#>zG)e4Nz)V8#@cZk?inkWyJdB{ut zER!*;SSpptL~@xz?&0eh>=o!8R7+Xg+ee}l*Qrz2!;kfs1h9ePAbA5eSk%am6NiZ- zydv2siCPrRu9mD7y(d|x_)}CaeIj})uJG6}ym;#LP zI4I|G+4LE+-dy{^_8t57ANunA)oT@SU!@I@ket#cz3ZT|8A#l+eaHSUj~%~yjj7xo zDYQQZ>-1%_7A`(??07BTmdPm{)B6q>I5+~gMn|JQs zbMpIpzl@(Sb;YXPd-feTe&&ZR^L8EGf9!aAkIcTW4$gUf#>@}4eYEGZ{Rh7H_46Mv z@X>EiD@x3-oxf1Kk;NJuk~4bD#`nAL*y-oruu-SZJu+#Nj2XLi-?t~v-uvaD-7(Wy zIH!4}YW4dc?KyD#`wI)0c5~-N&1`h+_&47ktLTy0e}G)!qimu2`L4y9(B|d#9p9Lh zm0x)9&{todI(_451;gbu8FxW4E=|!uBJ(X>U%OX2$PPh@C{I?d?Un-eH#0eOp}aT;OA9;@@%O}nu3KE;8C)>iPO8} zyP#5~wB(Y<{UzcAkJ5qlNb9R?CIx`At{+6 zTerg7U!qHA8n=uu9G9*X@^9ruDLOZy z6yV!MTmj0G+&==(CV0AXk}Tjl(AOiL&Uzu;Ro<1x*8~q5r~;%~pb(ERM^J{w>@uFD z|7fi?ToTYnJ^0eeoZH95Ma$g~SgR`~MPA{6=aB=qFD`e3XS0GPlj)Kbn&U~b%wFhA zH0}&!!0{4}nI+9-2Gw24)bZyUd2_jq?uK(=t)iBOSy$zTi`HBWZ&-dYf_citFR93l zf5KjhXFVGyG_7?hVO?!ak}BX*QdIDzuD>?C)T3SOvR(}}m$L2+(e%!=Uh2JgrzV>@ zuIZ!ydRZUlOyj=H#by0AUT)m~&ecl;zPhOy$T2?;yvL3m1a|~E69H*cgnw)ouc-P; zR*%gl60wq(*oF-TcqMyyumKX*1EPd9Oq}A;DuCq@P(Y%R$hrznUO ziIO05NJQAgY(tTl^&(t~3~W7-A0!gw25ki^7ki2t;x9R}_~x$%8w&EEDwbk<%kif% zh$`B^f(VZXi;{o|wS=%;SP8y*%(4ph6;>qoR^+iF4=;JTr~&A)Y(i}oHA=nMrXH+8 z!pcBO6eyC2l@k2rB9V-R{aegPg2f@ChN5;NR*o+ydwH;6Eo`Bv2|G+I5qYpO@egPK zS}P}tB8ALT#6~qzOQJj_;^rRS_@gi)St1fp2s}h2nkQy!v2vxOr$V@Ar-=3ik0+&3o+x3B=Tj{>mu@s#Zq!B)`rk$DI=q}&`O_G4tjT7L{v;f zjJl<)2&R!jhYTOCi;0Z!uKt-7x%*>wOuCQzn;izd1|OW$J0xK~W|!MqM83(I2URvq j;Q*(O&<}Tp`M`YUr{jfPLux+yA*Kr literal 0 HcmV?d00001 diff --git a/packages/xetchunk-wasm/vendor/chunker_wasm_bg.wasm.d.ts b/packages/xetchunk-wasm/vendor/chunker_wasm_bg.wasm.d.ts new file mode 100644 index 0000000000..1535d5a6fb --- /dev/null +++ b/packages/xetchunk-wasm/vendor/chunker_wasm_bg.wasm.d.ts @@ -0,0 +1,16 @@ +/* tslint:disable */ +/* eslint-disable */ +export const memory: WebAssembly.Memory; +export const __wbg_chunker_free: (a: number, b: number) => void; +export const chunker_new: (a: number) => number; +export const chunker_add_data: (a: number, b: number, c: number) => [number, number, number]; +export const chunker_finish: (a: number) => [number, number, number]; +export const compute_xorb_hash: (a: any) => [number, number, number, number]; +export const __wbindgen_malloc: (a: number, b: number) => number; +export const __wbindgen_realloc: (a: number, b: number, c: number, d: number) => number; +export const __wbindgen_exn_store: (a: number) => void; +export const __externref_table_alloc: () => number; +export const __wbindgen_export_4: WebAssembly.Table; +export const __externref_table_dealloc: (a: number) => void; +export const __wbindgen_free: (a: number, b: number, c: number) => void; +export const __wbindgen_start: () => void; diff --git a/packages/xetchunk-wasm/vendor/package.json b/packages/xetchunk-wasm/vendor/package.json new file mode 100644 index 0000000000..47c13ae667 --- /dev/null +++ b/packages/xetchunk-wasm/vendor/package.json @@ -0,0 +1,17 @@ +{ + "name": "chunker-wasm", + "type": "module", + "version": "0.1.0", + "files": [ + "chunker_wasm_bg.wasm", + "chunker_wasm.js", + "chunker_wasm_bg.js", + "chunker_wasm.d.ts" + ], + "main": "chunker_wasm.js", + "types": "chunker_wasm.d.ts", + "sideEffects": [ + "./chunker_wasm.js", + "./snippets/*" + ] +} \ No newline at end of file From 21c82cc57b51c634283324e80091298dc4130039 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Wed, 9 Jul 2025 18:20:05 +0200 Subject: [PATCH 42/44] remove useless class in WASM --- packages/xetchunk-wasm/assembly/xet-chunker.ts | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/packages/xetchunk-wasm/assembly/xet-chunker.ts b/packages/xetchunk-wasm/assembly/xet-chunker.ts index 76b95619dd..4a0e957f63 100644 --- a/packages/xetchunk-wasm/assembly/xet-chunker.ts +++ b/packages/xetchunk-wasm/assembly/xet-chunker.ts @@ -25,11 +25,6 @@ export class Chunk { class NextResult { chunk: Chunk | null; bytesConsumed: i32; - - constructor(chunk: Chunk | null, bytesConsumed: i32) { - this.chunk = chunk; - this.bytesConsumed = bytesConsumed; - } } class XetChunker { @@ -110,10 +105,16 @@ class XetChunker { }; this.curChunkLen = 0; this.hash = 0; - return new NextResult(chunk, consumeLen); + return { + chunk, + bytesConsumed: consumeLen, + }; } - return new NextResult(null, consumeLen); + return { + chunk: null, + bytesConsumed: consumeLen, + }; } nextBlock(data: Uint8Array, isFinal: boolean): Chunk[] { From e62c9c2857cb2b7050de385657931915c7cef498 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Thu, 10 Jul 2025 14:05:22 +0200 Subject: [PATCH 43/44] add benchmark file --- packages/blake3-wasm/BENCHMARK.md | 104 +++++++++ packages/blake3-wasm/assembly/blake3.ts | 288 ++++++++++++++---------- packages/blake3-wasm/package.json | 1 + packages/blake3-wasm/tests/bench.js | 190 ++++++++++++++++ 4 files changed, 464 insertions(+), 119 deletions(-) create mode 100644 packages/blake3-wasm/BENCHMARK.md create mode 100644 packages/blake3-wasm/tests/bench.js diff --git a/packages/blake3-wasm/BENCHMARK.md b/packages/blake3-wasm/BENCHMARK.md new file mode 100644 index 0000000000..5f529a46bf --- /dev/null +++ b/packages/blake3-wasm/BENCHMARK.md @@ -0,0 +1,104 @@ +# BLAKE3 Performance Benchmark + +This benchmark measures the throughput (MB/s) of the BLAKE3 hashing implementation when processing random data of various sizes. + +## Features + +- **Multiple data sizes**: Tests from 1 KB to 100 MB +- **Three hashing methods**: + - **Single-shot**: Direct `blake3(data)` calls + - **Streaming**: Using `createHasher()` with single update + - **Chunked**: Simulating large file processing with 64KB chunks +- **Automatic iteration adjustment**: More iterations for smaller data sizes +- **Warm-up runs**: Ensures consistent performance measurements +- **Detailed reporting**: Shows time, throughput, and summary + +## Usage + +### Run the benchmark: + +```bash +pnpm run bench +``` + +### From Node.js: + +```javascript +import { runBenchmark } from "./tests/bench.js"; + +const results = runBenchmark(); +``` + +### Individual size benchmark: + +```javascript +import { benchmarkSize } from "./tests/bench.js"; + +const result = benchmarkSize(1000 * 1000, 10); // 1MB, 10 iterations +console.log(result); +``` + +## Output Format + +The benchmark provides: + +- **Per-size results**: Time and throughput for each data size +- **Summary table**: Comparison across all sizes and methods +- **Best performance**: Highlights the fastest method and size + +Example output: + +``` +🚀 BLAKE3 Performance Benchmark +============================================================ + +📊 Benchmarking 1.0 KB data (100 iterations, 100.0 KB total) +──────────────────────────────────────────────────────────── +ðŸ”đ Single-shot: 2.34ms (42.74 MB/s) +ðŸ”đ Streaming: 2.45ms (40.82 MB/s) +ðŸ”đ Chunked: 2.67ms (37.45 MB/s) + +📈 SUMMARY +============================================================ +Data Size | Single-shot | Streaming | Chunked +──────────────────────────────────────────────────────────── +1.0 KB | 42.74 MB/s | 40.82 MB/s | 37.45 MB/s +64.0 KB | 156.23 MB/s| 148.91 MB/s| 142.67 MB/s +1.0 MB | 234.56 MB/s| 228.34 MB/s| 221.89 MB/s +10.0 MB | 456.78 MB/s| 445.12 MB/s| 438.90 MB/s +100.0 MB | 567.89 MB/s| 556.23 MB/s| 549.67 MB/s + +🏆 BEST PERFORMANCE +──────────────────────────────────────────────────────────── +Method: Single-shot +Data Size: 100.0 MB +Throughput: 567.89 MB/s +``` + +## Throughput Units + +The benchmark uses decimal units (power of 1000) for consistency: + +- **MB/s**: Megabytes per second (1,000,000 bytes/second) +- **GB/s**: Gigabytes per second (1,000,000,000 bytes/second) + +## Data Sizes Tested + +- **1 KB**: Small data performance +- **64 KB**: Medium data performance +- **1 MB**: Large data performance +- **10 MB**: Very large data performance +- **100 MB**: Massive data performance + +## Iteration Counts + +- **Small data** (< 1 MB): 100 iterations for statistical accuracy +- **Medium data** (1-10 MB): 10 iterations for reasonable runtime +- **Large data** (> 10 MB): 3 iterations to avoid excessive runtime + +## Notes + +- Random data is generated for each test to ensure realistic performance +- Warm-up runs are performed before timing to ensure consistent results +- All measurements use `performance.now()` for high-precision timing +- The benchmark automatically adjusts iterations based on data size diff --git a/packages/blake3-wasm/assembly/blake3.ts b/packages/blake3-wasm/assembly/blake3.ts index 5091750dc7..1ca3b097ab 100644 --- a/packages/blake3-wasm/assembly/blake3.ts +++ b/packages/blake3-wasm/assembly/blake3.ts @@ -16,7 +16,16 @@ const IV: StaticArray = [ 0x6a09e667, 0xbb67ae85, 0x3c6ef372, 0xa54ff53a, 0x510e527f, 0x9b05688c, 0x1f83d9ab, 0x5be0cd19, ]; -const MSG_PERMUTATION: StaticArray = [2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8]; +// Message schedule for each round - this replaces the simple permutation +const MSG_SCHEDULE: StaticArray> = [ + [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15], + [2, 6, 3, 10, 7, 0, 4, 13, 1, 11, 12, 5, 9, 14, 15, 8], + [3, 4, 10, 12, 13, 2, 7, 14, 6, 5, 9, 0, 11, 15, 8, 1], + [10, 7, 12, 9, 14, 3, 13, 15, 4, 0, 11, 2, 5, 8, 1, 6], + [12, 13, 9, 11, 15, 10, 14, 8, 7, 2, 5, 3, 0, 1, 6, 4], + [9, 14, 11, 5, 8, 12, 15, 1, 13, 3, 0, 10, 2, 6, 4, 7], + [11, 15, 5, 0, 1, 9, 8, 6, 14, 10, 2, 12, 3, 4, 7, 13], +]; // The mixing function, G, which mixes either a column or a diagonal. function g(state: StaticArray, a: i32, b: i32, c: i32, d: i32, mx: u32, my: u32): void { @@ -30,81 +39,115 @@ function g(state: StaticArray, a: i32, b: i32, c: i32, d: i32, mx: u32, my: state[b] = rotr(state[b] ^ state[c], 7); } -function round(state: StaticArray, m: StaticArray): void { +// Optimized round function using MSG_SCHEDULE +function round(state: StaticArray, msg: StaticArray, round: i32): void { + // Select the message schedule based on the round. + const schedule = MSG_SCHEDULE[round]; + // Mix the columns. - g(state, 0, 4, 8, 12, m[0], m[1]); - g(state, 1, 5, 9, 13, m[2], m[3]); - g(state, 2, 6, 10, 14, m[4], m[5]); - g(state, 3, 7, 11, 15, m[6], m[7]); - // Mix the diagonals. - g(state, 0, 5, 10, 15, m[8], m[9]); - g(state, 1, 6, 11, 12, m[10], m[11]); - g(state, 2, 7, 8, 13, m[12], m[13]); - g(state, 3, 4, 9, 14, m[14], m[15]); -} + g(state, 0, 4, 8, 12, msg[schedule[0]], msg[schedule[1]]); + g(state, 1, 5, 9, 13, msg[schedule[2]], msg[schedule[3]]); + g(state, 2, 6, 10, 14, msg[schedule[4]], msg[schedule[5]]); + g(state, 3, 7, 11, 15, msg[schedule[6]], msg[schedule[7]]); -function permute(m: StaticArray): void { - const permuted = new StaticArray(16); - for (let i = 0; i < 16; i++) { - permuted[i] = m[MSG_PERMUTATION[i]]; - } - for (let i = 0; i < 16; i++) { - m[i] = permuted[i]; - } + // Mix the diagonals. + g(state, 0, 5, 10, 15, msg[schedule[8]], msg[schedule[9]]); + g(state, 1, 6, 11, 12, msg[schedule[10]], msg[schedule[11]]); + g(state, 2, 7, 8, 13, msg[schedule[12]], msg[schedule[13]]); + g(state, 3, 4, 9, 14, msg[schedule[14]], msg[schedule[15]]); } -function compress( - chaining_value: StaticArray, - block_words: StaticArray, +// Optimized compress function based on Rust portable implementation +function compress_pre( + cv: StaticArray, + block: StaticArray, + block_len: u8, counter: u64, - block_len: u32, - flags: u32 + flags: u8 ): StaticArray { - const counter_low = counter as u32; - const counter_high = (counter >> 32) as u32; - const state = new StaticArray(16); + const block_words = words_from_le_bytes_64(block); - // Initialize state - for (let i = 0; i < 8; i++) { - state[i] = chaining_value[i]; - state[i + 8] = IV[i]; - } - state[12] = counter_low; - state[13] = counter_high; - state[14] = block_len; - state[15] = flags; + const state = new StaticArray(16); + // Initialize state more efficiently + state[0] = cv[0]; + state[1] = cv[1]; + state[2] = cv[2]; + state[3] = cv[3]; + state[4] = cv[4]; + state[5] = cv[5]; + state[6] = cv[6]; + state[7] = cv[7]; + state[8] = IV[0]; + state[9] = IV[1]; + state[10] = IV[2]; + state[11] = IV[3]; + state[12] = counter as u32; + state[13] = (counter >> 32) as u32; + state[14] = block_len as u32; + state[15] = flags as u32; + + // Apply 7 rounds using the optimized round function + round(state, block_words, 0); + round(state, block_words, 1); + round(state, block_words, 2); + round(state, block_words, 3); + round(state, block_words, 4); + round(state, block_words, 5); + round(state, block_words, 6); - const block = new StaticArray(16); - for (let i = 0; i < 16; i++) { - block[i] = block_words[i]; - } + return state; +} - // Apply rounds - round(state, block); - permute(block); - round(state, block); - permute(block); - round(state, block); - permute(block); - round(state, block); - permute(block); - round(state, block); - permute(block); - round(state, block); - permute(block); - round(state, block); - - // Final mixing - for (let i = 0; i < 8; i++) { - state[i] ^= state[i + 8]; - state[i + 8] ^= chaining_value[i]; - } +// Optimized compress function that modifies CV in place +function compress_in_place(cv: StaticArray, block: StaticArray, block_len: u8, counter: u64, flags: u8): void { + const state = compress_pre(cv, block, block_len, counter, flags); + + // Final mixing - XOR the halves + cv[0] = state[0] ^ state[8]; + cv[1] = state[1] ^ state[9]; + cv[2] = state[2] ^ state[10]; + cv[3] = state[3] ^ state[11]; + cv[4] = state[4] ^ state[12]; + cv[5] = state[5] ^ state[13]; + cv[6] = state[6] ^ state[14]; + cv[7] = state[7] ^ state[15]; +} - return state; +// Optimized compress function for XOF (extensible output function) +function compress_xof( + cv: StaticArray, + block: StaticArray, + block_len: u8, + counter: u64, + flags: u8 +): StaticArray { + const mut_state = compress_pre(cv, block, block_len, counter, flags); + + // XOR the halves + mut_state[0] ^= mut_state[8]; + mut_state[1] ^= mut_state[9]; + mut_state[2] ^= mut_state[10]; + mut_state[3] ^= mut_state[11]; + mut_state[4] ^= mut_state[12]; + mut_state[5] ^= mut_state[13]; + mut_state[6] ^= mut_state[14]; + mut_state[7] ^= mut_state[15]; + mut_state[8] ^= cv[0]; + mut_state[9] ^= cv[1]; + mut_state[10] ^= cv[2]; + mut_state[11] ^= cv[3]; + mut_state[12] ^= cv[4]; + mut_state[13] ^= cv[5]; + mut_state[14] ^= cv[6]; + mut_state[15] ^= cv[7]; + + return le_bytes_from_words_64(mut_state); } -function words_from_little_endian_bytes(bytes: StaticArray, words: StaticArray): void { - for (let i = 0; i < words.length; i++) { +// Optimized function to convert bytes to words (little-endian) +function words_from_le_bytes_64(bytes: StaticArray): StaticArray { + const words = new StaticArray(16); + for (let i = 0; i < 16; i++) { const offset = i * 4; words[i] = bytes[offset] | @@ -112,6 +155,35 @@ function words_from_little_endian_bytes(bytes: StaticArray, words: StaticArr ((bytes[offset + 2] as u32) << 16) | ((bytes[offset + 3] as u32) << 24); } + return words; +} + +// Optimized function to convert words to bytes (little-endian) +function le_bytes_from_words_64(words: StaticArray): StaticArray { + const bytes = new StaticArray(64); + for (let i = 0; i < 16; i++) { + const word = words[i]; + const offset = i * 4; + bytes[offset] = word as u8; + bytes[offset + 1] = (word >> 8) as u8; + bytes[offset + 2] = (word >> 16) as u8; + bytes[offset + 3] = (word >> 24) as u8; + } + return bytes; +} + +// Optimized function to convert words to bytes (32-bit, little-endian) +function le_bytes_from_words_32(words: StaticArray): StaticArray { + const bytes = new StaticArray(32); + for (let i = 0; i < 8; i++) { + const word = words[i]; + const offset = i * 4; + bytes[offset] = word as u8; + bytes[offset + 1] = (word >> 8) as u8; + bytes[offset + 2] = (word >> 16) as u8; + bytes[offset + 3] = (word >> 24) as u8; + } + return bytes; } class Blake3Hasher { @@ -140,11 +212,6 @@ class Blake3Hasher { } const key_words = new StaticArray(8); - // const key_static = new StaticArray(32); - // for (let i = 0; i < 32; i++) { - // key_static[i] = key[i]; - // } - // words_from_little_endian_bytes(key_static, key_words); const dataView = new DataView(key.buffer); for (let i = 0; i < 8; i++) { key_words[i] = dataView.getUint32(i * 4, true); @@ -245,18 +312,14 @@ class ChunkState { let inputPos = 0; while (inputPos < input.length) { if (this.block_len == BLOCK_LEN) { - const block_words = new StaticArray(16); - words_from_little_endian_bytes(this.block, block_words); - const compressed = compress( + // Use optimized compress_in_place + compress_in_place( this.chaining_value, - block_words, + this.block, + BLOCK_LEN as u8, this.chunk_counter, - BLOCK_LEN, - this.flags | this.start_flag() + (this.flags | this.start_flag()) as u8 ); - for (let i = 0; i < 8; i++) { - this.chaining_value[i] = compressed[i]; - } this.blocks_compressed++; this.block = new StaticArray(BLOCK_LEN); this.block_len = 0; @@ -273,11 +336,9 @@ class ChunkState { } output(): Output { - const block_words = new StaticArray(16); - words_from_little_endian_bytes(this.block, block_words); return new Output( this.chaining_value, - block_words, + this.block, this.chunk_counter, this.block_len, this.flags | this.start_flag() | CHUNK_END @@ -287,60 +348,41 @@ class ChunkState { class Output { input_chaining_value: StaticArray; - block_words: StaticArray; + block: StaticArray; + block_len: u8; counter: u64; - block_len: u32; flags: u32; - constructor( - input_chaining_value: StaticArray, - block_words: StaticArray, - counter: u64, - block_len: u32, - flags: u32 - ) { + constructor(input_chaining_value: StaticArray, block: StaticArray, counter: u64, block_len: u8, flags: u32) { this.input_chaining_value = input_chaining_value; - this.block_words = block_words; + this.block = block; this.counter = counter; this.block_len = block_len; this.flags = flags; } chaining_value(): StaticArray { - const compressed = compress(this.input_chaining_value, this.block_words, this.counter, this.block_len, this.flags); - const result = new StaticArray(8); + const cv_copy = new StaticArray(8); for (let i = 0; i < 8; i++) { - result[i] = compressed[i]; + cv_copy[i] = this.input_chaining_value[i]; } - return result; + compress_in_place(cv_copy, this.block, this.block_len, this.counter, this.flags as u8); + return cv_copy; } root_output_bytes(out: Uint8Array): void { let output_block_counter: u64 = 0; for (let i = 0; i < out.length; i += 2 * OUT_LEN) { - const words = compress( + const xof_output = compress_xof( this.input_chaining_value, - this.block_words, - output_block_counter, + this.block, this.block_len, - this.flags | ROOT + output_block_counter, + (this.flags | ROOT) as u8 ); const out_block = out.subarray(i, i + 2 * OUT_LEN); - for (let j = 0; j < words.length; j++) { - const word = words[j]; - const offset = j * 4; - if (offset < out_block.length) { - out_block[offset] = word & 0xff; - if (offset + 1 < out_block.length) { - out_block[offset + 1] = (word >> 8) & 0xff; - if (offset + 2 < out_block.length) { - out_block[offset + 2] = (word >> 16) & 0xff; - if (offset + 3 < out_block.length) { - out_block[offset + 3] = (word >> 24) & 0xff; - } - } - } - } + for (let j = 0; j < out_block.length; j++) { + out_block[j] = xof_output[j]; } output_block_counter++; } @@ -353,12 +395,20 @@ function parent_output( key_words: StaticArray, flags: u32 ): Output { - const block_words = new StaticArray(16); - for (let i = 0; i < 8; i++) { - block_words[i] = left_child_cv[i]; - block_words[i + 8] = right_child_cv[i]; + const block = new StaticArray(BLOCK_LEN); + const left_bytes = le_bytes_from_words_32(left_child_cv); + const right_bytes = le_bytes_from_words_32(right_child_cv); + + // Copy left child bytes + for (let i = 0; i < 32; i++) { + block[i] = left_bytes[i]; } - return new Output(key_words, block_words, 0, BLOCK_LEN, PARENT | flags); + // Copy right child bytes + for (let i = 0; i < 32; i++) { + block[32 + i] = right_bytes[i]; + } + + return new Output(key_words, block, 0, BLOCK_LEN as u8, PARENT | flags); } function parent_cv( diff --git a/packages/blake3-wasm/package.json b/packages/blake3-wasm/package.json index 2fd61e316f..3ba75dc328 100644 --- a/packages/blake3-wasm/package.json +++ b/packages/blake3-wasm/package.json @@ -6,6 +6,7 @@ "build:release": "asc assembly/index.ts --target release", "build": "pnpm run build:debug && npm run build:release", "test": "vitest run", + "bench": "node tests/bench.js", "prepare": "pnpm run build" }, "keywords": [ diff --git a/packages/blake3-wasm/tests/bench.js b/packages/blake3-wasm/tests/bench.js new file mode 100644 index 0000000000..e664f8c90d --- /dev/null +++ b/packages/blake3-wasm/tests/bench.js @@ -0,0 +1,190 @@ +/* eslint-disable @typescript-eslint/explicit-module-boundary-types */ +import { blake3, createHasher, update, finalize } from "../build/release.js"; + +// Generate random data of specified size +function generateRandomData(size) { + const data = new Uint8Array(size); + for (let i = 0; i < size; i++) { + data[i] = Math.floor(Math.random() * 256); + } + return data; +} + +// Convert bytes to MB +function bytesToMB(bytes) { + return bytes / (1000 * 1000); +} + +// Format time in appropriate units +function formatTime(ms) { + if (ms < 1) { + return `${(ms * 1000).toFixed(2)}Ξs`; + } else if (ms < 1000) { + return `${ms.toFixed(2)}ms`; + } else { + return `${(ms / 1000).toFixed(2)}s`; + } +} + +// Format throughput in appropriate units +function formatThroughput(mbPerSec) { + if (mbPerSec >= 1000) { + return `${(mbPerSec / 1000).toFixed(2)} GB/s`; + } else { + return `${mbPerSec.toFixed(2)} MB/s`; + } +} + +// Benchmark a single data size +function benchmarkSize(dataSize, iterations = 10) { + const data = generateRandomData(dataSize); + const totalBytes = dataSize * iterations; + + console.log( + `\n📊 Benchmarking ${formatBytes(dataSize)} data (${iterations} iterations, ${formatBytes(totalBytes)} total)` + ); + console.log("─".repeat(60)); + + // Warm up + for (let i = 0; i < 3; i++) { + blake3(data); + } + + // Test 1: Single-shot hashing + const start1 = performance.now(); + for (let i = 0; i < iterations; i++) { + blake3(data); + } + const end1 = performance.now(); + const time1 = end1 - start1; + const throughput1 = bytesToMB(totalBytes) / (time1 / 1000); + + console.log(`ðŸ”đ Single-shot: ${formatTime(time1)} (${formatThroughput(throughput1)})`); + + // Test 2: Streaming hashing with hasher + const start2 = performance.now(); + for (let i = 0; i < iterations; i++) { + const hasher = createHasher(); + update(hasher, data); + finalize(hasher); + } + const end2 = performance.now(); + const time2 = end2 - start2; + const throughput2 = bytesToMB(totalBytes) / (time2 / 1000); + + console.log(`ðŸ”đ Streaming: ${formatTime(time2)} (${formatThroughput(throughput2)})`); + + // Test 3: Chunked hashing (simulate large files) + const chunkSize = Math.min(64 * 1000, dataSize); // 64KB chunks or data size, whichever is smaller + const start3 = performance.now(); + for (let i = 0; i < iterations; i++) { + const hasher = createHasher(); + let offset = 0; + while (offset < dataSize) { + const chunk = data.slice(offset, offset + chunkSize); + update(hasher, chunk); + offset += chunkSize; + } + finalize(hasher); + } + const end3 = performance.now(); + const time3 = end3 - start3; + const throughput3 = bytesToMB(totalBytes) / (time3 / 1000); + + console.log(`ðŸ”đ Chunked: ${formatTime(time3)} (${formatThroughput(throughput3)})`); + + return { + dataSize, + iterations, + totalBytes, + singleShot: { time: time1, throughput: throughput1 }, + streaming: { time: time2, throughput: throughput2 }, + chunked: { time: time3, throughput: throughput3 }, + }; +} + +// Format bytes in human readable format +function formatBytes(bytes) { + if (bytes < 1000) { + return `${bytes} B`; + } else if (bytes < 1000 * 1000) { + return `${(bytes / 1000).toFixed(1)} KB`; + } else if (bytes < 1000 * 1000 * 1000) { + return `${(bytes / (1000 * 1000)).toFixed(1)} MB`; + } else { + return `${(bytes / (1000 * 1000 * 1000)).toFixed(1)} GB`; + } +} + +// Main benchmark function +function runBenchmark() { + console.log("🚀 BLAKE3 Performance Benchmark"); + console.log("=".repeat(60)); + + const sizes = [ + 1000, // 1 KB + 64 * 1000, // 64 KB + 1000 * 1000, // 1 MB + 10 * 1000 * 1000, // 10 MB + 100 * 1000 * 1000, // 100 MB + ]; + + const results = []; + + for (const size of sizes) { + const iterations = size < 1000 * 1000 ? 100 : size < 10 * 1000 * 1000 ? 10 : 3; + const result = benchmarkSize(size, iterations); + results.push(result); + } + + // Summary + console.log("\n📈 SUMMARY"); + console.log("=".repeat(60)); + console.log("Data Size | Single-shot | Streaming | Chunked"); + console.log("─".repeat(60)); + + for (const result of results) { + const size = formatBytes(result.dataSize).padEnd(12); + const single = formatThroughput(result.singleShot.throughput).padEnd(12); + const stream = formatThroughput(result.streaming.throughput).padEnd(12); + const chunk = formatThroughput(result.chunked.throughput); + + console.log(`${size} | ${single} | ${stream} | ${chunk}`); + } + + // Find best performance + let bestThroughput = 0; + let bestMethod = ""; + let bestSize = ""; + + for (const result of results) { + const methods = [ + { name: "Single-shot", throughput: result.singleShot.throughput }, + { name: "Streaming", throughput: result.streaming.throughput }, + { name: "Chunked", throughput: result.chunked.throughput }, + ]; + + for (const method of methods) { + if (method.throughput > bestThroughput) { + bestThroughput = method.throughput; + bestMethod = method.name; + bestSize = formatBytes(result.dataSize); + } + } + } + + console.log("\n🏆 BEST PERFORMANCE"); + console.log("─".repeat(60)); + console.log(`Method: ${bestMethod}`); + console.log(`Data Size: ${bestSize}`); + console.log(`Throughput: ${formatThroughput(bestThroughput)}`); + + return results; +} + +// Run the benchmark if this file is executed directly +if (typeof window === "undefined") { + runBenchmark(); +} + +export { runBenchmark, benchmarkSize, generateRandomData }; From d566e964488b1e1ec0d264dcd0872576bcdce420 Mon Sep 17 00:00:00 2001 From: coyotte508 Date: Thu, 10 Jul 2025 16:04:36 +0200 Subject: [PATCH 44/44] fix benchmark values --- packages/blake3-wasm/BENCHMARK.md | 27 +++++++++++++++------------ packages/blake3-wasm/tests/bench.js | 6 +++--- 2 files changed, 18 insertions(+), 15 deletions(-) diff --git a/packages/blake3-wasm/BENCHMARK.md b/packages/blake3-wasm/BENCHMARK.md index 5f529a46bf..f291f878a9 100644 --- a/packages/blake3-wasm/BENCHMARK.md +++ b/packages/blake3-wasm/BENCHMARK.md @@ -49,30 +49,33 @@ The benchmark provides: Example output: ``` -🚀 BLAKE3 Performance Benchmark + BLAKE3 Performance Benchmark ============================================================ 📊 Benchmarking 1.0 KB data (100 iterations, 100.0 KB total) ──────────────────────────────────────────────────────────── -ðŸ”đ Single-shot: 2.34ms (42.74 MB/s) -ðŸ”đ Streaming: 2.45ms (40.82 MB/s) -ðŸ”đ Chunked: 2.67ms (37.45 MB/s) +ðŸ”đ Single-shot: 12.65ms (7.90 MB/s) +ðŸ”đ Streaming: 11.94ms (8.37 MB/s) +ðŸ”đ Chunked: 12.44ms (8.04 MB/s) + +📊 Benchmarking 64.0 KB data (100 iterations, 6.4 MB total) +──────────────────────────────────────────────────────────── +ðŸ”đ Single-shot: 701.26ms (9.13 MB/s) +ðŸ”đ Streaming: 688.19ms (9.30 MB/s) +ðŸ”đ Chunked: 703.23ms (9.10 MB/s) 📈 SUMMARY ============================================================ Data Size | Single-shot | Streaming | Chunked ──────────────────────────────────────────────────────────── -1.0 KB | 42.74 MB/s | 40.82 MB/s | 37.45 MB/s -64.0 KB | 156.23 MB/s| 148.91 MB/s| 142.67 MB/s -1.0 MB | 234.56 MB/s| 228.34 MB/s| 221.89 MB/s -10.0 MB | 456.78 MB/s| 445.12 MB/s| 438.90 MB/s -100.0 MB | 567.89 MB/s| 556.23 MB/s| 549.67 MB/s +1.0 KB | 7.90 MB/s | 8.37 MB/s | 8.04 MB/s +64.0 KB | 9.13 MB/s | 9.30 MB/s | 9.10 MB/s 🏆 BEST PERFORMANCE ──────────────────────────────────────────────────────────── -Method: Single-shot -Data Size: 100.0 MB -Throughput: 567.89 MB/s +Method: Streaming +Data Size: 64.0 KB +Throughput: 9.30 MB/s ``` ## Throughput Units diff --git a/packages/blake3-wasm/tests/bench.js b/packages/blake3-wasm/tests/bench.js index e664f8c90d..abcc29b5ca 100644 --- a/packages/blake3-wasm/tests/bench.js +++ b/packages/blake3-wasm/tests/bench.js @@ -124,9 +124,9 @@ function runBenchmark() { const sizes = [ 1000, // 1 KB 64 * 1000, // 64 KB - 1000 * 1000, // 1 MB - 10 * 1000 * 1000, // 10 MB - 100 * 1000 * 1000, // 100 MB + // 1000 * 1000, // 1 MB + // 10 * 1000 * 1000, // 10 MB + // 100 * 1000 * 1000, // 100 MB ]; const results = [];