Skip to content

Commit 3f59395

Browse files
authored
Added a mock blockstore to compute CID of large data streams with no/tiny memory usage (#677)
Another production-readiness fix. This is particularly useful when dealing with large files and a necessity in a large-scale production service environment. Prior to this PR, we used the In-Memory Blockstore to compute the CID, which would temporarily store the entire data in memory until the the CID is computed. (e.g. 10 people uploading 1GB at the same time would require 10GB of memory) Did a sanity test with a 5 GB data stream: original implementation would consume gigabytes of memory during the computation of the CID; the new implementation consumes zero/little additional memory (minus the process itself).
1 parent 324e38f commit 3f59395

File tree

3 files changed

+126
-3
lines changed

3 files changed

+126
-3
lines changed

src/store/blockstore-mock.ts

Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
import { CID } from 'multiformats';
2+
import type { AbortOptions, AwaitIterable } from 'interface-store';
3+
import type { Blockstore, Pair } from 'interface-blockstore';
4+
5+
/**
6+
* Mock implementation for the Blockstore interface.
7+
*
8+
* WARNING!!! Purely to be used with `ipfs-unixfs-importer` to compute CID without needing consume any memory.
9+
* This is particularly useful when dealing with large files and a necessity in a large-scale production service environment.
10+
*/
11+
export class BlockstoreMock implements Blockstore {
12+
13+
async open(): Promise<void> {
14+
}
15+
16+
async close(): Promise<void> {
17+
}
18+
19+
async put(key: CID, _val: Uint8Array, _options?: AbortOptions): Promise<CID> {
20+
return key;
21+
}
22+
23+
async get(_key: CID, _options?: AbortOptions): Promise<Uint8Array> {
24+
return new Uint8Array();
25+
}
26+
27+
async has(_key: CID, _options?: AbortOptions): Promise<boolean> {
28+
return false;
29+
}
30+
31+
async delete(_key: CID, _options?: AbortOptions): Promise<void> {
32+
}
33+
34+
async isEmpty(_options?: AbortOptions): Promise<boolean> {
35+
return true;
36+
}
37+
38+
async * putMany(source: AwaitIterable<Pair>, options?: AbortOptions): AsyncIterable<CID> {
39+
for await (const entry of source) {
40+
await this.put(entry.cid, entry.block, options);
41+
42+
yield entry.cid;
43+
}
44+
}
45+
46+
async * getMany(source: AwaitIterable<CID>, options?: AbortOptions): AsyncIterable<Pair> {
47+
for await (const key of source) {
48+
yield {
49+
cid : key,
50+
block : await this.get(key, options)
51+
};
52+
}
53+
}
54+
55+
async * getAll(options?: AbortOptions): AsyncIterable<Pair> {
56+
// @ts-expect-error keyEncoding is 'buffer' but types for db.iterator always return the key type as 'string'
57+
const li: AsyncGenerator<[Uint8Array, Uint8Array]> = this.db.iterator({
58+
keys : true,
59+
keyEncoding : 'buffer'
60+
}, options);
61+
62+
for await (const [key, value] of li) {
63+
yield { cid: CID.decode(key), block: value };
64+
}
65+
}
66+
67+
async * deleteMany(source: AwaitIterable<CID>, options?: AbortOptions): AsyncIterable<CID> {
68+
for await (const key of source) {
69+
await this.delete(key, options);
70+
71+
yield key;
72+
}
73+
}
74+
75+
/**
76+
* deletes all entries
77+
*/
78+
async clear(): Promise<void> {
79+
}
80+
}

src/utils/cid.ts

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -2,9 +2,9 @@ import * as cbor from '@ipld/dag-cbor';
22

33
import type { Readable } from 'readable-stream';
44

5+
import { BlockstoreMock } from '../store/blockstore-mock.js';
56
import { CID } from 'multiformats/cid';
67
import { importer } from 'ipfs-unixfs-importer';
7-
import { MemoryBlockstore } from 'blockstore-core';
88
import { sha256 } from 'multiformats/hashes/sha2';
99
import { DwnError, DwnErrorCode } from '../core/dwn-error.js';
1010

@@ -77,7 +77,7 @@ export class Cid {
7777
* @returns V1 CID of the DAG comprised by chunking data into unixfs DAG-PB encoded blocks
7878
*/
7979
public static async computeDagPbCidFromBytes(content: Uint8Array): Promise<string> {
80-
const asyncDataBlocks = importer([{ content }], new MemoryBlockstore(), { cidVersion: 1 });
80+
const asyncDataBlocks = importer([{ content }], new BlockstoreMock(), { cidVersion: 1 });
8181

8282
// NOTE: the last block contains the root CID
8383
let block;
@@ -90,7 +90,7 @@ export class Cid {
9090
* @returns V1 CID of the DAG comprised by chunking data into unixfs DAG-PB encoded blocks
9191
*/
9292
public static async computeDagPbCidFromStream(dataStream: Readable): Promise<string> {
93-
const asyncDataBlocks = importer([{ content: dataStream }], new MemoryBlockstore(), { cidVersion: 1 });
93+
const asyncDataBlocks = importer([{ content: dataStream }], new BlockstoreMock(), { cidVersion: 1 });
9494

9595
// NOTE: the last block contains the root CID
9696
let block;
Lines changed: 43 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,43 @@
1+
import chaiAsPromised from 'chai-as-promised';
2+
import chai, { expect } from 'chai';
3+
4+
import { BlockstoreMock } from '../../src/store/blockstore-mock.js';
5+
import { DataStream } from '../../src/index.js';
6+
import { importer } from 'ipfs-unixfs-importer';
7+
import { MemoryBlockstore } from 'blockstore-core';
8+
import { TestDataGenerator } from '../utils/test-data-generator.js';
9+
10+
chai.use(chaiAsPromised);
11+
12+
describe('BlockstoreMock', () => {
13+
it('should facilitate the same CID computation as other implementations', async () => {
14+
15+
let dataSizeInBytes = 10;
16+
17+
// iterate through order of magnitude in size until hitting 10MB
18+
// to ensure that the same CID is computed for the same data with the MockBlockstore as with the MemoryBlockstore
19+
while (dataSizeInBytes <= 10_000_000) {
20+
const dataBytes = TestDataGenerator.randomBytes(dataSizeInBytes);
21+
const dataStreamForMemoryBlockstore = DataStream.fromBytes(dataBytes);
22+
const dataStreamForMockBlockstore = DataStream.fromBytes(dataBytes);
23+
24+
const asyncDataBlocksByMemoryBlockstore = importer([{ content: dataStreamForMemoryBlockstore }], new MemoryBlockstore(), { cidVersion: 1 });
25+
const asyncDataBlocksByMockBlockstore = importer([{ content: dataStreamForMockBlockstore }], new BlockstoreMock(), { cidVersion: 1 });
26+
27+
// NOTE: the last block contains the root CID
28+
let blockByMemoryBlockstore;
29+
for await (blockByMemoryBlockstore of asyncDataBlocksByMemoryBlockstore) { ; }
30+
const dataCidByMemoryBlockstore = blockByMemoryBlockstore ? blockByMemoryBlockstore.cid.toString() : '';
31+
32+
let blockByMockBlockstore;
33+
for await (blockByMockBlockstore of asyncDataBlocksByMockBlockstore) { ; }
34+
const dataCidByMockBlockstore = blockByMockBlockstore ? blockByMockBlockstore.cid.toString() : '';
35+
36+
expect(dataCidByMockBlockstore).to.exist;
37+
expect(dataCidByMockBlockstore.length).to.be.greaterThan(0);
38+
expect(dataCidByMockBlockstore).to.be.equal(dataCidByMemoryBlockstore);
39+
40+
dataSizeInBytes *= 10;
41+
}
42+
});
43+
});

0 commit comments

Comments
 (0)