diff --git a/core/indexing/CodebaseIndexer.test.ts b/core/indexing/CodebaseIndexer.test.ts index b697520aaa..f2a31209c8 100644 --- a/core/indexing/CodebaseIndexer.test.ts +++ b/core/indexing/CodebaseIndexer.test.ts @@ -124,14 +124,6 @@ describe("CodebaseIndexer", () => { return updates; } - async function refreshIndexFiles(files: string[]) { - const updates = []; - for await (const update of codebaseIndexer.refreshFiles(files)) { - updates.push(update); - } - return updates; - } - async function getAllIndexedFiles() { const files = await testIndex.getIndexedFilesForTags( await testIde.getTags(testIndex.artifactId), @@ -197,7 +189,7 @@ describe("CodebaseIndexer", () => { test("should successfuly re-index specific files", async () => { // Could add more specific tests for this but uses similar logic const before = await getAllIndexedFiles(); - await refreshIndexFiles(before); + await codebaseIndexer.refreshCodebaseIndexFiles(before); const after = await getAllIndexedFiles(); expect(after.length).toBe(before.length); diff --git a/core/indexing/CodebaseIndexer.ts b/core/indexing/CodebaseIndexer.ts index 86933f8bdf..d2de0c5124 100644 --- a/core/indexing/CodebaseIndexer.ts +++ b/core/indexing/CodebaseIndexer.ts @@ -43,7 +43,7 @@ export class CodebaseIndexer { * - To limit memory usage for indexes that perform computations locally, e.g. FTS * - To make as few requests as possible to the embeddings providers */ - filesPerBatch = 500; + filesPerBatch = 200; private indexingCancellationController: AbortController | undefined; private codebaseIndexingState: IndexingProgressUpdate; private readonly pauseToken: PauseToken; @@ -75,16 +75,10 @@ export class CodebaseIndexer { this.pauseToken = new PauseToken(initialPaused); } - /** - * Set the paused state of the indexer - */ set paused(value: boolean) { this.pauseToken.paused = value; } - /** - * Get the current paused state of the indexer - */ get paused(): boolean { return this.pauseToken.paused; } @@ -235,7 +229,9 @@ export class CodebaseIndexer { } } - async *refreshFiles(files: string[]): AsyncGenerator { + private async *refreshFiles( + files: string[], + ): AsyncGenerator { let progress = 0; if (files.length === 0) { yield { @@ -525,7 +521,7 @@ export class CodebaseIndexer { repoName, )) { yield { - progress: progress, + progress, desc, status: "indexing", }; diff --git a/core/llm/llms/Bedrock.ts b/core/llm/llms/Bedrock.ts index ae595f3f33..3fc2294652 100644 --- a/core/llm/llms/Bedrock.ts +++ b/core/llm/llms/Bedrock.ts @@ -547,7 +547,7 @@ class Bedrock extends BaseLLM { } // EMBED // - async embed(chunks: string[]): Promise { + async _embed(chunks: string[]): Promise { const credentials = await this._getCredentials(); const client = new BedrockRuntimeClient({ region: this.region, diff --git a/core/llm/llms/HuggingFaceTEI.ts b/core/llm/llms/HuggingFaceTEI.ts index 175aedde02..fcf20ac8e4 100644 --- a/core/llm/llms/HuggingFaceTEI.ts +++ b/core/llm/llms/HuggingFaceTEI.ts @@ -25,16 +25,7 @@ class HuggingFaceTEIEmbeddingsProvider extends BaseLLM { }); } - async embed(chunks: string[]) { - const batchedChunks = this.getBatchedChunks(chunks); - - const results = await Promise.all( - batchedChunks.map((batch) => this.doEmbedRequest(batch)), - ); - return results.flat(); - } - - async doEmbedRequest(batch: string[]): Promise { + async _embed(batch: string[]): Promise { const headers: Record = { "Content-Type": "application/json", }; diff --git a/core/llm/llms/SageMaker.ts b/core/llm/llms/SageMaker.ts index bbb87c1310..1d1a950c92 100644 --- a/core/llm/llms/SageMaker.ts +++ b/core/llm/llms/SageMaker.ts @@ -141,7 +141,7 @@ class SageMaker extends BaseLLM { } } - async embed(chunks: string[]) { + async _embed(chunks: string[]) { const credentials = await this._getCredentials(); const client = new SageMakerRuntimeClient({ region: this.region,