Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
127 changes: 98 additions & 29 deletions go/internal/e2e/compaction_e2e_test.go
Original file line number Diff line number Diff line change
@@ -1,15 +1,16 @@
package e2e

import (
"errors"
"strings"
"testing"
"time"

copilot "github.com/github/copilot-sdk/go"
"github.com/github/copilot-sdk/go/internal/e2e/testharness"
)

func TestCompactionE2E(t *testing.T) {
t.Skip("Compaction tests are skipped due to flakiness — re-enable once stabilized")
ctx := testharness.NewTestContext(t)
client := ctx.NewClient()
t.Cleanup(func() { client.ForceStop() })
Expand All @@ -33,19 +34,43 @@ func TestCompactionE2E(t *testing.T) {
t.Fatalf("Failed to create session: %v", err)
}

var compactionStartEvents []copilot.SessionEvent
var compactionCompleteEvents []copilot.SessionEvent

session.On(func(event copilot.SessionEvent) {
switch event.Data.(type) {
// The first prompt leaves the session below the compaction processor's minimum
// message count. The second prompt is therefore the first deterministic point
// at which low thresholds can trigger compaction. Subscribe before any prompts
// are sent so we never miss the events. The complete-event subscription filters
// for Success==true so any transient failed compaction event the daemon may emit
// before a successful retry is ignored (mirrors the dotnet/rust references).
startCh := make(chan copilot.SessionEvent, 1)
completeCh := make(chan copilot.SessionEvent, 1)
errCh := make(chan error, 1)
unsubscribe := session.On(func(event copilot.SessionEvent) {
switch d := event.Data.(type) {
case *copilot.SessionCompactionStartData:
compactionStartEvents = append(compactionStartEvents, event)
select {
case startCh <- event:
default:
}
case *copilot.SessionCompactionCompleteData:
compactionCompleteEvents = append(compactionCompleteEvents, event)
if !d.Success {
return
}
select {
case completeCh <- event:
default:
}
case *copilot.SessionErrorData:
msg := d.Message
if msg == "" {
msg = "session error"
}
select {
case errCh <- errors.New(msg):
default:
}
}
Comment thread
stephentoub marked this conversation as resolved.
})
defer unsubscribe()

// Send multiple messages to fill up the context window
_, err = session.SendAndWait(t.Context(), copilot.MessageOptions{Prompt: "Tell me a story about a dragon. Be detailed."})
if err != nil {
t.Fatalf("Failed to send first message: %v", err)
Expand All @@ -56,38 +81,82 @@ func TestCompactionE2E(t *testing.T) {
t.Fatalf("Failed to send second message: %v", err)
}

_, err = session.SendAndWait(t.Context(), copilot.MessageOptions{Prompt: "Now describe the dragon's treasure in great detail."})
if err != nil {
t.Fatalf("Failed to send third message: %v", err)
const compactionTimeout = 60 * time.Second

var startEvent copilot.SessionEvent
select {
case startEvent = <-startCh:
case err := <-errCh:
t.Fatalf("Session error waiting for session.compaction_start event: %v", err)
case <-time.After(compactionTimeout):
t.Fatalf("Timed out waiting for session.compaction_start event")
}

var completeEvent copilot.SessionEvent
select {
case completeEvent = <-completeCh:
case err := <-errCh:
t.Fatalf("Session error waiting for session.compaction_complete event: %v", err)
case <-time.After(compactionTimeout):
t.Fatalf("Timed out waiting for session.compaction_complete event")
}

// Should have triggered compaction at least once
if len(compactionStartEvents) < 1 {
t.Errorf("Expected at least 1 compaction_start event, got %d", len(compactionStartEvents))
startData, ok := startEvent.Data.(*copilot.SessionCompactionStartData)
if !ok {
t.Fatalf("Expected SessionCompactionStartData, got %T", startEvent.Data)
}
if len(compactionCompleteEvents) < 1 {
t.Errorf("Expected at least 1 compaction_complete event, got %d", len(compactionCompleteEvents))
if startData.ConversationTokens == nil || *startData.ConversationTokens <= 0 {
t.Errorf("Expected compaction to report conversation tokens at start, got %v", startData.ConversationTokens)
}

// Compaction should have succeeded
if len(compactionCompleteEvents) > 0 {
lastComplete := compactionCompleteEvents[len(compactionCompleteEvents)-1]
d, ok := lastComplete.Data.(*copilot.SessionCompactionCompleteData)
if !ok || !d.Success {
t.Errorf("Expected compaction to succeed")
}
if ok && d.TokensRemoved != nil && *d.TokensRemoved <= 0 {
t.Errorf("Expected tokensRemoved > 0, got %v", *d.TokensRemoved)
}
completeData, ok := completeEvent.Data.(*copilot.SessionCompactionCompleteData)
if !ok {
t.Fatalf("Expected SessionCompactionCompleteData, got %T", completeEvent.Data)
}
if !completeData.Success {
t.Errorf("Expected compaction to succeed, error=%v", completeData.Error)
}
if completeData.CompactionTokensUsed == nil {
t.Errorf("Expected compaction tokens-used data")
} else if completeData.CompactionTokensUsed.InputTokens == nil || *completeData.CompactionTokensUsed.InputTokens <= 0 {
t.Errorf("Expected compaction call to consume input tokens, got %v", completeData.CompactionTokensUsed.InputTokens)
}
summary := ""
if completeData.SummaryContent != nil {
summary = *completeData.SummaryContent
}
summary = strings.ToLower(summary)
if !strings.Contains(summary, "<overview>") {
t.Errorf("Expected summary to contain <overview>, got: %q", summary)
}
if !strings.Contains(summary, "<history>") {
t.Errorf("Expected summary to contain <history>, got: %q", summary)
}
if !strings.Contains(summary, "<checkpoint_title>") {
t.Errorf("Expected summary to contain <checkpoint_title>, got: %q", summary)
}

_, err = session.SendAndWait(t.Context(), copilot.MessageOptions{Prompt: "Now describe the dragon's treasure in great detail."})
if err != nil {
t.Fatalf("Failed to send third message: %v", err)
}

// Verify session still works after compaction
answer, err := session.SendAndWait(t.Context(), copilot.MessageOptions{Prompt: "What was the story about?"})
if err != nil {
t.Fatalf("Failed to send verification message: %v", err)
}
if ad, ok := answer.Data.(*copilot.AssistantMessageData); !ok || !strings.Contains(strings.ToLower(ad.Content), "dragon") {
t.Errorf("Expected answer to contain 'dragon', got %v", answer.Data)
ad, ok := answer.Data.(*copilot.AssistantMessageData)
if !ok {
t.Fatalf("Expected assistant message data, got %T", answer.Data)
}
content := strings.ToLower(ad.Content)
// Should remember it was about a dragon (context preserved via summary)
if !strings.Contains(content, "kaedrith") {
t.Errorf("Expected answer to mention 'Kaedrith', got: %q", ad.Content)
}
if !strings.Contains(content, "dragon") {
t.Errorf("Expected answer to mention 'dragon', got: %q", ad.Content)
}
})

Expand Down
101 changes: 70 additions & 31 deletions nodejs/test/e2e/compaction.e2e.test.ts
Original file line number Diff line number Diff line change
@@ -1,9 +1,40 @@
import { describe, expect, it } from "vitest";
import { SessionEvent, approveAll } from "../../src/index.js";
import { approveAll, type CopilotSession, type SessionEvent } from "../../src/index.js";
import { createSdkTestContext } from "./harness/sdkTestContext.js";

// TODO: Compaction tests are skipped due to flakiness — re-enable once stabilized
describe.skip("Compaction", async () => {
const compactionTimeoutMs = 60_000;

function getNextSessionEvent<TEventType extends SessionEvent["type"]>(
session: CopilotSession,
eventType: TEventType,
description: string,
predicate: (event: Extract<SessionEvent, { type: TEventType }>) => boolean = () => true
): Promise<Extract<SessionEvent, { type: TEventType }>> {
return new Promise((resolve, reject) => {
let unsubscribe: () => void = () => {};
const timeout = setTimeout(() => {
unsubscribe();
reject(new Error(`Timed out waiting for ${description}`));
}, compactionTimeoutMs);

unsubscribe = session.on((event) => {
if (event.type === eventType) {
const typedEvent = event as Extract<SessionEvent, { type: TEventType }>;
if (predicate(typedEvent)) {
clearTimeout(timeout);
unsubscribe();
resolve(typedEvent);
}
} else if (event.type === "session.error") {
clearTimeout(timeout);
unsubscribe();
reject(new Error(`${event.data.message}\n${event.data.stack}`));
}
});
});
}

describe("Compaction", async () => {
const { copilotClient: client } = await createSdkTestContext();

it("should trigger compaction with low threshold and emit events", async () => {
Expand All @@ -19,48 +50,56 @@ describe.skip("Compaction", async () => {
},
});

const events: SessionEvent[] = [];
session.on((event) => {
events.push(event);
});
// The first prompt leaves the session below the compaction processor's minimum
// message count. The second prompt is therefore the first deterministic point
// at which low thresholds can trigger compaction. Register event waiters before
// any prompts are sent so we never miss the events.
const compactionStartedP = getNextSessionEvent(
session,
"session.compaction_start",
"session.compaction_start"
);
// Wait specifically for a *successful* compaction_complete so that any transient
// failed compaction event the daemon may emit before a successful retry is ignored
// (mirrors the dotnet/rust references).
const compactionCompletedP = getNextSessionEvent(
session,
"session.compaction_complete",
"successful session.compaction_complete",
(event) => event.data.success
);

// Send multiple messages to fill up the context window
// With such low thresholds, even a few messages should trigger compaction
await session.sendAndWait({
prompt: "Tell me a story about a dragon. Be detailed.",
});
await session.sendAndWait({
prompt: "Continue the story with more details about the dragon's castle.",
});
await session.sendAndWait({
prompt: "Now describe the dragon's treasure in great detail.",
});

// Check for compaction events
const compactionStartEvents = events.filter((e) => e.type === "session.compaction_start");
const compactionCompleteEvents = events.filter(
(e) => e.type === "session.compaction_complete"
);
const [startEvent, completeEvent] = await Promise.all([
compactionStartedP,
compactionCompletedP,
]);
Comment thread
stephentoub marked this conversation as resolved.

// Should have triggered compaction at least once
expect(compactionStartEvents.length).toBeGreaterThanOrEqual(1);
expect(compactionCompleteEvents.length).toBeGreaterThanOrEqual(1);
expect(startEvent.data.conversationTokens ?? 0).toBeGreaterThan(0);
expect(completeEvent.data.success).toBe(true);
expect(completeEvent.data.compactionTokensUsed).toBeDefined();
expect(completeEvent.data.compactionTokensUsed?.inputTokens ?? 0).toBeGreaterThan(0);
const summary = (completeEvent.data.summaryContent ?? "").toLowerCase();
expect(summary).toContain("<overview>");
expect(summary).toContain("<history>");
expect(summary).toContain("<checkpoint_title>");

// Compaction should have succeeded
const lastCompactionComplete =
compactionCompleteEvents[compactionCompleteEvents.length - 1];
expect(lastCompactionComplete.data.success).toBe(true);

// Should have removed some tokens
if (lastCompactionComplete.data.tokensRemoved !== undefined) {
expect(lastCompactionComplete.data.tokensRemoved).toBeGreaterThan(0);
}
await session.sendAndWait({
prompt: "Now describe the dragon's treasure in great detail.",
});

// Verify the session still works after compaction
const answer = await session.sendAndWait({ prompt: "What was the story about?" });
expect(answer?.data.content).toBeDefined();
const content = (answer?.data.content ?? "").toLowerCase();
// Should remember it was about a dragon (context preserved via summary)
expect(answer?.data.content?.toLowerCase()).toContain("dragon");
expect(content).toContain("kaedrith");
expect(content).toContain("dragon");
}, 120000);

it("should not emit compaction events when infinite sessions disabled", async () => {
Expand Down
Loading
Loading