Skip to content

Commit af7610d

Browse files
stephentoubCopilot
andcommitted
Fix flaky E2E tests
Make permission handler error coverage assert deterministic replayed tool results instead of waiting for final assistant text, and ensure background-agent tests wait for the completion notification before cleanup. Normalize equivalent replay proxy notification wording across SDK suites. Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>
1 parent 8e67f64 commit af7610d

5 files changed

Lines changed: 102 additions & 9 deletions

File tree

dotnet/test/E2E/PermissionE2ETests.cs

Lines changed: 31 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -226,24 +226,46 @@ await session2.SendAndWaitAsync(new MessageOptions
226226
[Fact]
227227
public async Task Should_Handle_Permission_Handler_Errors_Gracefully()
228228
{
229+
var permissionRequestReceived =
230+
new TaskCompletionSource<PermissionRequest>(TaskCreationOptions.RunContinuationsAsynchronously);
229231
var session = await CreateSessionAsync(new SessionConfig
230232
{
231233
OnPermissionRequest = (request, invocation) =>
232234
{
233-
// Simulate an error in the handler
235+
permissionRequestReceived.TrySetResult(request);
234236
throw new InvalidOperationException("Handler error");
235237
}
236238
});
237239

238-
await session.SendAsync(new MessageOptions
240+
try
239241
{
240-
Prompt = "Run 'echo test'. If you can't, say 'failed'."
241-
});
242-
243-
var message = await TestHelper.GetFinalAssistantMessageAsync(session);
244-
245-
// Should handle the error and deny permission
246-
Assert.Matches("fail|cannot|unable|permission", message?.Data.Content?.ToLowerInvariant() ?? string.Empty);
242+
var exchanges = await SendAndWaitForExchangesAsync(
243+
session,
244+
new MessageOptions
245+
{
246+
Prompt = "Run 'echo test'. If you can't, say 'failed'."
247+
},
248+
minimumCount: 2);
249+
250+
var permissionRequest = await permissionRequestReceived.Task.WaitAsync(TimeSpan.FromSeconds(30));
251+
Assert.IsType<PermissionRequestShell>(permissionRequest);
252+
253+
var toolResultMessage = exchanges
254+
.SelectMany(exchange => exchange.Request.Messages)
255+
.LastOrDefault(message =>
256+
message.Role == "tool" &&
257+
message.StringContent?.Contains("Permission denied", StringComparison.OrdinalIgnoreCase) == true);
258+
259+
Assert.NotNull(toolResultMessage);
260+
Assert.Contains(
261+
"could not request permission",
262+
toolResultMessage.StringContent ?? string.Empty,
263+
StringComparison.OrdinalIgnoreCase);
264+
}
265+
finally
266+
{
267+
await session.DisposeAsync();
268+
}
247269
}
248270

249271
[Fact]

dotnet/test/E2E/RpcTasksAndHandlersE2ETests.cs

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -80,6 +80,22 @@ public async Task Should_Start_Background_Agent_And_Report_Task_Details()
8080
});
8181
Assert.Contains("TASK_AGENT_READY", ready?.Data.Content ?? string.Empty, StringComparison.Ordinal);
8282

83+
var taskCompletionNotification =
84+
new TaskCompletionSource<AssistantMessageEvent>(TaskCreationOptions.RunContinuationsAsynchronously);
85+
using var subscription = session.On<SessionEvent>(evt =>
86+
{
87+
switch (evt)
88+
{
89+
case AssistantMessageEvent assistantMessage
90+
when assistantMessage.Data.Content?.Contains("TASK_AGENT_DONE", StringComparison.Ordinal) == true:
91+
taskCompletionNotification.TrySetResult(assistantMessage);
92+
break;
93+
case SessionErrorEvent error:
94+
taskCompletionNotification.TrySetException(new Exception(error.Data.Message ?? "session error"));
95+
break;
96+
}
97+
});
98+
8399
var started = await session.Rpc.Tasks.StartAgentAsync(
84100
agentType: "general-purpose",
85101
prompt: "Reply with TASK_AGENT_DONE exactly.",
@@ -123,6 +139,7 @@ await TestHelper.WaitForConditionAsync(
123139

124140
Assert.NotNull(task);
125141
Assert.Contains("TASK_AGENT_DONE", task.LatestResponse ?? task.Result ?? string.Empty);
142+
await taskCompletionNotification.Task.WaitAsync(TimeSpan.FromSeconds(30));
126143

127144
if (task.Status == GitHub.Copilot.Rpc.TaskStatus.Idle)
128145
{

python/e2e/test_rpc_tasks_and_handlers_e2e.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,7 @@
2929
UIElicitationResponseAction,
3030
UIHandlePendingElicitationRequest,
3131
)
32+
from copilot.generated.session_events import AssistantMessageData, SessionErrorData
3233
from copilot.session import PermissionHandler
3334

3435
from .testharness import E2ETestContext
@@ -211,6 +212,21 @@ async def test_should_start_background_agent_and_report_task_details(self, ctx:
211212
session = await ctx.client.create_session(
212213
on_permission_request=PermissionHandler.approve_all,
213214
)
215+
task_completion_notification = asyncio.get_running_loop().create_future()
216+
217+
def on_event(event):
218+
if isinstance(event.data, AssistantMessageData) and "TASK_AGENT_DONE" in (
219+
event.data.content or ""
220+
):
221+
if not task_completion_notification.done():
222+
task_completion_notification.set_result(event)
223+
elif isinstance(event.data, SessionErrorData):
224+
if not task_completion_notification.done():
225+
task_completion_notification.set_exception(
226+
RuntimeError(event.data.message or "session error")
227+
)
228+
229+
unsubscribe = session.on(on_event)
214230
try:
215231
ready = await session.send_and_wait(
216232
"Reply with TASK_AGENT_READY exactly.",
@@ -262,6 +278,7 @@ async def test_should_start_background_agent_and_report_task_details(self, ctx:
262278
)
263279
assert found_task is not None, f"Task {task_id} disappeared before it completed"
264280
assert "TASK_AGENT_DONE" in (found_task.latest_response or found_task.result or "")
281+
await asyncio.wait_for(task_completion_notification, timeout=30.0)
265282

266283
if found_task.status == TaskInfoStatus.IDLE:
267284
cancel = await session.rpc.tasks.cancel(TasksCancelRequest(id=task_id))
@@ -274,4 +291,5 @@ async def test_should_start_background_agent_and_report_task_details(self, ctx:
274291
after_remove = await session.rpc.tasks.list()
275292
assert not any(t.id == task_id for t in (after_remove.tasks or []))
276293
finally:
294+
unsubscribe()
277295
await session.disconnect()

test/harness/replayingCapiProxy.test.ts

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -324,6 +324,38 @@ describe("ReplayingCapiProxy", () => {
324324
expect(result.conversations[0].messages[0].content).toBe("What is 2+2?");
325325
});
326326

327+
test("normalizes task completion notification wording", async () => {
328+
const unreadNotification = [
329+
"<system_notification>",
330+
'Agent "sdk-background-agent" (general-purpose) has completed successfully. Use read_agent with agent_id "sdk-background-agent" to retrieve unread results.',
331+
"</system_notification>",
332+
].join("\n");
333+
const fullNotification = [
334+
"<system_notification>",
335+
'Agent "sdk-background-agent" (general-purpose) has completed successfully. Use read_agent with agent_id "sdk-background-agent" to retrieve the full results.',
336+
"</system_notification>",
337+
].join("\n");
338+
339+
const requestBody = JSON.stringify({
340+
messages: [
341+
{
342+
role: "user",
343+
content: unreadNotification,
344+
},
345+
],
346+
});
347+
const responseBody = JSON.stringify({
348+
choices: [{ message: { role: "assistant", content: "Done" } }],
349+
});
350+
351+
const outputPath = await createProxy([
352+
{ url: "/chat/completions", requestBody, responseBody },
353+
]);
354+
355+
const result = await readYamlOutput(outputPath);
356+
expect(result.conversations[0].messages[0].content).toBe(fullNotification);
357+
});
358+
327359
test("strips agent_instructions from user messages", async () => {
328360
const requestBody = JSON.stringify({
329361
messages: [

test/harness/replayingCapiProxy.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1034,6 +1034,10 @@ function transformOpenAIRequestMessage(
10341034

10351035
function normalizeUserMessage(content: string): string {
10361036
return normalizeSkillContextFrontmatter(content)
1037+
.replace(
1038+
/Use read_agent with agent_id "([^"]+)" to retrieve unread results\./g,
1039+
'Use read_agent with agent_id "$1" to retrieve the full results.',
1040+
)
10371041
.replace(/<current_datetime>.*?<\/current_datetime>/g, "")
10381042
.replace(/<reminder>[\s\S]*?<\/reminder>/g, "")
10391043
.replace(/<system_reminder>[\s\S]*?<\/system_reminder>/g, "")

0 commit comments

Comments
 (0)