-
Notifications
You must be signed in to change notification settings - Fork 430
Retry one completed Copilot BYOK proxy auth failure as a fresh run #41629
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
5aea718
dd67ccb
a37214d
1275635
8ec2ad7
7f564cb
d8e9cd7
a51963f
e5ef972
c97812f
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -31,6 +31,7 @@ const { | |
| AGENTIC_ENGINE_TIMEOUT_PATTERN, | ||
| isDetectionPhase, | ||
| isAuthenticationFailedError, | ||
| isRetryableProxyAuthenticationFailure, | ||
| isMCPGatewayShutdownError, | ||
| isModelAvailableInReflectData, | ||
| isModelAvailableInReflectFile, | ||
|
|
@@ -1154,6 +1155,23 @@ describe("copilot_harness.cjs", () => { | |
| }); | ||
| }); | ||
|
|
||
| const PROXY_AUTH_FAILURE_OUTPUT = "Authentication failed with provider at http://api-proxy:10002 (HTTP 403)."; | ||
|
|
||
| describe("isRetryableProxyAuthenticationFailure", () => { | ||
| it("returns true for gh-aw proxy auth failures after partial execution", () => { | ||
| expect(isRetryableProxyAuthenticationFailure(PROXY_AUTH_FAILURE_OUTPUT, true)).toBe(true); | ||
| }); | ||
|
|
||
| it("returns false when the auth failure happened before any output was produced", () => { | ||
| expect(isRetryableProxyAuthenticationFailure(PROXY_AUTH_FAILURE_OUTPUT, false)).toBe(false); | ||
| }); | ||
|
|
||
| it("returns false for non-proxy authentication failures", () => { | ||
| expect(isRetryableProxyAuthenticationFailure("Authentication failed (Request ID: ABC123)", true)).toBe(false); | ||
| expect(isRetryableProxyAuthenticationFailure("Authentication failed with provider at https://api.openai.com/v1 (HTTP 401).", true)).toBe(false); | ||
| }); | ||
| }); | ||
|
|
||
| describe("envFlagEnabled", () => { | ||
| it.each(["true", "TRUE", "True", "1", "yes", " YES "])("returns true for '%s'", v => { | ||
| expect(envFlagEnabled(v)).toBe(true); | ||
|
|
@@ -1168,8 +1186,8 @@ describe("copilot_harness.cjs", () => { | |
| }); | ||
| }); | ||
|
|
||
| describe("auth error prevents retry", () => { | ||
| // Inline the same retry logic as the driver, including auth error check | ||
| describe("provider auth retry policy", () => { | ||
| // Inline the same retry logic as the driver for auth-related failures. | ||
| const MCP_POLICY_BLOCKED_PATTERN = /MCP servers were blocked by policy:/; | ||
| const NO_AUTH_INFO_PATTERN = /No authentication information found/; | ||
| const MAX_RETRIES = 3; | ||
|
|
@@ -1184,7 +1202,9 @@ describe("copilot_harness.cjs", () => { | |
| if (result.exitCode === 0) return false; | ||
| // MCP policy errors are persistent — never retry | ||
| if (MCP_POLICY_BLOCKED_PATTERN.test(result.output)) return false; | ||
| if (attempt === 0 && isAuthenticationFailedError(result.output)) return false; | ||
| if (isAuthenticationFailedError(result.output)) { | ||
| return attempt === 0 && isRetryableProxyAuthenticationFailure(result.output, result.hasOutput); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. The stateless 💡 DetailThe real harness mutates This is a pre-existing test design limitation that becomes more significant as the retry logic grows, but the current tests provide no coverage for multi-attempt state interactions such as:
Consider adding at least one integration-level test that drives the real |
||
| } | ||
| // Auth error on --continue: fall back to fresh run once; on fresh run: bail | ||
| if (NO_AUTH_INFO_PATTERN.test(result.output)) { | ||
| return useContinueOnRetry && attempt < MAX_RETRIES; | ||
|
|
@@ -1197,12 +1217,41 @@ describe("copilot_harness.cjs", () => { | |
| expect(shouldRetry(result, 0, false)).toBe(false); | ||
| }); | ||
|
|
||
| it("does not retry when first attempt reports authentication failed", () => { | ||
| it("retries once when the first attempt hits a proxy auth failure after partial execution", () => { | ||
| const result = { | ||
| exitCode: 1, | ||
| hasOutput: true, | ||
| output: PROXY_AUTH_FAILURE_OUTPUT, | ||
| }; | ||
| expect(shouldRetry(result, 0, false)).toBe(true); | ||
| }); | ||
|
|
||
| it("does not retry when proxy auth fails before any output was produced", () => { | ||
| const result = { | ||
| exitCode: 1, | ||
| hasOutput: false, | ||
| output: PROXY_AUTH_FAILURE_OUTPUT, | ||
| }; | ||
| expect(shouldRetry(result, 0, false)).toBe(false); | ||
| }); | ||
|
|
||
| it("does not retry generic authentication_failed errors that do not come from the gh-aw proxy", () => { | ||
| const result = { exitCode: 1, hasOutput: true, output: "Authentication failed (Request ID: ABC123)" }; | ||
| expect(shouldRetry(result, 0, false)).toBe(false); | ||
| }); | ||
|
|
||
| it("retries as fresh run when auth fails on a --continue attempt", () => { | ||
| it("retries the first proxy auth failure only once", () => { | ||
| const result = { | ||
| exitCode: 1, | ||
| hasOutput: true, | ||
| output: PROXY_AUTH_FAILURE_OUTPUT, | ||
| }; | ||
| expect(shouldRetry(result, 0, false)).toBe(true); | ||
| expect(shouldRetry(result, 1, false)).toBe(false); | ||
| expect(shouldRetry(result, 2, false)).toBe(false); | ||
| }); | ||
|
|
||
| it("retries as fresh run when no-auth failure happens on a --continue attempt", () => { | ||
| // This replicates the fix: attempt 1 ran for 3+ min then failed mid-stream, | ||
| // attempt 2 (--continue) fails with auth error — driver retries once as fresh run. | ||
| const continueResult = { exitCode: 1, hasOutput: true, output: "Error: No authentication information found." }; | ||
|
|
||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
continueDisabledPermanently = truehere permanently poisons all future retries, not just the next one.💡 Analysis and suggested fix
continueDisabledPermanentlyis not scoped to a single retry — it prevents--continuefrom ever being re-enabled for the entire remaining run. Setting it in the proxy-auth retry branch creates a subtle regression:useContinueOnRetryis forced tofalsebecausecontinueDisabledPermanently = true, so it starts fresh and discards all the valid work done in attempt=1This is wrong.
continueDisabledPermanently = truebelongs in cases where session state is poisoned (null-type tool_call — conversation history causes 400 forever) or where re-enabling--continuewould resurrect a broken recovery path (NoAuthInfo on a--continueattempt). A proxy-auth failure at attempt=0 does not corrupt attempt=1’s freshly-created, clean session state.The one-time retry guarantee is already fully enforced by
attempt === 0;continueDisabledPermanentlyadds nothing to that.useContinueOnRetry = falseis also redundant — it startsfalseby default and is never set totrueat attempt=0.With this fix, if attempt=1 partially succeeds and then fails for a transient reason, attempt=2 will correctly set
useContinueOnRetry = !copilotSDKMode(line 1003) and resume from attempt=1’s valid state instead of discarding it.