fix: allow multiple compaction retries on context overflow (#8928)

Previously, overflowCompactionAttempted was a boolean flag set once, preventing
recovery when a single compaction wasn't enough. Change to a counter allowing up
to 3 attempts before giving up. Also add diagnostic logging on overflow events to
help debug early-overflow issues.

Fixes sessions that hit context overflow during long agentic turns with many tool
calls, where one compaction round isn't sufficient to bring context below limits.
main
Glucksberg 2026-02-05 17:58:37 -04:00 committed by GitHub
parent 4629054403
commit 4e1a7cd60c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 73 additions and 22 deletions

View File

@ -137,6 +137,7 @@ vi.mock("../pi-embedded-helpers.js", async () => {
isFailoverErrorMessage: vi.fn(() => false), isFailoverErrorMessage: vi.fn(() => false),
isAuthAssistantError: vi.fn(() => false), isAuthAssistantError: vi.fn(() => false),
isRateLimitAssistantError: vi.fn(() => false), isRateLimitAssistantError: vi.fn(() => false),
isBillingAssistantError: vi.fn(() => false),
classifyFailoverReason: vi.fn(() => null), classifyFailoverReason: vi.fn(() => null),
formatAssistantErrorText: vi.fn(() => ""), formatAssistantErrorText: vi.fn(() => ""),
pickFallbackThinkingLevel: vi.fn(() => null), pickFallbackThinkingLevel: vi.fn(() => null),
@ -214,7 +215,9 @@ describe("overflow compaction in run loop", () => {
); );
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2); expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2);
expect(log.warn).toHaveBeenCalledWith( expect(log.warn).toHaveBeenCalledWith(
expect.stringContaining("context overflow detected; attempting auto-compaction"), expect.stringContaining(
"context overflow detected (attempt 1/3); attempting auto-compaction",
),
); );
expect(log.info).toHaveBeenCalledWith(expect.stringContaining("auto-compaction succeeded")); expect(log.info).toHaveBeenCalledWith(expect.stringContaining("auto-compaction succeeded"));
// Should not be an error result // Should not be an error result
@ -241,33 +244,70 @@ describe("overflow compaction in run loop", () => {
expect(log.warn).toHaveBeenCalledWith(expect.stringContaining("auto-compaction failed")); expect(log.warn).toHaveBeenCalledWith(expect.stringContaining("auto-compaction failed"));
}); });
it("returns error if overflow happens again after compaction", async () => { it("retries compaction up to 3 times before giving up", async () => {
const overflowError = new Error("request_too_large: Request size exceeds model context window"); const overflowError = new Error("request_too_large: Request size exceeds model context window");
// 4 overflow errors: 3 compaction retries + final failure
mockedRunEmbeddedAttempt mockedRunEmbeddedAttempt
.mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }))
.mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }))
.mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError })) .mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }))
.mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError })); .mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }));
mockedCompactDirect.mockResolvedValueOnce({ mockedCompactDirect
.mockResolvedValueOnce({
ok: true, ok: true,
compacted: true, compacted: true,
result: { result: { summary: "Compacted 1", firstKeptEntryId: "entry-3", tokensBefore: 180000 },
summary: "Compacted", })
firstKeptEntryId: "entry-3", .mockResolvedValueOnce({
tokensBefore: 180000, ok: true,
}, compacted: true,
result: { summary: "Compacted 2", firstKeptEntryId: "entry-5", tokensBefore: 160000 },
})
.mockResolvedValueOnce({
ok: true,
compacted: true,
result: { summary: "Compacted 3", firstKeptEntryId: "entry-7", tokensBefore: 140000 },
}); });
const result = await runEmbeddedPiAgent(baseParams); const result = await runEmbeddedPiAgent(baseParams);
// Compaction attempted only once // Compaction attempted 3 times (max)
expect(mockedCompactDirect).toHaveBeenCalledTimes(1); expect(mockedCompactDirect).toHaveBeenCalledTimes(3);
// Two attempts: first overflow -> compact -> retry -> second overflow -> return error // 4 attempts: 3 overflow+compact+retry cycles + final overflow → error
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2); expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(4);
expect(result.meta.error?.kind).toBe("context_overflow"); expect(result.meta.error?.kind).toBe("context_overflow");
expect(result.payloads?.[0]?.isError).toBe(true); expect(result.payloads?.[0]?.isError).toBe(true);
}); });
it("succeeds after second compaction attempt", async () => {
const overflowError = new Error("request_too_large: Request size exceeds model context window");
mockedRunEmbeddedAttempt
.mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }))
.mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }))
.mockResolvedValueOnce(makeAttemptResult({ promptError: null }));
mockedCompactDirect
.mockResolvedValueOnce({
ok: true,
compacted: true,
result: { summary: "Compacted 1", firstKeptEntryId: "entry-3", tokensBefore: 180000 },
})
.mockResolvedValueOnce({
ok: true,
compacted: true,
result: { summary: "Compacted 2", firstKeptEntryId: "entry-5", tokensBefore: 160000 },
});
const result = await runEmbeddedPiAgent(baseParams);
expect(mockedCompactDirect).toHaveBeenCalledTimes(2);
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(3);
expect(result.meta.error).toBeUndefined();
});
it("does not attempt compaction for compaction_failure errors", async () => { it("does not attempt compaction for compaction_failure errors", async () => {
const compactionFailureError = new Error( const compactionFailureError = new Error(
"request_too_large: summarization failed - Request size exceeds model context window", "request_too_large: summarization failed - Request size exceeds model context window",

View File

@ -303,7 +303,8 @@ export async function runEmbeddedPiAgent(
} }
} }
let overflowCompactionAttempted = false; const MAX_OVERFLOW_COMPACTION_ATTEMPTS = 3;
let overflowCompactionAttempts = 0;
try { try {
while (true) { while (true) {
attemptedThinking.add(thinkLevel); attemptedThinking.add(thinkLevel);
@ -373,13 +374,23 @@ export async function runEmbeddedPiAgent(
if (promptError && !aborted) { if (promptError && !aborted) {
const errorText = describeUnknownError(promptError); const errorText = describeUnknownError(promptError);
if (isContextOverflowError(errorText)) { if (isContextOverflowError(errorText)) {
const msgCount = attempt.messagesSnapshot?.length ?? 0;
log.warn(
`[context-overflow-diag] sessionKey=${params.sessionKey ?? params.sessionId} ` +
`provider=${provider}/${modelId} messages=${msgCount} ` +
`sessionFile=${params.sessionFile} compactionAttempts=${overflowCompactionAttempts} ` +
`error=${errorText.slice(0, 200)}`,
);
const isCompactionFailure = isCompactionFailureError(errorText); const isCompactionFailure = isCompactionFailureError(errorText);
// Attempt auto-compaction on context overflow (not compaction_failure) // Attempt auto-compaction on context overflow (not compaction_failure)
if (!isCompactionFailure && !overflowCompactionAttempted) { if (
!isCompactionFailure &&
overflowCompactionAttempts < MAX_OVERFLOW_COMPACTION_ATTEMPTS
) {
overflowCompactionAttempts++;
log.warn( log.warn(
`context overflow detected; attempting auto-compaction for ${provider}/${modelId}`, `context overflow detected (attempt ${overflowCompactionAttempts}/${MAX_OVERFLOW_COMPACTION_ATTEMPTS}); attempting auto-compaction for ${provider}/${modelId}`,
); );
overflowCompactionAttempted = true;
const compactResult = await compactEmbeddedPiSessionDirect({ const compactResult = await compactEmbeddedPiSessionDirect({
sessionId: params.sessionId, sessionId: params.sessionId,
sessionKey: params.sessionKey, sessionKey: params.sessionKey,