fix: allow multiple compaction retries on context overflow (#8928)

Previously, overflowCompactionAttempted was a boolean flag set once, preventing
recovery when a single compaction wasn't enough. Change to a counter allowing up
to 3 attempts before giving up. Also add diagnostic logging on overflow events to
help debug early-overflow issues.

Fixes sessions that hit context overflow during long agentic turns with many tool
calls, where one compaction round isn't sufficient to bring context below limits.
main
Glucksberg 2026-02-05 17:58:37 -04:00 committed by GitHub
parent 4629054403
commit 4e1a7cd60c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 73 additions and 22 deletions

View File

@ -137,6 +137,7 @@ vi.mock("../pi-embedded-helpers.js", async () => {
isFailoverErrorMessage: vi.fn(() => false),
isAuthAssistantError: vi.fn(() => false),
isRateLimitAssistantError: vi.fn(() => false),
isBillingAssistantError: vi.fn(() => false),
classifyFailoverReason: vi.fn(() => null),
formatAssistantErrorText: vi.fn(() => ""),
pickFallbackThinkingLevel: vi.fn(() => null),
@ -214,7 +215,9 @@ describe("overflow compaction in run loop", () => {
);
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2);
expect(log.warn).toHaveBeenCalledWith(
expect.stringContaining("context overflow detected; attempting auto-compaction"),
expect.stringContaining(
"context overflow detected (attempt 1/3); attempting auto-compaction",
),
);
expect(log.info).toHaveBeenCalledWith(expect.stringContaining("auto-compaction succeeded"));
// Should not be an error result
@ -241,31 +244,68 @@ describe("overflow compaction in run loop", () => {
expect(log.warn).toHaveBeenCalledWith(expect.stringContaining("auto-compaction failed"));
});
it("returns error if overflow happens again after compaction", async () => {
it("retries compaction up to 3 times before giving up", async () => {
const overflowError = new Error("request_too_large: Request size exceeds model context window");
// 4 overflow errors: 3 compaction retries + final failure
mockedRunEmbeddedAttempt
.mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }))
.mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }))
.mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }))
.mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }));
mockedCompactDirect
.mockResolvedValueOnce({
ok: true,
compacted: true,
result: { summary: "Compacted 1", firstKeptEntryId: "entry-3", tokensBefore: 180000 },
})
.mockResolvedValueOnce({
ok: true,
compacted: true,
result: { summary: "Compacted 2", firstKeptEntryId: "entry-5", tokensBefore: 160000 },
})
.mockResolvedValueOnce({
ok: true,
compacted: true,
result: { summary: "Compacted 3", firstKeptEntryId: "entry-7", tokensBefore: 140000 },
});
const result = await runEmbeddedPiAgent(baseParams);
// Compaction attempted 3 times (max)
expect(mockedCompactDirect).toHaveBeenCalledTimes(3);
// 4 attempts: 3 overflow+compact+retry cycles + final overflow → error
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(4);
expect(result.meta.error?.kind).toBe("context_overflow");
expect(result.payloads?.[0]?.isError).toBe(true);
});
it("succeeds after second compaction attempt", async () => {
const overflowError = new Error("request_too_large: Request size exceeds model context window");
mockedRunEmbeddedAttempt
.mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }))
.mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }));
.mockResolvedValueOnce(makeAttemptResult({ promptError: overflowError }))
.mockResolvedValueOnce(makeAttemptResult({ promptError: null }));
mockedCompactDirect.mockResolvedValueOnce({
ok: true,
compacted: true,
result: {
summary: "Compacted",
firstKeptEntryId: "entry-3",
tokensBefore: 180000,
},
});
mockedCompactDirect
.mockResolvedValueOnce({
ok: true,
compacted: true,
result: { summary: "Compacted 1", firstKeptEntryId: "entry-3", tokensBefore: 180000 },
})
.mockResolvedValueOnce({
ok: true,
compacted: true,
result: { summary: "Compacted 2", firstKeptEntryId: "entry-5", tokensBefore: 160000 },
});
const result = await runEmbeddedPiAgent(baseParams);
// Compaction attempted only once
expect(mockedCompactDirect).toHaveBeenCalledTimes(1);
// Two attempts: first overflow -> compact -> retry -> second overflow -> return error
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(2);
expect(result.meta.error?.kind).toBe("context_overflow");
expect(result.payloads?.[0]?.isError).toBe(true);
expect(mockedCompactDirect).toHaveBeenCalledTimes(2);
expect(mockedRunEmbeddedAttempt).toHaveBeenCalledTimes(3);
expect(result.meta.error).toBeUndefined();
});
it("does not attempt compaction for compaction_failure errors", async () => {

View File

@ -303,7 +303,8 @@ export async function runEmbeddedPiAgent(
}
}
let overflowCompactionAttempted = false;
const MAX_OVERFLOW_COMPACTION_ATTEMPTS = 3;
let overflowCompactionAttempts = 0;
try {
while (true) {
attemptedThinking.add(thinkLevel);
@ -373,13 +374,23 @@ export async function runEmbeddedPiAgent(
if (promptError && !aborted) {
const errorText = describeUnknownError(promptError);
if (isContextOverflowError(errorText)) {
const msgCount = attempt.messagesSnapshot?.length ?? 0;
log.warn(
`[context-overflow-diag] sessionKey=${params.sessionKey ?? params.sessionId} ` +
`provider=${provider}/${modelId} messages=${msgCount} ` +
`sessionFile=${params.sessionFile} compactionAttempts=${overflowCompactionAttempts} ` +
`error=${errorText.slice(0, 200)}`,
);
const isCompactionFailure = isCompactionFailureError(errorText);
// Attempt auto-compaction on context overflow (not compaction_failure)
if (!isCompactionFailure && !overflowCompactionAttempted) {
if (
!isCompactionFailure &&
overflowCompactionAttempts < MAX_OVERFLOW_COMPACTION_ATTEMPTS
) {
overflowCompactionAttempts++;
log.warn(
`context overflow detected; attempting auto-compaction for ${provider}/${modelId}`,
`context overflow detected (attempt ${overflowCompactionAttempts}/${MAX_OVERFLOW_COMPACTION_ATTEMPTS}); attempting auto-compaction for ${provider}/${modelId}`,
);
overflowCompactionAttempted = true;
const compactResult = await compactEmbeddedPiSessionDirect({
sessionId: params.sessionId,
sessionKey: params.sessionKey,