Verbose: batch rapid tool results

main
Peter Steinberger 2025-12-03 10:11:41 +00:00
parent 527bed2b53
commit 38a03ff2c8
3 changed files with 63 additions and 11 deletions

View File

@ -5,6 +5,7 @@
### Highlights
- **Thinking directives & state:** `/t|/think|/thinking <level>` (aliases off|minimal|low|medium|high|max/highest). Inline applies to that message; directive-only message pins the level for the session; `/think:off` clears. Resolution: inline > session override > `inbound.reply.thinkingDefault` > off. Pi/Tau get `--thinking <level>` (except off); other agents append cue words (`think` → `think hard``think harder``ultrathink`). Heartbeat probe uses `HEARTBEAT /think:high`.
- **Verbose directives + session hints:** `/v|/verbose on|full|off` mirrors thinking: inline > session > config default. Directive-only replies with an acknowledgement; invalid levels return a hint. When enabled, tool results from JSON-emitting agents (Pi/Tau, etc.) are forwarded as metadata-only `[🛠️ <tool-name> <arg>]` messages (now streamed as they happen), and new sessions surface a `🧭 New session: <id>` hint.
- **Verbose tool coalescing:** successive tool results of the same tool within ~250ms are batched into one `[🛠️ tool] arg1, arg2` message to reduce WhatsApp noise.
- **Directive confirmations:** Directive-only messages now reply with an acknowledgement (`Thinking level set to high.` / `Thinking disabled.`) and reject unknown levels with a helpful hint (state is unchanged).
- **Pi/Tau stability:** RPC replies buffered until the assistant turn finishes; parsers return consistent `texts[]`; web auto-replies keep a warm Tau RPC process to avoid cold starts.
- **Claude prompt flow:** One-time `sessionIntro` with per-message `/think:high` bodyPrefix; system prompt always sent on first turn even with `sendSystemOnce`.

View File

@ -53,6 +53,9 @@ export type CommandReplyResult = {
meta: CommandReplyMeta;
};
// Debounce window for coalescing successive tool_result messages (ms)
const TOOL_RESULT_DEBOUNCE_MS = 250;
type ToolMessageLike = {
name?: string;
toolName?: string;
@ -120,6 +123,12 @@ function formatToolPrefix(toolName?: string, meta?: string) {
return extra ? `[🛠️ ${label} ${extra}]` : `[🛠️ ${label}]`;
}
function formatToolAggregate(toolName?: string, metas?: string[]) {
const filtered = (metas ?? []).filter(Boolean);
if (!filtered.length) return formatToolPrefix(toolName);
return `${formatToolPrefix(toolName)} ${filtered.join(", ")}`;
}
export function summarizeClaudeMetadata(payload: unknown): string | undefined {
if (!payload || typeof payload !== "object") return undefined;
const obj = payload as Record<string, unknown>;
@ -321,6 +330,27 @@ export async function runCommandReply(
let queuedMs: number | undefined;
let queuedAhead: number | undefined;
try {
let pendingToolName: string | undefined;
let pendingMetas: string[] = [];
let pendingTimer: NodeJS.Timeout | null = null;
const flushPendingTool = () => {
if (!onPartialReply) return;
if (!pendingToolName && pendingMetas.length === 0) return;
const text = formatToolAggregate(pendingToolName, pendingMetas);
const { text: cleanedText, mediaUrls: mediaFound } =
splitMediaFromOutput(text);
void onPartialReply({
text: cleanedText,
mediaUrls: mediaFound?.length ? mediaFound : undefined,
} as ReplyPayload);
pendingToolName = undefined;
pendingMetas = [];
if (pendingTimer) {
clearTimeout(pendingTimer);
pendingTimer = null;
}
};
const run = async () => {
// Prefer long-lived tau RPC for pi agent to avoid cold starts.
if (agentKind === "pi") {
@ -338,7 +368,7 @@ export async function runCommandReply(
}
return copy;
})();
return await runPiRpc({
const rpcResult = await runPiRpc({
argv: rpcArgv,
cwd: reply.cwd,
prompt: body,
@ -363,13 +393,20 @@ export async function runCommandReply(
) {
const toolName = inferToolName(ev.message);
const meta = inferToolMeta(ev.message);
const prefix = formatToolPrefix(toolName, meta);
const { text: cleanedText, mediaUrls: mediaFound } =
splitMediaFromOutput(prefix);
void onPartialReply({
text: cleanedText,
mediaUrls: mediaFound?.length ? mediaFound : undefined,
} as ReplyPayload);
if (
pendingToolName &&
toolName &&
toolName !== pendingToolName
) {
flushPendingTool();
}
if (!pendingToolName) pendingToolName = toolName;
if (meta) pendingMetas.push(meta);
if (pendingTimer) clearTimeout(pendingTimer);
pendingTimer = setTimeout(
flushPendingTool,
TOOL_RESULT_DEBOUNCE_MS,
);
}
} catch {
// ignore malformed lines
@ -377,6 +414,8 @@ export async function runCommandReply(
}
: undefined,
});
flushPendingTool();
return rpcResult;
}
return await commandRunner(finalArgv, { timeoutMs, cwd: reply.cwd });
};
@ -414,8 +453,20 @@ export async function runCommandReply(
verboseLevel === "on" && !onPartialReply && parsedToolResults.length > 0;
if (includeToolResultsInline) {
for (const tr of parsedToolResults) {
const prefixed = formatToolPrefix(tr.toolName, tr.meta);
const aggregated = parsedToolResults.reduce<
{ toolName?: string; metas: string[] }[]
>((acc, tr) => {
const last = acc.at(-1);
if (last && last.toolName === tr.toolName) {
if (tr.meta) last.metas.push(tr.meta);
} else {
acc.push({ toolName: tr.toolName, metas: tr.meta ? [tr.meta] : [] });
}
return acc;
}, []);
for (const tr of aggregated) {
const prefixed = formatToolAggregate(tr.toolName, tr.metas);
const { text: cleanedText, mediaUrls: mediaFound } =
splitMediaFromOutput(prefixed);
replyItems.push({

View File

@ -744,7 +744,7 @@ describe("config and templating", () => {
expect(rpcSpy).toHaveBeenCalled();
const payloads = Array.isArray(res) ? res : res ? [res] : [];
expect(payloads.length).toBeGreaterThanOrEqual(2);
expect(payloads[0]?.text).toBe("[🛠️ bash ls]");
expect(payloads[0]?.text).toBe("[🛠️ bash] ls");
expect(payloads[1]?.text).toContain("summary");
});