Verbose: batch rapid tool results

2025-12-03 10:11:41 +00:00 · 2025-12-03 10:11:41 +00:00 · 38a03ff2c8
parent 527bed2b53
commit 38a03ff2c8
3 changed files with 63 additions and 11 deletions
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@ -5,6 +5,7 @@
 ### Highlights
 - **Thinking directives & state:** `/t|/think|/thinking <level>` (aliases off|minimal|low|medium|high|max/highest). Inline applies to that message; directive-only message pins the level for the session; `/think:off` clears. Resolution: inline > session override > `inbound.reply.thinkingDefault` > off. Pi/Tau get `--thinking <level>` (except off); other agents append cue words (`think` → `think hard` → `think harder` → `ultrathink`). Heartbeat probe uses `HEARTBEAT /think:high`.
 - **Verbose directives + session hints:** `/v|/verbose on|full|off` mirrors thinking: inline > session > config default. Directive-only replies with an acknowledgement; invalid levels return a hint. When enabled, tool results from JSON-emitting agents (Pi/Tau, etc.) are forwarded as metadata-only `[🛠️ <tool-name> <arg>]` messages (now streamed as they happen), and new sessions surface a `🧭 New session: <id>` hint.
 - **Verbose tool coalescing:** successive tool results of the same tool within ~250ms are batched into one `[🛠️ tool] arg1, arg2` message to reduce WhatsApp noise.
 - **Directive confirmations:** Directive-only messages now reply with an acknowledgement (`Thinking level set to high.` / `Thinking disabled.`) and reject unknown levels with a helpful hint (state is unchanged).
 - **Pi/Tau stability:** RPC replies buffered until the assistant turn finishes; parsers return consistent `texts[]`; web auto-replies keep a warm Tau RPC process to avoid cold starts.
 - **Claude prompt flow:** One-time `sessionIntro` with per-message `/think:high` bodyPrefix; system prompt always sent on first turn even with `sendSystemOnce`.
--- a/src/auto-reply/command-reply.ts
+++ b/src/auto-reply/command-reply.ts
@ -53,6 +53,9 @@ export type CommandReplyResult = {
  meta: CommandReplyMeta;
 };
 // Debounce window for coalescing successive tool_result messages (ms)
 const TOOL_RESULT_DEBOUNCE_MS = 250;
 type ToolMessageLike = {
  name?: string;
  toolName?: string;
@ -120,6 +123,12 @@ function formatToolPrefix(toolName?: string, meta?: string) {
  return extra ? `[🛠️ ${label} ${extra}]` : `[🛠️ ${label}]`;
 }
 function formatToolAggregate(toolName?: string, metas?: string[]) {
  const filtered = (metas ?? []).filter(Boolean);
  if (!filtered.length) return formatToolPrefix(toolName);
  return `${formatToolPrefix(toolName)} ${filtered.join(", ")}`;
 }
 export function summarizeClaudeMetadata(payload: unknown): string | undefined {
  if (!payload || typeof payload !== "object") return undefined;
  const obj = payload as Record<string, unknown>;
@ -321,6 +330,27 @@ export async function runCommandReply(
  let queuedMs: number | undefined;
  let queuedAhead: number | undefined;
  try {
    let pendingToolName: string | undefined;
    let pendingMetas: string[] = [];
    let pendingTimer: NodeJS.Timeout | null = null;
    const flushPendingTool = () => {
      if (!onPartialReply) return;
      if (!pendingToolName && pendingMetas.length === 0) return;
      const text = formatToolAggregate(pendingToolName, pendingMetas);
      const { text: cleanedText, mediaUrls: mediaFound } =
        splitMediaFromOutput(text);
      void onPartialReply({
        text: cleanedText,
        mediaUrls: mediaFound?.length ? mediaFound : undefined,
      } as ReplyPayload);
      pendingToolName = undefined;
      pendingMetas = [];
      if (pendingTimer) {
        clearTimeout(pendingTimer);
        pendingTimer = null;
      }
    };
    const run = async () => {
      // Prefer long-lived tau RPC for pi agent to avoid cold starts.
      if (agentKind === "pi") {
@ -338,7 +368,7 @@ export async function runCommandReply(
          }
          return copy;
        })();
-        return await runPiRpc({
+        const rpcResult = await runPiRpc({
          argv: rpcArgv,
          cwd: reply.cwd,
          prompt: body,
@ -363,13 +393,20 @@ export async function runCommandReply(
                    ) {
                      const toolName = inferToolName(ev.message);
                      const meta = inferToolMeta(ev.message);
-                      const prefix = formatToolPrefix(toolName, meta);
+                      if (
-                      const { text: cleanedText, mediaUrls: mediaFound } =
+                        pendingToolName &&
-                        splitMediaFromOutput(prefix);
+                        toolName &&
-                      void onPartialReply({
+                        toolName !== pendingToolName
-                        text: cleanedText,
+                      ) {
-                        mediaUrls: mediaFound?.length ? mediaFound : undefined,
+                        flushPendingTool();
-                      } as ReplyPayload);
+                      }
                      if (!pendingToolName) pendingToolName = toolName;
                      if (meta) pendingMetas.push(meta);
                      if (pendingTimer) clearTimeout(pendingTimer);
                      pendingTimer = setTimeout(
                        flushPendingTool,
                        TOOL_RESULT_DEBOUNCE_MS,
                      );
                    }
                  } catch {
                    // ignore malformed lines
@ -377,6 +414,8 @@ export async function runCommandReply(
                }
              : undefined,
        });
        flushPendingTool();
        return rpcResult;
      }
      return await commandRunner(finalArgv, { timeoutMs, cwd: reply.cwd });
    };
@ -414,8 +453,20 @@ export async function runCommandReply(
      verboseLevel === "on" && !onPartialReply && parsedToolResults.length > 0;
    if (includeToolResultsInline) {
-      for (const tr of parsedToolResults) {
+      const aggregated = parsedToolResults.reduce<
-        const prefixed = formatToolPrefix(tr.toolName, tr.meta);
+        { toolName?: string; metas: string[] }[]
      >((acc, tr) => {
        const last = acc.at(-1);
        if (last && last.toolName === tr.toolName) {
          if (tr.meta) last.metas.push(tr.meta);
        } else {
          acc.push({ toolName: tr.toolName, metas: tr.meta ? [tr.meta] : [] });
        }
        return acc;
      }, []);
      for (const tr of aggregated) {
        const prefixed = formatToolAggregate(tr.toolName, tr.metas);
        const { text: cleanedText, mediaUrls: mediaFound } =
          splitMediaFromOutput(prefixed);
        replyItems.push({
--- a/src/index.core.test.ts
+++ b/src/index.core.test.ts
@ -744,7 +744,7 @@ describe("config and templating", () => {
    expect(rpcSpy).toHaveBeenCalled();
    const payloads = Array.isArray(res) ? res : res ? [res] : [];
    expect(payloads.length).toBeGreaterThanOrEqual(2);
-    expect(payloads[0]?.text).toBe("[🛠️ bash ls]");
+    expect(payloads[0]?.text).toBe("[🛠️ bash] ls");
    expect(payloads[1]?.text).toContain("summary");
  });