From 318166f8b0f58c0d2ced5ef200156a1b6c96d7d4 Mon Sep 17 00:00:00 2001 From: Peter Steinberger Date: Wed, 3 Dec 2025 09:40:05 +0000 Subject: [PATCH] Verbose: send tool result metadata only --- CHANGELOG.md | 2 +- README.md | 2 +- docs/thinking.md | 2 +- src/agents/agents.test.ts | 9 ++++ src/agents/pi.ts | 35 +++++++++++++-- src/agents/types.ts | 7 ++- src/auto-reply/command-reply.ts | 77 ++++++++++++++++++++++++--------- src/index.core.test.ts | 5 +-- 8 files changed, 108 insertions(+), 31 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index b47b0b2f7..dad0d1935 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,7 +4,7 @@ ### Highlights - **Thinking directives & state:** `/t|/think|/thinking ` (aliases off|minimal|low|medium|high|max/highest). Inline applies to that message; directive-only message pins the level for the session; `/think:off` clears. Resolution: inline > session override > `inbound.reply.thinkingDefault` > off. Pi/Tau get `--thinking ` (except off); other agents append cue words (`think` โ†’ `think hard` โ†’ `think harder` โ†’ `ultrathink`). Heartbeat probe uses `HEARTBEAT /think:high`. -- **Verbose directives + session hints:** `/v|/verbose on|full|off` mirrors thinking: inline > session > config default. Directive-only replies with an acknowledgement; invalid levels return a hint. When enabled, tool results from JSON-emitting agents (Pi/Tau, etc.) are forwarded as `[๐Ÿ› ๏ธ ] โ€ฆ` messages (now streamed as they happen), and new sessions surface a `๐Ÿงญ New session: ` hint. +- **Verbose directives + session hints:** `/v|/verbose on|full|off` mirrors thinking: inline > session > config default. Directive-only replies with an acknowledgement; invalid levels return a hint. When enabled, tool results from JSON-emitting agents (Pi/Tau, etc.) are forwarded as metadata-only `[๐Ÿ› ๏ธ ]` messages (now streamed as they happen), and new sessions surface a `๐Ÿงญ New session: ` hint. - **Directive confirmations:** Directive-only messages now reply with an acknowledgement (`Thinking level set to high.` / `Thinking disabled.`) and reject unknown levels with a helpful hint (state is unchanged). - **Pi/Tau stability:** RPC replies buffered until the assistant turn finishes; parsers return consistent `texts[]`; web auto-replies keep a warm Tau RPC process to avoid cold starts. - **Claude prompt flow:** One-time `sessionIntro` with per-message `/think:high` bodyPrefix; system prompt always sent on first turn even with `sendSystemOnce`. diff --git a/README.md b/README.md index 5470a0aa5..b5f3d86dc 100644 --- a/README.md +++ b/README.md @@ -166,7 +166,7 @@ warelay supports running on the same phone number you message fromโ€”you chat wi - Levels: `on|full` (same) or `off` (default). Use `/v on`, `/verbose:full`, `/v off`, etc.; colon optional. - Directive-only message sets a session-level verbose flag (`Verbose logging enabled./disabled.`); invalid levels reply with a hint and donโ€™t change state. - Inline directive applies only to that message; resolution: inline > session default > `inbound.reply.verboseDefault` (config) > off. -- When verbose is on **and the agent emits structured tool results (Pi/Tau and other JSON-emitting agents)**, tool results are sent back as separate messages prefixed with `๐Ÿ› ๏ธ`. +- When verbose is on **and the agent emits structured tool results (Pi/Tau and other JSON-emitting agents)**, only tool metadata is forwarded: each tool result becomes `[๐Ÿ› ๏ธ ]` (output/body is not inlined). - Starting a new session while verbose is on adds a first reply like `๐Ÿงญ New session: ` so you can correlate runs. ### Logging (optional) diff --git a/docs/thinking.md b/docs/thinking.md index 311608a9f..fbe0d06aa 100644 --- a/docs/thinking.md +++ b/docs/thinking.md @@ -28,7 +28,7 @@ - Levels: `on|full` or `off` (default). - Directive-only message toggles session verbose and replies `Verbose logging enabled.` / `Verbose logging disabled.`; invalid levels return a hint without changing state. - Inline directive affects only that message; session/global defaults apply otherwise. -- When verbose is on, agents that emit structured tool results (Pi/Tau, other JSON agents) send each tool result back as its own message, prefixed with `๐Ÿ› ๏ธ`. +- When verbose is on, agents that emit structured tool results (Pi/Tau, other JSON agents) send each tool result back as its own metadata-only message, prefixed with `[๐Ÿ› ๏ธ ]` (the tool output itself is not forwarded). ## Heartbeats - Heartbeat probe body is `HEARTBEAT /think:high`, so it always asks for max thinking on the probe. Inline directive wins; session/global defaults are used only when no directive is present. diff --git a/src/agents/agents.test.ts b/src/agents/agents.test.ts index da40b2109..9a6be1e74 100644 --- a/src/agents/agents.test.ts +++ b/src/agents/agents.test.ts @@ -67,6 +67,15 @@ describe("agent buildArgs + parseOutput helpers", () => { expect((parsed.meta?.usage as { output?: number })?.output).toBe(5); }); + it("piSpec carries tool names when present", () => { + const stdout = + '{"type":"message_end","message":{"role":"tool_result","name":"bash","content":[{"type":"text","text":"ls output"}]}}'; + const parsed = piSpec.parseOutput(stdout); + const tool = parsed.toolResults?.[0] as { text?: string; toolName?: string }; + expect(tool?.text).toBe("ls output"); + expect(tool?.toolName).toBe("bash"); + }); + it("codexSpec parses agent_message and aggregates usage", () => { const stdout = [ '{"type":"item.completed","item":{"type":"agent_message","text":"hi there"}}', diff --git a/src/agents/pi.ts b/src/agents/pi.ts index e8569c85a..2687b1c2d 100644 --- a/src/agents/pi.ts +++ b/src/agents/pi.ts @@ -1,6 +1,11 @@ import path from "node:path"; -import type { AgentMeta, AgentParseResult, AgentSpec } from "./types.js"; +import type { + AgentMeta, + AgentParseResult, + AgentSpec, + AgentToolResult, +} from "./types.js"; type PiAssistantMessage = { role?: string; @@ -9,15 +14,37 @@ type PiAssistantMessage = { model?: string; provider?: string; stopReason?: string; + name?: string; + toolName?: string; + tool_call_id?: string; toolCallId?: string; }; +function inferToolName(msg: PiAssistantMessage): string | undefined { + const candidates = [ + msg.toolName, + msg.name, + msg.toolCallId, + msg.tool_call_id, + ] + .map((c) => (typeof c === "string" ? c.trim() : "")) + .filter(Boolean); + if (candidates.length) return candidates[0]; + + if (msg.role && msg.role.includes(":")) { + const suffix = msg.role.split(":").slice(1).join(":").trim(); + if (suffix) return suffix; + } + + return undefined; +} + function parsePiJson(raw: string): AgentParseResult { const lines = raw.split(/\n+/).filter((l) => l.trim().startsWith("{")); // Collect only completed assistant messages (skip streaming updates/toolcalls). const texts: string[] = []; - const toolResults: string[] = []; + const toolResults: AgentToolResult[] = []; let lastAssistant: PiAssistantMessage | undefined; let lastPushed: string | undefined; @@ -59,7 +86,9 @@ function parsePiJson(raw: string): AgentParseResult { .map((c) => c.text) .join("\n") .trim(); - if (toolText) toolResults.push(toolText); + if (toolText) { + toolResults.push({ text: toolText, toolName: inferToolName(msg) }); + } } } catch { // ignore malformed lines diff --git a/src/agents/types.ts b/src/agents/types.ts index 7d16b2b47..f826188d6 100644 --- a/src/agents/types.ts +++ b/src/agents/types.ts @@ -15,11 +15,16 @@ export type AgentMeta = { extra?: Record; }; +export type AgentToolResult = { + text: string; + toolName?: string; +}; + export type AgentParseResult = { // Plural to support agents that emit multiple assistant turns per prompt. texts?: string[]; mediaUrls?: string[]; - toolResults?: string[]; + toolResults?: Array; meta?: AgentMeta; }; diff --git a/src/auto-reply/command-reply.ts b/src/auto-reply/command-reply.ts index e1397219e..b40315599 100644 --- a/src/auto-reply/command-reply.ts +++ b/src/auto-reply/command-reply.ts @@ -2,7 +2,7 @@ import fs from "node:fs/promises"; import path from "node:path"; import { type AgentKind, getAgentSpec } from "../agents/index.js"; -import type { AgentMeta } from "../agents/types.js"; +import type { AgentMeta, AgentToolResult } from "../agents/types.js"; import type { WarelayConfig } from "../config/config.js"; import { isVerbose, logVerbose } from "../globals.js"; import { logError } from "../logger.js"; @@ -53,6 +53,51 @@ export type CommandReplyResult = { meta: CommandReplyMeta; }; +type ToolMessageLike = { + name?: string; + toolName?: string; + tool_call_id?: string; + toolCallId?: string; + role?: string; +}; + +function inferToolName(message?: ToolMessageLike): string | undefined { + if (!message) return undefined; + const candidates = [ + message.toolName, + message.name, + message.toolCallId, + message.tool_call_id, + ] + .map((c) => (typeof c === "string" ? c.trim() : "")) + .filter(Boolean); + if (candidates.length) return candidates[0]; + + if (message.role && message.role.includes(":")) { + const suffix = message.role.split(":").slice(1).join(":").trim(); + if (suffix) return suffix; + } + return undefined; +} + +function normalizeToolResults( + toolResults?: Array, +): AgentToolResult[] { + if (!toolResults) return []; + return toolResults + .map((tr) => (typeof tr === "string" ? { text: tr } : tr)) + .map((tr) => ({ + text: (tr.text ?? "").trim(), + toolName: tr.toolName?.trim() || undefined, + })) + .filter((tr) => tr.text.length > 0); +} + +function formatToolPrefix(toolName?: string) { + const label = toolName?.trim() || "tool"; + return `[๐Ÿ› ๏ธ ${label}]`; +} + export function summarizeClaudeMetadata(payload: unknown): string | undefined { if (!payload || typeof payload !== "object") return undefined; const obj = payload as Record; @@ -289,23 +334,14 @@ export async function runCommandReply( ev.message?.role === "tool_result" && Array.isArray(ev.message.content) ) { - const text = ( - ev.message.content as Array<{ text?: string }> - ) - .map((c) => c.text) - .filter((t): t is string => !!t) - .join("\n") - .trim(); - if (text) { - const { text: cleanedText, mediaUrls: mediaFound } = - splitMediaFromOutput(`๐Ÿ› ๏ธ ${text}`); - void onPartialReply({ - text: cleanedText, - mediaUrls: mediaFound?.length - ? mediaFound - : undefined, - } as ReplyPayload); - } + const toolName = inferToolName(ev.message); + const prefix = formatToolPrefix(toolName); + const { text: cleanedText, mediaUrls: mediaFound } = + splitMediaFromOutput(prefix); + void onPartialReply({ + text: cleanedText, + mediaUrls: mediaFound?.length ? mediaFound : undefined, + } as ReplyPayload); } } catch { // ignore malformed lines @@ -341,8 +377,7 @@ export async function runCommandReply( // Collect assistant texts and tool results from parseOutput (tau RPC can emit many). const parsedTexts = parsed?.texts?.map((t) => t.trim()).filter(Boolean) ?? []; - const parsedToolResults = - parsed?.toolResults?.map((t) => t.trim()).filter(Boolean) ?? []; + const parsedToolResults = normalizeToolResults(parsed?.toolResults); type ReplyItem = { text: string; media?: string[] }; const replyItems: ReplyItem[] = []; @@ -352,7 +387,7 @@ export async function runCommandReply( if (includeToolResultsInline) { for (const tr of parsedToolResults) { - const prefixed = `๐Ÿ› ๏ธ ${tr}`; + const prefixed = formatToolPrefix(tr.toolName); const { text: cleanedText, mediaUrls: mediaFound } = splitMediaFromOutput(prefixed); replyItems.push({ diff --git a/src/index.core.test.ts b/src/index.core.test.ts index c2bf466ce..78743c4a7 100644 --- a/src/index.core.test.ts +++ b/src/index.core.test.ts @@ -719,7 +719,7 @@ describe("config and templating", () => { const rpcSpy = vi.spyOn(tauRpc, "runPiRpc").mockResolvedValue({ stdout: '{"type":"message","message":{"role":"assistant","content":[{"type":"text","text":"summary"}]}}\n' + - '{"type":"message_end","message":{"role":"tool_result","content":[{"type":"text","text":"ls output"}]}}', + '{"type":"message_end","message":{"role":"tool_result","name":"bash","content":[{"type":"text","text":"ls output"}]}}', stderr: "", code: 0, signal: null, @@ -744,8 +744,7 @@ describe("config and templating", () => { expect(rpcSpy).toHaveBeenCalled(); const payloads = Array.isArray(res) ? res : res ? [res] : []; expect(payloads.length).toBeGreaterThanOrEqual(2); - expect(payloads[0]?.text).toContain("๐Ÿ› ๏ธ"); - expect(payloads[0]?.text).toContain("ls output"); + expect(payloads[0]?.text).toBe("[๐Ÿ› ๏ธ bash]"); expect(payloads[1]?.text).toContain("summary"); });