openclaw/src/commands/agent.ts

458 lines
13 KiB
TypeScript

import crypto from "node:crypto";
import { lookupContextTokens } from "../agents/context.js";
import {
DEFAULT_CONTEXT_TOKENS,
DEFAULT_MODEL,
DEFAULT_PROVIDER,
} from "../agents/defaults.js";
import { runEmbeddedPiAgent } from "../agents/pi-embedded.js";
import {
DEFAULT_AGENT_WORKSPACE_DIR,
ensureAgentWorkspace,
} from "../agents/workspace.js";
import { chunkText } from "../auto-reply/chunk.js";
import type { MsgContext } from "../auto-reply/templating.js";
import {
normalizeThinkLevel,
normalizeVerboseLevel,
type ThinkLevel,
type VerboseLevel,
} from "../auto-reply/thinking.js";
import { type CliDeps, createDefaultDeps } from "../cli/deps.js";
import { type ClawdisConfig, loadConfig } from "../config/config.js";
import {
DEFAULT_IDLE_MINUTES,
loadSessionStore,
resolveSessionKey,
resolveSessionTranscriptPath,
resolveStorePath,
type SessionEntry,
saveSessionStore,
} from "../config/sessions.js";
import { emitAgentEvent } from "../infra/agent-events.js";
import { defaultRuntime, type RuntimeEnv } from "../runtime.js";
import { normalizeE164 } from "../utils.js";
type AgentCommandOpts = {
message: string;
to?: string;
sessionId?: string;
thinking?: string;
thinkingOnce?: string;
verbose?: string;
json?: boolean;
timeout?: string;
deliver?: boolean;
surface?: string;
provider?: string; // delivery provider (whatsapp|telegram|...)
bestEffortDeliver?: boolean;
abortSignal?: AbortSignal;
};
type SessionResolution = {
sessionId: string;
sessionKey?: string;
sessionEntry?: SessionEntry;
sessionStore?: Record<string, SessionEntry>;
storePath: string;
isNewSession: boolean;
persistedThinking?: ThinkLevel;
persistedVerbose?: VerboseLevel;
};
function resolveSession(opts: {
cfg: ClawdisConfig;
to?: string;
sessionId?: string;
}): SessionResolution {
const sessionCfg = opts.cfg.inbound?.session;
const scope = sessionCfg?.scope ?? "per-sender";
const mainKey = sessionCfg?.mainKey ?? "main";
const idleMinutes = Math.max(
sessionCfg?.idleMinutes ?? DEFAULT_IDLE_MINUTES,
1,
);
const idleMs = idleMinutes * 60_000;
const storePath = resolveStorePath(sessionCfg?.store);
const sessionStore = loadSessionStore(storePath);
const now = Date.now();
const ctx: MsgContext | undefined = opts.to?.trim()
? { From: opts.to }
: undefined;
let sessionKey: string | undefined = ctx
? resolveSessionKey(scope, ctx, mainKey)
: undefined;
let sessionEntry = sessionKey ? sessionStore[sessionKey] : undefined;
// If a session id was provided, prefer to re-use its entry (by id) even when no key was derived.
if (
opts.sessionId &&
(!sessionEntry || sessionEntry.sessionId !== opts.sessionId)
) {
const foundKey = Object.keys(sessionStore).find(
(key) => sessionStore[key]?.sessionId === opts.sessionId,
);
if (foundKey) {
sessionKey = sessionKey ?? foundKey;
sessionEntry = sessionStore[foundKey];
}
}
const fresh = sessionEntry && sessionEntry.updatedAt >= now - idleMs;
const sessionId =
opts.sessionId?.trim() ||
(fresh ? sessionEntry?.sessionId : undefined) ||
crypto.randomUUID();
const isNewSession = !fresh && !opts.sessionId;
const persistedThinking =
fresh && sessionEntry?.thinkingLevel
? normalizeThinkLevel(sessionEntry.thinkingLevel)
: undefined;
const persistedVerbose =
fresh && sessionEntry?.verboseLevel
? normalizeVerboseLevel(sessionEntry.verboseLevel)
: undefined;
return {
sessionId,
sessionKey,
sessionEntry,
sessionStore,
storePath,
isNewSession,
persistedThinking,
persistedVerbose,
};
}
export async function agentCommand(
opts: AgentCommandOpts,
runtime: RuntimeEnv = defaultRuntime,
deps: CliDeps = createDefaultDeps(),
) {
const body = (opts.message ?? "").trim();
if (!body) throw new Error("Message (--message) is required");
if (!opts.to && !opts.sessionId) {
throw new Error("Pass --to <E.164> or --session-id to choose a session");
}
const cfg = loadConfig();
const agentCfg = cfg.inbound?.agent;
const workspaceDirRaw = cfg.inbound?.workspace ?? DEFAULT_AGENT_WORKSPACE_DIR;
const workspace = await ensureAgentWorkspace({
dir: workspaceDirRaw,
ensureBootstrapFiles: true,
});
const workspaceDir = workspace.dir;
const allowFrom = (cfg.inbound?.allowFrom ?? [])
.map((val) => normalizeE164(val))
.filter((val) => val.length > 1);
const thinkOverride = normalizeThinkLevel(opts.thinking);
const thinkOnce = normalizeThinkLevel(opts.thinkingOnce);
if (opts.thinking && !thinkOverride) {
throw new Error(
"Invalid thinking level. Use one of: off, minimal, low, medium, high.",
);
}
if (opts.thinkingOnce && !thinkOnce) {
throw new Error(
"Invalid one-shot thinking level. Use one of: off, minimal, low, medium, high.",
);
}
const verboseOverride = normalizeVerboseLevel(opts.verbose);
if (opts.verbose && !verboseOverride) {
throw new Error('Invalid verbose level. Use "on" or "off".');
}
const timeoutSecondsRaw =
opts.timeout !== undefined
? Number.parseInt(String(opts.timeout), 10)
: (agentCfg?.timeoutSeconds ?? 600);
if (Number.isNaN(timeoutSecondsRaw) || timeoutSecondsRaw <= 0) {
throw new Error("--timeout must be a positive integer (seconds)");
}
const timeoutMs = Math.max(timeoutSecondsRaw, 1) * 1000;
const sessionResolution = resolveSession({
cfg,
to: opts.to,
sessionId: opts.sessionId,
});
const {
sessionId,
sessionKey,
sessionEntry,
sessionStore,
storePath,
isNewSession,
persistedThinking,
persistedVerbose,
} = sessionResolution;
const resolvedThinkLevel =
thinkOnce ??
thinkOverride ??
persistedThinking ??
(agentCfg?.thinkingDefault as ThinkLevel | undefined);
const resolvedVerboseLevel =
verboseOverride ??
persistedVerbose ??
(agentCfg?.verboseDefault as VerboseLevel | undefined);
// Persist explicit /command overrides to the session store when we have a key.
if (sessionStore && sessionKey) {
const entry = sessionEntry ??
sessionStore[sessionKey] ?? { sessionId, updatedAt: Date.now() };
const next: SessionEntry = { ...entry, sessionId, updatedAt: Date.now() };
if (thinkOverride) {
if (thinkOverride === "off") delete next.thinkingLevel;
else next.thinkingLevel = thinkOverride;
}
if (verboseOverride) {
if (verboseOverride === "off") delete next.verboseLevel;
else next.verboseLevel = verboseOverride;
}
sessionStore[sessionKey] = next;
await saveSessionStore(storePath, sessionStore);
}
const provider = agentCfg?.provider?.trim() || DEFAULT_PROVIDER;
const model = agentCfg?.model?.trim() || DEFAULT_MODEL;
const sessionFile = resolveSessionTranscriptPath(sessionId);
const startedAt = Date.now();
emitAgentEvent({
runId: sessionId,
stream: "job",
data: {
state: "started",
startedAt,
to: opts.to ?? null,
sessionId,
isNewSession,
},
});
let result: Awaited<ReturnType<typeof runEmbeddedPiAgent>>;
try {
result = await runEmbeddedPiAgent({
sessionId,
sessionFile,
workspaceDir,
prompt: body,
provider,
model,
thinkLevel: resolvedThinkLevel,
verboseLevel: resolvedVerboseLevel,
timeoutMs,
runId: sessionId,
abortSignal: opts.abortSignal,
onAgentEvent: (evt) => {
emitAgentEvent({
runId: sessionId,
stream: evt.stream,
data: evt.data,
});
},
});
emitAgentEvent({
runId: sessionId,
stream: "job",
data: {
state: "done",
startedAt,
endedAt: Date.now(),
to: opts.to ?? null,
sessionId,
durationMs: Date.now() - startedAt,
aborted: result.meta.aborted ?? false,
},
});
} catch (err) {
emitAgentEvent({
runId: sessionId,
stream: "job",
data: {
state: "error",
startedAt,
endedAt: Date.now(),
to: opts.to ?? null,
sessionId,
durationMs: Date.now() - startedAt,
error: String(err),
},
});
throw err;
}
// Update token+model fields in the session store.
if (sessionStore && sessionKey) {
const usage = result.meta.agentMeta?.usage;
const modelUsed = result.meta.agentMeta?.model ?? model;
const contextTokens =
agentCfg?.contextTokens ??
lookupContextTokens(modelUsed) ??
DEFAULT_CONTEXT_TOKENS;
const entry = sessionStore[sessionKey] ?? {
sessionId,
updatedAt: Date.now(),
};
const next: SessionEntry = {
...entry,
sessionId,
updatedAt: Date.now(),
model: modelUsed,
contextTokens,
};
next.abortedLastRun = result.meta.aborted ?? false;
if (usage) {
const input = usage.input ?? 0;
const output = usage.output ?? 0;
const promptTokens =
input + (usage.cacheRead ?? 0) + (usage.cacheWrite ?? 0);
next.inputTokens = input;
next.outputTokens = output;
next.totalTokens =
promptTokens > 0 ? promptTokens : (usage.total ?? input);
}
sessionStore[sessionKey] = next;
await saveSessionStore(storePath, sessionStore);
}
const payloads = result.payloads ?? [];
const deliver = opts.deliver === true;
const bestEffortDeliver = opts.bestEffortDeliver === true;
const deliveryProvider = (opts.provider ?? "whatsapp").toLowerCase();
const whatsappTarget = opts.to ? normalizeE164(opts.to) : allowFrom[0];
const telegramTarget = opts.to?.trim() || undefined;
const logDeliveryError = (err: unknown) => {
const message = `Delivery failed (${deliveryProvider}): ${String(err)}`;
runtime.error?.(message);
if (!runtime.error) runtime.log(message);
};
if (deliver) {
if (deliveryProvider === "whatsapp" && !whatsappTarget) {
const err = new Error(
"Delivering to WhatsApp requires --to <E.164> or inbound.allowFrom[0]",
);
if (!bestEffortDeliver) throw err;
logDeliveryError(err);
}
if (deliveryProvider === "telegram" && !telegramTarget) {
const err = new Error("Delivering to Telegram requires --to <chatId>");
if (!bestEffortDeliver) throw err;
logDeliveryError(err);
}
if (deliveryProvider === "webchat") {
const err = new Error(
"Delivering to WebChat is not supported via `clawdis agent`; use WebChat RPC instead.",
);
if (!bestEffortDeliver) throw err;
logDeliveryError(err);
}
if (
deliveryProvider !== "whatsapp" &&
deliveryProvider !== "telegram" &&
deliveryProvider !== "webchat"
) {
const err = new Error(`Unknown provider: ${deliveryProvider}`);
if (!bestEffortDeliver) throw err;
logDeliveryError(err);
}
}
if (opts.json) {
const normalizedPayloads = payloads.map((p) => ({
text: p.text ?? "",
mediaUrl: p.mediaUrl ?? null,
mediaUrls: p.mediaUrls ?? (p.mediaUrl ? [p.mediaUrl] : undefined),
}));
runtime.log(
JSON.stringify(
{ payloads: normalizedPayloads, meta: result.meta },
null,
2,
),
);
if (!deliver) return;
}
if (payloads.length === 0) {
runtime.log("No reply from agent.");
return;
}
for (const payload of payloads) {
const mediaList =
payload.mediaUrls ?? (payload.mediaUrl ? [payload.mediaUrl] : []);
if (!opts.json) {
const lines: string[] = [];
if (payload.text) lines.push(payload.text.trimEnd());
for (const url of mediaList) lines.push(`MEDIA:${url}`);
runtime.log(lines.join("\n"));
}
if (!deliver) continue;
const text = payload.text ?? "";
const media = mediaList;
if (!text && media.length === 0) continue;
if (deliveryProvider === "whatsapp" && whatsappTarget) {
try {
const primaryMedia = media[0];
await deps.sendMessageWhatsApp(whatsappTarget, text, {
verbose: false,
mediaUrl: primaryMedia,
});
for (const extra of media.slice(1)) {
await deps.sendMessageWhatsApp(whatsappTarget, "", {
verbose: false,
mediaUrl: extra,
});
}
} catch (err) {
if (!bestEffortDeliver) throw err;
logDeliveryError(err);
}
continue;
}
if (deliveryProvider === "telegram" && telegramTarget) {
try {
if (media.length === 0) {
for (const chunk of chunkText(text, 4000)) {
await deps.sendMessageTelegram(telegramTarget, chunk, {
verbose: false,
});
}
} else {
let first = true;
for (const url of media) {
const caption = first ? text : "";
first = false;
await deps.sendMessageTelegram(telegramTarget, caption, {
verbose: false,
mediaUrl: url,
});
}
}
} catch (err) {
if (!bestEffortDeliver) throw err;
logDeliveryError(err);
}
}
}
}