feat(mac): host PeekabooBridge for ui

main
Peter Steinberger 2025-12-13 16:55:41 +00:00
parent fd566bda14
commit c17440f5b4
21 changed files with 1197 additions and 422 deletions

View File

@ -1,6 +1,15 @@
{ {
"originHash" : "ee7127ff91914397f9991e22a0b06ab0bca0d83582adeed6011198c49167631b", "originHash" : "5de6834e5cb92c45c61a2e6792b780ac231c5741def70f1efa9ec857fa12f8cb",
"pins" : [ "pins" : [
{
"identity" : "eventsource",
"kind" : "remoteSourceControl",
"location" : "https://github.com/mattt/eventsource.git",
"state" : {
"revision" : "ca2a9d90cbe49e09b92f4b6ebd922c03ebea51d0",
"version" : "1.3.0"
}
},
{ {
"identity" : "menubarextraaccess", "identity" : "menubarextraaccess",
"kind" : "remoteSourceControl", "kind" : "remoteSourceControl",
@ -19,6 +28,96 @@
"version" : "2.8.1" "version" : "2.8.1"
} }
}, },
{
"identity" : "swift-algorithms",
"kind" : "remoteSourceControl",
"location" : "https://github.com/apple/swift-algorithms",
"state" : {
"revision" : "87e50f483c54e6efd60e885f7f5aa946cee68023",
"version" : "1.2.1"
}
},
{
"identity" : "swift-asn1",
"kind" : "remoteSourceControl",
"location" : "https://github.com/apple/swift-asn1.git",
"state" : {
"revision" : "810496cf121e525d660cd0ea89a758740476b85f",
"version" : "1.5.1"
}
},
{
"identity" : "swift-async-algorithms",
"kind" : "remoteSourceControl",
"location" : "https://github.com/apple/swift-async-algorithms",
"state" : {
"revision" : "6c050d5ef8e1aa6342528460db614e9770d7f804",
"version" : "1.1.1"
}
},
{
"identity" : "swift-collections",
"kind" : "remoteSourceControl",
"location" : "https://github.com/apple/swift-collections",
"state" : {
"branch" : "main",
"revision" : "8e5e4a8f3617283b556064574651fc0869943c9a"
}
},
{
"identity" : "swift-configuration",
"kind" : "remoteSourceControl",
"location" : "https://github.com/apple/swift-configuration",
"state" : {
"branch" : "main",
"revision" : "3528deb75256d7dcbb0d71fa75077caae0a8c749"
}
},
{
"identity" : "swift-crypto",
"kind" : "remoteSourceControl",
"location" : "https://github.com/apple/swift-crypto.git",
"state" : {
"revision" : "6f70fa9eab24c1fd982af18c281c4525d05e3095",
"version" : "4.2.0"
}
},
{
"identity" : "swift-log",
"kind" : "remoteSourceControl",
"location" : "https://github.com/apple/swift-log.git",
"state" : {
"revision" : "bc386b95f2a16ccd0150a8235e7c69eab2b866ca",
"version" : "1.8.0"
}
},
{
"identity" : "swift-numerics",
"kind" : "remoteSourceControl",
"location" : "https://github.com/apple/swift-numerics.git",
"state" : {
"revision" : "0c0290ff6b24942dadb83a929ffaaa1481df04a2",
"version" : "1.1.1"
}
},
{
"identity" : "swift-sdk",
"kind" : "remoteSourceControl",
"location" : "https://github.com/modelcontextprotocol/swift-sdk.git",
"state" : {
"revision" : "c0407a0b52677cb395d824cac2879b963075ba8c",
"version" : "0.10.2"
}
},
{
"identity" : "swift-service-lifecycle",
"kind" : "remoteSourceControl",
"location" : "https://github.com/swift-server/swift-service-lifecycle",
"state" : {
"revision" : "1de37290c0ab3c5a96028e0f02911b672fd42348",
"version" : "2.9.1"
}
},
{ {
"identity" : "swift-subprocess", "identity" : "swift-subprocess",
"kind" : "remoteSourceControl", "kind" : "remoteSourceControl",

View File

@ -18,6 +18,9 @@ let package = Package(
.package(url: "https://github.com/swiftlang/swift-subprocess.git", from: "0.1.0"), .package(url: "https://github.com/swiftlang/swift-subprocess.git", from: "0.1.0"),
.package(url: "https://github.com/sparkle-project/Sparkle", from: "2.8.1"), .package(url: "https://github.com/sparkle-project/Sparkle", from: "2.8.1"),
.package(path: "../shared/ClawdisKit"), .package(path: "../shared/ClawdisKit"),
.package(path: "../../Peekaboo/Core/PeekabooCore"),
.package(path: "../../Peekaboo/Core/PeekabooAutomationKit"),
.package(path: "../../Peekaboo/Core/PeekabooVisualizer"),
], ],
targets: [ targets: [
.target( .target(
@ -42,6 +45,9 @@ let package = Package(
.product(name: "MenuBarExtraAccess", package: "MenuBarExtraAccess"), .product(name: "MenuBarExtraAccess", package: "MenuBarExtraAccess"),
.product(name: "Subprocess", package: "swift-subprocess"), .product(name: "Subprocess", package: "swift-subprocess"),
.product(name: "Sparkle", package: "Sparkle"), .product(name: "Sparkle", package: "Sparkle"),
.product(name: "PeekabooBridge", package: "PeekabooCore"),
.product(name: "PeekabooAutomationKit", package: "PeekabooAutomationKit"),
.product(name: "PeekabooVisualizer", package: "PeekabooVisualizer"),
], ],
resources: [ resources: [
.copy("Resources/Clawdis.icns"), .copy("Resources/Clawdis.icns"),
@ -55,6 +61,8 @@ let package = Package(
dependencies: [ dependencies: [
"ClawdisIPC", "ClawdisIPC",
"ClawdisProtocol", "ClawdisProtocol",
.product(name: "PeekabooBridge", package: "PeekabooCore"),
.product(name: "PeekabooAutomationKit", package: "PeekabooAutomationKit"),
], ],
swiftSettings: [ swiftSettings: [
.enableUpcomingFeature("StrictConcurrency"), .enableUpcomingFeature("StrictConcurrency"),

View File

@ -155,6 +155,15 @@ final class AppState: ObservableObject {
didSet { self.ifNotPreview { UserDefaults.standard.set(self.canvasEnabled, forKey: canvasEnabledKey) } } didSet { self.ifNotPreview { UserDefaults.standard.set(self.canvasEnabled, forKey: canvasEnabledKey) } }
} }
@Published var peekabooBridgeEnabled: Bool {
didSet {
self.ifNotPreview {
UserDefaults.standard.set(self.peekabooBridgeEnabled, forKey: peekabooBridgeEnabledKey)
Task { await PeekabooBridgeHostCoordinator.shared.setEnabled(self.peekabooBridgeEnabled) }
}
}
}
@Published var attachExistingGatewayOnly: Bool { @Published var attachExistingGatewayOnly: Bool {
didSet { didSet {
self.ifNotPreview { self.ifNotPreview {
@ -231,6 +240,8 @@ final class AppState: ObservableObject {
let storedPort = UserDefaults.standard.integer(forKey: webChatPortKey) let storedPort = UserDefaults.standard.integer(forKey: webChatPortKey)
self.webChatPort = storedPort > 0 ? storedPort : 18788 self.webChatPort = storedPort > 0 ? storedPort : 18788
self.canvasEnabled = UserDefaults.standard.object(forKey: canvasEnabledKey) as? Bool ?? true self.canvasEnabled = UserDefaults.standard.object(forKey: canvasEnabledKey) as? Bool ?? true
self.peekabooBridgeEnabled = UserDefaults.standard
.object(forKey: peekabooBridgeEnabledKey) as? Bool ?? true
self.attachExistingGatewayOnly = UserDefaults.standard.bool(forKey: attachExistingGatewayOnlyKey) self.attachExistingGatewayOnly = UserDefaults.standard.bool(forKey: attachExistingGatewayOnlyKey)
if !self.isPreview { if !self.isPreview {

View File

@ -24,6 +24,7 @@ let webChatEnabledKey = "clawdis.webChatEnabled"
let webChatSwiftUIEnabledKey = "clawdis.webChatSwiftUIEnabled" let webChatSwiftUIEnabledKey = "clawdis.webChatSwiftUIEnabled"
let webChatPortKey = "clawdis.webChatPort" let webChatPortKey = "clawdis.webChatPort"
let canvasEnabledKey = "clawdis.canvasEnabled" let canvasEnabledKey = "clawdis.canvasEnabled"
let peekabooBridgeEnabledKey = "clawdis.peekabooBridgeEnabled"
let deepLinkAgentEnabledKey = "clawdis.deepLinkAgentEnabled" let deepLinkAgentEnabledKey = "clawdis.deepLinkAgentEnabled"
let deepLinkKeyKey = "clawdis.deepLinkKey" let deepLinkKeyKey = "clawdis.deepLinkKey"
let modelCatalogPathKey = "clawdis.modelCatalogPath" let modelCatalogPathKey = "clawdis.modelCatalogPath"

View File

@ -58,53 +58,6 @@ enum ControlRequestHandler {
let result = await AgentRPC.shared.status() let result = await AgentRPC.shared.status()
return Response(ok: result.ok, message: result.error) return Response(ok: result.ok, message: result.error)
case .uiListScreens:
let screens = await MainActor.run { UIScreenService.listScreens() }
let payload = try JSONEncoder().encode(screens)
return Response(ok: true, payload: payload)
case let .uiScreenshot(screenIndex, windowID):
let authorized = await PermissionManager
.ensure([.screenRecording], interactive: false)[.screenRecording] ?? false
guard authorized else { return Response(ok: false, message: "screen recording permission missing") }
let resolution: (screenIndex: Int?, displayID: UInt32?) = await Task { @MainActor in
if let screenIndex,
let match = UIScreenService.listScreens().first(where: { $0.index == screenIndex })
{
return (screenIndex, match.displayID)
}
return (nil, nil)
}.value
let data = await Task { @MainActor in
await Screenshotter.capture(displayID: resolution.displayID, windowID: windowID)
}.value
guard let data else {
return Response(ok: false, message: "screenshot failed")
}
let dir = FileManager.default.temporaryDirectory.appendingPathComponent("clawdis-ui", isDirectory: true)
try? FileManager.default.createDirectory(at: dir, withIntermediateDirectories: true)
let outURL = dir.appendingPathComponent("screenshot-\(Int(Date().timeIntervalSince1970 * 1000)).png")
do {
try data.write(to: outURL)
} catch {
return Response(ok: false, message: "failed to write screenshot: \(error.localizedDescription)")
}
let size = ScreenshotSize.readPNGSize(data: data)
let result = UIScreenshotResult(
path: outURL.path,
width: size?.width ?? 0,
height: size?.height ?? 0,
screenIndex: resolution.screenIndex,
displayID: resolution.displayID,
windowID: windowID)
let payload = try JSONEncoder().encode(result)
return Response(ok: true, payload: payload)
case let .runShell(command, cwd, env, timeoutSec, needsSR): case let .runShell(command, cwd, env, timeoutSec, needsSR):
if needsSR { if needsSR {
let authorized = await PermissionManager let authorized = await PermissionManager

View File

@ -57,6 +57,11 @@ struct GeneralSettings: View {
subtitle: "Allow the agent to show and control the Canvas panel.", subtitle: "Allow the agent to show and control the Canvas panel.",
binding: self.$state.canvasEnabled) binding: self.$state.canvasEnabled)
SettingsToggleRow(
title: "Enable Peekaboo Bridge",
subtitle: "Allow signed tools to drive UI automation via `clawdis-mac ui …`.",
binding: self.$state.peekabooBridgeEnabled)
SettingsToggleRow( SettingsToggleRow(
title: "Enable debug tools", title: "Enable debug tools",
subtitle: "Show the Debug tab with development utilities.", subtitle: "Show the Debug tab with development utilities.",

View File

@ -183,6 +183,7 @@ final class AppDelegate: NSObject, NSApplicationDelegate {
Task { await HealthStore.shared.refresh(onDemand: true) } Task { await HealthStore.shared.refresh(onDemand: true) }
Task { await PortGuardian.shared.sweep(mode: AppStateStore.shared.connectionMode) } Task { await PortGuardian.shared.sweep(mode: AppStateStore.shared.connectionMode) }
Task { await self.socketServer.start() } Task { await self.socketServer.start() }
Task { await PeekabooBridgeHostCoordinator.shared.setEnabled(AppStateStore.shared.peekabooBridgeEnabled) }
self.scheduleFirstRunOnboardingIfNeeded() self.scheduleFirstRunOnboardingIfNeeded()
// Developer/testing helper: auto-open WebChat when launched with --webchat // Developer/testing helper: auto-open WebChat when launched with --webchat
@ -202,6 +203,7 @@ final class AppDelegate: NSObject, NSApplicationDelegate {
Task { await AgentRPC.shared.shutdown() } Task { await AgentRPC.shared.shutdown() }
Task { await GatewayConnection.shared.shutdown() } Task { await GatewayConnection.shared.shutdown() }
Task { await self.socketServer.stop() } Task { await self.socketServer.stop() }
Task { await PeekabooBridgeHostCoordinator.shared.stop() }
} }
@MainActor @MainActor

View File

@ -0,0 +1,254 @@
import Foundation
import os
import PeekabooAutomationKit
import PeekabooBridge
import PeekabooFoundation
import PeekabooVisualizer
@MainActor
final class PeekabooBridgeHostCoordinator {
static let shared = PeekabooBridgeHostCoordinator()
private let logger = Logger(subsystem: "com.steipete.clawdis", category: "PeekabooBridge")
private var host: PeekabooBridgeHost?
private var services: ClawdisPeekabooBridgeServices?
func setEnabled(_ enabled: Bool) async {
if enabled {
await self.startIfNeeded()
} else {
await self.stop()
}
}
func stop() async {
guard let host else { return }
await host.stop()
self.host = nil
self.services = nil
self.logger.info("PeekabooBridge host stopped")
}
private func startIfNeeded() async {
guard self.host == nil else { return }
let allowlistedTeamIDs: Set<String> = ["Y5PE65HELJ"]
let allowlistedBundles: Set<String> = []
let services = ClawdisPeekabooBridgeServices()
let server = PeekabooBridgeServer(
services: services,
hostKind: .gui,
allowlistedTeams: allowlistedTeamIDs,
allowlistedBundles: allowlistedBundles)
let host = PeekabooBridgeHost(
socketPath: PeekabooBridgeConstants.clawdisSocketPath,
server: server,
allowedTeamIDs: allowlistedTeamIDs,
requestTimeoutSec: 10)
self.services = services
self.host = host
await host.start()
self.logger.info("PeekabooBridge host started at \(PeekabooBridgeConstants.clawdisSocketPath, privacy: .public)")
}
}
@MainActor
private final class ClawdisPeekabooBridgeServices: PeekabooBridgeServiceProviding {
let permissions: PermissionsService
let screenCapture: any ScreenCaptureServiceProtocol
let automation: any UIAutomationServiceProtocol
let windows: any WindowManagementServiceProtocol
let applications: any ApplicationServiceProtocol
let menu: any MenuServiceProtocol
let dock: any DockServiceProtocol
let dialogs: any DialogServiceProtocol
let snapshots: any SnapshotManagerProtocol
init() {
let logging = LoggingService(subsystem: "com.steipete.clawdis.peekaboo")
let visualizer = PeekabooVisualizerFeedbackClient(client: .shared)
let snapshots = InMemorySnapshotManager(options: .init(
snapshotValidityWindow: 600,
maxSnapshots: 50,
deleteArtifactsOnCleanup: false))
let applications = ApplicationService(feedbackClient: visualizer)
let captureBase = ScreenCaptureService(loggingService: logging)
let screenCapture = FeedbackScreenCaptureService(base: captureBase, feedbackClient: visualizer)
self.permissions = PermissionsService()
self.snapshots = snapshots
self.applications = applications
self.screenCapture = screenCapture
self.automation = UIAutomationService(
snapshotManager: snapshots,
loggingService: logging,
searchPolicy: .balanced,
feedbackClient: visualizer)
self.windows = WindowManagementService(applicationService: applications, feedbackClient: visualizer)
self.menu = MenuService(applicationService: applications, feedbackClient: visualizer)
self.dock = DockService(feedbackClient: visualizer)
self.dialogs = DialogService(feedbackClient: visualizer)
}
}
@MainActor
private final class PeekabooVisualizerFeedbackClient: AutomationFeedbackClient {
private let client: VisualizationClient
init(client: VisualizationClient) {
self.client = client
}
func connect() {
self.client.connect()
}
func showClickFeedback(at point: CGPoint, type: ClickType) async -> Bool {
await self.client.showClickFeedback(at: point, type: type)
}
func showTypingFeedback(keys: [String], duration: TimeInterval, cadence: TypingCadence) async -> Bool {
await self.client.showTypingFeedback(keys: keys, duration: duration, cadence: cadence)
}
func showScrollFeedback(at point: CGPoint, direction: ScrollDirection, amount: Int) async -> Bool {
await self.client.showScrollFeedback(at: point, direction: direction, amount: amount)
}
func showHotkeyDisplay(keys: [String], duration: TimeInterval) async -> Bool {
await self.client.showHotkeyDisplay(keys: keys, duration: duration)
}
func showSwipeGesture(from: CGPoint, to: CGPoint, duration: TimeInterval) async -> Bool {
await self.client.showSwipeGesture(from: from, to: to, duration: duration)
}
func showMouseMovement(from: CGPoint, to: CGPoint, duration: TimeInterval) async -> Bool {
await self.client.showMouseMovement(from: from, to: to, duration: duration)
}
func showWindowOperation(_ kind: WindowOperationKind, windowRect: CGRect, duration: TimeInterval) async -> Bool {
let mapped: WindowOperation = switch kind {
case .close: .close
case .minimize: .minimize
case .maximize: .maximize
case .move: .move
case .resize: .resize
case .setBounds: .setBounds
case .focus: .focus
}
return await self.client.showWindowOperation(mapped, windowRect: windowRect, duration: duration)
}
func showDialogInteraction(
element: DialogElementType,
elementRect: CGRect,
action: DialogActionType) async -> Bool
{
await self.client.showDialogInteraction(element: element, elementRect: elementRect, action: action)
}
func showMenuNavigation(menuPath: [String]) async -> Bool {
await self.client.showMenuNavigation(menuPath: menuPath)
}
func showSpaceSwitch(from: Int, to: Int, direction: SpaceSwitchDirection) async -> Bool {
let mapped: SpaceDirection = direction == .left ? .left : .right
return await self.client.showSpaceSwitch(from: from, to: to, direction: mapped)
}
func showAppLaunch(appName: String, iconPath: String?) async -> Bool {
await self.client.showAppLaunch(appName: appName, iconPath: iconPath)
}
func showAppQuit(appName: String, iconPath: String?) async -> Bool {
await self.client.showAppQuit(appName: appName, iconPath: iconPath)
}
func showScreenshotFlash(in rect: CGRect) async -> Bool {
await self.client.showScreenshotFlash(in: rect)
}
func showWatchCapture(in rect: CGRect) async -> Bool {
await self.client.showWatchCapture(in: rect)
}
}
@MainActor
private final class FeedbackScreenCaptureService: ScreenCaptureServiceProtocol {
private let base: any ScreenCaptureServiceProtocol
private let feedbackClient: any AutomationFeedbackClient
init(base: any ScreenCaptureServiceProtocol, feedbackClient: any AutomationFeedbackClient) {
self.base = base
self.feedbackClient = feedbackClient
}
func captureScreen(
displayIndex: Int?,
visualizerMode: CaptureVisualizerMode,
scale: CaptureScalePreference) async throws -> CaptureResult
{
let result = try await self.base.captureScreen(
displayIndex: displayIndex,
visualizerMode: visualizerMode,
scale: scale)
await self.showCaptureFeedback(mode: visualizerMode, rect: result.metadata.displayInfo?.bounds)
return result
}
func captureWindow(
appIdentifier: String,
windowIndex: Int?,
visualizerMode: CaptureVisualizerMode,
scale: CaptureScalePreference) async throws -> CaptureResult
{
let result = try await self.base.captureWindow(
appIdentifier: appIdentifier,
windowIndex: windowIndex,
visualizerMode: visualizerMode,
scale: scale)
await self.showCaptureFeedback(mode: visualizerMode, rect: result.metadata.windowInfo?.bounds)
return result
}
func captureFrontmost(
visualizerMode: CaptureVisualizerMode,
scale: CaptureScalePreference) async throws -> CaptureResult
{
let result = try await self.base.captureFrontmost(visualizerMode: visualizerMode, scale: scale)
await self.showCaptureFeedback(mode: visualizerMode, rect: result.metadata.windowInfo?.bounds)
return result
}
func captureArea(
_ rect: CGRect,
visualizerMode: CaptureVisualizerMode,
scale: CaptureScalePreference) async throws -> CaptureResult
{
let result = try await self.base.captureArea(rect, visualizerMode: visualizerMode, scale: scale)
await self.showCaptureFeedback(mode: visualizerMode, rect: rect)
return result
}
func hasScreenRecordingPermission() async -> Bool {
await self.base.hasScreenRecordingPermission()
}
private func showCaptureFeedback(mode: CaptureVisualizerMode, rect: CGRect?) async {
guard let rect else { return }
switch mode {
case .screenshotFlash:
_ = await self.feedbackClient.showScreenshotFlash(in: rect)
case .watchCapture:
_ = await self.feedbackClient.showWatchCapture(in: rect)
}
}
}

View File

@ -1,80 +0,0 @@
import AppKit
import CoreGraphics
import Foundation
@preconcurrency import ScreenCaptureKit
import VideoToolbox
enum Screenshotter {
@MainActor
static func capture(displayID: UInt32?, windowID: UInt32?) async -> Data? {
guard let content = try? await SCShareableContent.current else { return nil }
let targetDisplay: SCDisplay? = if let displayID {
content.displays.first(where: { $0.displayID == displayID })
} else {
content.displays.first
}
let filter: SCContentFilter
if let windowID, let win = content.windows.first(where: { $0.windowID == windowID }) {
filter = SCContentFilter(desktopIndependentWindow: win)
} else if let display = targetDisplay {
filter = SCContentFilter(display: display, excludingWindows: [])
} else {
return nil
}
let config = SCStreamConfiguration()
if let display = targetDisplay {
config.width = display.width
config.height = display.height
}
config.scalesToFit = true
config.colorSpaceName = CGColorSpace.displayP3
let stream = SCStream(filter: filter, configuration: config, delegate: nil)
let grabber = FrameGrabber()
try? stream.addStreamOutput(
grabber,
type: .screen,
sampleHandlerQueue: DispatchQueue(label: "com.steipete.clawdis.sshot"))
do {
try await stream.startCapture()
let data = await grabber.awaitPNG()
try? await stream.stopCapture()
return data
} catch {
return nil
}
}
}
final class FrameGrabber: NSObject, SCStreamOutput {
private var continuation: CheckedContinuation<Data?, Never>?
private var delivered = false
func awaitPNG() async -> Data? {
await withCheckedContinuation { cont in
self.continuation = cont
}
}
nonisolated func stream(
_ stream: SCStream,
didOutputSampleBuffer sampleBuffer: CMSampleBuffer,
of outputType: SCStreamOutputType)
{
guard outputType == .screen else { return }
if self.delivered { return }
guard let imageBuffer = sampleBuffer.imageBuffer else { return }
var cgImage: CGImage?
let result = VTCreateCGImageFromCVPixelBuffer(imageBuffer, options: nil, imageOut: &cgImage)
guard result == noErr, let cgImage else { return }
let rep = NSBitmapImageRep(cgImage: cgImage)
guard let data = rep.representation(using: .png, properties: [:]) else { return }
self.delivered = true
self.continuation?.resume(returning: data)
self.continuation = nil
}
}

View File

@ -1,44 +0,0 @@
import AppKit
import ClawdisIPC
import CoreGraphics
enum UIScreenService {
static func listScreens() -> [UIScreenInfo] {
let screens = NSScreen.screens
let mainScreen = NSScreen.main
return screens.enumerated().map { index, screen in
UIScreenInfo(
index: index,
name: screen.peekabooName,
frame: screen.frame,
visibleFrame: screen.visibleFrame,
isPrimary: screen == mainScreen,
scaleFactor: screen.backingScaleFactor,
displayID: screen.displayID)
}
}
}
private extension NSScreen {
var displayID: UInt32 {
if let num = self.deviceDescription[NSDeviceDescriptionKey("NSScreenNumber")] as? NSNumber {
return num.uint32Value
}
return 0
}
/// Match Peekaboo's `ScreenService` naming (built-in vs. resolution fallback).
var peekabooName: String {
let id = self.displayID
guard id != 0 else { return "Display" }
if CGDisplayIsBuiltin(id) != 0 { return "Built-in Display" }
if let mode = CGDisplayCopyDisplayMode(id) {
return "\(mode.pixelWidth)×\(mode.pixelHeight) Display"
}
return "External Display"
}
}

View File

@ -15,6 +15,11 @@ struct ClawdisCLI {
exit(code) exit(code)
} }
if args.first == "ui" {
let code = try await UICLI.run(args: Array(args.dropFirst()), jsonOutput: jsonOutput)
exit(code)
}
let parsed = try parseCommandLine(args: args) let parsed = try parseCommandLine(args: args)
let response = try await send(request: parsed.request) let response = try await send(request: parsed.request)
@ -42,8 +47,6 @@ struct ClawdisCLI {
var kind: Kind var kind: Kind
enum Kind { enum Kind {
case uiScreens
case uiScreenshot
case generic case generic
} }
} }
@ -100,29 +103,6 @@ struct ClawdisCLI {
if caps.isEmpty { caps = Capability.allCases } if caps.isEmpty { caps = Capability.allCases }
return ParsedCLIRequest(request: .ensurePermissions(caps, interactive: interactive), kind: .generic) return ParsedCLIRequest(request: .ensurePermissions(caps, interactive: interactive), kind: .generic)
case "ui":
guard let sub = args.first else { throw CLIError.help }
args = Array(args.dropFirst())
switch sub {
case "screens":
return ParsedCLIRequest(request: .uiListScreens, kind: .uiScreens)
case "screenshot":
var screenIndex: Int?
var windowID: UInt32?
while !args.isEmpty {
let arg = args.removeFirst()
switch arg {
case "--screen-index": screenIndex = args.popFirst().flatMap(Int.init)
case "--window-id": windowID = args.popFirst().flatMap(UInt32.init)
default: break
}
}
return ParsedCLIRequest(request: .uiScreenshot(screenIndex: screenIndex, windowID: windowID), kind: .uiScreenshot)
default:
throw CLIError.help
}
case "run": case "run":
var cwd: String? var cwd: String?
var env: [String: String] = [:] var env: [String: String] = [:]
@ -333,24 +313,6 @@ struct ClawdisCLI {
} }
switch parsed.kind { switch parsed.kind {
case .uiScreens:
let screens = try self.decodePayload([UIScreenInfo].self, payload: response.payload)
if screens.isEmpty {
FileHandle.standardOutput.write(Data("No screens\n".utf8))
return
}
for s in screens {
let primary = s.isPrimary ? " (primary)" : ""
let size = "\(Int(s.frame.width))×\(Int(s.frame.height))"
let scale = String(format: "%.1f", Double(s.scaleFactor))
let line = "Display \(s.index + 1)\(primary): \(s.name) \(size) @\(scale)x (id \(s.displayID))\n"
FileHandle.standardOutput.write(Data(line.utf8))
}
case .uiScreenshot:
let result = try self.decodePayload(UIScreenshotResult.self, payload: response.payload)
FileHandle.standardOutput.write(Data((result.path + "\n").utf8))
case .generic: case .generic:
if let payload = response.payload, let text = String(data: payload, encoding: .utf8), !text.isEmpty { if let payload = response.payload, let text = String(data: payload, encoding: .utf8), !text.isEmpty {
FileHandle.standardOutput.write(payload) FileHandle.standardOutput.write(payload)
@ -370,22 +332,6 @@ struct ClawdisCLI {
] ]
switch parsed.kind { switch parsed.kind {
case .uiScreens:
if let payload = response.payload,
let obj = try? JSONSerialization.jsonObject(with: payload) {
output["result"] = obj
} else {
output["result"] = []
}
case .uiScreenshot:
if let payload = response.payload,
let obj = try? JSONSerialization.jsonObject(with: payload) {
output["result"] = obj
} else {
output["result"] = NSNull()
}
case .generic: case .generic:
if let payload = response.payload, !payload.isEmpty { if let payload = response.payload, !payload.isEmpty {
if let obj = try? JSONSerialization.jsonObject(with: payload) { if let obj = try? JSONSerialization.jsonObject(with: payload) {
@ -424,8 +370,12 @@ struct ClawdisCLI {
[--interactive] [--interactive]
UI: UI:
clawdis-mac ui screens clawdis-mac ui screenshot [...]
clawdis-mac ui screenshot [--screen-index <n>] [--window-id <u32>] clawdis-mac ui see [...]
clawdis-mac ui click ...
clawdis-mac ui type ...
clawdis-mac ui wait ...
clawdis-mac ui --help
Shell: Shell:
clawdis-mac run [--cwd <path>] [--env KEY=VAL] [--timeout <sec>] clawdis-mac run [--cwd <path>] [--env KEY=VAL] [--timeout <sec>]

View File

@ -0,0 +1,589 @@
import Foundation
import Darwin
import PeekabooAutomationKit
import PeekabooBridge
import PeekabooFoundation
enum UICLI {
static func run(args: [String], jsonOutput: Bool) async throws -> Int32 {
var args = args
guard let sub = args.first else {
self.printHelp()
return 0
}
args.removeFirst()
if sub == "--help" || sub == "-h" || sub == "help" {
self.printHelp()
return 0
}
let context = try await self.resolveContext()
switch sub {
case "permissions":
return try await self.runPermissions(args: args, jsonOutput: jsonOutput, context: context)
case "frontmost":
return try await self.runFrontmost(args: args, jsonOutput: jsonOutput, context: context)
case "apps":
return try await self.runApps(args: args, jsonOutput: jsonOutput, context: context)
case "windows":
return try await self.runWindows(args: args, jsonOutput: jsonOutput, context: context)
case "screenshot":
return try await self.runScreenshot(args: args, jsonOutput: jsonOutput, context: context)
case "see":
return try await self.runSee(args: args, jsonOutput: jsonOutput, context: context)
case "click":
return try await self.runClick(args: args, jsonOutput: jsonOutput, context: context)
case "type":
return try await self.runType(args: args, jsonOutput: jsonOutput, context: context)
case "wait":
return try await self.runWait(args: args, jsonOutput: jsonOutput, context: context)
default:
self.printHelp()
return 1
}
}
// MARK: - Context
private struct Context {
let client: PeekabooBridgeClient
let hostDescription: String
}
private static func resolveContext() async throws -> Context {
let explicitSocket = ProcessInfo.processInfo.environment["PEEKABOO_BRIDGE_SOCKET"]
let candidates: [String] = if let explicitSocket, !explicitSocket.isEmpty {
[explicitSocket]
} else {
[
PeekabooBridgeConstants.peekabooSocketPath,
PeekabooBridgeConstants.clawdisSocketPath,
]
}
let identity = PeekabooBridgeClientIdentity(
bundleIdentifier: Bundle.main.bundleIdentifier,
teamIdentifier: nil,
processIdentifier: getpid(),
hostname: Host.current().name)
for socketPath in candidates {
let client = PeekabooBridgeClient(socketPath: socketPath, requestTimeoutSec: 10)
do {
let handshake = try await client.handshake(client: identity, requestedHost: nil)
return Context(
client: client,
hostDescription: "\(handshake.hostKind.rawValue) via \(socketPath)")
} catch let envelope as PeekabooBridgeErrorEnvelope {
if envelope.code == .unauthorizedClient {
throw envelope
}
} catch {
continue
}
}
throw NSError(domain: "clawdis.ui", code: 1, userInfo: [
NSLocalizedDescriptionKey: "No PeekabooBridge host reachable (run Peekaboo.app or Clawdis.app).",
])
}
// MARK: - Commands
private static func runPermissions(args: [String], jsonOutput: Bool, context: Context) async throws -> Int32 {
let sub = args.first ?? "status"
if sub != "status" && sub != "--help" && sub != "-h" && sub != "help" {
self.printHelp()
return 1
}
let status = try await context.client.permissionsStatus()
if jsonOutput {
try self.writeJSON([
"ok": true,
"host": context.hostDescription,
"result": try self.toJSONObject(status),
])
} else {
FileHandle.standardOutput.write(Data((self.formatPermissions(status) + "\n").utf8))
}
return 0
}
private static func runFrontmost(args _: [String], jsonOutput: Bool, context: Context) async throws -> Int32 {
let app = try await context.client.getFrontmostApplication()
let window = try await context.client.getFocusedWindow()
if jsonOutput {
let windowObject: Any = if let window {
try self.toJSONObject(window)
} else {
NSNull()
}
try self.writeJSON([
"ok": true,
"host": context.hostDescription,
"app": try self.toJSONObject(app),
"window": windowObject,
])
} else {
let bundle = app.bundleIdentifier ?? "<unknown>"
let line = "\(bundle) (pid \(app.processIdentifier))"
FileHandle.standardOutput.write(Data((line + "\n").utf8))
if let window {
FileHandle.standardOutput.write(Data(("window \(window.windowID): \(window.title)\n").utf8))
}
}
return 0
}
private static func runApps(args _: [String], jsonOutput: Bool, context: Context) async throws -> Int32 {
let apps = try await context.client.listApplications()
if jsonOutput {
try self.writeJSON([
"ok": true,
"host": context.hostDescription,
"result": try self.toJSONObject(apps),
])
} else {
for app in apps {
let bundle = app.bundleIdentifier ?? "<unknown>"
FileHandle.standardOutput.write(Data(("\(bundle)\t\(app.name)\n").utf8))
}
}
return 0
}
private static func runWindows(args: [String], jsonOutput: Bool, context: Context) async throws -> Int32 {
var args = args
var bundleId: String?
while !args.isEmpty {
switch args.removeFirst() {
case "--bundle-id":
bundleId = args.popFirst()
case "--help", "-h", "help":
self.printHelp()
return 0
default:
break
}
}
let target: WindowTarget = if let bundleId, !bundleId.isEmpty { .application(bundleId) } else { .frontmost }
let windows = try await context.client.listWindows(target: target)
if jsonOutput {
try self.writeJSON([
"ok": true,
"host": context.hostDescription,
"result": try self.toJSONObject(windows),
])
} else {
for window in windows {
FileHandle.standardOutput.write(Data(("\(window.windowID)\t\(window.title)\n").utf8))
}
}
return 0
}
private static func runScreenshot(args: [String], jsonOutput: Bool, context: Context) async throws -> Int32 {
var args = args
var displayIndex: Int?
var bundleId: String?
var windowIndex: Int?
var mode: CaptureVisualizerMode = .screenshotFlash
var scale: CaptureScalePreference = .logical1x
while !args.isEmpty {
let arg = args.removeFirst()
switch arg {
case "--screen-index":
displayIndex = args.popFirst().flatMap(Int.init)
case "--bundle-id":
bundleId = args.popFirst()
case "--window-index":
windowIndex = args.popFirst().flatMap(Int.init)
case "--watch":
mode = .watchCapture
case "--scale":
let raw = args.popFirst()?.lowercased()
if raw == "native" { scale = .native }
if raw == "1x" || raw == "logical" || raw == "logical1x" { scale = .logical1x }
case "--help", "-h", "help":
self.printHelp()
return 0
default:
break
}
}
let capture: CaptureResult
if let bundleId, !bundleId.isEmpty {
capture = try await context.client.captureWindow(
appIdentifier: bundleId,
windowIndex: windowIndex,
visualizerMode: mode,
scale: scale)
} else if displayIndex != nil {
capture = try await context.client.captureScreen(
displayIndex: displayIndex,
visualizerMode: mode,
scale: scale)
} else {
capture = try await context.client.captureFrontmost(visualizerMode: mode, scale: scale)
}
let path = try self.writeTempPNG(capture.imageData)
if jsonOutput {
try self.writeJSON([
"ok": true,
"host": context.hostDescription,
"path": path,
"metadata": try self.toJSONObject(capture.metadata),
"warning": capture.warning ?? "",
])
} else {
FileHandle.standardOutput.write(Data((path + "\n").utf8))
}
return 0
}
private static func runSee(args: [String], jsonOutput: Bool, context: Context) async throws -> Int32 {
var args = args
var bundleId: String?
var windowIndex: Int?
var snapshotId: String?
while !args.isEmpty {
let arg = args.removeFirst()
switch arg {
case "--bundle-id":
bundleId = args.popFirst()
case "--window-index":
windowIndex = args.popFirst().flatMap(Int.init)
case "--snapshot-id":
snapshotId = args.popFirst()
case "--help", "-h", "help":
self.printHelp()
return 0
default:
break
}
}
let capture: CaptureResult
if let bundleId, !bundleId.isEmpty {
capture = try await context.client.captureWindow(
appIdentifier: bundleId,
windowIndex: windowIndex,
visualizerMode: .screenshotFlash,
scale: .logical1x)
} else {
capture = try await context.client.captureFrontmost(visualizerMode: .screenshotFlash, scale: .logical1x)
bundleId = capture.metadata.applicationInfo?.bundleIdentifier
}
let resolvedSnapshotId: String = if let snapshotId, !snapshotId.isEmpty {
snapshotId
} else if let bundleId, !bundleId.isEmpty, let existing = try? await context.client
.getMostRecentSnapshot(applicationBundleId: bundleId) {
existing
} else {
try await context.client.createSnapshot()
}
let screenshotPath = try self.writeTempPNG(capture.imageData)
try await context.client.storeScreenshot(
snapshotId: resolvedSnapshotId,
screenshotPath: screenshotPath,
applicationBundleId: bundleId,
applicationProcessId: capture.metadata.applicationInfo?.processIdentifier,
applicationName: capture.metadata.applicationInfo?.name,
windowTitle: capture.metadata.windowInfo?.title,
windowBounds: capture.metadata.windowInfo?.bounds)
let windowContext = WindowContext(
applicationName: capture.metadata.applicationInfo?.name,
windowTitle: capture.metadata.windowInfo?.title,
windowBounds: capture.metadata.windowInfo?.bounds)
let detection = try await context.client.detectElements(
in: capture.imageData,
snapshotId: resolvedSnapshotId,
windowContext: windowContext)
try await context.client.storeDetectionResult(snapshotId: resolvedSnapshotId, result: detection)
if jsonOutput {
try self.writeJSON([
"ok": true,
"host": context.hostDescription,
"snapshotId": resolvedSnapshotId,
"screenshotPath": screenshotPath,
"result": try self.toJSONObject(detection),
])
} else {
FileHandle.standardOutput.write(Data((screenshotPath + "\n").utf8))
for el in detection.elements.all {
let b = el.bounds
let label = (el.label ?? el.value ?? "").replacingOccurrences(of: "\n", with: " ")
let line =
"\(el.id)\t\(el.type)\t\(Int(b.origin.x)),\(Int(b.origin.y)) \(Int(b.size.width))x\(Int(b.size.height))\t\(label)\n"
FileHandle.standardOutput.write(Data(line.utf8))
}
}
return 0
}
private static func runClick(args: [String], jsonOutput: Bool, context: Context) async throws -> Int32 {
var args = args
var bundleId: String?
var snapshotId: String?
var on: String?
var clickType: ClickType = .single
while !args.isEmpty {
let arg = args.removeFirst()
switch arg {
case "--bundle-id":
bundleId = args.popFirst()
case "--snapshot-id":
snapshotId = args.popFirst()
case "--on":
on = args.popFirst()
case "--double":
clickType = .double
case "--right":
clickType = .right
case "--help", "-h", "help":
self.printHelp()
return 0
default:
break
}
}
guard let on, !on.isEmpty else {
throw NSError(domain: "clawdis.ui", code: 2, userInfo: [
NSLocalizedDescriptionKey: "Missing --on <elementId> (run `clawdis-mac ui see` first).",
])
}
let effectiveSnapshotId = try await self.resolveImplicitSnapshotId(
snapshotId: snapshotId,
bundleId: bundleId,
client: context.client)
try await context.client.click(target: .elementId(on), clickType: clickType, snapshotId: effectiveSnapshotId)
if jsonOutput {
try self.writeJSON([
"ok": true,
"host": context.hostDescription,
])
}
return 0
}
private static func runType(args: [String], jsonOutput: Bool, context: Context) async throws -> Int32 {
var args = args
var bundleId: String?
var snapshotId: String?
var into: String?
var clearExisting = false
var delayMs = 20
var textParts: [String] = []
while !args.isEmpty {
let arg = args.removeFirst()
switch arg {
case "--bundle-id":
bundleId = args.popFirst()
case "--snapshot-id":
snapshotId = args.popFirst()
case "--into":
into = args.popFirst()
case "--clear":
clearExisting = true
case "--delay-ms":
delayMs = args.popFirst().flatMap(Int.init) ?? delayMs
case "--text":
if let next = args.popFirst() {
textParts.append(next)
}
case "--help", "-h", "help":
self.printHelp()
return 0
default:
textParts.append(arg)
}
}
let text = textParts.joined(separator: " ").trimmingCharacters(in: .whitespacesAndNewlines)
guard !text.isEmpty else {
throw NSError(domain: "clawdis.ui", code: 3, userInfo: [
NSLocalizedDescriptionKey: "Missing text (use --text <value>).",
])
}
let effectiveSnapshotId = try await self.resolveImplicitSnapshotId(
snapshotId: snapshotId,
bundleId: bundleId,
client: context.client)
try await context.client.type(
text: text,
target: into,
clearExisting: clearExisting,
typingDelay: delayMs,
snapshotId: effectiveSnapshotId)
if jsonOutput {
try self.writeJSON([
"ok": true,
"host": context.hostDescription,
])
}
return 0
}
private static func runWait(args: [String], jsonOutput: Bool, context: Context) async throws -> Int32 {
var args = args
var bundleId: String?
var snapshotId: String?
var on: String?
var timeoutSec: Double = 10
while !args.isEmpty {
let arg = args.removeFirst()
switch arg {
case "--bundle-id":
bundleId = args.popFirst()
case "--snapshot-id":
snapshotId = args.popFirst()
case "--on":
on = args.popFirst()
case "--timeout":
timeoutSec = args.popFirst().flatMap(Double.init) ?? timeoutSec
case "--help", "-h", "help":
self.printHelp()
return 0
default:
break
}
}
guard let on, !on.isEmpty else {
throw NSError(domain: "clawdis.ui", code: 4, userInfo: [
NSLocalizedDescriptionKey: "Missing --on <elementId>.",
])
}
let effectiveSnapshotId = try await self.resolveImplicitSnapshotId(
snapshotId: snapshotId,
bundleId: bundleId,
client: context.client)
let result = try await context.client.waitForElement(
target: .elementId(on),
timeout: timeoutSec,
snapshotId: effectiveSnapshotId)
if jsonOutput {
try self.writeJSON([
"ok": true,
"host": context.hostDescription,
"result": try self.toJSONObject(result),
])
} else {
FileHandle.standardOutput.write(Data((result.found ? "found\n" : "not found\n").utf8))
}
return result.found ? 0 : 1
}
private static func resolveImplicitSnapshotId(
snapshotId: String?,
bundleId: String?,
client: PeekabooBridgeClient) async throws -> String
{
if let snapshotId, !snapshotId.isEmpty { return snapshotId }
let resolvedBundle: String? = if let bundleId, !bundleId.isEmpty {
bundleId
} else {
try await client.getFrontmostApplication().bundleIdentifier
}
guard let resolvedBundle, !resolvedBundle.isEmpty else {
throw NSError(domain: "clawdis.ui", code: 5, userInfo: [
NSLocalizedDescriptionKey: "Could not determine bundle id for implicit snapshot.",
])
}
do {
return try await client.getMostRecentSnapshot(applicationBundleId: resolvedBundle)
} catch {
throw NSError(domain: "clawdis.ui", code: 6, userInfo: [
NSLocalizedDescriptionKey: "No recent snapshot for \(resolvedBundle). Run `clawdis-mac ui see --bundle-id \(resolvedBundle)` first.",
])
}
}
// MARK: - IO helpers
private static func writeTempPNG(_ data: Data) throws -> String {
let dir = FileManager.default.temporaryDirectory
let formatter = ISO8601DateFormatter()
formatter.formatOptions = [.withInternetDateTime, .withFractionalSeconds]
let stamp = formatter.string(from: Date()).replacingOccurrences(of: ":", with: "-")
let url = dir.appendingPathComponent("clawdis-ui-\(stamp).png")
try data.write(to: url, options: [.atomic])
return url.path
}
private static func formatPermissions(_ status: PermissionsStatus) -> String {
let sr = status.screenRecording ? "screen-recording=ok" : "screen-recording=missing"
let ax = status.accessibility ? "accessibility=ok" : "accessibility=missing"
let ascr = status.appleScript ? "applescript=ok" : "applescript=missing"
return "\(sr) \(ax) \(ascr)"
}
private static func toJSONObject<T: Encodable>(_ value: T) throws -> Any {
let encoder = JSONEncoder()
encoder.dateEncodingStrategy = .iso8601
let data = try encoder.encode(value)
return try JSONSerialization.jsonObject(with: data)
}
private static func writeJSON(_ obj: [String: Any]) throws {
let data = try JSONSerialization.data(withJSONObject: obj, options: [.prettyPrinted])
FileHandle.standardOutput.write(data)
FileHandle.standardOutput.write(Data([0x0A]))
}
private static func printHelp() {
let usage = """
clawdis-mac ui UI automation via PeekabooBridge
Usage:
clawdis-mac [--json] ui <command> ...
Commands:
permissions status
frontmost
apps
windows [--bundle-id <id>]
screenshot [--screen-index <n>] [--bundle-id <id>] [--window-index <n>] [--watch] [--scale native|1x]
see [--bundle-id <id>] [--window-index <n>] [--snapshot-id <id>]
click --on <elementId> [--bundle-id <id>] [--snapshot-id <id>] [--double|--right]
type --text <value> [--into <elementId>] [--bundle-id <id>] [--snapshot-id <id>] [--clear] [--delay-ms <n>]
wait --on <elementId> [--bundle-id <id>] [--snapshot-id <id>] [--timeout <sec>]
Notes:
- Prefers Peekaboo.apps bridge, then Clawdis.apps bridge.
- Default timeout is 10 seconds per action.
"""
FileHandle.standardError.write(Data((usage + "\n").utf8))
}
}

View File

@ -50,64 +50,6 @@ public struct CanvasPlacement: Codable, Sendable {
} }
} }
// MARK: - UI (Peekaboo-aligned types)
/// Display info aligned with Peekaboo's `ScreenService.ScreenInfo`:
/// - `index` is the 0-based position in `NSScreen.screens` at runtime.
/// - `frame`/`visibleFrame` are AppKit screen rectangles (bottom-left origin).
public struct UIScreenInfo: Codable, Sendable {
public let index: Int
public let name: String
public let frame: CGRect
public let visibleFrame: CGRect
public let isPrimary: Bool
public let scaleFactor: CGFloat
public let displayID: UInt32
public init(
index: Int,
name: String,
frame: CGRect,
visibleFrame: CGRect,
isPrimary: Bool,
scaleFactor: CGFloat,
displayID: UInt32)
{
self.index = index
self.name = name
self.frame = frame
self.visibleFrame = visibleFrame
self.isPrimary = isPrimary
self.scaleFactor = scaleFactor
self.displayID = displayID
}
}
public struct UIScreenshotResult: Codable, Sendable {
public let path: String
public let width: Int
public let height: Int
public let screenIndex: Int?
public let displayID: UInt32?
public let windowID: UInt32?
public init(
path: String,
width: Int,
height: Int,
screenIndex: Int? = nil,
displayID: UInt32? = nil,
windowID: UInt32? = nil)
{
self.path = path
self.width = width
self.height = height
self.screenIndex = screenIndex
self.displayID = displayID
self.windowID = windowID
}
}
public enum Request: Sendable { public enum Request: Sendable {
case notify( case notify(
title: String, title: String,
@ -116,8 +58,6 @@ public enum Request: Sendable {
priority: NotificationPriority?, priority: NotificationPriority?,
delivery: NotificationDelivery?) delivery: NotificationDelivery?)
case ensurePermissions([Capability], interactive: Bool) case ensurePermissions([Capability], interactive: Bool)
case uiListScreens
case uiScreenshot(screenIndex: Int?, windowID: UInt32?)
case runShell( case runShell(
command: [String], command: [String],
cwd: String?, cwd: String?,
@ -158,7 +98,6 @@ extension Request: Codable {
case type case type
case title, body, sound, priority, delivery case title, body, sound, priority, delivery
case caps, interactive case caps, interactive
case screenIndex, windowID
case command, cwd, env, timeoutSec, needsScreenRecording case command, cwd, env, timeoutSec, needsScreenRecording
case message, thinking, session, deliver, to case message, thinking, session, deliver, to
case rpcStatus case rpcStatus
@ -174,8 +113,6 @@ extension Request: Codable {
private enum Kind: String, Codable { private enum Kind: String, Codable {
case notify case notify
case ensurePermissions case ensurePermissions
case uiListScreens
case uiScreenshot
case runShell case runShell
case status case status
case agent case agent
@ -205,14 +142,6 @@ extension Request: Codable {
try container.encode(caps, forKey: .caps) try container.encode(caps, forKey: .caps)
try container.encode(interactive, forKey: .interactive) try container.encode(interactive, forKey: .interactive)
case .uiListScreens:
try container.encode(Kind.uiListScreens, forKey: .type)
case let .uiScreenshot(screenIndex, windowID):
try container.encode(Kind.uiScreenshot, forKey: .type)
try container.encodeIfPresent(screenIndex, forKey: .screenIndex)
try container.encodeIfPresent(windowID, forKey: .windowID)
case let .runShell(command, cwd, env, timeoutSec, needsSR): case let .runShell(command, cwd, env, timeoutSec, needsSR):
try container.encode(Kind.runShell, forKey: .type) try container.encode(Kind.runShell, forKey: .type)
try container.encode(command, forKey: .command) try container.encode(command, forKey: .command)
@ -289,14 +218,6 @@ extension Request: Codable {
let interactive = try container.decode(Bool.self, forKey: .interactive) let interactive = try container.decode(Bool.self, forKey: .interactive)
self = .ensurePermissions(caps, interactive: interactive) self = .ensurePermissions(caps, interactive: interactive)
case .uiListScreens:
self = .uiListScreens
case .uiScreenshot:
let screenIndex = try container.decodeIfPresent(Int.self, forKey: .screenIndex)
let windowID = try container.decodeIfPresent(UInt32.self, forKey: .windowID)
self = .uiScreenshot(screenIndex: screenIndex, windowID: windowID)
case .runShell: case .runShell:
let command = try container.decode([String].self, forKey: .command) let command = try container.decode([String].self, forKey: .command)
let cwd = try container.decodeIfPresent(String.self, forKey: .cwd) let cwd = try container.decodeIfPresent(String.self, forKey: .cwd)

View File

@ -1,10 +1,10 @@
--- ---
summary: "Spec for the Clawdis macOS companion menu bar app and XPC broker" summary: "Spec for the Clawdis macOS companion menu bar app and local broker (control socket + PeekabooBridge)"
read_when: read_when:
- Implementing macOS app features - Implementing macOS app features
- Touching XPC/CLI bridging - Touching broker/CLI bridging
--- ---
# Clawdis macOS Companion (menu bar + XPC broker) # Clawdis macOS Companion (menu bar + local broker)
Author: steipete · Status: draft spec · Date: 2025-12-05 Author: steipete · Status: draft spec · Date: 2025-12-05
@ -12,21 +12,24 @@ Author: steipete · Status: draft spec · Date: 2025-12-05
- Single macOS menu-bar app named **Clawdis** that: - Single macOS menu-bar app named **Clawdis** that:
- Shows native notifications for Clawdis/clawdis events. - Shows native notifications for Clawdis/clawdis events.
- Owns TCC prompts (Notifications, Accessibility, Screen Recording, Automation/AppleScript, Microphone, Speech Recognition). - Owns TCC prompts (Notifications, Accessibility, Screen Recording, Automation/AppleScript, Microphone, Speech Recognition).
- Brokers privileged actions (screen capture, shell with elevated UI context) via XPC. - Brokers privileged actions via local IPC:
- Clawdis control socket (app-specific actions like notify/run)
- PeekabooBridge socket (`bridge.sock`) for UI automation (see `docs/mac/peekaboo.md`)
- Provides a tiny CLI (`clawdis-mac`) that talks to the app; Node/TS shells out to it. - Provides a tiny CLI (`clawdis-mac`) that talks to the app; Node/TS shells out to it.
- Replace the separate notifier helper pattern (Oracle) with a built-in notifier. - Replace the separate notifier helper pattern (Oracle) with a built-in notifier.
- Offer a first-run experience similar to VibeTunnels onboarding (permissions + CLI install). - Offer a first-run experience similar to VibeTunnels onboarding (permissions + CLI install).
## High-level design ## High-level design
- SwiftPM package in `apps/macos/` (macOS 15+, Swift 6): - SwiftPM package in `apps/macos/` (macOS 15+, Swift 6).
- Dependency: `https://github.com/ChimeHQ/AsyncXPCConnection` (>=0.6.0). - Targets:
- Targets: - `ClawdisIPC` (shared Codable types + helpers for app-specific commands).
- `ClawdisIPC` (shared Codable types + helpers). - `Clawdis` (LSUIElement MenuBarExtra app; hosts control socket + optional PeekabooBridgeHost).
- `Clawdis` (LSUIElement MenuBarExtra app; embeds XPC listener and notifier). - `ClawdisCLI` (`clawdis-mac`; prints text by default, `--json` for scripts).
- `ClawdisCLI` (client that forms requests, talks XPC, prints JSON for scripts). - Bundle ID: `com.steipete.clawdis`.
- Bundle ID: `com.steipete.clawdis`; XPC service name: `com.steipete.clawdis.xpc`.
- The CLI lives in the app bundle `Contents/Helpers/clawdis-mac`; dev symlink `bin/clawdis-mac` points there. - The CLI lives in the app bundle `Contents/Helpers/clawdis-mac`; dev symlink `bin/clawdis-mac` points there.
- Node/TS layer calls the CLI; no direct XPC from Node. - Node/TS layer calls the CLI; no direct privileged API calls from Node.
Note: `docs/mac/xpc.md` describes an aspirational long-term Mach/XPC architecture. The current direction for UI automation is PeekabooBridge (socket-based).
## IPC contract (ClawdisIPC) ## IPC contract (ClawdisIPC)
- Codable enums; small payloads (<1 MB enforced in listener): - Codable enums; small payloads (<1 MB enforced in listener):
@ -36,13 +39,15 @@ enum Capability { notifications, accessibility, screenRecording, appleScript, mi
enum Request { enum Request {
notify(title, body, sound?) notify(title, body, sound?)
ensurePermissions([Capability], interactive: Bool) ensurePermissions([Capability], interactive: Bool)
uiScreenshot(screenIndex?, windowID?)
runShell(command:[String], cwd?, env?, timeoutSec?, needsScreenRecording: Bool) runShell(command:[String], cwd?, env?, timeoutSec?, needsScreenRecording: Bool)
status status
} }
struct Response { ok: Bool; message?: String; payload?: Data } struct Response { ok: Bool; message?: String; payload?: Data }
``` ```
- Listener rejects oversize/unknown cases and validates the caller by code signature TeamID (with a `DEBUG`-only same-UID escape hatch controlled by `CLAWDIS_ALLOW_UNSIGNED_SOCKET_CLIENTS=1`). - The control-socket server rejects oversize/unknown cases and validates the caller by code signature TeamID (with a `DEBUG`-only same-UID escape hatch controlled by `CLAWDIS_ALLOW_UNSIGNED_SOCKET_CLIENTS=1`).
UI automation is not part of `ClawdisIPC.Request`:
- `clawdis-mac ui …` speaks **PeekabooBridge** (see `docs/mac/peekaboo.md`).
## App UX (Clawdis) ## App UX (Clawdis)
- MenuBarExtra icon only (LSUIElement; no Dock). - MenuBarExtra icon only (LSUIElement; no Dock).
@ -52,28 +57,37 @@ struct Response { ok: Bool; message?: String; payload?: Data }
- Permissions: live status + “Request” buttons for Notifications/Accessibility/Screen Recording; links to System Settings. - Permissions: live status + “Request” buttons for Notifications/Accessibility/Screen Recording; links to System Settings.
- Debug (when enabled): PID/log links, restart/reveal app shortcuts, manual test notification. - Debug (when enabled): PID/log links, restart/reveal app shortcuts, manual test notification.
- About: version, links, license. - About: version, links, license.
- Pause behavior: matches Trimmys “Auto Trim” toggle. When paused, XPC listener returns `ok=false, message="clawdis paused"` for actions that would touch TCC (notify/run/screenshot). State is persisted (UserDefaults) and surfaced in menu and status view. - Pause behavior: matches Trimmys “Auto Trim” toggle. When paused, the broker returns `ok=false, message="clawdis paused"` for actions that would touch TCC. State is persisted (UserDefaults) and surfaced in menu and status view.
- Onboarding (VibeTunnel-inspired): Welcome → What it does → Install CLI (shows `ln -s .../clawdis-mac /usr/local/bin`) → Permissions checklist with live status → Test notification → Done. Re-show when `welcomeVersion` bumps or CLI/app version mismatch. - Onboarding (VibeTunnel-inspired): Welcome → What it does → Install CLI (shows `ln -s .../clawdis-mac /usr/local/bin`) → Permissions checklist with live status → Test notification → Done. Re-show when `welcomeVersion` bumps or CLI/app version mismatch.
## Built-in services ## Built-in services
- NotificationManager: UNUserNotificationCenter primary; AppleScript `display notification` fallback; respects the `--sound` value on each request. - NotificationManager: UNUserNotificationCenter primary; AppleScript `display notification` fallback; respects the `--sound` value on each request.
- PermissionManager: checks/requests Notifications, Accessibility (AX), Screen Recording (capture probe); publishes changes for UI. - PermissionManager: checks/requests Notifications, Accessibility (AX), Screen Recording (capture probe); publishes changes for UI.
- ScreenCaptureManager: window/display PNG capture; gated on permission. - UI automation + capture: provided by **PeekabooBridgeHost** when enabled (see `docs/mac/peekaboo.md`).
- ShellExecutor: executes `Process` with timeout; rejects when `needsScreenRecording` and permission missing; returns stdout/stderr in payload. - ShellExecutor: executes `Process` with timeout; rejects when `needsScreenRecording` and permission missing; returns stdout/stderr in payload.
- XPCListener actor: routes Request → managers; logs via OSLog. - ControlSocketServer actor: routes Request → managers; logs via OSLog.
## CLI (`clawdis-mac`) ## CLI (`clawdis-mac`)
- Subcommands (text by default; `--json` for machine output; non-zero exit on failure): - Subcommands (text by default; `--json` for machine output; non-zero exit on failure):
- `notify --title --body [--sound] [--priority passive|active|timeSensitive] [--delivery system|overlay|auto]` - `notify --title --body [--sound] [--priority passive|active|timeSensitive] [--delivery system|overlay|auto]`
- `ensure-permissions --cap accessibility --cap screenRecording [--interactive]` - `ensure-permissions --cap accessibility --cap screenRecording [--interactive]`
- `ui screens` - `ui permissions status`
- `ui screenshot [--screen-index N] [--window-id N]` - `ui frontmost`
- `ui apps`
- `ui windows [--bundle-id <id>]`
- `ui screenshot [--screen-index <n>] [--bundle-id <id>] [--window-index <n>] [--watch] [--scale native|1x]`
- `ui see [--bundle-id <id>] [--window-index <n>] [--snapshot-id <id>]`
- `ui click --on <elementId> [--bundle-id <id>] [--snapshot-id <id>] [--double|--right]`
- `ui type --text <value> [--into <elementId>] [--bundle-id <id>] [--snapshot-id <id>] [--clear] [--delay-ms <n>]`
- `ui wait --on <elementId> [--bundle-id <id>] [--snapshot-id <id>] [--timeout <sec>]`
- `run -- cmd args... [--cwd] [--env KEY=VAL] [--timeout 30] [--needs-screen-recording]` - `run -- cmd args... [--cwd] [--env KEY=VAL] [--timeout 30] [--needs-screen-recording]`
- `status` - `status`
- Sounds: supply any macOS alert name with `--sound` per notification; omit the flag to use the system default. There is no longer a persisted “default sound” in the app UI. - Sounds: supply any macOS alert name with `--sound` per notification; omit the flag to use the system default. There is no longer a persisted “default sound” in the app UI.
- Priority: `timeSensitive` is best-effort and falls back to `active` unless the app is signed with the Time Sensitive Notifications entitlement. - Priority: `timeSensitive` is best-effort and falls back to `active` unless the app is signed with the Time Sensitive Notifications entitlement.
- Delivery: `overlay` and `auto` show an in-app toast panel (bypasses Notification Center/Focus). - Delivery: `overlay` and `auto` show an in-app toast panel (bypasses Notification Center/Focus).
- Internals: builds a `ClawdisIPC.Request`, sends it to the running app over the local control socket, and prints text by default (or JSON with `--json`). - Internals:
- For app-specific commands (`notify`, `ensure-permissions`, `run`, `status`): build `ClawdisIPC.Request`, send over the control socket.
- For UI automation (`ui …`): connect to PeekabooBridge hosts (Peekaboo.app → Clawdis.app) and send one JSON request per command (see `docs/mac/peekaboo.md`).
## Integration with clawdis/Clawdis (Node/TS) ## Integration with clawdis/Clawdis (Node/TS)
- Add helper module that shells to `clawdis-mac`: - Add helper module that shells to `clawdis-mac`:
@ -135,6 +149,6 @@ Notes:
## Open questions / decisions ## Open questions / decisions
- Where to place the dev symlink `bin/clawdis-mac` (repo root vs. `apps/macos/bin`)? - Where to place the dev symlink `bin/clawdis-mac` (repo root vs. `apps/macos/bin`)?
- Should `runShell` support streaming stdout/stderr (XPC with AsyncSequence) or just buffered? (Start buffered; streaming later.) - Should `runShell` support streaming stdout/stderr (IPC with AsyncSequence) or just buffered? (Start buffered; streaming later.)
- Icon: reuse Clawdis lobster or new mac-specific glyph? - Icon: reuse Clawdis lobster or new mac-specific glyph?
- Sparkle updates: bundled via Sparkle; release builds point at `https://raw.githubusercontent.com/steipete/clawdis/main/appcast.xml` and enable auto-checks, while debug builds leave the feed blank and disable checks. - Sparkle updates: bundled via Sparkle; release builds point at `https://raw.githubusercontent.com/steipete/clawdis/main/appcast.xml` and enable auto-checks, while debug builds leave the feed blank and disable checks.

View File

@ -69,7 +69,7 @@ Implementation notes:
## Agent API surface (proposed) ## Agent API surface (proposed)
Expose Canvas via the existing `clawdis-mac`XPC → app routing so the agent can: Expose Canvas via the existing `clawdis-mac`control socket → app routing so the agent can:
- Show/hide the panel. - Show/hide the panel.
- Navigate to a path (relative to the session root). - Navigate to a path (relative to the session root).
- Evaluate JavaScript and optionally return results. - Evaluate JavaScript and optionally return results.

View File

@ -8,7 +8,7 @@ read_when:
Date: 2025-12-06 · Status: draft · Owner: steipete Date: 2025-12-06 · Status: draft · Owner: steipete
## Goal ## Goal
Run the Node-based Clawdis/clawdis gateway as a direct child of the LSUIElement app (instead of a launchd agent) while keeping all TCC-sensitive work inside the Swift app/XPC and wiring the existing “Clawdis Active” toggle to start/stop the child. Run the Node-based Clawdis/clawdis gateway as a direct child of the LSUIElement app (instead of a launchd agent) while keeping all TCC-sensitive work inside the Swift app/broker layer and wiring the existing “Clawdis Active” toggle to start/stop the child.
## When to prefer the child-process mode ## When to prefer the child-process mode
- You want gateway lifetime strictly coupled to the menu-bar app (dies when the app quits) and controlled by the “Clawdis Active” toggle without touching launchd. - You want gateway lifetime strictly coupled to the menu-bar app (dies when the app quits) and controlled by the “Clawdis Active” toggle without touching launchd.
@ -18,12 +18,13 @@ Run the Node-based Clawdis/clawdis gateway as a direct child of the LSUIElement
## Tradeoffs vs. launchd ## Tradeoffs vs. launchd
- **Pros:** tighter coupling to UI state; simpler surface (no plist install/bootout); easier to stream stdout/stderr; fewer moving parts for beta users. - **Pros:** tighter coupling to UI state; simpler surface (no plist install/bootout); easier to stream stdout/stderr; fewer moving parts for beta users.
- **Cons:** no built-in KeepAlive/login auto-start; app crash kills gateway; you must build your own restart/backoff; Activity Monitor will show both processes under the app; still need correct TCC handling (see below). - **Cons:** no built-in KeepAlive/login auto-start; app crash kills gateway; you must build your own restart/backoff; Activity Monitor will show both processes under the app; still need correct TCC handling (see below).
- **TCC:** behaviorally, child processes often inherit the parent apps “responsible process” for TCC, but this is *not a contract*. Continue to route all protected actions through the Swift app/XPC so prompts stay tied to the signed app bundle. - **TCC:** behaviorally, child processes often inherit the parent apps “responsible process” for TCC, but this is *not a contract*. Continue to route all protected actions through the Swift app/broker so prompts stay tied to the signed app bundle.
## TCC guardrails (must keep) ## TCC guardrails (must keep)
- Screen Recording, Accessibility, mic, and speech prompts must originate from the Swift app/XPC. The Node child should never call these APIs directly; use the existing XPC/CLI broker (`clawdis-mac`) for: - Screen Recording, Accessibility, mic, and speech prompts must originate from the signed Swift app/broker. The Node child should never call these APIs directly; use the CLI broker (`clawdis-mac`) for:
- `ensure-permissions` - `ensure-permissions`
- `ui screenshot` / ScreenCaptureKit work - `ui screenshot` (via PeekabooBridge host)
- other `ui …` automation (see/click/type/scroll/wait) when implemented
- mic/speech permission checks - mic/speech permission checks
- notifications - notifications
- shell runs that need `needs-screen-recording` - shell runs that need `needs-screen-recording`
@ -48,7 +49,7 @@ Run the Node-based Clawdis/clawdis gateway as a direct child of the LSUIElement
## Packaging and signing ## Packaging and signing
- Bundle the gateway payload (dist + production node_modules) under `Contents/Resources/Gateway/`; rely on host Node ≥22 instead of embedding a runtime. - Bundle the gateway payload (dist + production node_modules) under `Contents/Resources/Gateway/`; rely on host Node ≥22 instead of embedding a runtime.
- Codesign native addons and dylibs inside the bundle; no nested runtime binary to sign now. - Codesign native addons and dylibs inside the bundle; no nested runtime binary to sign now.
- Host runtime should not call TCC APIs directly; keep privileged work inside the app/XPC. - Host runtime should not call TCC APIs directly; keep privileged work inside the app/broker.
## Logging and observability ## Logging and observability
- Stream child stdout/stderr to `/tmp/clawdis-gateway.log`; surface the last N lines in the Debug tab. - Stream child stdout/stderr to `/tmp/clawdis-gateway.log`; surface the last N lines in the Debug tab.
@ -58,14 +59,14 @@ Run the Node-based Clawdis/clawdis gateway as a direct child of the LSUIElement
## Failure/edge cases ## Failure/edge cases
- App crash/quit kills the gateway. Decide if that is acceptable for the deployment tier; otherwise, stick with launchd for production and keep child-process for dev/experiments. - App crash/quit kills the gateway. Decide if that is acceptable for the deployment tier; otherwise, stick with launchd for production and keep child-process for dev/experiments.
- If the gateway exits repeatedly, back off (e.g., 1s/2s/5s/10s) and give up after N attempts with a menu warning. - If the gateway exits repeatedly, back off (e.g., 1s/2s/5s/10s) and give up after N attempts with a menu warning.
- Respect the existing pause semantics: when paused, the XPC should return `ok=false, "clawdis paused"`; the gateway should avoid calling privileged routes while paused. - Respect the existing pause semantics: when paused, the broker should return `ok=false, "clawdis paused"`; the gateway should avoid calling privileged routes while paused.
## Open questions / follow-ups ## Open questions / follow-ups
- Do we need dual-mode (launchd for prod, child for dev)? If yes, gate via a setting or build flag. - Do we need dual-mode (launchd for prod, child for dev)? If yes, gate via a setting or build flag.
- Embedding a runtime is off the table for now; we rely on host Node for size/simplicity. Revisit only if host PATH drift becomes painful. - Embedding a runtime is off the table for now; we rely on host Node for size/simplicity. Revisit only if host PATH drift becomes painful.
- Do we want a tiny signed helper for rare TCC actions that cannot be brokered via XPC? - Do we want a tiny signed helper for rare TCC actions that cannot be brokered via the Swift app/broker?
## Decision snapshot (current recommendation) ## Decision snapshot (current recommendation)
- Keep all TCC surfaces in the Swift app/XPC. - Keep all TCC surfaces in the Swift app/broker (control socket + PeekabooBridgeHost).
- Implement `GatewayProcessManager` with Swift Subprocess to start/stop the gateway on the “Clawdis Active” toggle. - Implement `GatewayProcessManager` with Swift Subprocess to start/stop the gateway on the “Clawdis Active” toggle.
- Maintain the launchd path as a fallback for uptime/login persistence until child-mode proves stable. - Maintain the launchd path as a fallback for uptime/login persistence until child-mode proves stable.

View File

@ -22,5 +22,5 @@ Shapes & sizes
- Scurry uses leg wiggle up to ~1.0 with a small horizontal jiggle; its additive to any existing idle wiggle. - Scurry uses leg wiggle up to ~1.0 with a small horizontal jiggle; its additive to any existing idle wiggle.
Behavioral notes Behavioral notes
- No external CLI/XPC toggle for ears/working; keep it internal to the apps own signals to avoid accidental flapping. - No external CLI/broker toggle for ears/working; keep it internal to the apps own signals to avoid accidental flapping.
- Keep TTLs short (<10s) so the icon returns to baseline quickly if a job hangs. - Keep TTLs short (<10s) so the icon returns to baseline quickly if a job hangs.

View File

@ -1,44 +1,80 @@
--- ---
summary: "Plan for integrating Peekaboo automation + visualizer into Clawdis macOS app (via clawdis-mac)" summary: "Plan for integrating Peekaboo automation into Clawdis via PeekabooBridge (socket-based TCC broker)"
read_when: read_when:
- Adding UI automation commands - Adding UI automation commands
- Integrating Peekaboo as a submodule - Integrating Peekaboo as a submodule
- Changing clawdis-mac IPC/output formats - Changing clawdis-mac IPC/output formats
--- ---
# Peekaboo in Clawdis (macOS UI automation + visualizer) # Peekaboo Bridge in Clawdis (macOS UI automation broker)
## Goal ## TL;DR
Reuse Peekaboos mac automation “core” inside **Clawdis.app** so we piggyback on Clawdis existing TCC grants (Screen Recording, Accessibility, etc.). The CLI (`clawdis-mac`) stays a thin synchronous trigger surface for **single actions** (no batches), returning errors cleanly. - **Peekaboo removed its XPC helper** and now exposes privileged automation via a **UNIX domain socket bridge** (`PeekabooBridge` / `PeekabooBridgeHost`, socket name `bridge.sock`).
- Clawdis integrates by **hosting the same bridge** inside **Clawdis.app** (optional, user-toggleable), and by making `clawdis-mac ui …` act as a **bridge client**.
- For **visualizations**, we keep them in **Peekaboo.app** (best UX); Clawdis stays a thin broker host. No visualizer toggle in Clawdis.
Non-goals: Non-goals:
- No AI/agent runtime parts from Peekaboo (no Tachikoma/MCP/Commander entrypoints). - No auto-launching Peekaboo.app.
- No auto-onboarding or System Settings deep-linking from the automation layer (Clawdis onboarding already handles that). - No onboarding deep links from the automation endpoint (Clawdis onboarding already handles permissions).
- No AI provider/agent runtime dependencies in Clawdis (avoid pulling Tachikoma/MCP into the Clawdis app/CLI).
## Where code lives ## Big refactor (Dec 2025): XPC → Bridge
- **Clawdis.app (macOS)**: owns all automation + visualization + TCC prompts. Peekaboos privileged execution moved from “CLI → XPC helper” to “CLI → socket bridge host”. For Clawdis this is a win:
- **`clawdis-mac` CLI**: sends one request, waits, prints result, exits non-zero on failure. - It matches the existing “local socket + codesign checks” approach.
- **Gateway/Node/TS**: shells out to `clawdis-mac` when it needs TCC-backed actions. - It lets us piggyback on **either** Peekaboo.apps permissions **or** Clawdis.apps permissions (whichever is running).
- It avoids “two apps with two TCC bubbles” unless needed.
Transport: existing UNIX domain socket (`controlSocketPath`) already used by `clawdis-mac`. Reference (Peekaboo submodule): `docs/bridge-host.md`.
## Dependencies (submodule strategy) ## Architecture
Integrate Peekaboo via git submodule (nested submodules OK). ### Processes
- **Bridge hosts** (provide TCC-backed automation):
- **Peekaboo.app** (preferred; also provides visualizations + controls)
- **Clawdis.app** (secondary; “thin host” only)
- **Bridge clients** (trigger single actions):
- `clawdis-mac ui …`
- Node/Gateway shells out to `clawdis-mac`
Consume only: ### Host discovery (client-side)
- `PeekabooAutomationKit` (AX automation, element detection, capture helpers; no Tachikoma/MCP). Order is deliberate:
- `AXorcist` (input driving / AX helpers). 1. Peekaboo.app host (full UX)
- `PeekabooVisualizer` (overlay visualizations). 2. Clawdis.app host (piggyback on Clawdis permissions)
Important nuance: Socket paths (convention; exact paths must match Peekaboo):
- `PeekabooAutomationKit` is a standalone SwiftPM package and does **not** require Tachikoma/MCP/Commander. - Peekaboo: `~/Library/Application Support/Peekaboo/bridge.sock`
- `PeekabooVisualizer` ships as a product inside `PeekabooCore/Package.swift`. That package declares other dependencies (including a path dependency to Tachikoma). SwiftPM will still need those paths to exist during dependency resolution even if we dont build those targets. - Clawdis: `~/Library/Application Support/clawdis/bridge.sock`
- If this becomes annoying for Clawdis, the follow-up is to extract `PeekabooVisualizer` into its own standalone Swift package that depends only on `PeekabooFoundation`/`PeekabooProtocols`/`PeekabooExternalDependencies`.
No auto-launch: if a host isnt reachable, the command fails with a clear error (start Peekaboo.app or Clawdis.app).
Override (debugging): set `PEEKABOO_BRIDGE_SOCKET=/path/to/bridge.sock`.
### Protocol shape
- **Single request per connection**: connect → write one JSON request → half-close → read one JSON response → close.
- **Timeout**: 10 seconds end-to-end per action (client enforced; host should also enforce per-operation).
- **Errors**: human-readable string by default; structured envelope in `--json`.
## Dependency strategy (submodule)
Integrate Peekaboo via git submodule (nested submodules are OK).
Path in Clawdis repo:
- `./Peekaboo` (Swabble-style; keep stable so SwiftPM path deps dont churn).
What Clawdis should use:
- **Client side**: `PeekabooBridge` (socket client + protocol models).
- **Host side (Clawdis.app)**: `PeekabooBridgeHost` + the minimal Peekaboo services needed to implement operations.
What Clawdis should *not* embed:
- **Visualizer UI**: keep it in Peekaboo.app for now (toggle + controls live there).
- **XPC**: dont reintroduce helper targets; use the bridge.
## IPC / CLI surface ## IPC / CLI surface
### Namespacing ### Namespacing
Add new automation commands behind a `ui` prefix: Add new automation commands behind a `ui` prefix:
- `clawdis-mac ui …` for UI automation + visualization-related actions. - `clawdis-mac ui …` for UI automation + visualization-related actions.
- Keep existing top-level commands (`notify`, `run`, `canvas …`, etc.) for compatibility, but do a clean cutover for screenshots: remove the legacy top-level `screenshot` command and ship only `clawdis-mac ui screenshot`. - Keep existing top-level commands (`notify`, `run`, `canvas …`, etc.) for compatibility.
Screenshot cutover:
- Remove legacy screenshot endpoints/commands.
- Ship only `clawdis-mac ui screenshot` (no aliases).
### Output format ### Output format
Change `clawdis-mac` to default to human text output: Change `clawdis-mac` to default to human text output:
@ -50,14 +86,14 @@ This applies globally, not only `ui` commands.
Note (current state as of 2025-12-13): `clawdis-mac` prints text by default; use `--json` for structured output. Note (current state as of 2025-12-13): `clawdis-mac` prints text by default; use `--json` for structured output.
### Timeouts ### Timeouts
Default timeout for UI actions: **10 seconds** end-to-end (CLI already defaults to 10s). Default timeout for UI actions: **10 seconds** end-to-end.
- CLI: keep the fail-fast default at 10s (unless a command explicitly requests longer).
- Server: only has a ~5s read/decode timeout today; UI operations must also enforce their own per-action timeout so “wait for element” can fail deterministically.
## Coordinate model (multi-display) ## Coordinate model (multi-display)
Requirement: coordinates are **per screen**, not global. Requirement: coordinates are **per screen**, not global.
Proposed API shape: Standardize for the CLI (agent-friendly): **top-left origin per screen**.
Proposed request shape:
- Requests accept `screenIndex` + `{x, y}` in that screens local coordinate space. - Requests accept `screenIndex` + `{x, y}` in that screens local coordinate space.
- Clawdis.app converts to global CG coordinates using `NSScreen.screens[screenIndex].frame.origin`. - Clawdis.app converts to global CG coordinates using `NSScreen.screens[screenIndex].frame.origin`.
- Responses should echo both: - Responses should echo both:
@ -68,53 +104,48 @@ Proposed API shape:
Ordering: use `NSScreen.screens` ordering consistently (documented in the CLI help + JSON schema). Ordering: use `NSScreen.screens` ordering consistently (documented in the CLI help + JSON schema).
## Targeting (per app/window) ## Targeting (per app/window)
Expose window/app targeting in the IPC surface (based on Peekaboos existing `WindowTarget` model): Expose window/app targeting in the UI surface (align with Peekaboo targeting):
- frontmost - frontmost
- by app name / bundle id - by app name / bundle id
- by window title substring - by window title substring
- by (app, index) - by (app, index)
- by window id
Current `clawdis-mac ui …` support:
- `--bundle-id <id>` for app targeting
- `--window-index <n>` (0-based) for disambiguating within an app when capturing (see/screenshot)
All “see/click/type/scroll/wait” requests should accept a target (default: frontmost). All “see/click/type/scroll/wait” requests should accept a target (default: frontmost).
## “See” + click packs (Playwright-style) ## “See” + click packs (Playwright-style)
Peekaboo already has the core ingredients: Behavior stays aligned with Peekaboo:
- element detection yielding stable IDs (e.g., `B1`, `T3`) - `ui see` returns element IDs (e.g. `B1`, `T3`) with bounds/labels.
- bounds + labels/values - Follow-up actions reference those IDs without re-scanning.
- snapshot IDs to allow follow-up actions without re-scanning
Clawdiss `ui see` should: `clawdis-mac ui see` should:
- capture (optionally targeted) window/screen - capture (optionally targeted) window/screen
- return a **snapshot id** - return a screenshot **file path** (default: temp directory)
- return a list of elements with `{id, type, label/value?, bounds}` - return a list of elements (text or JSON)
- optionally return screenshot path/bytes (pref: path)
Snapshot lifecycle requirement: Snapshot lifecycle requirement:
- Clawdis runs long-lived in memory, so “snapshot state” should be **in-memory by default** (no disk-backed JSON concept). - Host apps are long-lived, so snapshot state should be **in-memory by default**.
- Peekaboo already supports this via an `InMemorySnapshotManager` (keep disk-backed snapshots as an optional debug mode later). - Snapshot scoping: “implicit snapshot” is **per target bundle id** (reuse last snapshot for that app when snapshot id is omitted).
Practical flow (agent-friendly):
- `clawdis-mac ui frontmost` returns the focused app (bundle id) + focused window (title/id) so follow-up calls can pass `--bundle-id …`.
- `clawdis-mac ui see --bundle-id X` updates the implicit snapshot for `X`.
- `clawdis-mac ui click --bundle-id X --on B1` reuses the most recent snapshot for `X` when `--snapshot-id` is omitted.
## Visualizer integration ## Visualizer integration
Visualizer must be user-toggleable via a Clawdis setting. Keep visualizations in **Peekaboo.app** for now.
- Clawdis hosts the bridge, but does not render overlays.
Implementation sketch: - Any “visualizer enabled/disabled” setting is controlled in Peekaboo.app.
- Add a Clawdis UserDefaults-backed setting (e.g. `clawdis.ui.visualizerEnabled`).
- Implement Peekaboos `VisualizerSettingsProviding` in Clawdis (`visualizerEnabled`, animation speed, and per-effect toggles).
- Create a Clawdis-specific `AutomationFeedbackClient` that forwards PeekabooAutomationKit feedback events into a shared `VisualizerCoordinator`.
Current state:
- `PeekabooVisualizer` already includes the visualization implementation (SwiftUI overlay views + coordinator).
The visualizer is intentionally display-only (no clickable overlays needed).
## Screenshots (legacy → Peekaboo takeover) ## Screenshots (legacy → Peekaboo takeover)
Clawdis uses `clawdis-mac ui screenshot` and returns a file path (default location: temp directory) instead of raw image bytes. Clawdis uses `clawdis-mac ui screenshot` and returns a file path (default location: temp directory) instead of raw image bytes.
Migration plan: Migration plan:
- Replace capture implementation with PeekabooAutomationKits capture service so we share: - Bridge host performs capture and returns a temp file path.
- per-screen mapping - No legacy aliases; make the old screenshot surface disappear cleanly.
- window/app targeting
- visual feedback (flash / watch HUD) when enabled
- Keep writing images to a file path on the app side and returning the path (text-friendly), with `--json` providing the structured metadata.
- No aliases: remove the old `Request.screenshot` and introduce a new `Request.uiScreenshot` (or similar) so the new behavior is explicit and theres no “legacy mode” to maintain.
## Permissions behavior ## Permissions behavior
If required permissions are missing: If required permissions are missing:
@ -122,17 +153,32 @@ If required permissions are missing:
- do not try to open System Settings from the automation endpoint - do not try to open System Settings from the automation endpoint
## Security (socket auth) ## Security (socket auth)
Clawdis socket is protected by: Both hosts must enforce:
- filesystem perms on the socket path (owner read/write only) - filesystem perms on the socket path (owner read/write only)
- server-side caller check: - server-side caller validation:
- requires the callers code signature TeamID to be `Y5PE65HELJ` - require the callers code signature TeamID to be `Y5PE65HELJ`
- in `DEBUG` builds only, an explicit escape hatch allows same-UID clients when `CLAWDIS_ALLOW_UNSIGNED_SOCKET_CLIENTS=1` is set (development convenience) - optional bundle-id allowlist for tighter scoping
This ensures “any local process” cant drive the privileged surface just because it runs under the same macOS user. Debug-only escape hatch (development convenience):
- “allow same-UID callers” means: *skip codesign checks for clients running under the same Unix user*.
- This must be **opt-in**, **DEBUG-only**, and guarded by an env var (Peekaboo uses `PEEKABOO_ALLOW_UNSIGNED_SOCKET_CLIENTS=1`).
## Current `clawdis-mac ui` commands (Dec 2025)
All commands default to text output. Add `--json` right after `clawdis-mac` for a structured envelope.
- `clawdis-mac ui permissions status`
- `clawdis-mac ui frontmost`
- `clawdis-mac ui apps`
- `clawdis-mac ui windows [--bundle-id <id>]`
- `clawdis-mac ui screenshot [--screen-index <n>] [--bundle-id <id>] [--window-index <n>] [--watch] [--scale native|1x]`
- `clawdis-mac ui see [--bundle-id <id>] [--window-index <n>] [--snapshot-id <id>]`
- `clawdis-mac ui click --on <elementId> [--bundle-id <id>] [--snapshot-id <id>] [--double|--right]`
- `clawdis-mac ui type --text <value> [--into <elementId>] [--bundle-id <id>] [--snapshot-id <id>] [--clear] [--delay-ms <n>]`
- `clawdis-mac ui wait --on <elementId> [--bundle-id <id>] [--snapshot-id <id>] [--timeout <sec>]`
## Next integration steps (after this doc) ## Next integration steps (after this doc)
1. Add Peekaboo as a git submodule (and required nested submodules). 1. Add Peekaboo as a git submodule (nested submodules OK).
2. Wire SwiftPM deps in `apps/macos/Package.swift` to import `PeekabooAutomationKit` + `PeekabooVisualizer`. 2. Add a small `clawdis-mac ui …` surface that speaks PeekabooBridge (text by default, `--json` for structured).
3. Extend `ClawdisIPC.Request` with `ui.*` commands (`see/click/type/scroll/wait/screenshot/windows/screens`). 3. Host `PeekabooBridgeHost` inside Clawdis.app behind a single setting (“Enable Peekaboo Bridge”, default on).
4. Implement handlers in Clawdis.app and route through PeekabooAutomationKit services. 4. Implement the minimum operation set needed for agents (see/click/type/scroll/wait/screenshot, plus list apps/windows/screens).
5. Update `clawdis-mac` output defaults (text + `--json`), and adjust any internal call sites that relied on JSON-by-default. 5. Keep all protocol decisions aligned with Peekaboo (coordinate system, element IDs, snapshot scoping, error envelopes).

View File

@ -1,19 +1,29 @@
--- ---
summary: "macOS XPC architecture for Clawdis app, CLI helper, and gateway bridge" summary: "macOS IPC architecture for Clawdis app, CLI helper, and gateway bridge (control socket + XPC + PeekabooBridge)"
read_when: read_when:
- Editing XPC contracts or menu bar app IPC - Editing IPC contracts or menu bar app IPC
--- ---
# Clawdis macOS XPC architecture (Dec 2025) # Clawdis macOS IPC architecture (Dec 2025)
Note: the current implementation primarily uses a local UNIX-domain control socket (`controlSocketPath`) between `clawdis-mac` and the app. This doc describes the intended long-term XPC/Mach-service architecture and the security constraints; update it as the implementation converges. Note: the current implementation primarily uses a local UNIX-domain control socket (`controlSocketPath`) between `clawdis-mac` and the app. This doc captures the intended long-term Mach/XPC direction and the security constraints, and also documents the separate PeekabooBridge socket used for UI automation.
## Goals ## Goals
- Single GUI app instance that owns all TCC-facing work (notifications, screen recording, mic, speech, AppleScript). - Single GUI app instance that owns all TCC-facing work (notifications, screen recording, mic, speech, AppleScript).
- A small surface for automation: the `clawdis-mac` CLI and the Node gateway talk to the app via a local XPC channel. - A small surface for automation: the `clawdis-mac` CLI and the Node gateway talk to the app via local IPC.
- Predictable permissions: always the same signed bundle ID, launched by launchd, so TCC grants stick. - Predictable permissions: always the same signed bundle ID, launched by launchd, so TCC grants stick.
- Limit who can connect: only signed clients from our team (with an explicit DEBUG-only escape hatch for development). - Limit who can connect: only signed clients from our team (with an explicit DEBUG-only escape hatch for development).
## How it works ## How it works
### Control socket (current)
- `clawdis-mac` talks to the app via a local UNIX socket (`controlSocketPath`) for app-specific requests (notify, status, ensure-permissions, run, etc.).
### PeekabooBridge (UI automation)
- UI automation uses a separate UNIX socket named `bridge.sock` and the PeekabooBridge JSON protocol.
- Host preference order (client-side): Peekaboo.app → Clawdis.app → local execution.
- Security: bridge hosts require TeamID `Y5PE65HELJ`; DEBUG-only same-UID escape hatch is guarded by `PEEKABOO_ALLOW_UNSIGNED_SOCKET_CLIENTS=1` (Peekaboo convention).
- See: `docs/mac/peekaboo.md` for the Clawdis plan and naming.
### Mach/XPC (future direction)
- The app registers a Mach service named `com.steipete.clawdis.xpc` via a user LaunchAgent at `~/Library/LaunchAgents/com.steipete.clawdis.plist`. - The app registers a Mach service named `com.steipete.clawdis.xpc` via a user LaunchAgent at `~/Library/LaunchAgents/com.steipete.clawdis.plist`.
- The launch agent runs `dist/Clawdis.app/Contents/MacOS/Clawdis` with `RunAtLoad=true`, `KeepAlive=false`, and a `MachServices` entry for the XPC name. - The launch agent runs `dist/Clawdis.app/Contents/MacOS/Clawdis` with `RunAtLoad=true`, `KeepAlive=false`, and a `MachServices` entry for the XPC name.
- The app hosts the XPC listener (`NSXPCListener(machServiceName:)`) and exports `ClawdisXPCService`. - The app hosts the XPC listener (`NSXPCListener(machServiceName:)`) and exports `ClawdisXPCService`.
@ -35,6 +45,8 @@ Note: the current implementation primarily uses a local UNIX-domain control sock
- RunAtLoad without KeepAlive means the app starts once; if it crashes it stays down (no unwanted respawn), but CLI calls will re-spawn via launchd. - RunAtLoad without KeepAlive means the app starts once; if it crashes it stays down (no unwanted respawn), but CLI calls will re-spawn via launchd.
## Hardening notes ## Hardening notes
- Prefer requiring a TeamID match for all privileged surfaces. The codebase currently has a `DEBUG`-only same-UID escape hatch gated behind `CLAWDIS_ALLOW_UNSIGNED_SOCKET_CLIENTS=1` for local development. - Prefer requiring a TeamID match for all privileged surfaces.
- Clawdis control socket: `CLAWDIS_ALLOW_UNSIGNED_SOCKET_CLIENTS=1` (DEBUG-only) may allow same-UID callers for local development.
- PeekabooBridge: `PEEKABOO_ALLOW_UNSIGNED_SOCKET_CLIENTS=1` (DEBUG-only) may allow same-UID callers for local development.
- All communication remains local-only; no network sockets are exposed. - All communication remains local-only; no network sockets are exposed.
- TCC prompts originate only from the GUI app bundle; run scripts/package-mac-app.sh so the signed bundle ID stays stable. - TCC prompts originate only from the GUI app bundle; run scripts/package-mac-app.sh so the signed bundle ID stays stable.

View File

@ -2,18 +2,49 @@ import { spawn } from "node:child_process";
import net from "node:net"; import net from "node:net";
import { afterEach, describe, expect, it } from "vitest"; import { afterEach, describe, expect, it } from "vitest";
const waitForText = async ( const waitForPortOpen = async (
chunks: string[], proc: ReturnType<typeof spawn>,
pattern: RegExp, chunksOut: string[],
chunksErr: string[],
port: number,
timeoutMs: number, timeoutMs: number,
) => { ) => {
const startedAt = Date.now(); const startedAt = Date.now();
while (Date.now() - startedAt < timeoutMs) { while (Date.now() - startedAt < timeoutMs) {
const joined = chunks.join(""); if (proc.exitCode !== null) {
if (pattern.test(joined)) return; const stdout = chunksOut.join("");
const stderr = chunksErr.join("");
throw new Error(
`gateway exited before listening (code=${String(proc.exitCode)} signal=${String(proc.signalCode)})\n` +
`--- stdout ---\n${stdout}\n--- stderr ---\n${stderr}`,
);
}
try {
await new Promise<void>((resolve, reject) => {
const socket = net.connect({ host: "127.0.0.1", port });
socket.once("connect", () => {
socket.destroy();
resolve();
});
socket.once("error", (err) => {
socket.destroy();
reject(err);
});
});
return;
} catch {
// keep polling
}
await new Promise((resolve) => setTimeout(resolve, 10)); await new Promise((resolve) => setTimeout(resolve, 10));
} }
throw new Error(`timeout waiting for ${String(pattern)}`); const stdout = chunksOut.join("");
const stderr = chunksErr.join("");
throw new Error(
`timeout waiting for gateway to listen on port ${port}\n` +
`--- stdout ---\n${stdout}\n--- stderr ---\n${stderr}`,
);
}; };
const getFreePort = async () => { const getFreePort = async () => {
@ -67,9 +98,11 @@ describe("gateway SIGTERM", () => {
child.stdout?.on("data", (d) => out.push(String(d))); child.stdout?.on("data", (d) => out.push(String(d)));
child.stderr?.on("data", (d) => err.push(String(d))); child.stderr?.on("data", (d) => err.push(String(d)));
await waitForText( await waitForPortOpen(
proc,
out, out,
new RegExp(`gateway listening on ws://127\\.0\\.0\\.1:${port}\\b`), err,
port,
20_000, 20_000,
); );

View File

@ -1015,7 +1015,7 @@ describe("web auto-reply", () => {
it( it(
"compresses common formats to jpeg under the cap", "compresses common formats to jpeg under the cap",
{ timeout: 15_000 }, { timeout: 45_000 },
async () => { async () => {
const formats = [ const formats = [
{ {