Docs: voice overlay plan and fix web mocks

main
Peter Steinberger 2025-12-09 03:25:55 +01:00
parent 3a42979e53
commit 99a3102134
5 changed files with 117 additions and 11 deletions

View File

@ -28,19 +28,71 @@ enum VoiceWakeChime: Codable, Equatable, Sendable {
struct VoiceWakeChimeCatalog {
/// Options shown in the picker.
static let systemOptions: [String] = [
"Glass", // default
"Ping",
"Pop",
"Frog",
"Submarine",
"Funk",
"Tink",
]
static let systemOptions: [String] = {
let discovered = Self.discoveredSoundMap.keys
let fallback: [String] = [
"Glass", // default
"Ping",
"Pop",
"Frog",
"Submarine",
"Funk",
"Tink",
"Basso",
"Blow",
"Bottle",
"Hero",
"Morse",
"Purr",
"Sosumi",
"Mail Sent",
]
// Keep Glass first, then present the rest alphabetically without duplicates.
var names = Set(discovered).union(fallback)
names.remove("Glass")
let sorted = names.sorted { $0.localizedCaseInsensitiveCompare($1) == .orderedAscending }
return ["Glass"] + sorted
}()
static func displayName(for raw: String) -> String {
return raw
}
static func url(for name: String) -> URL? {
return self.discoveredSoundMap[name]
}
private static let allowedExtensions: Set<String> = [
"aif", "aiff", "caf", "wav", "m4a", "mp3",
]
private static let searchRoots: [URL] = [
FileManager.default.homeDirectoryForCurrentUser.appendingPathComponent("Library/Sounds"),
URL(fileURLWithPath: "/Library/Sounds"),
URL(fileURLWithPath: "/System/Applications/Mail.app/Contents/Resources"), // Mail swoosh
URL(fileURLWithPath: "/System/Library/Sounds"),
]
private static let discoveredSoundMap: [String: URL] = {
var map: [String: URL] = [:]
for root in self.searchRoots {
guard let contents = try? FileManager.default.contentsOfDirectory(
at: root,
includingPropertiesForKeys: nil,
options: [.skipsHiddenFiles])
else { continue }
for url in contents where self.allowedExtensions.contains(url.pathExtension.lowercased()) {
let name = url.deletingPathExtension().lastPathComponent
// Preserve the first match in priority order.
if map[name] == nil {
map[name] = url
}
}
}
return map
}()
}
@MainActor
@ -62,7 +114,13 @@ enum VoiceWakeChimePlayer {
case .none:
return nil
case let .system(name):
return NSSound(named: NSSound.Name(name))
if let named = NSSound(named: NSSound.Name(name)) {
return named
}
if let url = VoiceWakeChimeCatalog.url(for: name) {
return NSSound(contentsOf: url, byReference: false)
}
return nil
case let .custom(_, bookmark):
var stale = false

43
docs/mac/voice-overlay.md Normal file
View File

@ -0,0 +1,43 @@
## Voice Overlay Lifecycle (macOS)
Audience: macOS app contributors. Goal: keep the voice overlay predictable when wake-word and push-to-talk overlap.
### Current intent
- If the overlay is already visible from wake-word and the user presses the hotkey, the hotkey session *adopts* the existing text instead of resetting it. The overlay stays up while the hotkey is held. When the user releases: send if there is trimmed text, otherwise dismiss.
- Wake-word alone still auto-sends on silence; push-to-talk sends immediately on release.
### Proposed architecture (to implement next)
1. **VoiceSessionCoordinator (actor)**
- Owns exactly one `VoiceSession` at a time.
- API (token-based): `beginWakeCapture`, `beginPushToTalk`, `updatePartial`, `endCapture`, `cancel`, `applyCooldown`.
- Drops callbacks that carry stale tokens (prevents old recognizers from reopening the overlay).
2. **VoiceSession (model)**
- Fields: `token`, `source` (wakeWord|pushToTalk), committed/volatile text, chime flags, timers (auto-send, idle), `overlayMode` (display|editing|sending), cooldown deadline.
3. **Overlay binding**
- `VoiceSessionPublisher` (`ObservableObject`) mirrors the active session into SwiftUI.
- `VoiceWakeOverlayView` renders only via the publisher; it never mutates global singletons directly.
- Overlay user actions (`sendNow`, `dismiss`, `edit`) call back into the coordinator with the session token.
4. **Unified send path**
- On `endCapture`: if trimmed text is empty → dismiss; else `performSend(session:)` (plays send chime once, forwards, dismisses).
- Push-to-talk: no delay; wake-word: optional delay for auto-send.
- Apply a short cooldown to the wake runtime after push-to-talk finishes so wake-word doesnt immediately retrigger.
5. **Logging**
- Coordinator emits `.info` logs in subsystem `com.steipete.clawdis`, categories `voicewake.overlay` and `voicewake.chime`.
- Key events: `session_started`, `adopted_by_push_to_talk`, `partial`, `finalized`, `send`, `dismiss`, `cancel`, `cooldown`.
### Debugging checklist
- Stream logs while reproducing a sticky overlay:
```bash
sudo log stream --predicate 'subsystem == "com.steipete.clawdis" AND category CONTAINS "voicewake"' --level info --style compact
```
- Verify only one active session token; stale callbacks should be dropped by the coordinator.
- Ensure push-to-talk release always calls `endCapture` with the active token; if text is empty, expect `dismiss` without chime or send.
### Migration steps (suggested)
1. Add `VoiceSessionCoordinator`, `VoiceSession`, and `VoiceSessionPublisher`.
2. Refactor `VoiceWakeRuntime` to create/update/end sessions instead of touching `VoiceWakeOverlayController` directly.
3. Refactor `VoicePushToTalk` to adopt existing sessions and call `endCapture` on release; apply runtime cooldown.
4. Wire `VoiceWakeOverlayController` to the publisher; remove direct calls from runtime/PTT.
5. Add integration tests for session adoption, cooldown, and empty-text dismissal.

View File

@ -6,6 +6,7 @@ import { getReplyFromConfig } from "./reply.js";
const webMocks = vi.hoisted(() => ({
webAuthExists: vi.fn().mockResolvedValue(true),
getWebAuthAgeMs: vi.fn().mockReturnValue(120_000),
readWebSelfId: vi.fn().mockReturnValue({ e164: "+1999" }),
}));
vi.mock("../web/session.js", () => webMocks);

View File

@ -7,6 +7,7 @@ const mocks = vi.hoisted(() => ({
resolveStorePath: vi.fn().mockReturnValue("/tmp/sessions.json"),
webAuthExists: vi.fn().mockResolvedValue(true),
getWebAuthAgeMs: vi.fn().mockReturnValue(5000),
readWebSelfId: vi.fn().mockReturnValue({ e164: "+1999" }),
logWebSelfId: vi.fn(),
}));
@ -17,6 +18,7 @@ vi.mock("../config/sessions.js", () => ({
vi.mock("../web/session.js", () => ({
webAuthExists: mocks.webAuthExists,
getWebAuthAgeMs: mocks.getWebAuthAgeMs,
readWebSelfId: mocks.readWebSelfId,
logWebSelfId: mocks.logWebSelfId,
}));
vi.mock("../config/config.js", () => ({

View File

@ -179,7 +179,9 @@ export async function startControlChannel(
respond(undefined, false, `unknown method: ${parsed.method}`);
break;
}
logDebug(`control: ${parsed.method} responded in ${Date.now() - started}ms`);
logDebug(
`control: ${parsed.method} responded in ${Date.now() - started}ms`,
);
} catch (err) {
logError(
`control: ${parsed.method} failed in ${Date.now() - started}ms: ${String(err)}`,