Docs: voice overlay plan and fix web mocks
parent
3a42979e53
commit
99a3102134
|
|
@ -28,19 +28,71 @@ enum VoiceWakeChime: Codable, Equatable, Sendable {
|
|||
|
||||
struct VoiceWakeChimeCatalog {
|
||||
/// Options shown in the picker.
|
||||
static let systemOptions: [String] = [
|
||||
"Glass", // default
|
||||
"Ping",
|
||||
"Pop",
|
||||
"Frog",
|
||||
"Submarine",
|
||||
"Funk",
|
||||
"Tink",
|
||||
]
|
||||
static let systemOptions: [String] = {
|
||||
let discovered = Self.discoveredSoundMap.keys
|
||||
let fallback: [String] = [
|
||||
"Glass", // default
|
||||
"Ping",
|
||||
"Pop",
|
||||
"Frog",
|
||||
"Submarine",
|
||||
"Funk",
|
||||
"Tink",
|
||||
"Basso",
|
||||
"Blow",
|
||||
"Bottle",
|
||||
"Hero",
|
||||
"Morse",
|
||||
"Purr",
|
||||
"Sosumi",
|
||||
"Mail Sent",
|
||||
]
|
||||
|
||||
// Keep Glass first, then present the rest alphabetically without duplicates.
|
||||
var names = Set(discovered).union(fallback)
|
||||
names.remove("Glass")
|
||||
let sorted = names.sorted { $0.localizedCaseInsensitiveCompare($1) == .orderedAscending }
|
||||
return ["Glass"] + sorted
|
||||
}()
|
||||
|
||||
static func displayName(for raw: String) -> String {
|
||||
return raw
|
||||
}
|
||||
|
||||
static func url(for name: String) -> URL? {
|
||||
return self.discoveredSoundMap[name]
|
||||
}
|
||||
|
||||
private static let allowedExtensions: Set<String> = [
|
||||
"aif", "aiff", "caf", "wav", "m4a", "mp3",
|
||||
]
|
||||
|
||||
private static let searchRoots: [URL] = [
|
||||
FileManager.default.homeDirectoryForCurrentUser.appendingPathComponent("Library/Sounds"),
|
||||
URL(fileURLWithPath: "/Library/Sounds"),
|
||||
URL(fileURLWithPath: "/System/Applications/Mail.app/Contents/Resources"), // Mail “swoosh”
|
||||
URL(fileURLWithPath: "/System/Library/Sounds"),
|
||||
]
|
||||
|
||||
private static let discoveredSoundMap: [String: URL] = {
|
||||
var map: [String: URL] = [:]
|
||||
for root in self.searchRoots {
|
||||
guard let contents = try? FileManager.default.contentsOfDirectory(
|
||||
at: root,
|
||||
includingPropertiesForKeys: nil,
|
||||
options: [.skipsHiddenFiles])
|
||||
else { continue }
|
||||
|
||||
for url in contents where self.allowedExtensions.contains(url.pathExtension.lowercased()) {
|
||||
let name = url.deletingPathExtension().lastPathComponent
|
||||
// Preserve the first match in priority order.
|
||||
if map[name] == nil {
|
||||
map[name] = url
|
||||
}
|
||||
}
|
||||
}
|
||||
return map
|
||||
}()
|
||||
}
|
||||
|
||||
@MainActor
|
||||
|
|
@ -62,7 +114,13 @@ enum VoiceWakeChimePlayer {
|
|||
case .none:
|
||||
return nil
|
||||
case let .system(name):
|
||||
return NSSound(named: NSSound.Name(name))
|
||||
if let named = NSSound(named: NSSound.Name(name)) {
|
||||
return named
|
||||
}
|
||||
if let url = VoiceWakeChimeCatalog.url(for: name) {
|
||||
return NSSound(contentsOf: url, byReference: false)
|
||||
}
|
||||
return nil
|
||||
|
||||
case let .custom(_, bookmark):
|
||||
var stale = false
|
||||
|
|
|
|||
|
|
@ -0,0 +1,43 @@
|
|||
## Voice Overlay Lifecycle (macOS)
|
||||
|
||||
Audience: macOS app contributors. Goal: keep the voice overlay predictable when wake-word and push-to-talk overlap.
|
||||
|
||||
### Current intent
|
||||
- If the overlay is already visible from wake-word and the user presses the hotkey, the hotkey session *adopts* the existing text instead of resetting it. The overlay stays up while the hotkey is held. When the user releases: send if there is trimmed text, otherwise dismiss.
|
||||
- Wake-word alone still auto-sends on silence; push-to-talk sends immediately on release.
|
||||
|
||||
### Proposed architecture (to implement next)
|
||||
1. **VoiceSessionCoordinator (actor)**
|
||||
- Owns exactly one `VoiceSession` at a time.
|
||||
- API (token-based): `beginWakeCapture`, `beginPushToTalk`, `updatePartial`, `endCapture`, `cancel`, `applyCooldown`.
|
||||
- Drops callbacks that carry stale tokens (prevents old recognizers from reopening the overlay).
|
||||
2. **VoiceSession (model)**
|
||||
- Fields: `token`, `source` (wakeWord|pushToTalk), committed/volatile text, chime flags, timers (auto-send, idle), `overlayMode` (display|editing|sending), cooldown deadline.
|
||||
3. **Overlay binding**
|
||||
- `VoiceSessionPublisher` (`ObservableObject`) mirrors the active session into SwiftUI.
|
||||
- `VoiceWakeOverlayView` renders only via the publisher; it never mutates global singletons directly.
|
||||
- Overlay user actions (`sendNow`, `dismiss`, `edit`) call back into the coordinator with the session token.
|
||||
4. **Unified send path**
|
||||
- On `endCapture`: if trimmed text is empty → dismiss; else `performSend(session:)` (plays send chime once, forwards, dismisses).
|
||||
- Push-to-talk: no delay; wake-word: optional delay for auto-send.
|
||||
- Apply a short cooldown to the wake runtime after push-to-talk finishes so wake-word doesn’t immediately retrigger.
|
||||
5. **Logging**
|
||||
- Coordinator emits `.info` logs in subsystem `com.steipete.clawdis`, categories `voicewake.overlay` and `voicewake.chime`.
|
||||
- Key events: `session_started`, `adopted_by_push_to_talk`, `partial`, `finalized`, `send`, `dismiss`, `cancel`, `cooldown`.
|
||||
|
||||
### Debugging checklist
|
||||
- Stream logs while reproducing a sticky overlay:
|
||||
|
||||
```bash
|
||||
sudo log stream --predicate 'subsystem == "com.steipete.clawdis" AND category CONTAINS "voicewake"' --level info --style compact
|
||||
```
|
||||
- Verify only one active session token; stale callbacks should be dropped by the coordinator.
|
||||
- Ensure push-to-talk release always calls `endCapture` with the active token; if text is empty, expect `dismiss` without chime or send.
|
||||
|
||||
### Migration steps (suggested)
|
||||
1. Add `VoiceSessionCoordinator`, `VoiceSession`, and `VoiceSessionPublisher`.
|
||||
2. Refactor `VoiceWakeRuntime` to create/update/end sessions instead of touching `VoiceWakeOverlayController` directly.
|
||||
3. Refactor `VoicePushToTalk` to adopt existing sessions and call `endCapture` on release; apply runtime cooldown.
|
||||
4. Wire `VoiceWakeOverlayController` to the publisher; remove direct calls from runtime/PTT.
|
||||
5. Add integration tests for session adoption, cooldown, and empty-text dismissal.
|
||||
|
||||
|
|
@ -6,6 +6,7 @@ import { getReplyFromConfig } from "./reply.js";
|
|||
const webMocks = vi.hoisted(() => ({
|
||||
webAuthExists: vi.fn().mockResolvedValue(true),
|
||||
getWebAuthAgeMs: vi.fn().mockReturnValue(120_000),
|
||||
readWebSelfId: vi.fn().mockReturnValue({ e164: "+1999" }),
|
||||
}));
|
||||
|
||||
vi.mock("../web/session.js", () => webMocks);
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ const mocks = vi.hoisted(() => ({
|
|||
resolveStorePath: vi.fn().mockReturnValue("/tmp/sessions.json"),
|
||||
webAuthExists: vi.fn().mockResolvedValue(true),
|
||||
getWebAuthAgeMs: vi.fn().mockReturnValue(5000),
|
||||
readWebSelfId: vi.fn().mockReturnValue({ e164: "+1999" }),
|
||||
logWebSelfId: vi.fn(),
|
||||
}));
|
||||
|
||||
|
|
@ -17,6 +18,7 @@ vi.mock("../config/sessions.js", () => ({
|
|||
vi.mock("../web/session.js", () => ({
|
||||
webAuthExists: mocks.webAuthExists,
|
||||
getWebAuthAgeMs: mocks.getWebAuthAgeMs,
|
||||
readWebSelfId: mocks.readWebSelfId,
|
||||
logWebSelfId: mocks.logWebSelfId,
|
||||
}));
|
||||
vi.mock("../config/config.js", () => ({
|
||||
|
|
|
|||
|
|
@ -179,7 +179,9 @@ export async function startControlChannel(
|
|||
respond(undefined, false, `unknown method: ${parsed.method}`);
|
||||
break;
|
||||
}
|
||||
logDebug(`control: ${parsed.method} responded in ${Date.now() - started}ms`);
|
||||
logDebug(
|
||||
`control: ${parsed.method} responded in ${Date.now() - started}ms`,
|
||||
);
|
||||
} catch (err) {
|
||||
logError(
|
||||
`control: ${parsed.method} failed in ${Date.now() - started}ms: ${String(err)}`,
|
||||
|
|
|
|||
Loading…
Reference in New Issue