fix(mac): keep voice wake listening

main
Peter Steinberger 2025-12-12 20:13:41 +00:00
parent 08a473fb35
commit 356b6e0483
5 changed files with 25 additions and 12 deletions

View File

@ -151,6 +151,9 @@ actor VoicePushToTalk {
VoiceWakeOverlayController.shared.dismiss() VoiceWakeOverlayController.shared.dismiss()
} }
self.isCapturing = false self.isCapturing = false
// If push-to-talk fails to start after pausing wake-word, ensure we resume listening.
await VoiceWakeRuntime.shared.applyPushToTalkCooldown()
await VoiceWakeRuntime.shared.refresh(state: AppStateStore.shared)
} }
} }

View File

@ -29,12 +29,6 @@ final class VoiceSessionCoordinator: ObservableObject {
attributed: NSAttributedString? = nil, attributed: NSAttributedString? = nil,
forwardEnabled: Bool = false) -> UUID forwardEnabled: Bool = false) -> UUID
{ {
// If a send is in-flight, ignore new sessions to avoid token churn.
if VoiceWakeOverlayController.shared.model.isSending {
self.logger.info("coordinator drop start while sending")
return self.session?.token ?? UUID()
}
let token = UUID() let token = UUID()
self.logger.info("coordinator start token=\(token.uuidString) source=\(source.rawValue) len=\(text.count)") self.logger.info("coordinator start token=\(token.uuidString) source=\(source.rawValue) len=\(text.count)")
let attributedText = attributed ?? VoiceWakeOverlayController.shared.makeAttributed(from: text) let attributedText = attributed ?? VoiceWakeOverlayController.shared.makeAttributed(from: text)
@ -127,4 +121,13 @@ final class VoiceSessionCoordinator: ObservableObject {
private func clearSession() { private func clearSession() {
self.session = nil self.session = nil
} }
/// Overlay dismiss completion callback (manual X, empty, auto-dismiss after send).
/// Ensures the wake-word recognizer is resumed if Voice Wake is enabled.
func overlayDidDismiss(token: UUID?) {
if let token, self.session?.token == token {
self.clearSession()
}
Task { await VoiceWakeRuntime.shared.refresh(state: AppStateStore.shared) }
}
} }

View File

@ -52,10 +52,6 @@ final class VoiceWakeOverlayController: ObservableObject {
forwardEnabled: Bool = false, forwardEnabled: Bool = false,
isFinal: Bool = false) -> UUID isFinal: Bool = false) -> UUID
{ {
if self.model.isSending {
self.logger.log(level: .info, "overlay drop session_start while sending")
return self.activeToken ?? UUID()
}
let message = """ let message = """
overlay session_start source=\(source.rawValue) \ overlay session_start source=\(source.rawValue) \
len=\(transcript.count) len=\(transcript.count)
@ -218,6 +214,7 @@ final class VoiceWakeOverlayController: ObservableObject {
window.animator().alphaValue = 0 window.animator().alphaValue = 0
} completionHandler: { } completionHandler: {
Task { @MainActor in Task { @MainActor in
let dismissedToken = self.activeToken
window.orderOut(nil) window.orderOut(nil)
self.model.isVisible = false self.model.isVisible = false
self.model.level = 0 self.model.level = 0
@ -229,6 +226,7 @@ final class VoiceWakeOverlayController: ObservableObject {
AppStateStore.shared.celebrateSend() AppStateStore.shared.celebrateSend()
} }
AppStateStore.shared.stopVoiceEars() AppStateStore.shared.stopVoiceEars()
VoiceSessionCoordinator.shared.overlayDidDismiss(token: dismissedToken)
} }
} }
} }

View File

@ -404,8 +404,6 @@ actor VoiceWakeRuntime {
private func restartRecognizerIfIdleAndOverlayHidden() async { private func restartRecognizerIfIdleAndOverlayHidden() async {
if self.isCapturing { return } if self.isCapturing { return }
let overlayVisible = await MainActor.run { VoiceWakeOverlayController.shared.isVisible }
if overlayVisible { return }
self.restartRecognizer() self.restartRecognizer()
} }

View File

@ -19,6 +19,17 @@ Updated: 2025-12-12 · Owners: mac app
- Overlay is driven via `VoiceWakeOverlayController` with committed/volatile coloring. - Overlay is driven via `VoiceWakeOverlayController` with committed/volatile coloring.
- After send, recognizer restarts cleanly to listen for the next trigger. - After send, recognizer restarts cleanly to listen for the next trigger.
## Lifecycle invariants
- If Voice Wake is enabled and permissions are granted, the wake-word recognizer should be listening (except during an explicit push-to-talk capture).
- Overlay visibility (including manual dismiss via the X button) must never prevent the recognizer from resuming.
## Sticky overlay failure mode (previous)
Previously, if the overlay got stuck visible and you manually closed it, Voice Wake could appear “dead” because the runtimes restart attempt could be blocked by overlay visibility and no subsequent restart was scheduled.
Hardening:
- Wake runtime restart is no longer blocked by overlay visibility.
- Overlay dismiss completion triggers a `VoiceWakeRuntime.refresh(...)` via `VoiceSessionCoordinator`, so manual X-dismiss always resumes listening.
## Push-to-talk specifics ## Push-to-talk specifics
- Hotkey detection uses a global `.flagsChanged` monitor for **right Option** (`keyCode 61` + `.option`). We only observe events (no swallowing). - Hotkey detection uses a global `.flagsChanged` monitor for **right Option** (`keyCode 61` + `.option`). We only observe events (no swallowing).
- Capture pipeline lives in `VoicePushToTalk`: starts Speech immediately, streams partials to the overlay, and calls `VoiceWakeForwarder` on release. - Capture pipeline lives in `VoicePushToTalk`: starts Speech immediately, streams partials to the overlay, and calls `VoiceWakeForwarder` on release.