fix(mac): switch push-to-talk to right option
parent
c3c6880382
commit
bc01488a75
|
|
@ -68,7 +68,7 @@ clawdis relay --verbose
|
||||||
## macOS Companion App (Clawdis.app)
|
## macOS Companion App (Clawdis.app)
|
||||||
|
|
||||||
- **On-device Voice Wake:** listens for wake words (e.g. “Claude”) using Apple’s on-device speech recognizer (macOS 26+). macOS still shows the standard Speech/Mic permissions prompt, but audio stays on device.
|
- **On-device Voice Wake:** listens for wake words (e.g. “Claude”) using Apple’s on-device speech recognizer (macOS 26+). macOS still shows the standard Speech/Mic permissions prompt, but audio stays on device.
|
||||||
- **Push-to-talk (Cmd+Fn):** hold the hotkey to speak; the voice overlay shows live partials and sends when you release.
|
- **Push-to-talk (Right Option hold):** hold right Option to speak; the voice overlay shows live partials and sends when you release.
|
||||||
- **Config tab:** pick the model from your local Pi model catalog (`pi-mono/packages/ai/src/models.generated.ts`), or enter a custom model ID; edit session store path and context tokens.
|
- **Config tab:** pick the model from your local Pi model catalog (`pi-mono/packages/ai/src/models.generated.ts`), or enter a custom model ID; edit session store path and context tokens.
|
||||||
- **Voice settings:** language + additional languages, mic picker, live level meter, trigger-word table, and a built-in test harness.
|
- **Voice settings:** language + additional languages, mic picker, live level meter, trigger-word table, and a built-in test harness.
|
||||||
- **Menu bar toggle:** enable/disable Voice Wake from the menu bar; respects Dock-icon preference.
|
- **Menu bar toggle:** enable/disable Voice Wake from the menu bar; respects Dock-icon preference.
|
||||||
|
|
|
||||||
|
|
@ -3,14 +3,13 @@ import AVFoundation
|
||||||
import OSLog
|
import OSLog
|
||||||
import Speech
|
import Speech
|
||||||
|
|
||||||
/// Observes Cmd+Fn and starts a push-to-talk capture while both are held.
|
/// Observes right Option and starts a push-to-talk capture while it is held.
|
||||||
@MainActor
|
@MainActor
|
||||||
final class VoicePushToTalkHotkey {
|
final class VoicePushToTalkHotkey {
|
||||||
static let shared = VoicePushToTalkHotkey()
|
static let shared = VoicePushToTalkHotkey()
|
||||||
|
|
||||||
private var monitor: Any?
|
private var monitor: Any?
|
||||||
private var fnDown = false
|
private var optionDown = false // right option only
|
||||||
private var commandDown = false
|
|
||||||
private var active = false
|
private var active = false
|
||||||
|
|
||||||
func setEnabled(_ enabled: Bool) {
|
func setEnabled(_ enabled: Bool) {
|
||||||
|
|
@ -23,7 +22,7 @@ final class VoicePushToTalkHotkey {
|
||||||
|
|
||||||
private func startMonitoring() {
|
private func startMonitoring() {
|
||||||
guard self.monitor == nil else { return }
|
guard self.monitor == nil else { return }
|
||||||
// Listen-only global monitor; Fn only surfaces on .flagsChanged and cannot be registered as a hotkey.
|
// Listen-only global monitor; we rely on Input Monitoring permission to receive events.
|
||||||
self.monitor = NSEvent.addGlobalMonitorForEvents(matching: .flagsChanged) { [weak self] event in
|
self.monitor = NSEvent.addGlobalMonitorForEvents(matching: .flagsChanged) { [weak self] event in
|
||||||
guard let self else { return }
|
guard let self else { return }
|
||||||
self.updateModifierState(from: event)
|
self.updateModifierState(from: event)
|
||||||
|
|
@ -35,23 +34,17 @@ final class VoicePushToTalkHotkey {
|
||||||
NSEvent.removeMonitor(monitor)
|
NSEvent.removeMonitor(monitor)
|
||||||
self.monitor = nil
|
self.monitor = nil
|
||||||
}
|
}
|
||||||
self.fnDown = false
|
self.optionDown = false
|
||||||
self.commandDown = false
|
|
||||||
self.active = false
|
self.active = false
|
||||||
}
|
}
|
||||||
|
|
||||||
private func updateModifierState(from event: NSEvent) {
|
private func updateModifierState(from event: NSEvent) {
|
||||||
switch event.keyCode {
|
// Right Option (keyCode 61) acts as a hold-to-talk modifier.
|
||||||
case 63: // Fn
|
if event.keyCode == 61 {
|
||||||
self.fnDown = event.modifierFlags.contains(.function)
|
self.optionDown = event.modifierFlags.contains(.option)
|
||||||
case 55, 54: // Left / Right command
|
|
||||||
self.commandDown = event.modifierFlags.contains(.command)
|
|
||||||
default:
|
|
||||||
break
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// “Walkie-talkie” chord is live only while both keys stay down.
|
let chordActive = self.optionDown
|
||||||
let chordActive = self.fnDown && self.commandDown
|
|
||||||
if chordActive && !self.active {
|
if chordActive && !self.active {
|
||||||
self.active = true
|
self.active = true
|
||||||
Task {
|
Task {
|
||||||
|
|
|
||||||
|
|
@ -50,8 +50,8 @@ struct VoiceWakeSettings: View {
|
||||||
.disabled(!voiceWakeSupported)
|
.disabled(!voiceWakeSupported)
|
||||||
|
|
||||||
SettingsToggleRow(
|
SettingsToggleRow(
|
||||||
title: "Hold Cmd+Fn to talk",
|
title: "Hold Right Option to talk",
|
||||||
subtitle: "Push-to-talk mode that starts listening while you hold the hotkey and shows the preview overlay.",
|
subtitle: "Push-to-talk mode that starts listening while you hold the key and shows the preview overlay.",
|
||||||
binding: self.$state.voicePushToTalkEnabled)
|
binding: self.$state.voicePushToTalkEnabled)
|
||||||
.disabled(!voiceWakeSupported)
|
.disabled(!voiceWakeSupported)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,7 @@ Updated: 2025-12-08 · Owners: mac app
|
||||||
|
|
||||||
## Modes
|
## Modes
|
||||||
- **Wake-word mode** (default): always-on Speech recognizer waits for trigger tokens (`swabbleTriggerWords`). On match it starts capture, shows the overlay with partial text, and auto-sends after silence.
|
- **Wake-word mode** (default): always-on Speech recognizer waits for trigger tokens (`swabbleTriggerWords`). On match it starts capture, shows the overlay with partial text, and auto-sends after silence.
|
||||||
- **Push-to-talk (Cmd+Fn)**: hold Cmd+Fn to capture immediately—no trigger needed. The overlay appears while held; releasing finalizes and forwards after a short delay so you can tweak text.
|
- **Push-to-talk (Right Option hold)**: hold the right Option key to capture immediately—no trigger needed. The overlay appears while held; releasing finalizes and forwards after a short delay so you can tweak text.
|
||||||
|
|
||||||
## Runtime behavior (wake-word)
|
## Runtime behavior (wake-word)
|
||||||
- Speech recognizer lives in `VoiceWakeRuntime`.
|
- Speech recognizer lives in `VoiceWakeRuntime`.
|
||||||
|
|
@ -15,11 +15,11 @@ Updated: 2025-12-08 · Owners: mac app
|
||||||
- After send, recognizer restarts cleanly to listen for the next trigger.
|
- After send, recognizer restarts cleanly to listen for the next trigger.
|
||||||
|
|
||||||
## Push-to-talk specifics
|
## Push-to-talk specifics
|
||||||
- Hotkey detection uses a global `.flagsChanged` monitor: Fn is `keyCode 63` and flagged via `.function`; Command is `keyCode 55/54`. We only **observe** events (no swallowing).
|
- Hotkey detection uses a global `.flagsChanged` monitor for **right Option** (`keyCode 61` + `.option`). We only observe events (no swallowing).
|
||||||
- Capture pipeline lives in `VoicePushToTalk`: starts Speech immediately, streams partials to the overlay, and calls `VoiceWakeForwarder` on release.
|
- Capture pipeline lives in `VoicePushToTalk`: starts Speech immediately, streams partials to the overlay, and calls `VoiceWakeForwarder` on release.
|
||||||
- When push-to-talk starts we pause the wake-word runtime to avoid dueling audio taps; it restarts automatically after release.
|
- When push-to-talk starts we pause the wake-word runtime to avoid dueling audio taps; it restarts automatically after release.
|
||||||
- Permissions: requires Microphone + Speech. macOS will prompt the first time; seeing events needs Accessibility approval.
|
- Permissions: requires Microphone + Speech; seeing events needs Accessibility/Input Monitoring approval.
|
||||||
- Fn caveat: some external keyboards don’t expose Fn; fall back to a standard shortcut if needed.
|
- External keyboards: some may not expose right Option as expected—offer a fallback shortcut if users report misses.
|
||||||
|
|
||||||
## User-facing settings
|
## User-facing settings
|
||||||
- **Voice Wake** toggle: enables wake-word runtime.
|
- **Voice Wake** toggle: enables wake-word runtime.
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue