vOOice/VoiceInk/PlaybackController.swift
Jake Shore de1c1e51aa Add hybrid streaming transcription for improved accuracy
- Implement real-time streaming preview using Parakeet EOU (160ms chunks)
- Add batch transcription on completion for accurate final result
- Prefer Whisper large-v3-turbo (2.7% WER) over Parakeet (6.05% WER) when available
- Remove audio preprocessing that hurts ASR accuracy (gain control, noise reduction)
- Add streaming audio callback support in Recorder and CoreAudioRecorder
- Raw audio passthrough - SDK handles resampling internally

Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-16 07:35:53 -05:00

138 lines
3.9 KiB
Swift

import AppKit
import Combine
import Foundation
import SwiftUI
import MediaRemoteAdapter
class PlaybackController: ObservableObject {
static let shared = PlaybackController()
private var mediaController: MediaRemoteAdapter.MediaController
private var wasPlayingWhenRecordingStarted = false
private var isMediaPlaying = false
private var lastKnownTrackInfo: TrackInfo?
private var originalMediaAppBundleId: String?
private var resumeTask: Task<Void, Never>?
@Published var isPauseMediaEnabled: Bool = UserDefaults.standard.bool(forKey: "isPauseMediaEnabled") {
didSet {
UserDefaults.standard.set(isPauseMediaEnabled, forKey: "isPauseMediaEnabled")
if isPauseMediaEnabled {
startMediaTracking()
} else {
stopMediaTracking()
}
}
}
private init() {
mediaController = MediaRemoteAdapter.MediaController()
if !UserDefaults.standard.contains(key: "isPauseMediaEnabled") {
UserDefaults.standard.set(false, forKey: "isPauseMediaEnabled")
}
setupMediaControllerCallbacks()
if isPauseMediaEnabled {
startMediaTracking()
}
}
private func setupMediaControllerCallbacks() {
mediaController.onTrackInfoReceived = { [weak self] trackInfo in
self?.isMediaPlaying = trackInfo?.payload.isPlaying ?? false
self?.lastKnownTrackInfo = trackInfo
}
mediaController.onListenerTerminated = { }
}
private func startMediaTracking() {
mediaController.startListening()
}
private func stopMediaTracking() {
mediaController.stopListening()
isMediaPlaying = false
lastKnownTrackInfo = nil
wasPlayingWhenRecordingStarted = false
originalMediaAppBundleId = nil
}
func pauseMedia() async {
resumeTask?.cancel()
resumeTask = nil
wasPlayingWhenRecordingStarted = false
originalMediaAppBundleId = nil
guard isPauseMediaEnabled,
isMediaPlaying,
lastKnownTrackInfo?.payload.isPlaying == true,
let bundleId = lastKnownTrackInfo?.payload.bundleIdentifier else {
return
}
wasPlayingWhenRecordingStarted = true
originalMediaAppBundleId = bundleId
try? await Task.sleep(nanoseconds: 50_000_000)
mediaController.pause()
}
func resumeMedia() async {
let shouldResume = wasPlayingWhenRecordingStarted
let originalBundleId = originalMediaAppBundleId
let delay = MediaController.shared.audioResumptionDelay
defer {
wasPlayingWhenRecordingStarted = false
originalMediaAppBundleId = nil
}
guard isPauseMediaEnabled,
shouldResume,
let bundleId = originalBundleId else {
return
}
guard isAppStillRunning(bundleId: bundleId) else {
return
}
guard let currentTrackInfo = lastKnownTrackInfo,
let currentBundleId = currentTrackInfo.payload.bundleIdentifier,
currentBundleId == bundleId,
currentTrackInfo.payload.isPlaying == false else {
return
}
let task = Task {
try? await Task.sleep(nanoseconds: UInt64(delay * 1_000_000_000))
if Task.isCancelled {
return
}
mediaController.play()
}
resumeTask = task
await task.value
}
private func isAppStillRunning(bundleId: String) -> Bool {
let runningApps = NSWorkspace.shared.runningApplications
return runningApps.contains { $0.bundleIdentifier == bundleId }
}
}
extension UserDefaults {
var isPauseMediaEnabled: Bool {
get { bool(forKey: "isPauseMediaEnabled") }
set { set(newValue, forKey: "isPauseMediaEnabled") }
}
}