- Implement real-time streaming preview using Parakeet EOU (160ms chunks) - Add batch transcription on completion for accurate final result - Prefer Whisper large-v3-turbo (2.7% WER) over Parakeet (6.05% WER) when available - Remove audio preprocessing that hurts ASR accuracy (gain control, noise reduction) - Add streaming audio callback support in Recorder and CoreAudioRecorder - Raw audio passthrough - SDK handles resampling internally Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
281 lines
9.9 KiB
Swift
281 lines
9.9 KiB
Swift
import Foundation
|
|
import AVFoundation
|
|
import CoreAudio
|
|
import os
|
|
|
|
@MainActor
|
|
class Recorder: NSObject, ObservableObject {
|
|
private var recorder: CoreAudioRecorder?
|
|
private let logger = Logger(subsystem: "com.prakashjoshipax.voiceink", category: "Recorder")
|
|
private let deviceManager = AudioDeviceManager.shared
|
|
private var deviceObserver: NSObjectProtocol?
|
|
private var deviceSwitchObserver: NSObjectProtocol?
|
|
private var isReconfiguring = false
|
|
private let mediaController = MediaController.shared
|
|
private let playbackController = PlaybackController.shared
|
|
@Published var audioMeter = AudioMeter(averagePower: 0, peakPower: 0)
|
|
private var audioLevelCheckTask: Task<Void, Never>?
|
|
private var audioMeterUpdateTask: Task<Void, Never>?
|
|
private var audioRestorationTask: Task<Void, Never>?
|
|
private var hasDetectedAudioInCurrentSession = false
|
|
|
|
/// Stored streaming callback - applied when CoreAudioRecorder is created
|
|
private var pendingStreamingCallback: ((_ samples: UnsafePointer<Float32>, _ frameCount: UInt32, _ sampleRate: Double, _ channelCount: UInt32) -> Void)?
|
|
|
|
enum RecorderError: Error {
|
|
case couldNotStartRecording
|
|
}
|
|
|
|
override init() {
|
|
super.init()
|
|
setupDeviceChangeObserver()
|
|
setupDeviceSwitchObserver()
|
|
}
|
|
|
|
private func setupDeviceChangeObserver() {
|
|
deviceObserver = AudioDeviceConfiguration.createDeviceChangeObserver { [weak self] in
|
|
Task {
|
|
await self?.handleDeviceChange()
|
|
}
|
|
}
|
|
}
|
|
|
|
private func setupDeviceSwitchObserver() {
|
|
deviceSwitchObserver = NotificationCenter.default.addObserver(
|
|
forName: .audioDeviceSwitchRequired,
|
|
object: nil,
|
|
queue: .main
|
|
) { [weak self] notification in
|
|
Task {
|
|
await self?.handleDeviceSwitchRequired(notification)
|
|
}
|
|
}
|
|
}
|
|
|
|
private func handleDeviceChange() async {
|
|
guard !isReconfiguring else { return }
|
|
guard recorder != nil else { return }
|
|
|
|
isReconfiguring = true
|
|
|
|
try? await Task.sleep(nanoseconds: 200_000_000)
|
|
|
|
await MainActor.run {
|
|
NotificationCenter.default.post(name: .toggleMiniRecorder, object: nil)
|
|
}
|
|
|
|
isReconfiguring = false
|
|
}
|
|
|
|
private func handleDeviceSwitchRequired(_ notification: Notification) async {
|
|
guard !isReconfiguring else { return }
|
|
guard let recorder = recorder else { return }
|
|
guard let userInfo = notification.userInfo,
|
|
let newDeviceID = userInfo["newDeviceID"] as? AudioDeviceID else {
|
|
logger.error("Device switch notification missing newDeviceID")
|
|
return
|
|
}
|
|
|
|
// Prevent concurrent device switches and handleDeviceChange() interference
|
|
isReconfiguring = true
|
|
defer { isReconfiguring = false }
|
|
|
|
logger.notice("🎙️ Device switch required: switching to device \(newDeviceID)")
|
|
|
|
do {
|
|
try recorder.switchDevice(to: newDeviceID)
|
|
|
|
// Notify user about the switch
|
|
if let deviceName = deviceManager.availableDevices.first(where: { $0.id == newDeviceID })?.name {
|
|
await MainActor.run {
|
|
NotificationManager.shared.showNotification(
|
|
title: "Switched to: \(deviceName)",
|
|
type: .info
|
|
)
|
|
}
|
|
}
|
|
|
|
logger.notice("🎙️ Successfully switched recording to device \(newDeviceID)")
|
|
} catch {
|
|
logger.error("❌ Failed to switch device: \(error.localizedDescription)")
|
|
|
|
// If switch fails, stop recording and notify user
|
|
await handleRecordingError(error)
|
|
}
|
|
}
|
|
|
|
func startRecording(toOutputFile url: URL) async throws {
|
|
deviceManager.isRecordingActive = true
|
|
|
|
let currentDeviceID = deviceManager.getCurrentDevice()
|
|
let lastDeviceID = UserDefaults.standard.string(forKey: "lastUsedMicrophoneDeviceID")
|
|
|
|
if String(currentDeviceID) != lastDeviceID {
|
|
if let deviceName = deviceManager.availableDevices.first(where: { $0.id == currentDeviceID })?.name {
|
|
await MainActor.run {
|
|
NotificationManager.shared.showNotification(
|
|
title: "Using: \(deviceName)",
|
|
type: .info
|
|
)
|
|
}
|
|
}
|
|
}
|
|
UserDefaults.standard.set(String(currentDeviceID), forKey: "lastUsedMicrophoneDeviceID")
|
|
|
|
hasDetectedAudioInCurrentSession = false
|
|
|
|
let deviceID = deviceManager.getCurrentDevice()
|
|
|
|
do {
|
|
let coreAudioRecorder = CoreAudioRecorder()
|
|
recorder = coreAudioRecorder
|
|
|
|
// Apply any pending streaming callback that was set before recording started
|
|
if let callback = pendingStreamingCallback {
|
|
coreAudioRecorder.streamingAudioCallback = callback
|
|
logger.notice("🎙️ Applied pending streaming callback to recorder")
|
|
}
|
|
|
|
try coreAudioRecorder.startRecording(toOutputFile: url, deviceID: deviceID)
|
|
|
|
audioRestorationTask?.cancel()
|
|
audioRestorationTask = nil
|
|
|
|
Task { [weak self] in
|
|
guard let self = self else { return }
|
|
await self.playbackController.pauseMedia()
|
|
_ = await self.mediaController.muteSystemAudio()
|
|
}
|
|
|
|
audioLevelCheckTask?.cancel()
|
|
audioMeterUpdateTask?.cancel()
|
|
|
|
audioMeterUpdateTask = Task {
|
|
while recorder != nil && !Task.isCancelled {
|
|
updateAudioMeter()
|
|
try? await Task.sleep(nanoseconds: 17_000_000)
|
|
}
|
|
}
|
|
|
|
audioLevelCheckTask = Task {
|
|
let notificationChecks: [TimeInterval] = [5.0, 12.0]
|
|
|
|
for delay in notificationChecks {
|
|
try? await Task.sleep(nanoseconds: UInt64(delay * 1_000_000_000))
|
|
|
|
if Task.isCancelled { return }
|
|
|
|
if self.hasDetectedAudioInCurrentSession {
|
|
return
|
|
}
|
|
|
|
await MainActor.run {
|
|
NotificationManager.shared.showNotification(
|
|
title: "No Audio Detected",
|
|
type: .warning
|
|
)
|
|
}
|
|
}
|
|
}
|
|
|
|
} catch {
|
|
logger.error("Failed to create audio recorder: \(error.localizedDescription)")
|
|
stopRecording()
|
|
throw RecorderError.couldNotStartRecording
|
|
}
|
|
}
|
|
|
|
func stopRecording() {
|
|
audioLevelCheckTask?.cancel()
|
|
audioMeterUpdateTask?.cancel()
|
|
recorder?.streamingAudioCallback = nil // Clear streaming callback
|
|
recorder?.stopRecording()
|
|
recorder = nil
|
|
audioMeter = AudioMeter(averagePower: 0, peakPower: 0)
|
|
|
|
audioRestorationTask = Task {
|
|
await mediaController.unmuteSystemAudio()
|
|
await playbackController.resumeMedia()
|
|
}
|
|
deviceManager.isRecordingActive = false
|
|
}
|
|
|
|
/// Sets a callback to receive real-time audio samples for streaming transcription.
|
|
/// The callback is invoked on the audio thread - do not perform blocking operations.
|
|
/// Note: The callback is stored and applied when recording starts (CoreAudioRecorder is created lazily).
|
|
func setStreamingAudioCallback(_ callback: ((_ samples: UnsafePointer<Float32>, _ frameCount: UInt32, _ sampleRate: Double, _ channelCount: UInt32) -> Void)?) {
|
|
pendingStreamingCallback = callback
|
|
// Also apply immediately if recorder already exists
|
|
recorder?.streamingAudioCallback = callback
|
|
}
|
|
|
|
private func handleRecordingError(_ error: Error) async {
|
|
logger.error("❌ Recording error occurred: \(error.localizedDescription)")
|
|
|
|
// Stop the recording
|
|
stopRecording()
|
|
|
|
// Notify the user about the recording failure
|
|
await MainActor.run {
|
|
NotificationManager.shared.showNotification(
|
|
title: "Recording Failed: \(error.localizedDescription)",
|
|
type: .error
|
|
)
|
|
}
|
|
}
|
|
|
|
private func updateAudioMeter() {
|
|
guard let recorder = recorder else { return }
|
|
|
|
let averagePower = recorder.averagePower
|
|
let peakPower = recorder.peakPower
|
|
|
|
let minVisibleDb: Float = -60.0
|
|
let maxVisibleDb: Float = 0.0
|
|
|
|
let normalizedAverage: Float
|
|
if averagePower < minVisibleDb {
|
|
normalizedAverage = 0.0
|
|
} else if averagePower >= maxVisibleDb {
|
|
normalizedAverage = 1.0
|
|
} else {
|
|
normalizedAverage = (averagePower - minVisibleDb) / (maxVisibleDb - minVisibleDb)
|
|
}
|
|
|
|
let normalizedPeak: Float
|
|
if peakPower < minVisibleDb {
|
|
normalizedPeak = 0.0
|
|
} else if peakPower >= maxVisibleDb {
|
|
normalizedPeak = 1.0
|
|
} else {
|
|
normalizedPeak = (peakPower - minVisibleDb) / (maxVisibleDb - minVisibleDb)
|
|
}
|
|
|
|
let newAudioMeter = AudioMeter(averagePower: Double(normalizedAverage), peakPower: Double(normalizedPeak))
|
|
|
|
if !hasDetectedAudioInCurrentSession && newAudioMeter.averagePower > 0.01 {
|
|
hasDetectedAudioInCurrentSession = true
|
|
}
|
|
|
|
audioMeter = newAudioMeter
|
|
}
|
|
|
|
// MARK: - Cleanup
|
|
|
|
deinit {
|
|
audioLevelCheckTask?.cancel()
|
|
audioMeterUpdateTask?.cancel()
|
|
audioRestorationTask?.cancel()
|
|
if let observer = deviceObserver {
|
|
NotificationCenter.default.removeObserver(observer)
|
|
}
|
|
if let observer = deviceSwitchObserver {
|
|
NotificationCenter.default.removeObserver(observer)
|
|
}
|
|
}
|
|
}
|
|
|
|
struct AudioMeter: Equatable {
|
|
let averagePower: Double
|
|
let peakPower: Double
|
|
} |