From e6a48ee1fd5ce88a0489ba7971a9641000681a83 Mon Sep 17 00:00:00 2001 From: Beingpax Date: Sun, 20 Apr 2025 20:09:53 +0545 Subject: [PATCH] Refactor recorder to use AVAudioEngine and improve error handling --- VoiceInk/Recorder.swift | 268 ++++++++++++---------------- VoiceInk/Whisper/WhisperState.swift | 2 +- 2 files changed, 118 insertions(+), 152 deletions(-) diff --git a/VoiceInk/Recorder.swift b/VoiceInk/Recorder.swift index 72949a4..7dc4036 100644 --- a/VoiceInk/Recorder.swift +++ b/VoiceInk/Recorder.swift @@ -3,29 +3,27 @@ import AVFoundation import CoreAudio import os -@MainActor // Change to MainActor since we need to interact with UI +@MainActor class Recorder: ObservableObject { - private var recorder: AVAudioRecorder? + private var engine: AVAudioEngine? + private var file: AVAudioFile? private let logger = Logger(subsystem: "com.prakashjoshipax.voiceink", category: "Recorder") private let deviceManager = AudioDeviceManager.shared private var deviceObserver: NSObjectProtocol? private var isReconfiguring = false private let mediaController = MediaController.shared @Published var audioMeter = AudioMeter(averagePower: 0, peakPower: 0) - private var levelMonitorTimer: Timer? + private var latestBuffer: AVAudioPCMBuffer? enum RecorderError: Error { case couldNotStartRecording - case deviceConfigurationFailed } init() { - logger.info("Initializing Recorder") setupDeviceChangeObserver() } private func setupDeviceChangeObserver() { - logger.info("Setting up device change observer") deviceObserver = AudioDeviceConfiguration.createDeviceChangeObserver { [weak self] in Task { await self?.handleDeviceChange() @@ -34,214 +32,182 @@ class Recorder: ObservableObject { } private func handleDeviceChange() async { - guard !isReconfiguring else { - logger.warning("Device change already in progress, skipping") - return - } - - logger.info("Handling device change") + guard !isReconfiguring else { return } isReconfiguring = true - // If we're recording, we need to stop and restart with new device - if recorder != nil { - logger.info("Active recording detected during device change") - let currentURL = recorder?.url - let currentDelegate = recorder?.delegate - + if engine != nil { + let currentURL = file?.url stopRecording() - - // Wait briefly for the device change to take effect - logger.info("Waiting for device change to take effect") - try? await Task.sleep(nanoseconds: 100_000_000) // 0.1 seconds + try? await Task.sleep(nanoseconds: 100_000_000) if let url = currentURL { do { - logger.info("Attempting to restart recording with new device") - try await startRecording(toOutputFile: url, delegate: currentDelegate) - logger.info("Successfully reconfigured recording with new device") - } catch { - logger.error("Failed to restart recording after device change: \(error.localizedDescription)") - } + try await startRecording(toOutputFile: url) + } catch {} } } - isReconfiguring = false - logger.info("Device change handling completed") } private func configureAudioSession(with deviceID: AudioDeviceID) async throws { - logger.info("Starting audio session configuration for device ID: \(deviceID)") - - // Add a small delay to ensure device is ready after system changes - try? await Task.sleep(nanoseconds: 50_000_000) // 0.05 seconds - + try? await Task.sleep(nanoseconds: 50_000_000) do { - // Get the audio format from the selected device let format = try AudioDeviceConfiguration.configureAudioSession(with: deviceID) - logger.info("Got audio format - Sample rate: \(format.mSampleRate), Channels: \(format.mChannelsPerFrame)") - - // Configure the device for recording try AudioDeviceConfiguration.setDefaultInputDevice(deviceID) - logger.info("Successfully set default input device") } catch { - logger.error("Audio session configuration failed: \(error.localizedDescription)") - logger.error("Device ID: \(deviceID)") - if let deviceName = deviceManager.getDeviceName(deviceID: deviceID) { - logger.error("Failed device name: \(deviceName)") - } throw error } - - // Add another small delay to allow configuration to settle - try? await Task.sleep(nanoseconds: 50_000_000) // 0.05 seconds - - if let deviceName = deviceManager.getDeviceName(deviceID: deviceID) { - logger.info("Successfully configured recorder with device: \(deviceName) (ID: \(deviceID))") - } + try? await Task.sleep(nanoseconds: 50_000_000) } - func startRecording(toOutputFile url: URL, delegate: AVAudioRecorderDelegate?) async throws { - logger.info("Starting recording process") - - // Check if we need to mute system audio + func startRecording(toOutputFile url: URL) async throws { let wasMuted = await mediaController.muteSystemAudio() - if wasMuted { - logger.info("System audio muted for recording") - } - - // Get the current selected device let deviceID = deviceManager.getCurrentDevice() if deviceID != 0 { do { - logger.info("Configuring audio session with device ID: \(deviceID)") - if let deviceName = deviceManager.getDeviceName(deviceID: deviceID) { - logger.info("Attempting to configure device: \(deviceName)") - } try await configureAudioSession(with: deviceID) - logger.info("Successfully configured audio session") - } catch { - logger.error("Failed to configure audio device: \(error.localizedDescription), Device ID: \(deviceID)") - if let deviceName = deviceManager.getDeviceName(deviceID: deviceID) { - logger.error("Failed device name: \(deviceName)") - } - logger.info("Falling back to default device") - } - } else { - logger.info("Using default audio device (no custom device selected)") + } catch {} } - logger.info("Setting up recording with settings: 16000Hz, 1 channel, PCM format") - let recordSettings: [String : Any] = [ + engine = AVAudioEngine() + let inputNode = engine!.inputNode + let inputFormat = inputNode.outputFormat(forBus: 0) + + let whisperSettings: [String: Any] = [ AVFormatIDKey: Int(kAudioFormatLinearPCM), AVSampleRateKey: 16000.0, AVNumberOfChannelsKey: 1, - AVEncoderAudioQualityKey: AVAudioQuality.high.rawValue + AVLinearPCMBitDepthKey: 16, + AVLinearPCMIsFloatKey: false, + AVLinearPCMIsBigEndianKey: false, + AVLinearPCMIsNonInterleaved: false ] + let processingFormat = AVAudioFormat( + commonFormat: .pcmFormatFloat32, + sampleRate: 16000.0, + channels: 1, + interleaved: false + )! + do { - logger.info("Initializing AVAudioRecorder with URL: \(url.path)") - let recorder = try AVAudioRecorder(url: url, settings: recordSettings) - recorder.delegate = delegate - recorder.isMeteringEnabled = true // Enable metering + file = try AVAudioFile(forWriting: url, settings: whisperSettings) + } catch { + logger.error("Failed to create audio file: \(error.localizedDescription)") + await mediaController.unmuteSystemAudio() + throw RecorderError.couldNotStartRecording + } + + inputNode.installTap(onBus: 0, bufferSize: 1024, format: inputFormat) { [weak self] buffer, _ in + guard let self = self else { return } - logger.info("Attempting to start recording...") - if recorder.record() { - logger.info("Recording started successfully") - self.recorder = recorder - startLevelMonitoring() - } else { - logger.error("Failed to start recording - recorder.record() returned false") - logger.error("Current device ID: \(deviceID)") - if let deviceName = deviceManager.getDeviceName(deviceID: deviceID) { - logger.error("Current device name: \(deviceName)") + let processedBuffer: AVAudioPCMBuffer + if buffer.format != processingFormat { + guard let converter = AVAudioConverter(from: buffer.format, to: processingFormat), + let newBuffer = AVAudioPCMBuffer(pcmFormat: processingFormat, + frameCapacity: AVAudioFrameCount(Double(buffer.frameLength) * + (16000.0 / buffer.format.sampleRate))) else { + self.logger.error("Failed to create converter or buffer") + return } - // Restore system audio if we muted it but failed to start recording - await mediaController.unmuteSystemAudio() + var error: NSError? + let status = converter.convert(to: newBuffer, error: &error) { _, outStatus in + outStatus.pointee = .haveData + return buffer + } - throw RecorderError.couldNotStartRecording + if status == .error || error != nil { + self.logger.error("Format conversion failed: \(error?.localizedDescription ?? "unknown error")") + return + } + + processedBuffer = newBuffer + } else { + processedBuffer = buffer } + + Task { @MainActor in + self.latestBuffer = processedBuffer + self.calculateAndUpdateAudioLevel(buffer: processedBuffer) + } + + do { + guard let int16Converter = AVAudioConverter(from: processedBuffer.format, to: self.file!.processingFormat), + let int16Buffer = AVAudioPCMBuffer(pcmFormat: self.file!.processingFormat, + frameCapacity: processedBuffer.frameLength) else { + self.logger.error("Failed to create int16 converter") + return + } + + var conversionError: NSError? + let conversionStatus = int16Converter.convert(to: int16Buffer, error: &conversionError) { _, outStatus in + outStatus.pointee = .haveData + return processedBuffer + } + + if conversionStatus == .error || conversionError != nil { + self.logger.error("Int16 conversion failed") + return + } + + try self.file?.write(from: int16Buffer) + } catch { + self.logger.error("Failed to write audio buffer: \(error.localizedDescription)") + } + } + + do { + try engine!.start() } catch { - logger.error("Error creating AVAudioRecorder: \(error.localizedDescription)") - logger.error("Recording settings used: \(recordSettings)") - logger.error("Output URL: \(url.path)") - - // Restore system audio if we muted it but failed to start recording await mediaController.unmuteSystemAudio() - - throw error + throw RecorderError.couldNotStartRecording } } func stopRecording() { - logger.info("Stopping recording") - stopLevelMonitoring() - recorder?.stop() - recorder?.delegate = nil // Remove delegate - recorder = nil - - // Force a device change notification to trigger system audio profile reset - logger.info("Triggering audio device change notification") + audioMeter = AudioMeter(averagePower: 0, peakPower: 0) + engine?.inputNode.removeTap(onBus: 0) + engine?.stop() + engine = nil + file = nil NotificationCenter.default.post(name: NSNotification.Name("AudioDeviceChanged"), object: nil) - - // Restore system audio if we muted it Task { await mediaController.unmuteSystemAudio() } - - logger.info("Recording stopped successfully") } - private func startLevelMonitoring() { - levelMonitorTimer = Timer.scheduledTimer(withTimeInterval: 0.05, repeats: true) { [weak self] _ in - guard let self = self else { return } - self.updateAudioLevel() + private func calculateAndUpdateAudioLevel(buffer: AVAudioPCMBuffer) { + guard let floatData = buffer.floatChannelData else { return } + let channelData = floatData[0] + let frameLength = Int(buffer.frameLength) + + var sum: Float = 0 + var peak: Float = 0 + for i in 0..