From 6c1fc3b869771918bbd376b1cb2006c34a8eef39 Mon Sep 17 00:00:00 2001 From: Beingpax Date: Sun, 11 May 2025 10:05:47 +0545 Subject: [PATCH] Rewire audio device selection system --- VoiceInk/Recorder.swift | 166 ++++++------------ .../Services/AudioDeviceConfiguration.swift | 52 +----- VoiceInk/Services/AudioDeviceManager.swift | 4 +- 3 files changed, 55 insertions(+), 167 deletions(-) diff --git a/VoiceInk/Recorder.swift b/VoiceInk/Recorder.swift index 1a5d083..55262ac 100644 --- a/VoiceInk/Recorder.swift +++ b/VoiceInk/Recorder.swift @@ -5,15 +5,13 @@ import os @MainActor class Recorder: ObservableObject { - private var engine: AVAudioEngine? - private var file: AVAudioFile? + private var recorder: AVAudioRecorder? private let logger = Logger(subsystem: "com.prakashjoshipax.voiceink", category: "Recorder") private let deviceManager = AudioDeviceManager.shared private var deviceObserver: NSObjectProtocol? private var isReconfiguring = false private let mediaController = MediaController.shared @Published var audioMeter = AudioMeter(averagePower: 0, peakPower: 0) - private var latestBuffer: AVAudioPCMBuffer? enum RecorderError: Error { case couldNotStartRecording @@ -34,9 +32,9 @@ class Recorder: ObservableObject { private func handleDeviceChange() async { guard !isReconfiguring else { return } isReconfiguring = true - - if engine != nil { - let currentURL = file?.url + + if recorder != nil { + let currentURL = recorder?.url stopRecording() try? await Task.sleep(nanoseconds: 100_000_000) @@ -63,10 +61,11 @@ class Recorder: ObservableObject { func startRecording(toOutputFile url: URL) async throws { deviceManager.isRecordingActive = true - + Task { await mediaController.muteSystemAudio() } + let deviceID = deviceManager.getCurrentDevice() if deviceID != 0 { do { @@ -76,11 +75,7 @@ class Recorder: ObservableObject { } } - engine = AVAudioEngine() - let inputNode = engine!.inputNode - let inputFormat = inputNode.outputFormat(forBus: 0) - - let whisperSettings: [String: Any] = [ + let recordSettings: [String: Any] = [ AVFormatIDKey: Int(kAudioFormatLinearPCM), AVSampleRateKey: 16000.0, AVNumberOfChannelsKey: 1, @@ -90,127 +85,68 @@ class Recorder: ObservableObject { AVLinearPCMIsNonInterleaved: false ] - let processingFormat = AVAudioFormat( - commonFormat: .pcmFormatFloat32, - sampleRate: 16000.0, - channels: 1, - interleaved: false - )! - do { - file = try AVAudioFile(forWriting: url, settings: whisperSettings) + recorder = try AVAudioRecorder(url: url, settings: recordSettings) + recorder?.isMeteringEnabled = true + + if recorder?.record() == false { + logger.error("❌ Could not start recording") + throw RecorderError.couldNotStartRecording + } + + Task { + while recorder != nil { + updateAudioMeter() + try? await Task.sleep(nanoseconds: 33_000_000) + } + } + } catch { - logger.error("Failed to create audio file: \(error.localizedDescription)") - stopRecording() - throw RecorderError.couldNotStartRecording - } - - inputNode.installTap(onBus: 0, bufferSize: 1024, format: inputFormat) { [weak self] buffer, _ in - guard let self = self else { return } - - let processedBuffer: AVAudioPCMBuffer - if buffer.format != processingFormat { - guard let converter = AVAudioConverter(from: buffer.format, to: processingFormat), - let newBuffer = AVAudioPCMBuffer(pcmFormat: processingFormat, - frameCapacity: AVAudioFrameCount(Double(buffer.frameLength) * - (16000.0 / buffer.format.sampleRate))) else { - self.logger.error("Failed to create converter or buffer") - return - } - - var error: NSError? - let status = converter.convert(to: newBuffer, error: &error) { _, outStatus in - outStatus.pointee = .haveData - return buffer - } - - if status == .error || error != nil { - self.logger.error("Format conversion failed: \(error?.localizedDescription ?? "unknown error")") - return - } - - processedBuffer = newBuffer - } else { - processedBuffer = buffer - } - - Task { @MainActor in - self.latestBuffer = processedBuffer - self.calculateAndUpdateAudioLevel(buffer: processedBuffer) - } - - do { - guard let int16Converter = AVAudioConverter(from: processedBuffer.format, to: self.file!.processingFormat), - let int16Buffer = AVAudioPCMBuffer(pcmFormat: self.file!.processingFormat, - frameCapacity: processedBuffer.frameLength) else { - self.logger.error("Failed to create int16 converter") - return - } - - var conversionError: NSError? - let conversionStatus = int16Converter.convert(to: int16Buffer, error: &conversionError) { _, outStatus in - outStatus.pointee = .haveData - return processedBuffer - } - - if conversionStatus == .error || conversionError != nil { - self.logger.error("Int16 conversion failed") - return - } - - try self.file?.write(from: int16Buffer) - } catch { - self.logger.error("Failed to write audio buffer: \(error.localizedDescription)") - } - } - - do { - try engine!.start() - } catch { - logger.error("❌ Failed to start audio engine: \(error.localizedDescription)") + logger.error("Failed to create audio recorder: \(error.localizedDescription)") stopRecording() throw RecorderError.couldNotStartRecording } } func stopRecording() { - let wasRunning = engine != nil - defer { - deviceManager.isRecordingActive = false - engine?.stop() - engine = nil - } - + recorder?.stop() + recorder = nil audioMeter = AudioMeter(averagePower: 0, peakPower: 0) - engine?.inputNode.removeTap(onBus: 0) - file = nil - NotificationCenter.default.post(name: NSNotification.Name("AudioDeviceChanged"), object: nil) Task { await mediaController.unmuteSystemAudio() } + deviceManager.isRecordingActive = false } - private func calculateAndUpdateAudioLevel(buffer: AVAudioPCMBuffer) { - guard let floatData = buffer.floatChannelData else { return } - let channelData = floatData[0] - let frameLength = Int(buffer.frameLength) + private func updateAudioMeter() { + guard let recorder = recorder else { return } + recorder.updateMeters() - var sum: Float = 0 - var peak: Float = 0 - for i in 0..= maxVisibleDb { + normalizedAverage = 1.0 + } else { + normalizedAverage = (averagePower - minVisibleDb) / (maxVisibleDb - minVisibleDb) } - let rms = sqrt(sum / Float(frameLength)) - let peakValue = peak + let normalizedPeak: Float + if peakPower < minVisibleDb { + normalizedPeak = 0.0 + } else if peakPower >= maxVisibleDb { + normalizedPeak = 1.0 + } else { + normalizedPeak = (peakPower - minVisibleDb) / (maxVisibleDb - minVisibleDb) + } - let multiplier: Double = 20.0 - let scaledRMS = min(Double(rms) * multiplier, 1.0) - let scaledPeak = min(Double(peakValue) * multiplier, 1.0) - - audioMeter = AudioMeter(averagePower: scaledRMS, peakPower: scaledPeak) + audioMeter = AudioMeter(averagePower: Double(normalizedAverage), peakPower: Double(normalizedPeak)) } deinit { diff --git a/VoiceInk/Services/AudioDeviceConfiguration.swift b/VoiceInk/Services/AudioDeviceConfiguration.swift index 9914d7a..2648a98 100644 --- a/VoiceInk/Services/AudioDeviceConfiguration.swift +++ b/VoiceInk/Services/AudioDeviceConfiguration.swift @@ -6,9 +6,7 @@ import os class AudioDeviceConfiguration { private static let logger = Logger(subsystem: "com.prakashjoshipax.voiceink", category: "AudioDeviceConfiguration") - /// Configures audio session for a specific device - /// - Parameter deviceID: The ID of the audio device to configure - /// - Returns: A tuple containing the configured format and any error that occurred + static func configureAudioSession(with deviceID: AudioDeviceID) throws -> AudioStreamBasicDescription { var propertySize = UInt32(MemoryLayout.size) var streamFormat = AudioStreamBasicDescription() @@ -56,53 +54,10 @@ class AudioDeviceConfiguration { throw AudioConfigurationError.failedToGetDeviceFormat(status: status) } - // Ensure we're using a standard PCM format - streamFormat.mFormatID = kAudioFormatLinearPCM - streamFormat.mFormatFlags = kAudioFormatFlagIsFloat | kAudioFormatFlagIsPacked - return streamFormat } - /// Sets up an audio device for the audio unit - /// - Parameters: - /// - deviceID: The ID of the audio device - /// - audioUnit: The audio unit to configure - static func configureAudioUnit(_ audioUnit: AudioUnit, with deviceID: AudioDeviceID) throws { - var deviceIDCopy = deviceID - let propertySize = UInt32(MemoryLayout.size) - - // First disable the audio unit - let resetStatus = AudioUnitReset(audioUnit, kAudioUnitScope_Global, 0) - if resetStatus != noErr { - logger.error("Failed to reset audio unit: \(resetStatus)") - } - - logger.info("Configuring audio unit for device ID: \(deviceID)") - let setDeviceResult = AudioUnitSetProperty( - audioUnit, - kAudioOutputUnitProperty_CurrentDevice, - kAudioUnitScope_Global, - 0, - &deviceIDCopy, - propertySize - ) - - if setDeviceResult != noErr { - logger.error("Failed to set audio unit device: \(setDeviceResult)") - logger.error("Device ID: \(deviceID)") - if let deviceName = AudioDeviceManager.shared.getDeviceName(deviceID: deviceID) { - logger.error("Failed device name: \(deviceName)") - } - throw AudioConfigurationError.failedToSetAudioUnitDevice(status: setDeviceResult) - } - - logger.info("Successfully configured audio unit") - // Add a small delay to allow the device to settle - Thread.sleep(forTimeInterval: 0.1) - } - - /// Sets the default input device for recording - /// - Parameter deviceID: The ID of the audio device + static func setDefaultInputDevice(_ deviceID: AudioDeviceID) throws { var deviceIDCopy = deviceID let propertySize = UInt32(MemoryLayout.size) @@ -147,7 +102,6 @@ class AudioDeviceConfiguration { enum AudioConfigurationError: LocalizedError { case failedToGetDeviceFormat(status: OSStatus) - case failedToSetAudioUnitDevice(status: OSStatus) case failedToSetInputDevice(status: OSStatus) case failedToGetAudioUnit @@ -155,8 +109,6 @@ enum AudioConfigurationError: LocalizedError { switch self { case .failedToGetDeviceFormat(let status): return "Failed to get device format: \(status)" - case .failedToSetAudioUnitDevice(let status): - return "Failed to set audio unit device: \(status)" case .failedToSetInputDevice(let status): return "Failed to set input device: \(status)" case .failedToGetAudioUnit: diff --git a/VoiceInk/Services/AudioDeviceManager.swift b/VoiceInk/Services/AudioDeviceManager.swift index f6f3828..a146d58 100644 --- a/VoiceInk/Services/AudioDeviceManager.swift +++ b/VoiceInk/Services/AudioDeviceManager.swift @@ -21,7 +21,7 @@ class AudioDeviceManager: ObservableObject { @Published var selectedDeviceID: AudioDeviceID? @Published var inputMode: AudioInputMode = .systemDefault @Published var prioritizedDevices: [PrioritizedDevice] = [] - private var fallbackDeviceID: AudioDeviceID? + var fallbackDeviceID: AudioDeviceID? var isRecordingActive: Bool = false @@ -44,7 +44,7 @@ class AudioDeviceManager: ObservableObject { setupDeviceChangeNotifications() } - private func setupFallbackDevice() { + func setupFallbackDevice() { let deviceID: AudioDeviceID? = getDeviceProperty( deviceID: AudioObjectID(kAudioObjectSystemObject), selector: kAudioHardwarePropertyDefaultInputDevice