From 57e5d456a6093da25ec9f30222ea05c6f76e413d Mon Sep 17 00:00:00 2001 From: Beingpax Date: Mon, 3 Mar 2025 21:34:09 +0545 Subject: [PATCH] feat: Enhance audio visualization and recording integration - Improved audio meter visualization, removed debug logs, optimized updates --- VoiceInk.xcodeproj/project.pbxproj | 4 +- VoiceInk/AudioEngine.swift | 236 ------------------------ VoiceInk/Recorder.swift | 54 +++++- VoiceInk/Views/MiniRecorderView.swift | 4 +- VoiceInk/Views/MiniWindowManager.swift | 8 +- VoiceInk/Views/NotchRecorderView.swift | 107 +++++++---- VoiceInk/Views/NotchWindowManager.swift | 10 +- VoiceInk/VisualizerView.swift | 89 ++++++--- VoiceInk/VoiceInk.swift | 1 - VoiceInk/Whisper/WhisperState.swift | 88 +++------ 10 files changed, 226 insertions(+), 375 deletions(-) delete mode 100644 VoiceInk/AudioEngine.swift diff --git a/VoiceInk.xcodeproj/project.pbxproj b/VoiceInk.xcodeproj/project.pbxproj index 0b945f0..114cc77 100644 --- a/VoiceInk.xcodeproj/project.pbxproj +++ b/VoiceInk.xcodeproj/project.pbxproj @@ -483,7 +483,7 @@ "@executable_path/../Frameworks", ); MACOSX_DEPLOYMENT_TARGET = 14.0; - MARKETING_VERSION = 0.96; + MARKETING_VERSION = 0.97; PRODUCT_BUNDLE_IDENTIFIER = com.prakashjoshipax.VoiceInk; PRODUCT_NAME = "$(TARGET_NAME)"; SWIFT_EMIT_LOC_STRINGS = YES; @@ -516,7 +516,7 @@ "@executable_path/../Frameworks", ); MACOSX_DEPLOYMENT_TARGET = 14.0; - MARKETING_VERSION = 0.96; + MARKETING_VERSION = 0.97; PRODUCT_BUNDLE_IDENTIFIER = com.prakashjoshipax.VoiceInk; PRODUCT_NAME = "$(TARGET_NAME)"; SWIFT_EMIT_LOC_STRINGS = YES; diff --git a/VoiceInk/AudioEngine.swift b/VoiceInk/AudioEngine.swift deleted file mode 100644 index 12f31de..0000000 --- a/VoiceInk/AudioEngine.swift +++ /dev/null @@ -1,236 +0,0 @@ -import Foundation -import AVFoundation -import CoreAudio -import os - -class AudioEngine: ObservableObject { - private let logger = Logger(subsystem: "com.prakashjoshipax.voiceink", category: "AudioEngine") - private lazy var engine = AVAudioEngine() - private lazy var mixer = AVAudioMixerNode() - @Published var isRunning = false - @Published var audioLevel: CGFloat = 0.0 - - private var lastUpdateTime: TimeInterval = 0 - private var inputTap: Any? - private let updateInterval: TimeInterval = 0.05 - private let deviceManager = AudioDeviceManager.shared - private var deviceObserver: NSObjectProtocol? - private var isConfiguring = false - - init() { - setupDeviceChangeObserver() - } - - private func setupDeviceChangeObserver() { - deviceObserver = AudioDeviceConfiguration.createDeviceChangeObserver { [weak self] in - guard let self = self else { return } - if self.isRunning { - self.handleDeviceChange() - } - } - } - - private func handleDeviceChange() { - guard !isConfiguring else { - logger.warning("Device change already in progress, skipping") - return - } - - isConfiguring = true - logger.info("Handling device change - Current engine state: \(self.isRunning ? "Running" : "Stopped")") - - // Stop the engine first - stopAudioEngine() - - // Log device change details - let currentDeviceID = deviceManager.getCurrentDevice() - if let deviceName = deviceManager.getDeviceName(deviceID: currentDeviceID) { - logger.info("Switching to device: \(deviceName) (ID: \(currentDeviceID))") - } - - // Wait a bit for the system to process the device change - DispatchQueue.main.asyncAfter(deadline: .now() + 0.1) { [weak self] in - guard let self = self else { return } - - // Try to start with new device - self.startAudioEngine() - self.isConfiguring = false - logger.info("Device change handling completed") - } - } - - private func setupAudioEngine() { - guard inputTap == nil else { return } - - let bus = 0 - - // Get the current device (either selected or fallback) - let currentDeviceID = deviceManager.getCurrentDevice() - - if currentDeviceID != 0 { - do { - logger.info("Setting up audio engine with device ID: \(currentDeviceID)") - // Log the device type (helps identify Bluetooth devices) - if let deviceName = deviceManager.getDeviceName(deviceID: currentDeviceID) { - let isBluetoothDevice = deviceName.lowercased().contains("bluetooth") - logger.info("Device type: \(isBluetoothDevice ? "Bluetooth" : "Standard") - \(deviceName)") - } - - try configureAudioSession(with: currentDeviceID) - } catch { - logger.error("Audio engine setup failed: \(error.localizedDescription)") - logger.error("Device ID: \(currentDeviceID)") - if let deviceName = deviceManager.getDeviceName(deviceID: currentDeviceID) { - logger.error("Failed device name: \(deviceName)") - } - // Don't return here, let it try with default device - } - } else { - logger.info("No specific device available, using system default") - } - - // Wait briefly for device configuration to take effect - Thread.sleep(forTimeInterval: 0.05) - - // Log input format details - let inputFormat = engine.inputNode.inputFormat(forBus: bus) - logger.info(""" - Input format details: - - Sample Rate: \(inputFormat.sampleRate) - - Channel Count: \(inputFormat.channelCount) - - Common Format: \(inputFormat.commonFormat.rawValue) - - - Channel Layout: \(inputFormat.channelLayout?.layoutTag ?? 0) - """) - - inputTap = engine.inputNode.installTap(onBus: bus, bufferSize: 1024, format: inputFormat) { [weak self] (buffer, time) in - self?.processAudioBuffer(buffer) - } - } - - private func configureAudioSession(with deviceID: AudioDeviceID) throws { - logger.info("Starting audio session configuration for device ID: \(deviceID)") - // Get the audio format from the selected device - let streamFormat = try AudioDeviceConfiguration.configureAudioSession(with: deviceID) - logger.info("Got stream format: \(streamFormat.mSampleRate)Hz, \(streamFormat.mChannelsPerFrame) channels") - - // Configure the input node to use the selected device - let inputNode = engine.inputNode - guard let audioUnit = inputNode.audioUnit else { - logger.error("Failed to get audio unit from input node") - throw AudioConfigurationError.failedToGetAudioUnit - } - logger.info("Got audio unit from input node") - - // Set the device for the audio unit - try AudioDeviceConfiguration.configureAudioUnit(audioUnit, with: deviceID) - logger.info("Configured audio unit with device") - - // Reset the engine to apply the new configuration - engine.stop() - try engine.reset() - logger.info("Reset audio engine") - - // Use async dispatch instead of thread sleep - DispatchQueue.global().async { - Thread.sleep(forTimeInterval: 0.05) - self.logger.info("Audio configuration delay completed") - } - } - - func startAudioEngine() { - guard !isRunning else { return } - - logger.info("Starting audio engine") - - do { - setupAudioEngine() - logger.info("Audio engine setup completed") - - try engine.prepare() - logger.info("Audio engine prepared") - - try engine.start() - isRunning = true - - // Log active device and configuration details - let currentDeviceID = deviceManager.getCurrentDevice() - if let deviceName = deviceManager.getDeviceName(deviceID: currentDeviceID) { - let isBluetoothDevice = deviceName.lowercased().contains("bluetooth") - logger.info(""" - Audio engine started successfully: - - Device: \(deviceName) - - Device ID: \(currentDeviceID) - - Device Type: \(isBluetoothDevice ? "Bluetooth" : "Standard") - - Engine Status: Running - """) - } - } catch { - logger.error(""" - Audio engine start failed: - - Error: \(error.localizedDescription) - - Error Details: \(error) - - Current Device ID: \(self.deviceManager.getCurrentDevice()) - - Engine State: \(self.engine.isRunning ? "Running" : "Stopped") - """) - // Clean up on failure - stopAudioEngine() - } - } - - func stopAudioEngine() { - guard isRunning else { return } - - logger.info("Stopping audio engine") - if let tap = inputTap { - engine.inputNode.removeTap(onBus: 0) - inputTap = nil - } - - engine.stop() - - // Complete cleanup of the engine - engine = AVAudioEngine() // Create a fresh instance - mixer = AVAudioMixerNode() // Reset mixer - - isRunning = false - audioLevel = 0.0 - logger.info("Audio engine stopped and reset") - } - - private func processAudioBuffer(_ buffer: AVAudioPCMBuffer) { - guard let channelData = buffer.floatChannelData?[0] else { return } - let frameCount = buffer.frameLength - - let currentTime = CACurrentMediaTime() - guard currentTime - lastUpdateTime >= updateInterval else { return } - lastUpdateTime = currentTime - - // Use vDSP for faster processing - var sum: Float = 0 - for frame in 0.. audioThreshold { + .onChange(of: audioMeter) { newMeter in + + if isActive { updateBars() } else { resetBars() @@ -303,47 +313,70 @@ struct NotchAudioVisualizer: View { for i in 0.. CGFloat { - let normalizedLevel = max(0, audioLevel - audioThreshold) - let amplifiedLevel = pow(normalizedLevel, 0.6) - let baseHeight = amplifiedLevel * maxHeight * 1.7 - let variation = CGFloat.random(in: -2...2) + private func calculateTargetHeight(for index: Int) -> BarLevel { let positionFactor = CGFloat(index) / CGFloat(barCount - 1) let curve = sin(positionFactor * .pi) - return max(minHeight, min(baseHeight * curve + variation, maxHeight)) + let randomFactor = Double.random(in: 0.8...1.2) + let averageBase = audioMeter.averagePower * randomFactor + let peakBase = audioMeter.peakPower * randomFactor + + let averageHeight = CGFloat(averageBase) * maxHeight * 1.7 * curve + let peakHeight = CGFloat(peakBase) * maxHeight * 1.7 * curve + + let finalAverage = max(minHeight, min(averageHeight, maxHeight)) + let finalPeak = max(minHeight, min(peakHeight, maxHeight)) + + + return BarLevel( + average: finalAverage, + peak: finalPeak + ) } } struct NotchVisualizerBar: View { - let height: CGFloat + let averageHeight: CGFloat + let peakHeight: CGFloat let color: Color var body: some View { - RoundedRectangle(cornerRadius: 1.5) - .fill( - LinearGradient( - gradient: Gradient(colors: [ - color.opacity(0.6), - color.opacity(0.8), - color - ]), - startPoint: .bottom, - endPoint: .top + ZStack(alignment: .bottom) { + // Average level bar + RoundedRectangle(cornerRadius: 1.5) + .fill( + LinearGradient( + gradient: Gradient(colors: [ + color.opacity(0.6), + color.opacity(0.8), + color + ]), + startPoint: .bottom, + endPoint: .top + ) ) - ) - .frame(width: 2, height: height) - .animation(.spring(response: 0.2, dampingFraction: 0.7, blendDuration: 0), value: height) + .frame(width: 2, height: averageHeight) + + } + .animation(.spring(response: 0.2, dampingFraction: 0.7, blendDuration: 0), value: averageHeight) + .animation(.spring(response: 0.2, dampingFraction: 0.7, blendDuration: 0), value: peakHeight) } } @@ -355,7 +388,11 @@ struct NotchStaticVisualizer: View { var body: some View { HStack(spacing: 2) { ForEach(0.. 0.01 ? 1 : 0) + } + .frame(maxHeight: geometry.size.height, alignment: .bottom) } } } diff --git a/VoiceInk/VoiceInk.swift b/VoiceInk/VoiceInk.swift index 8744cfa..2960230 100644 --- a/VoiceInk/VoiceInk.swift +++ b/VoiceInk/VoiceInk.swift @@ -95,7 +95,6 @@ struct VoiceInkApp: App { WindowManager.shared.configureWindow(window) }) .onDisappear { - whisperState.audioEngine.stopAudioEngine() whisperState.unloadModel() } } else { diff --git a/VoiceInk/Whisper/WhisperState.swift b/VoiceInk/Whisper/WhisperState.swift index f6167a9..28c8115 100644 --- a/VoiceInk/Whisper/WhisperState.swift +++ b/VoiceInk/Whisper/WhisperState.swift @@ -58,15 +58,14 @@ class WhisperState: NSObject, ObservableObject, AVAudioRecorderDelegate { case couldNotLocateModel } - private let modelsDirectory: URL - private let recordingsDirectory: URL - private var transcriptionStartTime: Date? - private var enhancementService: AIEnhancementService? + let modelsDirectory: URL + let recordingsDirectory: URL + private let enhancementService: AIEnhancementService? private let licenseViewModel: LicenseViewModel + private let logger = Logger(subsystem: "com.prakashjoshipax.voiceink", category: "WhisperState") + private var transcriptionStartTime: Date? private var notchWindowManager: NotchWindowManager? private var miniWindowManager: MiniWindowManager? - var audioEngine: AudioEngine - private let logger = Logger(subsystem: "com.prakashjoshipax.voiceink", category: "WhisperState") init(modelContext: ModelContext, enhancementService: AIEnhancementService? = nil) { self.modelContext = modelContext @@ -74,7 +73,6 @@ class WhisperState: NSObject, ObservableObject, AVAudioRecorderDelegate { self.recordingsDirectory = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask)[0] .appendingPathComponent("com.prakashjoshipax.VoiceInk") .appendingPathComponent("Recordings") - self.audioEngine = AudioEngine() self.enhancementService = enhancementService self.licenseViewModel = LicenseViewModel() @@ -150,7 +148,6 @@ class WhisperState: NSObject, ObservableObject, AVAudioRecorderDelegate { await recorder.stopRecording() isRecording = false isVisualizerActive = false - audioEngine.stopAudioEngine() if let recordedFile { let duration = Date().timeIntervalSince(transcriptionStartTime ?? Date()) await transcribeAudio(recordedFile, duration: duration) @@ -166,10 +163,6 @@ class WhisperState: NSObject, ObservableObject, AVAudioRecorderDelegate { create: true) .appending(path: "output.wav") - if !self.audioEngine.isRunning { - self.audioEngine.startAudioEngine() - } - try await self.recorder.startRecording(toOutputFile: file, delegate: self) self.isRecording = true @@ -192,7 +185,6 @@ class WhisperState: NSObject, ObservableObject, AVAudioRecorderDelegate { self.messageLog += "\(error.localizedDescription)\n" self.isRecording = false self.isVisualizerActive = false - self.audioEngine.stopAudioEngine() } } } else { @@ -479,30 +471,17 @@ class WhisperState: NSObject, ObservableObject, AVAudioRecorderDelegate { await toggleRecord() } } else { - // Serialize audio operations to prevent deadlocks + // Start recording first, then show UI Task { - do { - // First start the audio engine - await MainActor.run { - audioEngine.startAudioEngine() - } - - // Small delay to ensure audio system is ready - try await Task.sleep(nanoseconds: 50_000_000) // 50ms - - // Now play the sound - SoundManager.shared.playStartSound() - - // Show UI - await MainActor.run { - showRecorderPanel() - isMiniRecorderVisible = true - } - - // Finally start recording - await toggleRecord() - } catch { - logger.error("Error during recorder initialization: \(error)") + // Start recording immediately + await toggleRecord() + + // Play sound and show UI after recording has started + SoundManager.shared.playStartSound() + + await MainActor.run { + showRecorderPanel() + isMiniRecorderVisible = true } } } @@ -512,25 +491,21 @@ class WhisperState: NSObject, ObservableObject, AVAudioRecorderDelegate { logger.info("Showing recorder panel, type: \(self.recorderType)") if recorderType == "notch" { if notchWindowManager == nil { - notchWindowManager = NotchWindowManager(whisperState: self, audioEngine: audioEngine) + notchWindowManager = NotchWindowManager(whisperState: self, recorder: recorder) logger.info("Created new notch window manager") } notchWindowManager?.show() } else { if miniWindowManager == nil { - miniWindowManager = MiniWindowManager(whisperState: self, audioEngine: audioEngine) + miniWindowManager = MiniWindowManager(whisperState: self, recorder: recorder) logger.info("Created new mini window manager") } miniWindowManager?.show() } - // Audio engine is now started separately in handleToggleMiniRecorder - // SoundManager.shared.playStartSound() - Moved to handleToggleMiniRecorder logger.info("Recorder panel shown successfully") } private func hideRecorderPanel() { - audioEngine.stopAudioEngine() - if isRecording { Task { await toggleRecord() @@ -542,30 +517,20 @@ class WhisperState: NSObject, ObservableObject, AVAudioRecorderDelegate { if isMiniRecorderVisible { await dismissMiniRecorder() } else { - // Start a parallel task for both UI and recording - Task { - // Play start sound first - SoundManager.shared.playStartSound() - - // Start audio engine immediately - this can happen in parallel - audioEngine.startAudioEngine() - - // Show UI (this is quick now that we removed animations) - await MainActor.run { - showRecorderPanel() // Modified version that doesn't start audio engine - isMiniRecorderVisible = true - } - - // Start recording - await toggleRecord() + // Start recording first + await toggleRecord() + + // Play sound and show UI after recording has started + SoundManager.shared.playStartSound() + + await MainActor.run { + showRecorderPanel() + isMiniRecorderVisible = true } } } private func cleanupResources() async { - audioEngine.stopAudioEngine() - try? await Task.sleep(nanoseconds: 100_000_000) - if !isRecording && !isProcessing { await whisperContext?.releaseResources() whisperContext = nil @@ -616,7 +581,6 @@ class WhisperState: NSObject, ObservableObject, AVAudioRecorderDelegate { whisperContext = nil isModelLoaded = false - audioEngine.stopAudioEngine() if let recordedFile = recordedFile { try? FileManager.default.removeItem(at: recordedFile) self.recordedFile = nil