From 516309cb35c32908f32f0910032c11a6b53afcad Mon Sep 17 00:00:00 2001 From: Beingpax Date: Wed, 9 Jul 2025 16:27:23 +0545 Subject: [PATCH] Refactor to state machine for recording states & whisperstate file --- VoiceInk/HotkeyManager.swift | 17 +- .../Views/Recorder/MiniRecorderView.swift | 171 +++++----- .../Views/Recorder/NotchRecorderView.swift | 194 ++++++----- ...t => WhisperState+LocalModelManager.swift} | 45 ++- .../WhisperState+ModelManagement.swift | 46 +++ VoiceInk/Whisper/WhisperState+UI.swift | 11 +- VoiceInk/Whisper/WhisperState.swift | 304 ++++-------------- 7 files changed, 364 insertions(+), 424 deletions(-) rename VoiceInk/Whisper/{WhisperState+ModelManager.swift => WhisperState+LocalModelManager.swift} (91%) create mode 100644 VoiceInk/Whisper/WhisperState+ModelManagement.swift diff --git a/VoiceInk/HotkeyManager.swift b/VoiceInk/HotkeyManager.swift index 0168613..66d2975 100644 --- a/VoiceInk/HotkeyManager.swift +++ b/VoiceInk/HotkeyManager.swift @@ -26,6 +26,11 @@ class HotkeyManager: ObservableObject { private var whisperState: WhisperState private var miniRecorderShortcutManager: MiniRecorderShortcutManager + // MARK: - Helper Properties + private var canProcessHotkeyAction: Bool { + whisperState.recordingState != .transcribing && whisperState.recordingState != .enhancing + } + // NSEvent monitoring for modifier keys private var globalEventMonitor: Any? private var localEventMonitor: Any? @@ -246,7 +251,7 @@ class HotkeyManager: ObservableObject { if isHandsFreeMode { isHandsFreeMode = false Task { @MainActor in - guard !whisperState.isTranscribing && !whisperState.isProcessing else { return } + guard canProcessHotkeyAction else { return } await whisperState.handleToggleMiniRecorder() } return @@ -254,7 +259,7 @@ class HotkeyManager: ObservableObject { if !whisperState.isMiniRecorderVisible { Task { @MainActor in - guard !whisperState.isTranscribing && !whisperState.isProcessing else { return } + guard canProcessHotkeyAction else { return } await whisperState.handleToggleMiniRecorder() } } @@ -268,7 +273,7 @@ class HotkeyManager: ObservableObject { isHandsFreeMode = true } else { Task { @MainActor in - guard !whisperState.isTranscribing && !whisperState.isProcessing else { return } + guard canProcessHotkeyAction else { return } await whisperState.handleToggleMiniRecorder() } } @@ -291,13 +296,13 @@ class HotkeyManager: ObservableObject { if isShortcutHandsFreeMode { isShortcutHandsFreeMode = false - guard !whisperState.isTranscribing && !whisperState.isProcessing else { return } + guard canProcessHotkeyAction else { return } await whisperState.handleToggleMiniRecorder() return } if !whisperState.isMiniRecorderVisible { - guard !whisperState.isTranscribing && !whisperState.isProcessing else { return } + guard canProcessHotkeyAction else { return } await whisperState.handleToggleMiniRecorder() } } @@ -314,7 +319,7 @@ class HotkeyManager: ObservableObject { if pressDuration < briefPressThreshold { isShortcutHandsFreeMode = true } else { - guard !whisperState.isTranscribing && !whisperState.isProcessing else { return } + guard canProcessHotkeyAction else { return } await whisperState.handleToggleMiniRecorder() } } diff --git a/VoiceInk/Views/Recorder/MiniRecorderView.swift b/VoiceInk/Views/Recorder/MiniRecorderView.swift index 5de8c27..7c47ad6 100644 --- a/VoiceInk/Views/Recorder/MiniRecorderView.swift +++ b/VoiceInk/Views/Recorder/MiniRecorderView.swift @@ -10,102 +10,119 @@ struct MiniRecorderView: View { @EnvironmentObject private var enhancementService: AIEnhancementService + private var backgroundView: some View { + ZStack { + Color.black.opacity(0.9) + LinearGradient( + colors: [ + Color.black.opacity(0.95), + Color(red: 0.15, green: 0.15, blue: 0.15).opacity(0.9) + ], + startPoint: .top, + endPoint: .bottom + ) + VisualEffectView(material: .hudWindow, blendingMode: .withinWindow) + .opacity(0.05) + } + .clipShape(Capsule()) + } + + private var statusView: some View { + Group { + let currentState = whisperState.recordingState + + if currentState == .enhancing { + Text("Enhancing") + .foregroundColor(.white) + .font(.system(size: 10, weight: .medium, design: .default)) + .lineLimit(1) + .minimumScaleFactor(0.5) + } else if currentState == .transcribing { + Text("Transcribing") + .foregroundColor(.white) + .font(.system(size: 10, weight: .medium, design: .default)) + .lineLimit(1) + .minimumScaleFactor(0.5) + } else if currentState == .recording { + AudioVisualizer( + audioMeter: recorder.audioMeter, + color: .white, + isActive: currentState == .recording + ) + } else { + StaticVisualizer(color: .white) + } + } + } + + private var rightButton: some View { + Group { + if powerModeManager.isPowerModeEnabled { + NotchToggleButton( + isEnabled: powerModeManager.isPowerModeEnabled, + icon: powerModeManager.currentActiveConfiguration.emoji, + color: .orange, + disabled: false + ) { + showPowerModePopover.toggle() + } + .frame(width: 24) + .padding(.trailing, 8) + .popover(isPresented: $showPowerModePopover, arrowEdge: .bottom) { + PowerModePopover() + } + } else { + NotchToggleButton( + isEnabled: enhancementService.isEnhancementEnabled, + icon: enhancementService.activePrompt?.icon.rawValue ?? "brain", + color: .blue, + disabled: false + ) { + if enhancementService.isEnhancementEnabled { + showEnhancementPromptPopover.toggle() + } else { + enhancementService.isEnhancementEnabled = true + } + } + .frame(width: 24) + .padding(.trailing, 8) + .popover(isPresented: $showEnhancementPromptPopover, arrowEdge: .bottom) { + EnhancementPromptPopover() + .environmentObject(enhancementService) + } + } + } + } + var body: some View { Group { if windowManager.isVisible { Capsule() .fill(.clear) - .background( - ZStack { - Color.black.opacity(0.9) - LinearGradient( - colors: [ - Color.black.opacity(0.95), - Color(red: 0.15, green: 0.15, blue: 0.15).opacity(0.9) - ], - startPoint: .top, - endPoint: .bottom - ) - VisualEffectView(material: .hudWindow, blendingMode: .withinWindow) - .opacity(0.05) - } - .clipShape(Capsule()) - ) + .background(backgroundView) .overlay { Capsule() .strokeBorder(Color.white.opacity(0.1), lineWidth: 0.5) } .overlay { HStack(spacing: 0) { + let isRecording = whisperState.recordingState == .recording + let isProcessing = whisperState.recordingState == .transcribing || whisperState.recordingState == .enhancing + NotchRecordButton( - isRecording: whisperState.isRecording, - isProcessing: whisperState.isProcessing + isRecording: isRecording, + isProcessing: isProcessing ) { Task { await whisperState.toggleRecord() } } .frame(width: 24) .padding(.leading, 8) - Group { - if whisperState.isEnhancing { - Text("Enhancing") - .foregroundColor(.white) - .font(.system(size: 10, weight: .medium, design: .default)) - .lineLimit(1) - .minimumScaleFactor(0.5) - } else if whisperState.isTranscribing { - Text("Transcribing") - .foregroundColor(.white) - .font(.system(size: 10, weight: .medium, design: .default)) - .lineLimit(1) - .minimumScaleFactor(0.5) - } else if whisperState.isRecording { - AudioVisualizer( - audioMeter: recorder.audioMeter, - color: .white, - isActive: whisperState.isRecording - ) - } else { - StaticVisualizer(color: .white) - } - } - .frame(maxWidth: .infinity) - .padding(.horizontal, 8) + statusView + .frame(maxWidth: .infinity) + .padding(.horizontal, 8) - if powerModeManager.isPowerModeEnabled { - NotchToggleButton( - isEnabled: powerModeManager.isPowerModeEnabled, - icon: powerModeManager.currentActiveConfiguration.emoji, - color: .orange, - disabled: false - ) { - showPowerModePopover.toggle() - } - .frame(width: 24) - .padding(.trailing, 8) - .popover(isPresented: $showPowerModePopover, arrowEdge: .bottom) { - PowerModePopover() - } - } else { - NotchToggleButton( - isEnabled: enhancementService.isEnhancementEnabled, - icon: enhancementService.activePrompt?.icon.rawValue ?? "brain", - color: .blue, - disabled: false - ) { - if enhancementService.isEnhancementEnabled { - showEnhancementPromptPopover.toggle() - } else { - enhancementService.isEnhancementEnabled = true - } - } - .frame(width: 24) - .padding(.trailing, 8) - .popover(isPresented: $showEnhancementPromptPopover, arrowEdge: .bottom) { - EnhancementPromptPopover() - .environmentObject(enhancementService) - } - } + rightButton } .padding(.vertical, 8) } diff --git a/VoiceInk/Views/Recorder/NotchRecorderView.swift b/VoiceInk/Views/Recorder/NotchRecorderView.swift index ea5c5cb..2dc3c1e 100644 --- a/VoiceInk/Views/Recorder/NotchRecorderView.swift +++ b/VoiceInk/Views/Recorder/NotchRecorderView.swift @@ -31,95 +31,119 @@ struct NotchRecorderView: View { return 200 } + private var leftSection: some View { + HStack(spacing: 8) { + let isRecording = whisperState.recordingState == .recording + let isProcessing = whisperState.recordingState == .transcribing || whisperState.recordingState == .enhancing + + NotchRecordButton( + isRecording: isRecording, + isProcessing: isProcessing + ) { + Task { await whisperState.toggleRecord() } + } + .frame(width: 22) + + rightToggleButton + + Spacer() + } + .frame(width: 64) + .padding(.leading, 16) + } + + private var rightToggleButton: some View { + Group { + if powerModeManager.isPowerModeEnabled { + NotchToggleButton( + isEnabled: powerModeManager.isPowerModeEnabled, + icon: powerModeManager.currentActiveConfiguration.emoji, + color: .orange, + disabled: false + ) { + showPowerModePopover.toggle() + } + .frame(width: 22) + .popover(isPresented: $showPowerModePopover, arrowEdge: .bottom) { + PowerModePopover() + } + } else { + NotchToggleButton( + isEnabled: enhancementService.isEnhancementEnabled, + icon: enhancementService.activePrompt?.icon.rawValue ?? "brain", + color: .blue, + disabled: false + ) { + if enhancementService.isEnhancementEnabled { + showEnhancementPromptPopover.toggle() + } else { + enhancementService.isEnhancementEnabled = true + } + } + .frame(width: 22) + .popover(isPresented: $showEnhancementPromptPopover, arrowEdge: .bottom) { + EnhancementPromptPopover() + .environmentObject(enhancementService) + } + } + } + } + + private var centerSection: some View { + Rectangle() + .fill(Color.clear) + .frame(width: exactNotchWidth) + .contentShape(Rectangle()) + } + + private var rightSection: some View { + HStack(spacing: 0) { + Spacer() + statusDisplay + } + .frame(width: 84) + .padding(.trailing, 16) + } + + private var statusDisplay: some View { + Group { + let currentState = whisperState.recordingState + + if currentState == .enhancing { + Text("Enhancing") + .foregroundColor(.white) + .font(.system(size: 10, weight: .medium, design: .default)) + .lineLimit(1) + .minimumScaleFactor(0.5) + } else if currentState == .transcribing { + Text("Transcribing") + .foregroundColor(.white) + .font(.system(size: 10, weight: .medium, design: .default)) + .lineLimit(1) + .minimumScaleFactor(0.5) + } else if currentState == .recording { + AudioVisualizer( + audioMeter: recorder.audioMeter, + color: .white, + isActive: currentState == .recording + ) + .scaleEffect(y: min(1.0, (menuBarHeight - 8) / 25), anchor: .center) + } else { + StaticVisualizer(color: .white) + .scaleEffect(y: min(1.0, (menuBarHeight - 8) / 25), anchor: .center) + } + } + .frame(width: 70) + .padding(.trailing, 8) + } + var body: some View { Group { if windowManager.isVisible { HStack(spacing: 0) { - HStack(spacing: 8) { - NotchRecordButton( - isRecording: whisperState.isRecording, - isProcessing: whisperState.isProcessing - ) { - Task { await whisperState.toggleRecord() } - } - .frame(width: 22) - - if powerModeManager.isPowerModeEnabled { - NotchToggleButton( - isEnabled: powerModeManager.isPowerModeEnabled, - icon: powerModeManager.currentActiveConfiguration.emoji, - color: .orange, - disabled: false - ) { - showPowerModePopover.toggle() - } - .frame(width: 22) - .popover(isPresented: $showPowerModePopover, arrowEdge: .bottom) { - PowerModePopover() - } - } else { - NotchToggleButton( - isEnabled: enhancementService.isEnhancementEnabled, - icon: enhancementService.activePrompt?.icon.rawValue ?? "brain", - color: .blue, - disabled: false - ) { - if enhancementService.isEnhancementEnabled { - showEnhancementPromptPopover.toggle() - } else { - enhancementService.isEnhancementEnabled = true - } - } - .frame(width: 22) - .popover(isPresented: $showEnhancementPromptPopover, arrowEdge: .bottom) { - EnhancementPromptPopover() - .environmentObject(enhancementService) - } - } - - Spacer() - } - .frame(width: 64) - .padding(.leading, 16) - - Rectangle() - .fill(Color.clear) - .frame(width: exactNotchWidth) - .contentShape(Rectangle()) - - HStack(spacing: 0) { - Spacer() - - Group { - if whisperState.isEnhancing { - Text("Enhancing") - .foregroundColor(.white) - .font(.system(size: 10, weight: .medium, design: .default)) - .lineLimit(1) - .minimumScaleFactor(0.5) - } else if whisperState.isTranscribing { - Text("Transcribing") - .foregroundColor(.white) - .font(.system(size: 10, weight: .medium, design: .default)) - .lineLimit(1) - .minimumScaleFactor(0.5) - } else if whisperState.isRecording { - AudioVisualizer( - audioMeter: recorder.audioMeter, - color: .white, - isActive: whisperState.isRecording - ) - .scaleEffect(y: min(1.0, (menuBarHeight - 8) / 25), anchor: .center) - } else { - StaticVisualizer(color: .white) - .scaleEffect(y: min(1.0, (menuBarHeight - 8) / 25), anchor: .center) - } - } - .frame(width: 70) - .padding(.trailing, 8) - } - .frame(width: 84) - .padding(.trailing, 16) + leftSection + centerSection + rightSection } .frame(height: menuBarHeight) .frame(maxWidth: windowManager.isVisible ? .infinity : 0) diff --git a/VoiceInk/Whisper/WhisperState+ModelManager.swift b/VoiceInk/Whisper/WhisperState+LocalModelManager.swift similarity index 91% rename from VoiceInk/Whisper/WhisperState+ModelManager.swift rename to VoiceInk/Whisper/WhisperState+LocalModelManager.swift index b05900f..d73e76c 100644 --- a/VoiceInk/Whisper/WhisperState+ModelManager.swift +++ b/VoiceInk/Whisper/WhisperState+LocalModelManager.swift @@ -3,8 +3,51 @@ import os import Zip import SwiftUI + +struct WhisperModel: Identifiable { + let id = UUID() + let name: String + let url: URL + var coreMLEncoderURL: URL? // Path to the unzipped .mlmodelc directory + var isCoreMLDownloaded: Bool { coreMLEncoderURL != nil } + + var downloadURL: String { + "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/\(filename)" + } + + var filename: String { + "\(name).bin" + } + + // Core ML related properties + var coreMLZipDownloadURL: String? { + // Only non-quantized models have Core ML versions + guard !name.contains("q5") && !name.contains("q8") else { return nil } + return "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/\(name)-encoder.mlmodelc.zip" + } + + var coreMLEncoderDirectoryName: String? { + guard coreMLZipDownloadURL != nil else { return nil } + return "\(name)-encoder.mlmodelc" + } +} + +private class TaskDelegate: NSObject, URLSessionTaskDelegate { + private let continuation: CheckedContinuation + + init(_ continuation: CheckedContinuation) { + self.continuation = continuation + } + + func urlSession(_ session: URLSession, task: URLSessionTask, didCompleteWithError error: Error?) { + continuation.resume() + } +} + // MARK: - Model Management Extension extension WhisperState { + + // MARK: - Model Directory Management @@ -255,7 +298,7 @@ extension WhisperState { UserDefaults.standard.removeObject(forKey: "CurrentTranscriptionModel") loadedLocalModel = nil - canTranscribe = false + recordingState = .idle UserDefaults.standard.removeObject(forKey: "CurrentModel") } } catch { diff --git a/VoiceInk/Whisper/WhisperState+ModelManagement.swift b/VoiceInk/Whisper/WhisperState+ModelManagement.swift new file mode 100644 index 0000000..dc21002 --- /dev/null +++ b/VoiceInk/Whisper/WhisperState+ModelManagement.swift @@ -0,0 +1,46 @@ +import Foundation +import SwiftUI + +@MainActor +extension WhisperState { + // Loads the default transcription model from UserDefaults + func loadCurrentTranscriptionModel() { + if let savedModelName = UserDefaults.standard.string(forKey: "CurrentTranscriptionModel"), + let savedModel = allAvailableModels.first(where: { $0.name == savedModelName }) { + currentTranscriptionModel = savedModel + } + } + + // Function to set any transcription model as default + func setDefaultTranscriptionModel(_ model: any TranscriptionModel) { + self.currentTranscriptionModel = model + UserDefaults.standard.set(model.name, forKey: "CurrentTranscriptionModel") + + // For cloud models, clear the old loadedLocalModel + if model.provider != .local { + self.loadedLocalModel = nil + } + + // Enable transcription for cloud models immediately since they don't need loading + if model.provider != .local { + self.recordingState = .idle + self.isModelLoaded = true + } + + // Post notification about the model change + NotificationCenter.default.post(name: .didChangeModel, object: nil, userInfo: ["modelName": model.name]) + } + + func refreshAllAvailableModels() { + let currentModelId = currentTranscriptionModel?.id + allAvailableModels = PredefinedModels.models + + // If there was a current default model, find its new version in the refreshed list and update it. + // This handles cases where the default model was edited. + if let currentId = currentModelId, + let updatedModel = allAvailableModels.first(where: { $0.id == currentId }) + { + setDefaultTranscriptionModel(updatedModel) + } + } +} \ No newline at end of file diff --git a/VoiceInk/Whisper/WhisperState+UI.swift b/VoiceInk/Whisper/WhisperState+UI.swift index 3597d81..b320434 100644 --- a/VoiceInk/Whisper/WhisperState+UI.swift +++ b/VoiceInk/Whisper/WhisperState+UI.swift @@ -36,7 +36,7 @@ extension WhisperState { func toggleMiniRecorder() async { if isMiniRecorderVisible { - if isRecording { + if recordingState == .recording { await toggleRecord() } else { await cancelRecording() @@ -59,7 +59,7 @@ extension WhisperState { NotificationManager.shared.dismissNotification() } - if isRecording { + if recordingState == .recording { await recorder.stopRecording() } @@ -67,12 +67,7 @@ extension WhisperState { hideRecorderPanel() await MainActor.run { - isRecording = false - isVisualizerActive = false - isProcessing = false - isEnhancing = false - isTranscribing = false - canTranscribe = true + recordingState = .idle isMiniRecorderVisible = false } } diff --git a/VoiceInk/Whisper/WhisperState.swift b/VoiceInk/Whisper/WhisperState.swift index 868d79d..6f0f98d 100644 --- a/VoiceInk/Whisper/WhisperState.swift +++ b/VoiceInk/Whisper/WhisperState.swift @@ -6,11 +6,18 @@ import AppKit import KeyboardShortcuts import os +// MARK: - Recording State Machine +enum RecordingState: Equatable { + case idle + case recording + case transcribing + case enhancing +} + @MainActor class WhisperState: NSObject, ObservableObject { + @Published var recordingState: RecordingState = .idle @Published var isModelLoaded = false - @Published var canTranscribe = false - @Published var isRecording = false @Published var loadedLocalModel: WhisperModel? @Published var currentTranscriptionModel: (any TranscriptionModel)? @Published var isModelLoading = false @@ -18,10 +25,7 @@ class WhisperState: NSObject, ObservableObject { @Published var allAvailableModels: [any TranscriptionModel] = PredefinedModels.models @Published var clipboardMessage = "" @Published var miniRecorderError: String? - @Published var isProcessing = false @Published var shouldCancelRecording = false - @Published var isTranscribing = false - @Published var isEnhancing = false @Published var isAutoCopyEnabled: Bool = UserDefaults.standard.object(forKey: "IsAutoCopyEnabled") as? Bool ?? true { didSet { UserDefaults.standard.set(isAutoCopyEnabled, forKey: "IsAutoCopyEnabled") @@ -33,10 +37,6 @@ class WhisperState: NSObject, ObservableObject { } } - @Published var isVisualizerActive = false - - - @Published var isMiniRecorderVisible = false { didSet { if isMiniRecorderVisible { @@ -125,27 +125,22 @@ class WhisperState: NSObject, ObservableObject { } func toggleRecord() async { - if isRecording { - logger.notice("🛑 Stopping recording") - await MainActor.run { - isRecording = false - isVisualizerActive = false - } + if recordingState == .recording { await recorder.stopRecording() if let recordedFile { if !shouldCancelRecording { await transcribeAudio(recordedFile) } else { - logger.info("🛑 Transcription and paste aborted in toggleRecord due to shouldCancelRecording flag.") await MainActor.run { - isProcessing = false - isTranscribing = false - canTranscribe = true + recordingState = .idle } await cleanupModelResources() } } else { logger.error("❌ No recorded file found after stopping recording") + await MainActor.run { + recordingState = .idle + } } } else { guard currentTranscriptionModel != nil else { @@ -158,29 +153,23 @@ class WhisperState: NSObject, ObservableObject { return } shouldCancelRecording = false - logger.notice("🎙️ Starting recording sequence...") requestRecordPermission { [self] granted in if granted { Task { do { - // --- Prepare temporary file URL within Application Support base directory --- - let baseAppSupportDirectory = self.recordingsDirectory.deletingLastPathComponent() - let file = baseAppSupportDirectory.appendingPathComponent("output.wav") - // Ensure the base directory exists - try? FileManager.default.createDirectory(at: baseAppSupportDirectory, withIntermediateDirectories: true) - // Clean up any old temporary file first - self.recordedFile = file - - try await self.recorder.startRecording(toOutputFile: file) - self.logger.notice("✅ Audio engine started successfully.") - + // --- Prepare permanent file URL --- + let fileName = "\(UUID().uuidString).wav" + let permanentURL = self.recordingsDirectory.appendingPathComponent(fileName) + self.recordedFile = permanentURL + + try await self.recorder.startRecording(toOutputFile: permanentURL) + await MainActor.run { - self.isRecording = true - self.isVisualizerActive = true + self.recordingState = .recording } await ActiveWindowService.shared.applyConfigurationForCurrentApp() - + // Only load model if it's a local model and not already loaded if let model = self.currentTranscriptionModel, model.provider == .local { if let localWhisperModel = self.availableModels.first(where: { $0.name == model.name }), @@ -192,23 +181,19 @@ class WhisperState: NSObject, ObservableObject { } } } - + if let enhancementService = self.enhancementService, enhancementService.useScreenCaptureContext { await enhancementService.captureScreenContext() } - + } catch { self.logger.error("❌ Failed to start recording: \(error.localizedDescription)") await MainActor.run { - self.isRecording = false - self.isVisualizerActive = false - } - if let url = self.recordedFile { - try? FileManager.default.removeItem(at: url) - self.recordedFile = nil - self.logger.notice("🗑️ Cleaned up temporary recording file after failed start.") + self.recordingState = .idle } + // Do not remove the file on a failed start, to preserve all recordings. + self.recordedFile = nil } } } else { @@ -219,32 +204,20 @@ class WhisperState: NSObject, ObservableObject { } private func requestRecordPermission(response: @escaping (Bool) -> Void) { -#if os(macOS) response(true) -#else - AVAudioSession.sharedInstance().requestRecordPermission { granted in - response(granted) - } -#endif } - private func transcribeAudio(_ url: URL) async { if shouldCancelRecording { - logger.info("🎤 Transcription and paste aborted at the beginning of transcribeAudio due to shouldCancelRecording flag.") await MainActor.run { - isProcessing = false - isTranscribing = false - canTranscribe = true + recordingState = .idle } await cleanupModelResources() return } await MainActor.run { - isProcessing = true - isTranscribing = true - canTranscribe = false + recordingState = .transcribing } defer { @@ -257,11 +230,7 @@ class WhisperState: NSObject, ObservableObject { logger.notice("🔄 Starting transcription...") - var permanentURL: URL? - do { - permanentURL = try saveRecordingPermanently(url) - guard let model = currentTranscriptionModel else { throw WhisperStateError.transcriptionFailed } @@ -280,8 +249,6 @@ class WhisperState: NSObject, ObservableObject { var text = try await transcriptionService.transcribe(audioURL: url, model: model) let transcriptionDuration = Date().timeIntervalSince(transcriptionStart) - await MainActor.run { self.isTranscribing = false } - if await checkCancellationAndCleanup() { return } text = text.trimmingCharacters(in: .whitespacesAndNewlines) @@ -307,19 +274,14 @@ class WhisperState: NSObject, ObservableObject { do { if await checkCancellationAndCleanup() { return } - await MainActor.run { self.isEnhancing = true } + await MainActor.run { self.recordingState = .enhancing } let textForAI = promptDetectionResult?.processedText ?? text - defer { - Task { @MainActor in - self.isEnhancing = false - } - } let (enhancedText, enhancementDuration) = try await enhancementService.enhance(textForAI) let newTranscription = Transcription( text: originalText, duration: actualDuration, enhancedText: enhancedText, - audioFileURL: permanentURL?.absoluteString, + audioFileURL: url.absoluteString, transcriptionModelName: model.displayName, aiEnhancementModelName: enhancementService.getAIService()?.currentModel, transcriptionDuration: transcriptionDuration, @@ -333,7 +295,7 @@ class WhisperState: NSObject, ObservableObject { text: originalText, duration: actualDuration, enhancedText: "Enhancement failed: \(error.localizedDescription)", - audioFileURL: permanentURL?.absoluteString, + audioFileURL: url.absoluteString, transcriptionModelName: model.displayName, transcriptionDuration: transcriptionDuration ) @@ -351,7 +313,7 @@ class WhisperState: NSObject, ObservableObject { let newTranscription = Transcription( text: originalText, duration: actualDuration, - audioFileURL: permanentURL?.absoluteString, + audioFileURL: url.absoluteString, transcriptionModelName: model.displayName, transcriptionDuration: transcriptionDuration ) @@ -379,7 +341,6 @@ class WhisperState: NSObject, ObservableObject { ClipboardManager.copyToClipboard(text) } } - try? FileManager.default.removeItem(at: url) if let result = promptDetectionResult, let enhancementService = enhancementService, @@ -393,48 +354,34 @@ class WhisperState: NSObject, ObservableObject { } } catch { - if let permanentURL = permanentURL { - do { - let audioAsset = AVURLAsset(url: permanentURL) - let duration = CMTimeGetSeconds(try await audioAsset.load(.duration)) + do { + let audioAsset = AVURLAsset(url: url) + let duration = CMTimeGetSeconds(try await audioAsset.load(.duration)) + + await MainActor.run { + let errorDescription = (error as? LocalizedError)?.errorDescription ?? error.localizedDescription + let recoverySuggestion = (error as? LocalizedError)?.recoverySuggestion ?? "" + let fullErrorText = recoverySuggestion.isEmpty ? errorDescription : "\(errorDescription) \(recoverySuggestion)" - await MainActor.run { - let errorDescription = (error as? LocalizedError)?.errorDescription ?? error.localizedDescription - let recoverySuggestion = (error as? LocalizedError)?.recoverySuggestion ?? "" - let fullErrorText = recoverySuggestion.isEmpty ? errorDescription : "\(errorDescription) \(recoverySuggestion)" - - let failedTranscription = Transcription( - text: "Transcription Failed: \(fullErrorText)", - duration: duration, - enhancedText: nil, - audioFileURL: permanentURL.absoluteString - ) - - modelContext.insert(failedTranscription) - try? modelContext.save() - } - } catch { - // Silently continue if failed transcription record can't be saved + let failedTranscription = Transcription( + text: "Transcription Failed: \(fullErrorText)", + duration: duration, + enhancedText: nil, + audioFileURL: url.absoluteString + ) + + modelContext.insert(failedTranscription) + try? modelContext.save() } + } catch { + logger.error("❌ Could not create a record for the failed transcription: \(error.localizedDescription)") } await MainActor.run { - if permanentURL != nil { - NotificationManager.shared.showNotification( - title: "Transcription Failed. Tap to retry.", - type: .error, - onTap: { [weak self] in - Task { - await self?.retryLastTranscription() - } - } - ) - } else { - NotificationManager.shared.showNotification( - title: "Recording Failed", - type: .error - ) - } + NotificationManager.shared.showNotification( + title: "Transcription Failed", + type: .error + ) } await self.dismissMiniRecorder() @@ -444,107 +391,10 @@ class WhisperState: NSObject, ObservableObject { } } - private func saveRecordingPermanently(_ tempURL: URL) throws -> URL { - let fileName = "\(UUID().uuidString).wav" - let permanentURL = recordingsDirectory.appendingPathComponent(fileName) - try FileManager.default.copyItem(at: tempURL, to: permanentURL) - return permanentURL - } - - func retryLastTranscription() async { - do { - let descriptor = FetchDescriptor( - sortBy: [SortDescriptor(\.timestamp, order: .reverse)] - ) - let transcriptions = try modelContext.fetch(descriptor) - - guard let lastTranscription = transcriptions.first, - lastTranscription.text.hasPrefix("Transcription Failed"), - let audioURLString = lastTranscription.audioFileURL, - let audioURL = URL(string: audioURLString) else { - return - } - - guard let model = currentTranscriptionModel else { - throw WhisperStateError.transcriptionFailed - } - - let transcriptionService = AudioTranscriptionService(modelContext: modelContext, whisperState: self) - let newTranscription = try await transcriptionService.retranscribeAudio(from: audioURL, using: model) - - await MainActor.run { - NotificationManager.shared.showNotification( - title: "Transcription Successful", - type: .success - ) - - let textToPaste = newTranscription.enhancedText ?? newTranscription.text - DispatchQueue.main.asyncAfter(deadline: .now() + 0.05) { - CursorPaster.pasteAtCursor(textToPaste + " ", shouldPreserveClipboard: !self.isAutoCopyEnabled) - - if self.isAutoCopyEnabled { - ClipboardManager.copyToClipboard(textToPaste) - } - } - } - - } catch { - await MainActor.run { - NotificationManager.shared.showNotification( - title: "Retry Failed", - type: .error - ) - } - } - } - - // Loads the default transcription model from UserDefaults - private func loadCurrentTranscriptionModel() { - if let savedModelName = UserDefaults.standard.string(forKey: "CurrentTranscriptionModel"), - let savedModel = allAvailableModels.first(where: { $0.name == savedModelName }) { - currentTranscriptionModel = savedModel - } - } - - // Function to set any transcription model as default - func setDefaultTranscriptionModel(_ model: any TranscriptionModel) { - self.currentTranscriptionModel = model - UserDefaults.standard.set(model.name, forKey: "CurrentTranscriptionModel") - - // For cloud models, clear the old loadedLocalModel - if model.provider != .local { - self.loadedLocalModel = nil - } - - // Enable transcription for cloud models immediately since they don't need loading - if model.provider != .local { - self.canTranscribe = true - self.isModelLoaded = true - } - - logger.info("Default transcription model set to: \(model.name) (\(model.provider.rawValue))") - - // Post notification about the model change - NotificationCenter.default.post(name: .didChangeModel, object: nil, userInfo: ["modelName": model.name]) - } - func getEnhancementService() -> AIEnhancementService? { return enhancementService } - func refreshAllAvailableModels() { - let currentModelId = currentTranscriptionModel?.id - allAvailableModels = PredefinedModels.models - - // If there was a current default model, find its new version in the refreshed list and update it. - // This handles cases where the default model was edited. - if let currentId = currentModelId, - let updatedModel = allAvailableModels.first(where: { $0.id == currentId }) - { - setDefaultTranscriptionModel(updatedModel) - } - } - private func checkCancellationAndCleanup() async -> Bool { if shouldCancelRecording { await cleanupAndDismiss() @@ -561,46 +411,6 @@ class WhisperState: NSObject, ObservableObject { } } -struct WhisperModel: Identifiable { - let id = UUID() - let name: String - let url: URL - var coreMLEncoderURL: URL? // Path to the unzipped .mlmodelc directory - var isCoreMLDownloaded: Bool { coreMLEncoderURL != nil } - - var downloadURL: String { - "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/\(filename)" - } - - var filename: String { - "\(name).bin" - } - - // Core ML related properties - var coreMLZipDownloadURL: String? { - // Only non-quantized models have Core ML versions - guard !name.contains("q5") && !name.contains("q8") else { return nil } - return "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/\(name)-encoder.mlmodelc.zip" - } - - var coreMLEncoderDirectoryName: String? { - guard coreMLZipDownloadURL != nil else { return nil } - return "\(name)-encoder.mlmodelc" - } -} - -private class TaskDelegate: NSObject, URLSessionTaskDelegate { - private let continuation: CheckedContinuation - - init(_ continuation: CheckedContinuation) { - self.continuation = continuation - } - - func urlSession(_ session: URLSession, task: URLSessionTask, didCompleteWithError error: Error?) { - continuation.resume() - } -} - extension Notification.Name { static let toggleMiniRecorder = Notification.Name("toggleMiniRecorder") static let didChangeModel = Notification.Name("didChangeModel")