From b1524970c1c3a8f6835db7c1f6bde2534c6961c1 Mon Sep 17 00:00:00 2001 From: Beingpax Date: Tue, 10 Jun 2025 23:23:43 +0545 Subject: [PATCH] fix: power mode model selectio & removed current model in favor of currenttranscriptionmodel --- VoiceInk.xcodeproj/project.pbxproj | 12 ++++----- VoiceInk/Models/PredefinedModels.swift | 6 ++--- VoiceInk/PowerMode/PowerModeConfigView.swift | 5 +--- .../Services/LocalTranscriptionService.swift | 4 ++- VoiceInk/Views/ContentView.swift | 2 +- VoiceInk/Views/Metrics/MetricsSetupView.swift | 2 +- VoiceInk/Views/MetricsView.swift | 2 +- .../OnboardingModelDownloadView.swift | 20 ++++++++------- VoiceInk/Views/RecordView.swift | 2 +- .../Whisper/WhisperState+ModelManager.swift | 17 ++++++------- VoiceInk/Whisper/WhisperState.swift | 25 ++++--------------- 11 files changed, 41 insertions(+), 56 deletions(-) diff --git a/VoiceInk.xcodeproj/project.pbxproj b/VoiceInk.xcodeproj/project.pbxproj index 8ba1075..d9a306e 100644 --- a/VoiceInk.xcodeproj/project.pbxproj +++ b/VoiceInk.xcodeproj/project.pbxproj @@ -7,8 +7,8 @@ objects = { /* Begin PBXBuildFile section */ - E18D06332DF821F60035A1CE /* whisper.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = E11CB51D2DB1F8AF00F9F3ED /* whisper.xcframework */; }; - E18D06342DF821F60035A1CE /* whisper.xcframework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = E11CB51D2DB1F8AF00F9F3ED /* whisper.xcframework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; }; + E1362A1D2DF893B600E86C6E /* whisper.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = E136D0102DA3EE57000E1E8A /* whisper.xcframework */; }; + E1362A1E2DF893B600E86C6E /* whisper.xcframework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = E136D0102DA3EE57000E1E8A /* whisper.xcframework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; }; E1A261122CC143AC00B233D1 /* KeyboardShortcuts in Frameworks */ = {isa = PBXBuildFile; productRef = E1A261112CC143AC00B233D1 /* KeyboardShortcuts */; }; E1ADD45A2CC5352A00303ECB /* LaunchAtLogin in Frameworks */ = {isa = PBXBuildFile; productRef = E1ADD4592CC5352A00303ECB /* LaunchAtLogin */; }; E1ADD45F2CC544F100303ECB /* Sparkle in Frameworks */ = {isa = PBXBuildFile; productRef = E1ADD45E2CC544F100303ECB /* Sparkle */; }; @@ -33,13 +33,13 @@ /* End PBXContainerItemProxy section */ /* Begin PBXCopyFilesBuildPhase section */ - E18D06352DF821F60035A1CE /* Embed Frameworks */ = { + E1362A1F2DF893B600E86C6E /* Embed Frameworks */ = { isa = PBXCopyFilesBuildPhase; buildActionMask = 2147483647; dstPath = ""; dstSubfolderSpec = 10; files = ( - E18D06342DF821F60035A1CE /* whisper.xcframework in Embed Frameworks */, + E1362A1E2DF893B600E86C6E /* whisper.xcframework in Embed Frameworks */, ); name = "Embed Frameworks"; runOnlyForDeploymentPostprocessing = 0; @@ -80,7 +80,7 @@ E1ADD45A2CC5352A00303ECB /* LaunchAtLogin in Frameworks */, E1ADD45F2CC544F100303ECB /* Sparkle in Frameworks */, E1A261122CC143AC00B233D1 /* KeyboardShortcuts in Frameworks */, - E18D06332DF821F60035A1CE /* whisper.xcframework in Frameworks */, + E1362A1D2DF893B600E86C6E /* whisper.xcframework in Frameworks */, E1F5FA7A2DA6CBF900B1FD8A /* Zip in Frameworks */, ); runOnlyForDeploymentPostprocessing = 0; @@ -142,7 +142,7 @@ E11473AC2CBE0F0A00318EE4 /* Sources */, E11473AD2CBE0F0A00318EE4 /* Frameworks */, E11473AE2CBE0F0A00318EE4 /* Resources */, - E18D06352DF821F60035A1CE /* Embed Frameworks */, + E1362A1F2DF893B600E86C6E /* Embed Frameworks */, ); buildRules = ( ); diff --git a/VoiceInk/Models/PredefinedModels.swift b/VoiceInk/Models/PredefinedModels.swift index b064c95..0a76c97 100644 --- a/VoiceInk/Models/PredefinedModels.swift +++ b/VoiceInk/Models/PredefinedModels.swift @@ -129,7 +129,7 @@ import Foundation displayName: "Whisper Large v3 Turbo (Groq)", description: "Groq's ultra-fast Whisper Large v3 Turbo model with lightning-speed inference", provider: .groq, - speed: 0.7, + speed: 0.65, accuracy: 0.96, isMultilingual: true, supportedLanguages: getLanguageDictionary(isMultilingual: true, isLargeV3: true) @@ -139,8 +139,8 @@ import Foundation displayName: "Scribe v1 (ElevenLabs)", description: "ElevenLabs' Scribe model for fast and accurate transcription.", provider: .elevenLabs, - speed: 0.8, - accuracy: 0.95, + speed: 0.75, + accuracy: 0.98, isMultilingual: true, supportedLanguages: getLanguageDictionary(isMultilingual: true, isLargeV3: true) ), diff --git a/VoiceInk/PowerMode/PowerModeConfigView.swift b/VoiceInk/PowerMode/PowerModeConfigView.swift index 3134018..eee0cc8 100644 --- a/VoiceInk/PowerMode/PowerModeConfigView.swift +++ b/VoiceInk/PowerMode/PowerModeConfigView.swift @@ -364,7 +364,7 @@ struct ConfigurationView: View { // Create a simple binding that uses current model if nil let modelBinding = Binding( get: { - selectedTranscriptionModelName ?? whisperState.currentTranscriptionModel?.name ?? whisperState.availableModels.first?.name + selectedTranscriptionModelName ?? whisperState.usableModels.first?.name }, set: { selectedTranscriptionModelName = $0 } ) @@ -375,9 +375,6 @@ struct ConfigurationView: View { .foregroundColor(.secondary) Picker("", selection: modelBinding) { - Text("Default (\(whisperState.currentTranscriptionModel?.displayName ?? "None"))") - .tag(nil as String?) - ForEach(whisperState.usableModels, id: \.name) { model in Text(model.displayName).tag(model.name as String?) } diff --git a/VoiceInk/Services/LocalTranscriptionService.swift b/VoiceInk/Services/LocalTranscriptionService.swift index bba3600..9dde34b 100644 --- a/VoiceInk/Services/LocalTranscriptionService.swift +++ b/VoiceInk/Services/LocalTranscriptionService.swift @@ -25,7 +25,9 @@ class LocalTranscriptionService: TranscriptionService { if let whisperState = whisperState, await whisperState.isModelLoaded, let loadedContext = await whisperState.whisperContext, - await whisperState.currentModel?.name == localModel.name { + let currentModel = await whisperState.currentTranscriptionModel, + currentModel.provider == .local, + currentModel.name == localModel.name { logger.notice("✅ Using already loaded model: \(localModel.name)") whisperContext = loadedContext diff --git a/VoiceInk/Views/ContentView.swift b/VoiceInk/Views/ContentView.swift index 3387e47..6fe3e21 100644 --- a/VoiceInk/Views/ContentView.swift +++ b/VoiceInk/Views/ContentView.swift @@ -169,7 +169,7 @@ struct ContentView: View { private var isSetupComplete: Bool { hasLoadedData && - whisperState.currentModel != nil && + whisperState.currentTranscriptionModel != nil && KeyboardShortcuts.getShortcut(for: .toggleMiniRecorder) != nil && AXIsProcessTrusted() && CGPreflightScreenCaptureAccess() diff --git a/VoiceInk/Views/Metrics/MetricsSetupView.swift b/VoiceInk/Views/Metrics/MetricsSetupView.swift index 4eed1bd..f167f24 100644 --- a/VoiceInk/Views/Metrics/MetricsSetupView.swift +++ b/VoiceInk/Views/Metrics/MetricsSetupView.swift @@ -78,7 +78,7 @@ struct MetricsSetupView: View { title = "Enable Screen Recording" description = "Allow VoiceInk to understand context from your screen for transcript Enhancement" default: - isCompleted = whisperState.currentModel != nil + isCompleted = whisperState.currentTranscriptionModel != nil icon = "arrow.down" title = "Download Model" description = "Choose and download an AI model" diff --git a/VoiceInk/Views/MetricsView.swift b/VoiceInk/Views/MetricsView.swift index c162f04..e264194 100644 --- a/VoiceInk/Views/MetricsView.swift +++ b/VoiceInk/Views/MetricsView.swift @@ -68,7 +68,7 @@ struct MetricsView: View { private var isSetupComplete: Bool { hasLoadedData && - whisperState.currentModel != nil && + whisperState.currentTranscriptionModel != nil && KeyboardShortcuts.getShortcut(for: .toggleMiniRecorder) != nil && AXIsProcessTrusted() && CGPreflightScreenCaptureAccess() diff --git a/VoiceInk/Views/Onboarding/OnboardingModelDownloadView.swift b/VoiceInk/Views/Onboarding/OnboardingModelDownloadView.swift index 22d992a..a0c478b 100644 --- a/VoiceInk/Views/Onboarding/OnboardingModelDownloadView.swift +++ b/VoiceInk/Views/Onboarding/OnboardingModelDownloadView.swift @@ -149,8 +149,8 @@ struct OnboardingModelDownloadView: View { } private func checkModelStatus() { - if let model = whisperState.availableModels.first(where: { $0.name == turboModel.name }) { - isModelSet = whisperState.currentModel?.name == model.name + if whisperState.availableModels.contains(where: { $0.name == turboModel.name }) { + isModelSet = whisperState.currentTranscriptionModel?.name == turboModel.name } } @@ -159,11 +159,13 @@ struct OnboardingModelDownloadView: View { withAnimation { showTutorial = true } - } else if let model = whisperState.availableModels.first(where: { $0.name == turboModel.name }) { - Task { - await whisperState.setDefaultModel(model) - withAnimation { - isModelSet = true + } else if whisperState.availableModels.contains(where: { $0.name == turboModel.name }) { + if let modelToSet = whisperState.allAvailableModels.first(where: { $0.name == turboModel.name }) { + Task { + await whisperState.setDefaultTranscriptionModel(modelToSet) + withAnimation { + isModelSet = true + } } } } else { @@ -172,8 +174,8 @@ struct OnboardingModelDownloadView: View { } Task { await whisperState.downloadModel(turboModel) - if let model = whisperState.availableModels.first(where: { $0.name == turboModel.name }) { - await whisperState.setDefaultModel(model) + if let modelToSet = whisperState.allAvailableModels.first(where: { $0.name == turboModel.name }) { + await whisperState.setDefaultTranscriptionModel(modelToSet) withAnimation { isModelSet = true isDownloading = false diff --git a/VoiceInk/Views/RecordView.swift b/VoiceInk/Views/RecordView.swift index 3a9c106..dd6639e 100644 --- a/VoiceInk/Views/RecordView.swift +++ b/VoiceInk/Views/RecordView.swift @@ -39,7 +39,7 @@ struct RecordView: View { Text("VOICEINK") .font(.system(size: 42, weight: .bold)) - if whisperState.currentModel != nil { + if whisperState.currentTranscriptionModel != nil { Text("Powered by Whisper AI") .font(.system(size: 15)) .foregroundColor(.secondary) diff --git a/VoiceInk/Whisper/WhisperState+ModelManager.swift b/VoiceInk/Whisper/WhisperState+ModelManager.swift index 215b39d..6b4423e 100644 --- a/VoiceInk/Whisper/WhisperState+ModelManager.swift +++ b/VoiceInk/Whisper/WhisperState+ModelManager.swift @@ -44,18 +44,12 @@ extension WhisperState { await whisperContext?.setPrompt(currentPrompt) isModelLoaded = true - currentModel = model + loadedLocalModel = model } catch { throw WhisperStateError.modelLoadFailed } } - func setDefaultModel(_ model: WhisperModel) async { - currentModel = model - UserDefaults.standard.set(model.name, forKey: "CurrentModel") - canTranscribe = true - } - // MARK: - Model Download & Management /// Helper function to download a file from a URL with progress tracking @@ -255,9 +249,14 @@ extension WhisperState { // Update model state availableModels.removeAll { $0.id == model.id } - if currentModel?.id == model.id { - currentModel = nil + if currentTranscriptionModel?.name == model.name { + + currentTranscriptionModel = nil + UserDefaults.standard.removeObject(forKey: "CurrentTranscriptionModel") + + loadedLocalModel = nil canTranscribe = false + UserDefaults.standard.removeObject(forKey: "CurrentModel") } } catch { logError("Error deleting model: \(model.name)", error) diff --git a/VoiceInk/Whisper/WhisperState.swift b/VoiceInk/Whisper/WhisperState.swift index e2b03ef..27ce581 100644 --- a/VoiceInk/Whisper/WhisperState.swift +++ b/VoiceInk/Whisper/WhisperState.swift @@ -11,7 +11,7 @@ class WhisperState: NSObject, ObservableObject, AVAudioRecorderDelegate { @Published var isModelLoaded = false @Published var canTranscribe = false @Published var isRecording = false - @Published var currentModel: WhisperModel? + @Published var loadedLocalModel: WhisperModel? @Published var currentTranscriptionModel: (any TranscriptionModel)? @Published var isModelLoading = false @Published var availableModels: [WhisperModel] = [] @@ -112,11 +112,6 @@ class WhisperState: NSObject, ObservableObject, AVAudioRecorderDelegate { createRecordingsDirectoryIfNeeded() loadAvailableModels() loadCurrentTranscriptionModel() - - if let savedModelName = UserDefaults.standard.string(forKey: "CurrentModel"), - let savedModel = availableModels.first(where: { $0.name == savedModelName }) { - currentModel = savedModel - } } private func createRecordingsDirectoryIfNeeded() { @@ -403,16 +398,11 @@ class WhisperState: NSObject, ObservableObject, AVAudioRecorderDelegate { return permanentURL } + // Loads the default transcription model from UserDefaults private func loadCurrentTranscriptionModel() { if let savedModelName = UserDefaults.standard.string(forKey: "CurrentTranscriptionModel"), let savedModel = allAvailableModels.first(where: { $0.name == savedModelName }) { currentTranscriptionModel = savedModel - - // If it's a local model, also set it as currentModel for backward compatibility - if let localModel = savedModel as? LocalModel, - let whisperModel = availableModels.first(where: { $0.name == localModel.name }) { - currentModel = whisperModel - } } } @@ -422,14 +412,9 @@ class WhisperState: NSObject, ObservableObject, AVAudioRecorderDelegate { self.currentTranscriptionModel = model UserDefaults.standard.set(model.name, forKey: "CurrentTranscriptionModel") - // If it's a local model, also update currentModel for backward compatibility - if let localModel = model as? LocalModel, - let whisperModel = self.availableModels.first(where: { $0.name == localModel.name }) { - self.currentModel = whisperModel - UserDefaults.standard.set(whisperModel.name, forKey: "CurrentModel") - } else { - // For cloud models, clear the old currentModel - self.currentModel = nil + // For cloud models, clear the old loadedLocalModel + if model.provider != .local { + self.loadedLocalModel = nil } // Enable transcription for cloud models immediately since they don't need loading