From e2e6c3ac944f42c83471fd1044751e81cf327a1e Mon Sep 17 00:00:00 2001 From: Beingpax Date: Tue, 10 Jun 2025 17:13:55 +0545 Subject: [PATCH] feat: Migrate Power Mode to support cloud models --- VoiceInk.xcodeproj/project.pbxproj | 12 ++-- VoiceInk/PowerMode/ActiveWindowService.swift | 4 +- VoiceInk/PowerMode/PowerModeConfig.swift | 59 +++++++++++++++++-- VoiceInk/PowerMode/PowerModeConfigView.swift | 27 ++++----- .../PowerMode/PowerModeViewComponents.swift | 4 +- 5 files changed, 76 insertions(+), 30 deletions(-) diff --git a/VoiceInk.xcodeproj/project.pbxproj b/VoiceInk.xcodeproj/project.pbxproj index 83048c8..8ba1075 100644 --- a/VoiceInk.xcodeproj/project.pbxproj +++ b/VoiceInk.xcodeproj/project.pbxproj @@ -7,8 +7,8 @@ objects = { /* Begin PBXBuildFile section */ - E10340512DE1F666008BCBE5 /* whisper.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = E136D0102DA3EE57000E1E8A /* whisper.xcframework */; }; - E10340522DE1F666008BCBE5 /* whisper.xcframework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = E136D0102DA3EE57000E1E8A /* whisper.xcframework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; }; + E18D06332DF821F60035A1CE /* whisper.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = E11CB51D2DB1F8AF00F9F3ED /* whisper.xcframework */; }; + E18D06342DF821F60035A1CE /* whisper.xcframework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = E11CB51D2DB1F8AF00F9F3ED /* whisper.xcframework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; }; E1A261122CC143AC00B233D1 /* KeyboardShortcuts in Frameworks */ = {isa = PBXBuildFile; productRef = E1A261112CC143AC00B233D1 /* KeyboardShortcuts */; }; E1ADD45A2CC5352A00303ECB /* LaunchAtLogin in Frameworks */ = {isa = PBXBuildFile; productRef = E1ADD4592CC5352A00303ECB /* LaunchAtLogin */; }; E1ADD45F2CC544F100303ECB /* Sparkle in Frameworks */ = {isa = PBXBuildFile; productRef = E1ADD45E2CC544F100303ECB /* Sparkle */; }; @@ -33,13 +33,13 @@ /* End PBXContainerItemProxy section */ /* Begin PBXCopyFilesBuildPhase section */ - E1C7A8132DE06FC70034EDA0 /* Embed Frameworks */ = { + E18D06352DF821F60035A1CE /* Embed Frameworks */ = { isa = PBXCopyFilesBuildPhase; buildActionMask = 2147483647; dstPath = ""; dstSubfolderSpec = 10; files = ( - E10340522DE1F666008BCBE5 /* whisper.xcframework in Embed Frameworks */, + E18D06342DF821F60035A1CE /* whisper.xcframework in Embed Frameworks */, ); name = "Embed Frameworks"; runOnlyForDeploymentPostprocessing = 0; @@ -80,7 +80,7 @@ E1ADD45A2CC5352A00303ECB /* LaunchAtLogin in Frameworks */, E1ADD45F2CC544F100303ECB /* Sparkle in Frameworks */, E1A261122CC143AC00B233D1 /* KeyboardShortcuts in Frameworks */, - E10340512DE1F666008BCBE5 /* whisper.xcframework in Frameworks */, + E18D06332DF821F60035A1CE /* whisper.xcframework in Frameworks */, E1F5FA7A2DA6CBF900B1FD8A /* Zip in Frameworks */, ); runOnlyForDeploymentPostprocessing = 0; @@ -142,7 +142,7 @@ E11473AC2CBE0F0A00318EE4 /* Sources */, E11473AD2CBE0F0A00318EE4 /* Frameworks */, E11473AE2CBE0F0A00318EE4 /* Resources */, - E1C7A8132DE06FC70034EDA0 /* Embed Frameworks */, + E18D06352DF821F60035A1CE /* Embed Frameworks */, ); buildRules = ( ); diff --git a/VoiceInk/PowerMode/ActiveWindowService.swift b/VoiceInk/PowerMode/ActiveWindowService.swift index 21a21e7..3388ac2 100644 --- a/VoiceInk/PowerMode/ActiveWindowService.swift +++ b/VoiceInk/PowerMode/ActiveWindowService.swift @@ -116,7 +116,7 @@ class ActiveWindowService: ObservableObject { } if let whisperState = self.whisperState, - let modelName = config.selectedWhisperModel, + let modelName = config.selectedTranscriptionModelName, let selectedModel = await whisperState.allAvailableModels.first(where: { $0.name == modelName }) { let currentModelName = await MainActor.run { whisperState.currentTranscriptionModel?.name } @@ -127,7 +127,7 @@ class ActiveWindowService: ObservableObject { await whisperState.setDefaultTranscriptionModel(selectedModel) // The cleanup and load cycle is only necessary for local models. - if selectedModel.provider == .local { + if selectedModel.provider == ModelProvider.local { // Unload any previously loaded model to free up memory. await whisperState.cleanupModelResources() diff --git a/VoiceInk/PowerMode/PowerModeConfig.swift b/VoiceInk/PowerMode/PowerModeConfig.swift index ae1b994..827e19e 100644 --- a/VoiceInk/PowerMode/PowerModeConfig.swift +++ b/VoiceInk/PowerMode/PowerModeConfig.swift @@ -8,15 +8,22 @@ struct PowerModeConfig: Codable, Identifiable, Equatable { var urlConfigs: [URLConfig]? var isAIEnhancementEnabled: Bool var selectedPrompt: String? - var selectedWhisperModel: String? + var selectedTranscriptionModelName: String? var selectedLanguage: String? var useScreenCapture: Bool var selectedAIProvider: String? var selectedAIModel: String? + + // Custom coding keys to handle migration from selectedWhisperModel + enum CodingKeys: String, CodingKey { + case id, name, emoji, appConfigs, urlConfigs, isAIEnhancementEnabled, selectedPrompt, selectedLanguage, useScreenCapture, selectedAIProvider, selectedAIModel + case selectedWhisperModel // Old key + case selectedTranscriptionModelName // New key + } - init(id: UUID = UUID(), name: String, emoji: String, appConfigs: [AppConfig]? = nil, + init(id: UUID = UUID(), name: String, emoji: String, appConfigs: [AppConfig]? = nil, urlConfigs: [URLConfig]? = nil, isAIEnhancementEnabled: Bool, selectedPrompt: String? = nil, - selectedWhisperModel: String? = nil, selectedLanguage: String? = nil, useScreenCapture: Bool = false, + selectedTranscriptionModelName: String? = nil, selectedLanguage: String? = nil, useScreenCapture: Bool = false, selectedAIProvider: String? = nil, selectedAIModel: String? = nil) { self.id = id self.name = name @@ -28,9 +35,49 @@ struct PowerModeConfig: Codable, Identifiable, Equatable { self.useScreenCapture = useScreenCapture self.selectedAIProvider = selectedAIProvider ?? UserDefaults.standard.string(forKey: "selectedAIProvider") self.selectedAIModel = selectedAIModel - self.selectedWhisperModel = selectedWhisperModel ?? UserDefaults.standard.string(forKey: "CurrentModel") + self.selectedTranscriptionModelName = selectedTranscriptionModelName ?? UserDefaults.standard.string(forKey: "CurrentTranscriptionModel") self.selectedLanguage = selectedLanguage ?? UserDefaults.standard.string(forKey: "SelectedLanguage") ?? "en" } + + init(from decoder: Decoder) throws { + let container = try decoder.container(keyedBy: CodingKeys.self) + id = try container.decode(UUID.self, forKey: .id) + name = try container.decode(String.self, forKey: .name) + emoji = try container.decode(String.self, forKey: .emoji) + appConfigs = try container.decodeIfPresent([AppConfig].self, forKey: .appConfigs) + urlConfigs = try container.decodeIfPresent([URLConfig].self, forKey: .urlConfigs) + isAIEnhancementEnabled = try container.decode(Bool.self, forKey: .isAIEnhancementEnabled) + selectedPrompt = try container.decodeIfPresent(String.self, forKey: .selectedPrompt) + selectedLanguage = try container.decodeIfPresent(String.self, forKey: .selectedLanguage) + useScreenCapture = try container.decode(Bool.self, forKey: .useScreenCapture) + selectedAIProvider = try container.decodeIfPresent(String.self, forKey: .selectedAIProvider) + selectedAIModel = try container.decodeIfPresent(String.self, forKey: .selectedAIModel) + + if let newModelName = try container.decodeIfPresent(String.self, forKey: .selectedTranscriptionModelName) { + selectedTranscriptionModelName = newModelName + } else if let oldModelName = try container.decodeIfPresent(String.self, forKey: .selectedWhisperModel) { + selectedTranscriptionModelName = oldModelName + } else { + selectedTranscriptionModelName = nil + } + } + + func encode(to encoder: Encoder) throws { + var container = encoder.container(keyedBy: CodingKeys.self) + try container.encode(id, forKey: .id) + try container.encode(name, forKey: .name) + try container.encode(emoji, forKey: .emoji) + try container.encodeIfPresent(appConfigs, forKey: .appConfigs) + try container.encodeIfPresent(urlConfigs, forKey: .urlConfigs) + try container.encode(isAIEnhancementEnabled, forKey: .isAIEnhancementEnabled) + try container.encodeIfPresent(selectedPrompt, forKey: .selectedPrompt) + try container.encodeIfPresent(selectedLanguage, forKey: .selectedLanguage) + try container.encode(useScreenCapture, forKey: .useScreenCapture) + try container.encodeIfPresent(selectedAIProvider, forKey: .selectedAIProvider) + try container.encodeIfPresent(selectedAIModel, forKey: .selectedAIModel) + try container.encodeIfPresent(selectedTranscriptionModelName, forKey: .selectedTranscriptionModelName) + } + static func == (lhs: PowerModeConfig, rhs: PowerModeConfig) -> Bool { lhs.id == rhs.id @@ -96,7 +143,7 @@ class PowerModeManager: ObservableObject { defaultConfig = config } else { // Get default values from UserDefaults if available - let defaultModelName = UserDefaults.standard.string(forKey: "CurrentModel") + let defaultModelName = UserDefaults.standard.string(forKey: "CurrentTranscriptionModel") let defaultLanguage = UserDefaults.standard.string(forKey: "SelectedLanguage") ?? "en" defaultConfig = PowerModeConfig( @@ -105,7 +152,7 @@ class PowerModeManager: ObservableObject { emoji: "⚙️", isAIEnhancementEnabled: false, selectedPrompt: nil, - selectedWhisperModel: defaultModelName, + selectedTranscriptionModelName: defaultModelName, selectedLanguage: defaultLanguage ) saveDefaultConfig() diff --git a/VoiceInk/PowerMode/PowerModeConfigView.swift b/VoiceInk/PowerMode/PowerModeConfigView.swift index dcf2aae..3134018 100644 --- a/VoiceInk/PowerMode/PowerModeConfigView.swift +++ b/VoiceInk/PowerMode/PowerModeConfigView.swift @@ -16,7 +16,7 @@ struct ConfigurationView: View { @State private var isShowingAppPicker = false @State private var isAIEnhancementEnabled: Bool @State private var selectedPromptId: UUID? - @State private var selectedWhisperModelName: String? + @State private var selectedTranscriptionModelName: String? @State private var selectedLanguage: String? @State private var installedApps: [(url: URL, name: String, bundleId: String, icon: NSImage)] = [] @State private var searchText = "" @@ -56,10 +56,10 @@ struct ConfigurationView: View { // Simplified computed property for effective model name private var effectiveModelName: String? { - if let model = selectedWhisperModelName { + if let model = selectedTranscriptionModelName { return model } - return whisperState.currentModel?.name ?? whisperState.availableModels.first?.name + return whisperState.currentTranscriptionModel?.name } init(mode: ConfigurationMode, powerModeManager: PowerModeManager) { @@ -71,7 +71,7 @@ struct ConfigurationView: View { case .add: _isAIEnhancementEnabled = State(initialValue: true) _selectedPromptId = State(initialValue: nil) - _selectedWhisperModelName = State(initialValue: nil) + _selectedTranscriptionModelName = State(initialValue: nil) _selectedLanguage = State(initialValue: nil) _configName = State(initialValue: "") _selectedEmoji = State(initialValue: "✏️") @@ -84,7 +84,7 @@ struct ConfigurationView: View { let latestConfig = powerModeManager.getConfiguration(with: config.id) ?? config _isAIEnhancementEnabled = State(initialValue: latestConfig.isAIEnhancementEnabled) _selectedPromptId = State(initialValue: latestConfig.selectedPrompt.flatMap { UUID(uuidString: $0) }) - _selectedWhisperModelName = State(initialValue: latestConfig.selectedWhisperModel) + _selectedTranscriptionModelName = State(initialValue: latestConfig.selectedTranscriptionModelName) _selectedLanguage = State(initialValue: latestConfig.selectedLanguage) _configName = State(initialValue: latestConfig.name) _selectedEmoji = State(initialValue: latestConfig.emoji) @@ -98,7 +98,7 @@ struct ConfigurationView: View { let latestConfig = powerModeManager.defaultConfig _isAIEnhancementEnabled = State(initialValue: latestConfig.isAIEnhancementEnabled) _selectedPromptId = State(initialValue: latestConfig.selectedPrompt.flatMap { UUID(uuidString: $0) }) - _selectedWhisperModelName = State(initialValue: latestConfig.selectedWhisperModel) + _selectedTranscriptionModelName = State(initialValue: latestConfig.selectedTranscriptionModelName) _selectedLanguage = State(initialValue: latestConfig.selectedLanguage) _configName = State(initialValue: latestConfig.name) _selectedEmoji = State(initialValue: latestConfig.emoji) @@ -364,9 +364,9 @@ struct ConfigurationView: View { // Create a simple binding that uses current model if nil let modelBinding = Binding( get: { - selectedWhisperModelName ?? whisperState.currentModel?.name ?? whisperState.availableModels.first?.name + selectedTranscriptionModelName ?? whisperState.currentTranscriptionModel?.name ?? whisperState.availableModels.first?.name }, - set: { selectedWhisperModelName = $0 } + set: { selectedTranscriptionModelName = $0 } ) HStack { @@ -375,12 +375,11 @@ struct ConfigurationView: View { .foregroundColor(.secondary) Picker("", selection: modelBinding) { - Text("Default") + Text("Default (\(whisperState.currentTranscriptionModel?.displayName ?? "None"))") .tag(nil as String?) ForEach(whisperState.usableModels, id: \.name) { model in - Text(model.displayName) - .tag(model.name as String?) + Text(model.displayName).tag(model.name as String?) } } .labelsHidden() @@ -672,7 +671,7 @@ struct ConfigurationView: View { urlConfigs: websiteConfigs.isEmpty ? nil : websiteConfigs, isAIEnhancementEnabled: isAIEnhancementEnabled, selectedPrompt: selectedPromptId?.uuidString, - selectedWhisperModel: selectedWhisperModelName, + selectedTranscriptionModelName: selectedTranscriptionModelName, selectedLanguage: selectedLanguage, useScreenCapture: useScreenCapture, selectedAIProvider: selectedAIProvider, @@ -684,7 +683,7 @@ struct ConfigurationView: View { updatedConfig.emoji = selectedEmoji updatedConfig.isAIEnhancementEnabled = isAIEnhancementEnabled updatedConfig.selectedPrompt = selectedPromptId?.uuidString - updatedConfig.selectedWhisperModel = selectedWhisperModelName + updatedConfig.selectedTranscriptionModelName = selectedTranscriptionModelName updatedConfig.selectedLanguage = selectedLanguage updatedConfig.appConfigs = selectedAppConfigs.isEmpty ? nil : selectedAppConfigs updatedConfig.urlConfigs = websiteConfigs.isEmpty ? nil : websiteConfigs @@ -699,7 +698,7 @@ struct ConfigurationView: View { updatedConfig.emoji = selectedEmoji updatedConfig.isAIEnhancementEnabled = isAIEnhancementEnabled updatedConfig.selectedPrompt = selectedPromptId?.uuidString - updatedConfig.selectedWhisperModel = selectedWhisperModelName + updatedConfig.selectedTranscriptionModelName = selectedTranscriptionModelName updatedConfig.selectedLanguage = selectedLanguage updatedConfig.useScreenCapture = useScreenCapture updatedConfig.selectedAIProvider = selectedAIProvider diff --git a/VoiceInk/PowerMode/PowerModeViewComponents.swift b/VoiceInk/PowerMode/PowerModeViewComponents.swift index 1f317b7..9ac35fb 100644 --- a/VoiceInk/PowerMode/PowerModeViewComponents.swift +++ b/VoiceInk/PowerMode/PowerModeViewComponents.swift @@ -104,7 +104,7 @@ struct ConfigurationRow: View { } private var selectedModel: String? { - if let modelName = config.selectedWhisperModel, + if let modelName = config.selectedTranscriptionModelName, let model = whisperState.allAvailableModels.first(where: { $0.name == modelName }) { return model.displayName } @@ -116,7 +116,7 @@ struct ConfigurationRow: View { if langCode == "auto" { return "Auto" } if langCode == "en" { return "English" } - if let modelName = config.selectedWhisperModel, + if let modelName = config.selectedTranscriptionModelName, let model = whisperState.allAvailableModels.first(where: { $0.name == modelName }), let langName = model.supportedLanguages[langCode] { return langName