diff --git a/VoiceInk.xcodeproj/project.pbxproj b/VoiceInk.xcodeproj/project.pbxproj index 7c60bb8..356f30a 100644 --- a/VoiceInk.xcodeproj/project.pbxproj +++ b/VoiceInk.xcodeproj/project.pbxproj @@ -7,13 +7,14 @@ objects = { /* Begin PBXBuildFile section */ + E1304F742E3B9E8A0001F9E2 /* FluidAudio in Frameworks */ = {isa = PBXBuildFile; productRef = E1304F732E3B9E8A0001F9E2 /* FluidAudio */; }; + E1304F842E3BB2FF0001F9E2 /* whisper.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = E1304F832E3BB2FF0001F9E2 /* whisper.xcframework */; }; + E1304F852E3BB2FF0001F9E2 /* whisper.xcframework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = E1304F832E3BB2FF0001F9E2 /* whisper.xcframework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; }; E1A261122CC143AC00B233D1 /* KeyboardShortcuts in Frameworks */ = {isa = PBXBuildFile; productRef = E1A261112CC143AC00B233D1 /* KeyboardShortcuts */; }; - E1A8C8CB2E1257B7003E58EC /* whisper.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = E1A8C8CA2E1257B7003E58EC /* whisper.xcframework */; }; E1ADD45A2CC5352A00303ECB /* LaunchAtLogin in Frameworks */ = {isa = PBXBuildFile; productRef = E1ADD4592CC5352A00303ECB /* LaunchAtLogin */; }; E1ADD45F2CC544F100303ECB /* Sparkle in Frameworks */ = {isa = PBXBuildFile; productRef = E1ADD45E2CC544F100303ECB /* Sparkle */; }; E1D7EF992E35E16C00640029 /* MediaRemoteAdapter in Frameworks */ = {isa = PBXBuildFile; productRef = E1D7EF982E35E16C00640029 /* MediaRemoteAdapter */; }; E1D7EF9A2E35E19B00640029 /* MediaRemoteAdapter in Embed Frameworks */ = {isa = PBXBuildFile; productRef = E1D7EF982E35E16C00640029 /* MediaRemoteAdapter */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; }; - E1E0B9622E3133EF00C10E20 /* whisper.xcframework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = E1A8C8CA2E1257B7003E58EC /* whisper.xcframework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; }; E1F5FA7A2DA6CBF900B1FD8A /* Zip in Frameworks */ = {isa = PBXBuildFile; productRef = E1F5FA792DA6CBF900B1FD8A /* Zip */; }; /* End PBXBuildFile section */ @@ -41,7 +42,7 @@ dstPath = ""; dstSubfolderSpec = 10; files = ( - E1E0B9622E3133EF00C10E20 /* whisper.xcframework in Embed Frameworks */, + E1304F852E3BB2FF0001F9E2 /* whisper.xcframework in Embed Frameworks */, E1D7EF9A2E35E19B00640029 /* MediaRemoteAdapter in Embed Frameworks */, ); name = "Embed Frameworks"; @@ -53,6 +54,7 @@ E11473B02CBE0F0A00318EE4 /* VoiceInk.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = VoiceInk.app; sourceTree = BUILT_PRODUCTS_DIR; }; E11473C32CBE0F0B00318EE4 /* VoiceInkTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = VoiceInkTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; }; E11473CD2CBE0F0B00318EE4 /* VoiceInkUITests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = VoiceInkUITests.xctest; sourceTree = BUILT_PRODUCTS_DIR; }; + E1304F832E3BB2FF0001F9E2 /* whisper.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = whisper.xcframework; path = "../Downloads/build-apple/whisper.xcframework"; sourceTree = ""; }; E1A8C8CA2E1257B7003E58EC /* whisper.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = whisper.xcframework; path = "../whisper.cpp/build-apple/whisper.xcframework"; sourceTree = ""; }; /* End PBXFileReference section */ @@ -81,9 +83,10 @@ files = ( E1ADD45A2CC5352A00303ECB /* LaunchAtLogin in Frameworks */, E1D7EF992E35E16C00640029 /* MediaRemoteAdapter in Frameworks */, + E1304F742E3B9E8A0001F9E2 /* FluidAudio in Frameworks */, + E1304F842E3BB2FF0001F9E2 /* whisper.xcframework in Frameworks */, E1ADD45F2CC544F100303ECB /* Sparkle in Frameworks */, E1A261122CC143AC00B233D1 /* KeyboardShortcuts in Frameworks */, - E1A8C8CB2E1257B7003E58EC /* whisper.xcframework in Frameworks */, E1F5FA7A2DA6CBF900B1FD8A /* Zip in Frameworks */, ); runOnlyForDeploymentPostprocessing = 0; @@ -129,6 +132,7 @@ E114741C2CBE1DE200318EE4 /* Frameworks */ = { isa = PBXGroup; children = ( + E1304F832E3BB2FF0001F9E2 /* whisper.xcframework */, E1A8C8CA2E1257B7003E58EC /* whisper.xcframework */, ); name = Frameworks; @@ -160,6 +164,7 @@ E1ADD45E2CC544F100303ECB /* Sparkle */, E1F5FA792DA6CBF900B1FD8A /* Zip */, E1D7EF982E35E16C00640029 /* MediaRemoteAdapter */, + E1304F732E3B9E8A0001F9E2 /* FluidAudio */, ); productName = VoiceInk; productReference = E11473B02CBE0F0A00318EE4 /* VoiceInk.app */; @@ -249,6 +254,7 @@ E1ADD45D2CC544F100303ECB /* XCRemoteSwiftPackageReference "Sparkle" */, E1F5FA782DA6CBF900B1FD8A /* XCRemoteSwiftPackageReference "Zip" */, E1D7EF972E35E16C00640029 /* XCRemoteSwiftPackageReference "mediaremote-adapter" */, + E1304F722E3B9E8A0001F9E2 /* XCRemoteSwiftPackageReference "FluidAudio" */, ); preferredProjectObjectVersion = 77; productRefGroup = E11473B12CBE0F0A00318EE4 /* Products */; @@ -618,6 +624,14 @@ /* End XCConfigurationList section */ /* Begin XCRemoteSwiftPackageReference section */ + E1304F722E3B9E8A0001F9E2 /* XCRemoteSwiftPackageReference "FluidAudio" */ = { + isa = XCRemoteSwiftPackageReference; + repositoryURL = "https://github.com/FluidInference/FluidAudio"; + requirement = { + branch = main; + kind = branch; + }; + }; E1A261102CC143AC00B233D1 /* XCRemoteSwiftPackageReference "KeyboardShortcuts" */ = { isa = XCRemoteSwiftPackageReference; repositoryURL = "https://github.com/sindresorhus/KeyboardShortcuts"; @@ -661,6 +675,11 @@ /* End XCRemoteSwiftPackageReference section */ /* Begin XCSwiftPackageProductDependency section */ + E1304F732E3B9E8A0001F9E2 /* FluidAudio */ = { + isa = XCSwiftPackageProductDependency; + package = E1304F722E3B9E8A0001F9E2 /* XCRemoteSwiftPackageReference "FluidAudio" */; + productName = FluidAudio; + }; E1A261112CC143AC00B233D1 /* KeyboardShortcuts */ = { isa = XCSwiftPackageProductDependency; package = E1A261102CC143AC00B233D1 /* XCRemoteSwiftPackageReference "KeyboardShortcuts" */; diff --git a/VoiceInk.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved b/VoiceInk.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved index 713844b..b0ebaa8 100644 --- a/VoiceInk.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved +++ b/VoiceInk.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved @@ -1,6 +1,15 @@ { - "originHash" : "ef9c2994fdcb030d4d27f817e99251821e662f56f62355a728a019e924262633", + "originHash" : "b78069b2535604c42957e4e3be638514547280f6779f44a2b633aab9602881d9", "pins" : [ + { + "identity" : "fluidaudio", + "kind" : "remoteSourceControl", + "location" : "https://github.com/FluidInference/FluidAudio", + "state" : { + "branch" : "main", + "revision" : "2de87c32c320e2f28839c3a9682bc7bd0ea45be7" + } + }, { "identity" : "keyboardshortcuts", "kind" : "remoteSourceControl", diff --git a/VoiceInk/Models/PredefinedModels.swift b/VoiceInk/Models/PredefinedModels.swift index 6bf4e76..d3c80aa 100644 --- a/VoiceInk/Models/PredefinedModels.swift +++ b/VoiceInk/Models/PredefinedModels.swift @@ -87,8 +87,8 @@ import Foundation supportedLanguages: getLanguageDictionary(isMultilingual: true, provider: .nativeApple) ), - // Fluid Audio Model - FluidAudioModel( + // Parakeet Model + ParakeetModel( name: "parakeet-tdt-0.6b", displayName: "Parakeet", description: "NVIDIA's insanely fast Parakeet model for lightning-fast transcription.", @@ -96,7 +96,7 @@ import Foundation speed: 0.99, accuracy: 0.94, ramUsage: 0.8, - supportedLanguages: getLanguageDictionary(isMultilingual: false, provider: .fluidAudio) + supportedLanguages: getLanguageDictionary(isMultilingual: false, provider: .parakeet) ), // Local Models diff --git a/VoiceInk/Models/TranscriptionModel.swift b/VoiceInk/Models/TranscriptionModel.swift index 5515531..4ea95b3 100644 --- a/VoiceInk/Models/TranscriptionModel.swift +++ b/VoiceInk/Models/TranscriptionModel.swift @@ -3,6 +3,7 @@ import Foundation // Enum to differentiate between model providers enum ModelProvider: String, Codable, Hashable, CaseIterable { case local = "Local" + case parakeet = "Parakeet" case groq = "Groq" case elevenLabs = "ElevenLabs" case deepgram = "Deepgram" @@ -46,6 +47,23 @@ struct NativeAppleModel: TranscriptionModel { let supportedLanguages: [String: String] } +// A new struct for Parakeet models +struct ParakeetModel: TranscriptionModel { + let id = UUID() + let name: String + let displayName: String + let description: String + let provider: ModelProvider = .parakeet + let size: String + let speed: Double + let accuracy: Double + let ramUsage: Double + var isMultilingualModel: Bool { + supportedLanguages.count > 1 + } + let supportedLanguages: [String: String] +} + // A new struct for cloud models struct CloudModel: TranscriptionModel { let id: UUID diff --git a/VoiceInk/PowerMode/ActiveWindowService.swift b/VoiceInk/PowerMode/ActiveWindowService.swift index 3388ac2..5d05139 100644 --- a/VoiceInk/PowerMode/ActiveWindowService.swift +++ b/VoiceInk/PowerMode/ActiveWindowService.swift @@ -126,25 +126,23 @@ class ActiveWindowService: ObservableObject { // Set the new model as default. This works for both local and cloud models. await whisperState.setDefaultTranscriptionModel(selectedModel) - // The cleanup and load cycle is only necessary for local models. - if selectedModel.provider == ModelProvider.local { - // Unload any previously loaded model to free up memory. + switch selectedModel.provider { + case .local: await whisperState.cleanupModelResources() - // Load the new local model into memory. if let localModel = await whisperState.availableModels.first(where: { $0.name == selectedModel.name }) { do { try await whisperState.loadModel(localModel) - logger.info("✅ Power Mode: Successfully loaded local model '\(localModel.name)'.") } catch { logger.error("❌ Power Mode: Failed to load local model '\(localModel.name)': \(error.localizedDescription)") } } - } else { - // For cloud models, no in-memory loading is needed, but we should still - // clean up if the *previous* model was a local one. + + case .parakeet: + await whisperState.cleanupModelResources() + + default: await whisperState.cleanupModelResources() - logger.info("✅ Power Mode: Switched to cloud model '\(selectedModel.name)'. No local load needed.") } } } diff --git a/VoiceInk/Recorder.swift b/VoiceInk/Recorder.swift index f6a7104..12937ee 100644 --- a/VoiceInk/Recorder.swift +++ b/VoiceInk/Recorder.swift @@ -150,9 +150,16 @@ class Recorder: ObservableObject { recorder?.stop() recorder = nil audioMeter = AudioMeter(averagePower: 0, peakPower: 0) + Task { + // Complete system audio operations first await mediaController.unmuteSystemAudio() await playbackController.resumeMedia() + + // Then play stop sound on main thread after audio operations are complete + await MainActor.run { + SoundManager.shared.playStopSound() + } } deviceManager.isRecordingActive = false } diff --git a/VoiceInk/Resources/Sounds/pastes.mp3 b/VoiceInk/Resources/Sounds/pastess.mp3 similarity index 100% rename from VoiceInk/Resources/Sounds/pastes.mp3 rename to VoiceInk/Resources/Sounds/pastess.mp3 diff --git a/VoiceInk/Resources/Sounds/recstart.mp3 b/VoiceInk/Resources/Sounds/recstart.mp3 index be15dac..fe3e8b2 100755 Binary files a/VoiceInk/Resources/Sounds/recstart.mp3 and b/VoiceInk/Resources/Sounds/recstart.mp3 differ diff --git a/VoiceInk/Resources/Sounds/recstop.mp3 b/VoiceInk/Resources/Sounds/recstop.mp3 new file mode 100755 index 0000000..d5d4f76 Binary files /dev/null and b/VoiceInk/Resources/Sounds/recstop.mp3 differ diff --git a/VoiceInk/Services/AudioFileTranscriptionManager.swift b/VoiceInk/Services/AudioFileTranscriptionManager.swift index 2a31e19..560dccf 100644 --- a/VoiceInk/Services/AudioFileTranscriptionManager.swift +++ b/VoiceInk/Services/AudioFileTranscriptionManager.swift @@ -21,6 +21,7 @@ class AudioTranscriptionManager: ObservableObject { private var localTranscriptionService: LocalTranscriptionService? private lazy var cloudTranscriptionService = CloudTranscriptionService() private lazy var nativeAppleTranscriptionService = NativeAppleTranscriptionService() + private var parakeetTranscriptionService: ParakeetTranscriptionService? enum ProcessingPhase { case idle @@ -69,6 +70,11 @@ class AudioTranscriptionManager: ObservableObject { localTranscriptionService = LocalTranscriptionService(modelsDirectory: whisperState.modelsDirectory, whisperState: whisperState) } + // Initialize parakeet transcription service if needed + if parakeetTranscriptionService == nil { + parakeetTranscriptionService = ParakeetTranscriptionService(customModelsDirectory: whisperState.parakeetModelsDirectory) + } + // Process audio file processingPhase = .processingAudio let samples = try await audioProcessor.processAudioToSamples(url) @@ -96,6 +102,8 @@ class AudioTranscriptionManager: ObservableObject { switch currentModel.provider { case .local: text = try await localTranscriptionService!.transcribe(audioURL: permanentURL, model: currentModel) + case .parakeet: + text = try await parakeetTranscriptionService!.transcribe(audioURL: permanentURL, model: currentModel) case .nativeApple: text = try await nativeAppleTranscriptionService.transcribe(audioURL: permanentURL, model: currentModel) default: // Cloud models diff --git a/VoiceInk/Services/AudioFileTranscriptionService.swift b/VoiceInk/Services/AudioFileTranscriptionService.swift index 5005f0d..40420ce 100644 --- a/VoiceInk/Services/AudioFileTranscriptionService.swift +++ b/VoiceInk/Services/AudioFileTranscriptionService.swift @@ -18,6 +18,7 @@ class AudioTranscriptionService: ObservableObject { private let localTranscriptionService: LocalTranscriptionService private lazy var cloudTranscriptionService = CloudTranscriptionService() private lazy var nativeAppleTranscriptionService = NativeAppleTranscriptionService() + private lazy var parakeetTranscriptionService = ParakeetTranscriptionService(customModelsDirectory: whisperState.parakeetModelsDirectory) enum TranscriptionError: Error { case noAudioFile @@ -50,6 +51,8 @@ class AudioTranscriptionService: ObservableObject { switch model.provider { case .local: text = try await localTranscriptionService.transcribe(audioURL: url, model: model) + case .parakeet: + text = try await parakeetTranscriptionService.transcribe(audioURL: url, model: model) case .nativeApple: text = try await nativeAppleTranscriptionService.transcribe(audioURL: url, model: model) default: // Cloud models diff --git a/VoiceInk/Services/ParakeetTranscriptionService.swift b/VoiceInk/Services/ParakeetTranscriptionService.swift new file mode 100644 index 0000000..15566c7 --- /dev/null +++ b/VoiceInk/Services/ParakeetTranscriptionService.swift @@ -0,0 +1,89 @@ +import Foundation +import AVFoundation +import FluidAudio + + + +class ParakeetTranscriptionService: TranscriptionService { + private var asrManager: AsrManager? + private let customModelsDirectory: URL? + @Published var isModelLoaded = false + + init(customModelsDirectory: URL? = nil) { + self.customModelsDirectory = customModelsDirectory + } + + func loadModel() async throws { + if isModelLoaded { + return + } + + let asrConfig = ASRConfig( + maxSymbolsPerFrame: 3, + realtimeMode: true, + chunkSizeMs: 1500, + tdtConfig: TdtConfig( + durations: [0, 1, 2, 3, 4], + maxSymbolsPerStep: 3 + ) + ) + asrManager = AsrManager(config: asrConfig) + let models: AsrModels + if let customDirectory = customModelsDirectory { + models = try await AsrModels.downloadAndLoad(to: customDirectory) + } else { + models = try await AsrModels.downloadAndLoad() + } + try await asrManager?.initialize(models: models) + isModelLoaded = true + } + + func transcribe(audioURL: URL, model: any TranscriptionModel) async throws -> String { + do { + defer { + asrManager?.cleanup() + self.asrManager = nil + self.isModelLoaded = false + } + + if !isModelLoaded { + try await loadModel() + } + + guard let asrManager = asrManager else { + throw NSError(domain: "ParakeetTranscriptionService", code: -1, userInfo: [NSLocalizedDescriptionKey: "Failed to initialize ASR manager."]) + } + + let audioSamples = try readAudioSamples(from: audioURL) + let result = try await asrManager.transcribe(audioSamples) + + if UserDefaults.standard.object(forKey: "IsTextFormattingEnabled") as? Bool ?? true { + return WhisperTextFormatter.format(result.text) + } + return result.text + } catch { + let errorMessage = error.localizedDescription + await MainActor.run { + NotificationManager.shared.showNotification( + title: "Transcription Failed: \(errorMessage)", + type: .error + ) + } + return "" + } + } + + private func readAudioSamples(from url: URL) throws -> [Float] { + let data = try Data(contentsOf: url) + // A basic check, assuming a more robust check happens elsewhere. + guard data.count > 44 else { return [] } + + let floats = stride(from: 44, to: data.count, by: 2).map { + return data[$0..<$0 + 2].withUnsafeBytes { + let short = Int16(littleEndian: $0.load(as: Int16.self)) + return max(-1.0, min(Float(short) / 32767.0, 1.0)) + } + } + return floats + } +} \ No newline at end of file diff --git a/VoiceInk/SoundManager.swift b/VoiceInk/SoundManager.swift index f0768d9..9b20dd9 100644 --- a/VoiceInk/SoundManager.swift +++ b/VoiceInk/SoundManager.swift @@ -20,7 +20,7 @@ class SoundManager { // Try loading directly from the main bundle if let startSoundURL = Bundle.main.url(forResource: "recstart", withExtension: "mp3"), - let stopSoundURL = Bundle.main.url(forResource: "pastes", withExtension: "mp3"), + let stopSoundURL = Bundle.main.url(forResource: "recstop", withExtension: "mp3"), let escSoundURL = Bundle.main.url(forResource: "esc", withExtension: "wav") { print("Found sounds in main bundle") try? loadSounds(start: startSoundURL, stop: stopSoundURL, esc: escSoundURL) @@ -49,8 +49,8 @@ class SoundManager { escSound = try AVAudioPlayer(contentsOf: escURL) // Set lower volume for all sounds - startSound?.volume = 0.7 - stopSound?.volume = 0.7 + startSound?.volume = 0.4 + stopSound?.volume = 0.4 escSound?.volume = 0.3 // Prepare sounds for instant playback diff --git a/VoiceInk/Views/AI Models/ModelCardRowView.swift b/VoiceInk/Views/AI Models/ModelCardRowView.swift index 8a3c7c1..efa4ff8 100644 --- a/VoiceInk/Views/AI Models/ModelCardRowView.swift +++ b/VoiceInk/Views/AI Models/ModelCardRowView.swift @@ -3,6 +3,7 @@ import AppKit struct ModelCardRowView: View { let model: any TranscriptionModel + @ObservedObject var whisperState: WhisperState let isDownloaded: Bool let isCurrent: Bool let downloadProgress: [String: Double] @@ -30,6 +31,13 @@ struct ModelCardRowView: View { downloadAction: downloadAction ) } + case .parakeet: + if let parakeetModel = model as? ParakeetModel { + ParakeetModelCardRowView( + model: parakeetModel, + whisperState: whisperState + ) + } case .nativeApple: if let nativeAppleModel = model as? NativeAppleModel { NativeAppleModelCardView( diff --git a/VoiceInk/Views/AI Models/ModelManagementView.swift b/VoiceInk/Views/AI Models/ModelManagementView.swift index 25c4db2..387b1b0 100644 --- a/VoiceInk/Views/AI Models/ModelManagementView.swift +++ b/VoiceInk/Views/AI Models/ModelManagementView.swift @@ -118,6 +118,7 @@ struct ModelManagementView: View { ForEach(filteredModels, id: \.id) { model in ModelCardRowView( model: model, + whisperState: whisperState, isDownloaded: whisperState.availableModels.contains { $0.name == model.name }, isCurrent: whisperState.currentTranscriptionModel?.name == model.name, downloadProgress: whisperState.downloadProgress, @@ -190,7 +191,7 @@ struct ModelManagementView: View { return index1 < index2 } case .local: - return whisperState.allAvailableModels.filter { $0.provider == .local || $0.provider == .nativeApple } + return whisperState.allAvailableModels.filter { $0.provider == .local || $0.provider == .nativeApple || $0.provider == .parakeet } case .cloud: let cloudProviders: [ModelProvider] = [.groq, .elevenLabs, .deepgram, .mistral] return whisperState.allAvailableModels.filter { cloudProviders.contains($0.provider) } diff --git a/VoiceInk/Views/AI Models/ParakeetModelCardRowView.swift b/VoiceInk/Views/AI Models/ParakeetModelCardRowView.swift new file mode 100644 index 0000000..695b49d --- /dev/null +++ b/VoiceInk/Views/AI Models/ParakeetModelCardRowView.swift @@ -0,0 +1,173 @@ +import SwiftUI +import Combine +import AppKit + +struct ParakeetModelCardRowView: View { + let model: ParakeetModel + @ObservedObject var whisperState: WhisperState + + var isCurrent: Bool { + whisperState.currentTranscriptionModel?.name == model.name + } + + var isDownloaded: Bool { + whisperState.isParakeetModelDownloaded + } + + var isDownloading: Bool { + whisperState.isDownloadingParakeet + } + + var body: some View { + HStack(alignment: .top, spacing: 16) { + VStack(alignment: .leading, spacing: 6) { + headerSection + metadataSection + descriptionSection + progressSection + } + .frame(maxWidth: .infinity, alignment: .leading) + + actionSection + } + .padding(16) + .background(CardBackground(isSelected: isCurrent, useAccentGradientWhenSelected: isCurrent)) + } + + private var headerSection: some View { + HStack(alignment: .firstTextBaseline) { + Text(model.displayName) + .font(.system(size: 13, weight: .semibold)) + .foregroundColor(Color(.labelColor)) + + Text("Experimental") + .font(.system(size: 11, weight: .medium)) + .padding(.horizontal, 6) + .padding(.vertical, 2) + .background(Capsule().fill(Color.orange.opacity(0.8))) + .foregroundColor(.white) + + statusBadge + Spacer() + } + } + + private var statusBadge: some View { + Group { + if isCurrent { + Text("Default") + .font(.system(size: 11, weight: .medium)) + .padding(.horizontal, 6) + .padding(.vertical, 2) + .background(Capsule().fill(Color.accentColor)) + .foregroundColor(.white) + } else if isDownloaded { + Text("Downloaded") + .font(.system(size: 11, weight: .medium)) + .padding(.horizontal, 6) + .padding(.vertical, 2) + .background(Capsule().fill(Color(.quaternaryLabelColor))) + .foregroundColor(Color(.labelColor)) + } + } + } + + private var metadataSection: some View { + HStack(spacing: 12) { + Label(model.language, systemImage: "globe") + Label(model.size, systemImage: "internaldrive") + HStack(spacing: 3) { + Text("Speed") + progressDotsWithNumber(value: model.speed * 10) + } + HStack(spacing: 3) { + Text("Accuracy") + progressDotsWithNumber(value: model.accuracy * 10) + } + } + .font(.system(size: 11)) + .foregroundColor(Color(.secondaryLabelColor)) + .lineLimit(1) + } + + private var descriptionSection: some View { + Text(model.description) + .font(.system(size: 11)) + .foregroundColor(Color(.secondaryLabelColor)) + .lineLimit(2) + .fixedSize(horizontal: false, vertical: true) + .padding(.top, 4) + } + + private var progressSection: some View { + Group { + if isDownloading { + ProgressView() // Indeterminate for now + .progressViewStyle(LinearProgressViewStyle()) + .frame(maxWidth: 200) + .padding(.top, 8) + } + } + } + + private var actionSection: some View { + HStack(spacing: 8) { + if isCurrent { + Text("Default Model") + .font(.system(size: 12)) + .foregroundColor(Color(.secondaryLabelColor)) + } else if isDownloaded { + Button(action: { + Task { + await whisperState.setDefaultTranscriptionModel(model) + } + }) { + Text("Set as Default") + .font(.system(size: 12)) + } + .buttonStyle(.bordered) + .controlSize(.small) + } else { + Button(action: { + Task { + await whisperState.downloadParakeetModel() + } + }) { + HStack(spacing: 4) { + Text(isDownloading ? "Downloading..." : "Download") + Image(systemName: "arrow.down.circle") + } + .font(.system(size: 12, weight: .medium)) + .foregroundColor(.white) + .padding(.horizontal, 12) + .padding(.vertical, 6) + .background(Capsule().fill(Color.accentColor)) + } + .buttonStyle(.plain) + .disabled(isDownloading) + } + + if isDownloaded { + Menu { + Button(action: { + whisperState.deleteParakeetModel() + }) { + Label("Delete Model", systemImage: "trash") + } + + Button { + whisperState.showParakeetModelInFinder() + } label: { + Label("Show in Finder", systemImage: "folder") + } + } label: { + Image(systemName: "ellipsis.circle") + .font(.system(size: 14)) + } + .menuStyle(.borderlessButton) + .menuIndicator(.hidden) + .frame(width: 20, height: 20) + } + } + } +} diff --git a/VoiceInk/Whisper/WhisperState+ModelQueries.swift b/VoiceInk/Whisper/WhisperState+ModelQueries.swift index 63bf707..f38d39a 100644 --- a/VoiceInk/Whisper/WhisperState+ModelQueries.swift +++ b/VoiceInk/Whisper/WhisperState+ModelQueries.swift @@ -6,6 +6,8 @@ extension WhisperState { switch model.provider { case .local: return availableModels.contains { $0.name == model.name } + case .parakeet: + return isParakeetModelDownloaded case .nativeApple: if #available(macOS 26, *) { return true diff --git a/VoiceInk/Whisper/WhisperState+Parakeet.swift b/VoiceInk/Whisper/WhisperState+Parakeet.swift new file mode 100644 index 0000000..9746a49 --- /dev/null +++ b/VoiceInk/Whisper/WhisperState+Parakeet.swift @@ -0,0 +1,82 @@ +import Foundation +import FluidAudio +import AppKit + +extension WhisperState { + var isParakeetModelDownloaded: Bool { + get { UserDefaults.standard.bool(forKey: "ParakeetModelDownloaded") } + set { UserDefaults.standard.set(newValue, forKey: "ParakeetModelDownloaded") } + } + + var isParakeetModelDownloading: Bool { + get { isDownloadingParakeet } + set { isDownloadingParakeet = newValue } + } + + @MainActor + func downloadParakeetModel() async { + if isParakeetModelDownloaded { + return + } + + isDownloadingParakeet = true + downloadProgress["parakeet-tdt-0.6b"] = 0.0 + + do { + _ = try await AsrModels.downloadAndLoad(to: parakeetModelsDirectory) + self.isParakeetModelDownloaded = true + } catch { + self.isParakeetModelDownloaded = false + } + + isDownloadingParakeet = false + downloadProgress["parakeet-tdt-0.6b"] = nil + + refreshAllAvailableModels() + } + + @MainActor + func deleteParakeetModel() { + if let currentModel = currentTranscriptionModel, currentModel.provider == .parakeet { + currentTranscriptionModel = nil + UserDefaults.standard.removeObject(forKey: "CurrentTranscriptionModel") + } + + do { + // First try: app support directory + bundle path + let appSupportDirectory = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask)[0] + .appendingPathComponent("com.prakashjoshipax.VoiceInk") + let parakeetModelDirectory = appSupportDirectory.appendingPathComponent("parakeet-tdt-0.6b-v2-coreml") + + if FileManager.default.fileExists(atPath: parakeetModelDirectory.path) { + try FileManager.default.removeItem(at: parakeetModelDirectory) + } else { + // Second try: root of application support directory + let rootAppSupportDirectory = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask)[0] + let rootParakeetModelDirectory = rootAppSupportDirectory.appendingPathComponent("parakeet-tdt-0.6b-v2-coreml") + + if FileManager.default.fileExists(atPath: rootParakeetModelDirectory.path) { + try FileManager.default.removeItem(at: rootParakeetModelDirectory) + } + } + + self.isParakeetModelDownloaded = false + + } catch { + // Silently fail + } + + refreshAllAvailableModels() + } + + @MainActor + func showParakeetModelInFinder() { + let appSupportDirectory = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask)[0] + .appendingPathComponent("com.prakashjoshipax.VoiceInk") + let parakeetModelDirectory = appSupportDirectory.appendingPathComponent("parakeet-tdt-0.6b-v2-coreml") + + if FileManager.default.fileExists(atPath: parakeetModelDirectory.path) { + NSWorkspace.shared.selectFile(parakeetModelDirectory.path, inFileViewerRootedAtPath: "") + } + } +} diff --git a/VoiceInk/Whisper/WhisperState.swift b/VoiceInk/Whisper/WhisperState.swift index 5a2f387..6b4b6c5 100644 --- a/VoiceInk/Whisper/WhisperState.swift +++ b/VoiceInk/Whisper/WhisperState.swift @@ -62,6 +62,7 @@ class WhisperState: NSObject, ObservableObject { private var localTranscriptionService: LocalTranscriptionService! private lazy var cloudTranscriptionService = CloudTranscriptionService() private lazy var nativeAppleTranscriptionService = NativeAppleTranscriptionService() + private lazy var parakeetTranscriptionService = ParakeetTranscriptionService(customModelsDirectory: parakeetModelsDirectory) private var modelUrl: URL? { let possibleURLs = [ @@ -84,6 +85,7 @@ class WhisperState: NSObject, ObservableObject { let modelsDirectory: URL let recordingsDirectory: URL + let parakeetModelsDirectory: URL let enhancementService: AIEnhancementService? var licenseViewModel: LicenseViewModel let logger = Logger(subsystem: "com.prakashjoshipax.voiceink", category: "WhisperState") @@ -92,6 +94,7 @@ class WhisperState: NSObject, ObservableObject { // For model progress tracking @Published var downloadProgress: [String: Double] = [:] + @Published var isDownloadingParakeet = false init(modelContext: ModelContext, enhancementService: AIEnhancementService? = nil) { self.modelContext = modelContext @@ -100,6 +103,7 @@ class WhisperState: NSObject, ObservableObject { self.modelsDirectory = appSupportDirectory.appendingPathComponent("WhisperModels") self.recordingsDirectory = appSupportDirectory.appendingPathComponent("Recordings") + self.parakeetModelsDirectory = appSupportDirectory.appendingPathComponent("ParakeetModels") self.enhancementService = enhancementService self.licenseViewModel = LicenseViewModel() @@ -167,10 +171,11 @@ class WhisperState: NSObject, ObservableObject { await MainActor.run { self.recordingState = .recording + SoundManager.shared.playStartSound() } await ActiveWindowService.shared.applyConfigurationForCurrentApp() - + // Only load model if it's a local model and not already loaded if let model = self.currentTranscriptionModel, model.provider == .local { if let localWhisperModel = self.availableModels.first(where: { $0.name == model.name }), @@ -181,6 +186,8 @@ class WhisperState: NSObject, ObservableObject { self.logger.error("❌ Model loading failed: \(error.localizedDescription)") } } + } else if let model = self.currentTranscriptionModel, model.provider == .parakeet { + try? await parakeetTranscriptionService.loadModel() } if let enhancementService = self.enhancementService, @@ -239,6 +246,8 @@ class WhisperState: NSObject, ObservableObject { switch model.provider { case .local: transcriptionService = localTranscriptionService + case .parakeet: + transcriptionService = parakeetTranscriptionService case .nativeApple: transcriptionService = nativeAppleTranscriptionService default: @@ -332,7 +341,6 @@ class WhisperState: NSObject, ObservableObject { if await checkCancellationAndCleanup() { return } - SoundManager.shared.playStopSound() DispatchQueue.main.asyncAfter(deadline: .now() + 0.05) { CursorPaster.pasteAtCursor(text, shouldPreserveClipboard: !self.isAutoCopyEnabled)