From a0e4dd136754dd6976e8ff13ff31bd315c8db3e1 Mon Sep 17 00:00:00 2001 From: Beingpax Date: Sun, 19 Oct 2025 14:01:31 +0545 Subject: [PATCH] Added support for V2 and V3 models --- .../xcshareddata/swiftpm/Package.resolved | 2 +- VoiceInk/Models/PredefinedModels.swift | 20 ++- .../AudioFileTranscriptionManager.swift | 2 +- .../AudioFileTranscriptionService.swift | 2 +- .../ParakeetTranscriptionService.swift | 101 +++++++------- .../AI Models/ParakeetModelCardRowView.swift | 12 +- .../Whisper/WhisperState+ModelQueries.swift | 2 +- VoiceInk/Whisper/WhisperState+Parakeet.swift | 123 +++++++++--------- VoiceInk/Whisper/WhisperState.swift | 10 +- 9 files changed, 143 insertions(+), 131 deletions(-) diff --git a/VoiceInk.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved b/VoiceInk.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved index 8d8746b..5f675a5 100644 --- a/VoiceInk.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved +++ b/VoiceInk.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved @@ -7,7 +7,7 @@ "location" : "https://github.com/FluidInference/FluidAudio", "state" : { "branch" : "main", - "revision" : "328036d255ef76b8d661eacc16ac108eb45f9218" + "revision" : "a8f3bc7a3be7a93d7d5d412fdf71ae7922e92d09" } }, { diff --git a/VoiceInk/Models/PredefinedModels.swift b/VoiceInk/Models/PredefinedModels.swift index d3d8b7a..2fee46d 100644 --- a/VoiceInk/Models/PredefinedModels.swift +++ b/VoiceInk/Models/PredefinedModels.swift @@ -87,12 +87,22 @@ import Foundation supportedLanguages: getLanguageDictionary(isMultilingual: true, provider: .nativeApple) ), - // Parakeet Model + // Parakeet Models ParakeetModel( - name: "parakeet-tdt-0.6b", + name: "parakeet-tdt-0.6b-v2", + displayName: "Parakeet V2", + description: "NVIDIA's Parakeet V2 model optimized for lightning-fast English-only transcription.", + size: "474 MB", + speed: 0.99, + accuracy: 0.94, + ramUsage: 0.8, + supportedLanguages: getLanguageDictionary(isMultilingual: false, provider: .parakeet) + ), + ParakeetModel( + name: "parakeet-tdt-0.6b-v3", displayName: "Parakeet V3", - description: "NVIDIA's ASR model V3 for lightning-fast transcription with multi-lingual(English + European) support.", - size: "630 MB", + description: "NVIDIA's Parakeet V3 model with multilingual support across English and 25 European languages.", + size: "494 MB", speed: 0.99, accuracy: 0.94, ramUsage: 0.8, @@ -360,4 +370,4 @@ import Foundation "yue": "Cantonese", "zh": "Chinese", ] - } \ No newline at end of file + } diff --git a/VoiceInk/Services/AudioFileTranscriptionManager.swift b/VoiceInk/Services/AudioFileTranscriptionManager.swift index c6d32c5..1bfaaff 100644 --- a/VoiceInk/Services/AudioFileTranscriptionManager.swift +++ b/VoiceInk/Services/AudioFileTranscriptionManager.swift @@ -72,7 +72,7 @@ class AudioTranscriptionManager: ObservableObject { // Initialize parakeet transcription service if needed if parakeetTranscriptionService == nil { - parakeetTranscriptionService = ParakeetTranscriptionService(customModelsDirectory: whisperState.parakeetModelsDirectory) + parakeetTranscriptionService = ParakeetTranscriptionService() } // Process audio file diff --git a/VoiceInk/Services/AudioFileTranscriptionService.swift b/VoiceInk/Services/AudioFileTranscriptionService.swift index 2238a48..ff04ae9 100644 --- a/VoiceInk/Services/AudioFileTranscriptionService.swift +++ b/VoiceInk/Services/AudioFileTranscriptionService.swift @@ -19,7 +19,7 @@ class AudioTranscriptionService: ObservableObject { private let localTranscriptionService: LocalTranscriptionService private lazy var cloudTranscriptionService = CloudTranscriptionService() private lazy var nativeAppleTranscriptionService = NativeAppleTranscriptionService() - private lazy var parakeetTranscriptionService = ParakeetTranscriptionService(customModelsDirectory: whisperState.parakeetModelsDirectory) + private lazy var parakeetTranscriptionService = ParakeetTranscriptionService() enum TranscriptionError: Error { case noAudioFile diff --git a/VoiceInk/Services/ParakeetTranscriptionService.swift b/VoiceInk/Services/ParakeetTranscriptionService.swift index 042f53c..583ba65 100644 --- a/VoiceInk/Services/ParakeetTranscriptionService.swift +++ b/VoiceInk/Services/ParakeetTranscriptionService.swift @@ -4,67 +4,71 @@ import AVFoundation import FluidAudio import os.log -class ParakeetTranscriptionService: TranscriptionService { - private var asrManager: AsrManager? - private var vadManager: VadManager? - private let customModelsDirectory: URL? - private let logger = Logger(subsystem: "com.prakashjoshipax.voiceink.parakeet", category: "ParakeetTranscriptionService") - - init(customModelsDirectory: URL? = nil) { - self.customModelsDirectory = customModelsDirectory - } - - func loadModel() async throws { - guard asrManager == nil else { - logger.notice("🦜 Parakeet models already loaded, skipping") - return - } +class ParakeetTranscriptionService: TranscriptionService { + private var asrManager: AsrManager? + private var vadManager: VadManager? + private var activeVersion: AsrModelVersion? + private let logger = Logger( + subsystem: "com.prakashjoshipax.voiceink.parakeet", + category: "ParakeetTranscriptionService" + ) + + private func version(for model: any TranscriptionModel) -> AsrModelVersion { + model.name.lowercased().contains("v2") ? .v2 : .v3 + } - let manager = AsrManager(config: .default) - let models: AsrModels - - if let customModelsDirectory = customModelsDirectory { - models = try await AsrModels.load(from: customModelsDirectory) - } else { - // Fallback to FluidAudio's default directory - logger.notice("🦜 Loading Parakeet models from FluidAudio default directory") - models = try await AsrModels.downloadAndLoad() + private func ensureModelsLoaded(for version: AsrModelVersion) async throws { + if let manager = asrManager, activeVersion == version { + logger.notice("🦜 Parakeet \(version == .v2 ? "V2" : "V3") models already loaded, skipping") + return } - - try await manager.initialize(models: models) - self.asrManager = manager + cleanup() + + let manager = AsrManager(config: .default) + let cacheDirectory = AsrModels.defaultCacheDirectory(for: version) + logger.notice("🦜 Loading Parakeet models from \(cacheDirectory.path)") + let models = try await AsrModels.load( + from: cacheDirectory, + configuration: nil, + version: version + ) + try await manager.initialize(models: models) + self.asrManager = manager + self.activeVersion = version + } + + func loadModel(for model: ParakeetModel) async throws { + try await ensureModelsLoaded(for: version(for: model)) } - func transcribe(audioURL: URL, model: any TranscriptionModel) async throws -> String { - try await loadModel() - + func transcribe(audioURL: URL, model: any TranscriptionModel) async throws -> String { + let targetVersion = version(for: model) + try await ensureModelsLoaded(for: targetVersion) + guard let asrManager = asrManager else { logger.notice("🦜 ASR manager not initialized, cannot transcribe") throw ASRError.notInitialized - } + } - let audioSamples = try readAudioSamples(from: audioURL) + let audioSamples = try readAudioSamples(from: audioURL) - let durationSeconds = Double(audioSamples.count) / 16000.0 - let isVADEnabled = UserDefaults.standard.object(forKey: "IsVADEnabled") as? Bool ?? true + let durationSeconds = Double(audioSamples.count) / 16000.0 + let isVADEnabled = UserDefaults.standard.object(forKey: "IsVADEnabled") as? Bool ?? true - let speechAudio: [Float] - if durationSeconds < 20.0 || !isVADEnabled { - speechAudio = audioSamples - } else { - let vadConfig = VadConfig(threshold: 0.7) - if vadManager == nil, let customModelsDirectory { - do { - vadManager = try await VadManager( - config: vadConfig, - modelDirectory: customModelsDirectory.deletingLastPathComponent() - ) + let speechAudio: [Float] + if durationSeconds < 20.0 || !isVADEnabled { + speechAudio = audioSamples + } else { + let vadConfig = VadConfig(threshold: 0.7) + if vadManager == nil { + do { + vadManager = try await VadManager(config: vadConfig) } catch { logger.notice("🦜 VAD initialization failed, using full audio: \(error.localizedDescription)") - } - } - + } + } + do { if let vadManager { let segments = try await vadManager.segmentSpeechAudio(audioSamples) @@ -109,5 +113,6 @@ class ParakeetTranscriptionService: TranscriptionService { asrManager?.cleanup() asrManager = nil vadManager = nil + activeVersion = nil } } diff --git a/VoiceInk/Views/AI Models/ParakeetModelCardRowView.swift b/VoiceInk/Views/AI Models/ParakeetModelCardRowView.swift index 784ff22..8a86e45 100644 --- a/VoiceInk/Views/AI Models/ParakeetModelCardRowView.swift +++ b/VoiceInk/Views/AI Models/ParakeetModelCardRowView.swift @@ -11,11 +11,11 @@ struct ParakeetModelCardRowView: View { } var isDownloaded: Bool { - whisperState.isParakeetModelDownloaded + whisperState.isParakeetModelDownloaded(model) } var isDownloading: Bool { - whisperState.isDownloadingParakeet + whisperState.isParakeetModelDownloading(model) } var body: some View { @@ -104,7 +104,7 @@ struct ParakeetModelCardRowView: View { private var progressSection: some View { Group { if isDownloading { - let progress = whisperState.downloadProgress["parakeet-tdt-0.6b"] ?? 0.0 + let progress = whisperState.downloadProgress[model.name] ?? 0.0 ProgressView(value: progress) .progressViewStyle(LinearProgressViewStyle()) .frame(maxWidth: .infinity, alignment: .leading) @@ -133,7 +133,7 @@ struct ParakeetModelCardRowView: View { } else { Button(action: { Task { - await whisperState.downloadParakeetModel() + await whisperState.downloadParakeetModel(model) } }) { HStack(spacing: 4) { @@ -153,13 +153,13 @@ struct ParakeetModelCardRowView: View { if isDownloaded { Menu { Button(action: { - whisperState.deleteParakeetModel() + whisperState.deleteParakeetModel(model) }) { Label("Delete Model", systemImage: "trash") } Button { - whisperState.showParakeetModelInFinder() + whisperState.showParakeetModelInFinder(model) } label: { Label("Show in Finder", systemImage: "folder") } diff --git a/VoiceInk/Whisper/WhisperState+ModelQueries.swift b/VoiceInk/Whisper/WhisperState+ModelQueries.swift index 8ecb840..5c76b0e 100644 --- a/VoiceInk/Whisper/WhisperState+ModelQueries.swift +++ b/VoiceInk/Whisper/WhisperState+ModelQueries.swift @@ -7,7 +7,7 @@ extension WhisperState { case .local: return availableModels.contains { $0.name == model.name } case .parakeet: - return isParakeetModelDownloaded + return isParakeetModelDownloaded(named: model.name) case .nativeApple: if #available(macOS 26, *) { return true diff --git a/VoiceInk/Whisper/WhisperState+Parakeet.swift b/VoiceInk/Whisper/WhisperState+Parakeet.swift index 2c19991..d0e15af 100644 --- a/VoiceInk/Whisper/WhisperState+Parakeet.swift +++ b/VoiceInk/Whisper/WhisperState+Parakeet.swift @@ -3,99 +3,98 @@ import FluidAudio import AppKit extension WhisperState { - var isParakeetModelDownloaded: Bool { - get { UserDefaults.standard.bool(forKey: "ParakeetModelDownloaded") } - set { UserDefaults.standard.set(newValue, forKey: "ParakeetModelDownloaded") } + private func parakeetDefaultsKey(for modelName: String) -> String { + "ParakeetModelDownloaded_\(modelName)" } - var isParakeetModelDownloading: Bool { - get { isDownloadingParakeet } - set { isDownloadingParakeet = newValue } + private func parakeetVersion(for modelName: String) -> AsrModelVersion { + modelName.lowercased().contains("v2") ? .v2 : .v3 + } + + private func parakeetCacheDirectory(for version: AsrModelVersion) -> URL { + AsrModels.defaultCacheDirectory(for: version) + } + + func isParakeetModelDownloaded(named modelName: String) -> Bool { + UserDefaults.standard.bool(forKey: parakeetDefaultsKey(for: modelName)) + } + + func isParakeetModelDownloaded(_ model: ParakeetModel) -> Bool { + isParakeetModelDownloaded(named: model.name) + } + + func isParakeetModelDownloading(_ model: ParakeetModel) -> Bool { + parakeetDownloadStates[model.name] ?? false } @MainActor - func downloadParakeetModel() async { - if isParakeetModelDownloaded { + func downloadParakeetModel(_ model: ParakeetModel) async { + if isParakeetModelDownloaded(model) { return } - isDownloadingParakeet = true - downloadProgress["parakeet-tdt-0.6b"] = 0.0 + let modelName = model.name + parakeetDownloadStates[modelName] = true + downloadProgress[modelName] = 0.0 - // Start progress simulation let timer = Timer.scheduledTimer(withTimeInterval: 1.2, repeats: true) { timer in Task { @MainActor in - if let currentProgress = self.downloadProgress["parakeet-tdt-0.6b"], currentProgress < 0.9 { - self.downloadProgress["parakeet-tdt-0.6b"] = currentProgress + 0.0125 + if let currentProgress = self.downloadProgress[modelName], currentProgress < 0.9 { + self.downloadProgress[modelName] = currentProgress + 0.005 } } } - do { - _ = try await AsrModels.downloadAndLoad(to: parakeetModelsDirectory) + let version = parakeetVersion(for: modelName) - // Also download VAD model into the same parent directory as ASR models - let parentDir = parakeetModelsDirectory.deletingLastPathComponent() - _ = try await DownloadUtils.loadModels( - .vad, - modelNames: Array(ModelNames.VAD.requiredModels), - directory: parentDir - ) - self.isParakeetModelDownloaded = true - downloadProgress["parakeet-tdt-0.6b"] = 1.0 + do { + _ = try await AsrModels.downloadAndLoad(version: version) + + _ = try await VadManager() + + UserDefaults.standard.set(true, forKey: parakeetDefaultsKey(for: modelName)) + downloadProgress[modelName] = 1.0 } catch { - self.isParakeetModelDownloaded = false + UserDefaults.standard.set(false, forKey: parakeetDefaultsKey(for: modelName)) } - + timer.invalidate() - isDownloadingParakeet = false - downloadProgress["parakeet-tdt-0.6b"] = nil - + parakeetDownloadStates[modelName] = false + downloadProgress[modelName] = nil + refreshAllAvailableModels() } - + @MainActor - func deleteParakeetModel() { - if let currentModel = currentTranscriptionModel, currentModel.provider == .parakeet { + func deleteParakeetModel(_ model: ParakeetModel) { + if let currentModel = currentTranscriptionModel, + currentModel.provider == .parakeet, + currentModel.name == model.name { currentTranscriptionModel = nil UserDefaults.standard.removeObject(forKey: "CurrentTranscriptionModel") } - + + let version = parakeetVersion(for: model.name) + let cacheDirectory = parakeetCacheDirectory(for: version) + do { - // First try: app support directory + bundle path - let appSupportDirectory = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask)[0] - .appendingPathComponent("com.prakashjoshipax.VoiceInk") - let parakeetModelDirectory = appSupportDirectory.appendingPathComponent("parakeet-tdt-0.6b-v3-coreml") - - if FileManager.default.fileExists(atPath: parakeetModelDirectory.path) { - try FileManager.default.removeItem(at: parakeetModelDirectory) - } else { - // Second try: root of application support directory - let rootAppSupportDirectory = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask)[0] - let rootParakeetModelDirectory = rootAppSupportDirectory.appendingPathComponent("parakeet-tdt-0.6b-v3-coreml") - - if FileManager.default.fileExists(atPath: rootParakeetModelDirectory.path) { - try FileManager.default.removeItem(at: rootParakeetModelDirectory) - } + if FileManager.default.fileExists(atPath: cacheDirectory.path) { + try FileManager.default.removeItem(at: cacheDirectory) } - - self.isParakeetModelDownloaded = false - + UserDefaults.standard.set(false, forKey: parakeetDefaultsKey(for: model.name)) } catch { - // Silently fail + // Silently ignore removal errors } - + refreshAllAvailableModels() } - + @MainActor - func showParakeetModelInFinder() { - let appSupportDirectory = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask)[0] - .appendingPathComponent("com.prakashjoshipax.VoiceInk") - let parakeetModelDirectory = appSupportDirectory.appendingPathComponent("parakeet-tdt-0.6b-v3-coreml") - - if FileManager.default.fileExists(atPath: parakeetModelDirectory.path) { - NSWorkspace.shared.selectFile(parakeetModelDirectory.path, inFileViewerRootedAtPath: "") + func showParakeetModelInFinder(_ model: ParakeetModel) { + let cacheDirectory = parakeetCacheDirectory(for: parakeetVersion(for: model.name)) + + if FileManager.default.fileExists(atPath: cacheDirectory.path) { + NSWorkspace.shared.selectFile(cacheDirectory.path, inFileViewerRootedAtPath: "") } } -} +} diff --git a/VoiceInk/Whisper/WhisperState.swift b/VoiceInk/Whisper/WhisperState.swift index af7ba3b..4689eca 100644 --- a/VoiceInk/Whisper/WhisperState.swift +++ b/VoiceInk/Whisper/WhisperState.swift @@ -59,7 +59,7 @@ class WhisperState: NSObject, ObservableObject { private var localTranscriptionService: LocalTranscriptionService! private lazy var cloudTranscriptionService = CloudTranscriptionService() private lazy var nativeAppleTranscriptionService = NativeAppleTranscriptionService() - internal lazy var parakeetTranscriptionService = ParakeetTranscriptionService(customModelsDirectory: parakeetModelsDirectory) + internal lazy var parakeetTranscriptionService = ParakeetTranscriptionService() private var modelUrl: URL? { let possibleURLs = [ @@ -82,7 +82,6 @@ class WhisperState: NSObject, ObservableObject { let modelsDirectory: URL let recordingsDirectory: URL - let parakeetModelsDirectory: URL let enhancementService: AIEnhancementService? var licenseViewModel: LicenseViewModel let logger = Logger(subsystem: "com.prakashjoshipax.voiceink", category: "WhisperState") @@ -91,7 +90,7 @@ class WhisperState: NSObject, ObservableObject { // For model progress tracking @Published var downloadProgress: [String: Double] = [:] - @Published var isDownloadingParakeet = false + @Published var parakeetDownloadStates: [String: Bool] = [:] init(modelContext: ModelContext, enhancementService: AIEnhancementService? = nil) { self.modelContext = modelContext @@ -100,7 +99,6 @@ class WhisperState: NSObject, ObservableObject { self.modelsDirectory = appSupportDirectory.appendingPathComponent("WhisperModels") self.recordingsDirectory = appSupportDirectory.appendingPathComponent("Recordings") - self.parakeetModelsDirectory = appSupportDirectory.appendingPathComponent("ParakeetModels") self.enhancementService = enhancementService self.licenseViewModel = LicenseViewModel() @@ -200,8 +198,8 @@ class WhisperState: NSObject, ObservableObject { self.logger.error("❌ Model loading failed: \(error.localizedDescription)") } } - } else if let model = self.currentTranscriptionModel, model.provider == .parakeet { - try? await self.parakeetTranscriptionService.loadModel() + } else if let parakeetModel = self.currentTranscriptionModel as? ParakeetModel { + try? await self.parakeetTranscriptionService.loadModel(for: parakeetModel) } if let enhancementService = self.enhancementService {