Added support for V2 and V3 models

This commit is contained in:
Beingpax 2025-10-19 14:01:31 +05:45
parent 14d52a9c08
commit a0e4dd1367
9 changed files with 143 additions and 131 deletions

View File

@ -7,7 +7,7 @@
"location" : "https://github.com/FluidInference/FluidAudio",
"state" : {
"branch" : "main",
"revision" : "328036d255ef76b8d661eacc16ac108eb45f9218"
"revision" : "a8f3bc7a3be7a93d7d5d412fdf71ae7922e92d09"
}
},
{

View File

@ -87,12 +87,22 @@ import Foundation
supportedLanguages: getLanguageDictionary(isMultilingual: true, provider: .nativeApple)
),
// Parakeet Model
// Parakeet Models
ParakeetModel(
name: "parakeet-tdt-0.6b",
name: "parakeet-tdt-0.6b-v2",
displayName: "Parakeet V2",
description: "NVIDIA's Parakeet V2 model optimized for lightning-fast English-only transcription.",
size: "474 MB",
speed: 0.99,
accuracy: 0.94,
ramUsage: 0.8,
supportedLanguages: getLanguageDictionary(isMultilingual: false, provider: .parakeet)
),
ParakeetModel(
name: "parakeet-tdt-0.6b-v3",
displayName: "Parakeet V3",
description: "NVIDIA's ASR model V3 for lightning-fast transcription with multi-lingual(English + European) support.",
size: "630 MB",
description: "NVIDIA's Parakeet V3 model with multilingual support across English and 25 European languages.",
size: "494 MB",
speed: 0.99,
accuracy: 0.94,
ramUsage: 0.8,
@ -360,4 +370,4 @@ import Foundation
"yue": "Cantonese",
"zh": "Chinese",
]
}
}

View File

@ -72,7 +72,7 @@ class AudioTranscriptionManager: ObservableObject {
// Initialize parakeet transcription service if needed
if parakeetTranscriptionService == nil {
parakeetTranscriptionService = ParakeetTranscriptionService(customModelsDirectory: whisperState.parakeetModelsDirectory)
parakeetTranscriptionService = ParakeetTranscriptionService()
}
// Process audio file

View File

@ -19,7 +19,7 @@ class AudioTranscriptionService: ObservableObject {
private let localTranscriptionService: LocalTranscriptionService
private lazy var cloudTranscriptionService = CloudTranscriptionService()
private lazy var nativeAppleTranscriptionService = NativeAppleTranscriptionService()
private lazy var parakeetTranscriptionService = ParakeetTranscriptionService(customModelsDirectory: whisperState.parakeetModelsDirectory)
private lazy var parakeetTranscriptionService = ParakeetTranscriptionService()
enum TranscriptionError: Error {
case noAudioFile

View File

@ -4,67 +4,71 @@ import AVFoundation
import FluidAudio
import os.log
class ParakeetTranscriptionService: TranscriptionService {
private var asrManager: AsrManager?
private var vadManager: VadManager?
private let customModelsDirectory: URL?
private let logger = Logger(subsystem: "com.prakashjoshipax.voiceink.parakeet", category: "ParakeetTranscriptionService")
init(customModelsDirectory: URL? = nil) {
self.customModelsDirectory = customModelsDirectory
}
func loadModel() async throws {
guard asrManager == nil else {
logger.notice("🦜 Parakeet models already loaded, skipping")
return
}
class ParakeetTranscriptionService: TranscriptionService {
private var asrManager: AsrManager?
private var vadManager: VadManager?
private var activeVersion: AsrModelVersion?
private let logger = Logger(
subsystem: "com.prakashjoshipax.voiceink.parakeet",
category: "ParakeetTranscriptionService"
)
private func version(for model: any TranscriptionModel) -> AsrModelVersion {
model.name.lowercased().contains("v2") ? .v2 : .v3
}
let manager = AsrManager(config: .default)
let models: AsrModels
if let customModelsDirectory = customModelsDirectory {
models = try await AsrModels.load(from: customModelsDirectory)
} else {
// Fallback to FluidAudio's default directory
logger.notice("🦜 Loading Parakeet models from FluidAudio default directory")
models = try await AsrModels.downloadAndLoad()
private func ensureModelsLoaded(for version: AsrModelVersion) async throws {
if let manager = asrManager, activeVersion == version {
logger.notice("🦜 Parakeet \(version == .v2 ? "V2" : "V3") models already loaded, skipping")
return
}
try await manager.initialize(models: models)
self.asrManager = manager
cleanup()
let manager = AsrManager(config: .default)
let cacheDirectory = AsrModels.defaultCacheDirectory(for: version)
logger.notice("🦜 Loading Parakeet models from \(cacheDirectory.path)")
let models = try await AsrModels.load(
from: cacheDirectory,
configuration: nil,
version: version
)
try await manager.initialize(models: models)
self.asrManager = manager
self.activeVersion = version
}
func loadModel(for model: ParakeetModel) async throws {
try await ensureModelsLoaded(for: version(for: model))
}
func transcribe(audioURL: URL, model: any TranscriptionModel) async throws -> String {
try await loadModel()
func transcribe(audioURL: URL, model: any TranscriptionModel) async throws -> String {
let targetVersion = version(for: model)
try await ensureModelsLoaded(for: targetVersion)
guard let asrManager = asrManager else {
logger.notice("🦜 ASR manager not initialized, cannot transcribe")
throw ASRError.notInitialized
}
}
let audioSamples = try readAudioSamples(from: audioURL)
let audioSamples = try readAudioSamples(from: audioURL)
let durationSeconds = Double(audioSamples.count) / 16000.0
let isVADEnabled = UserDefaults.standard.object(forKey: "IsVADEnabled") as? Bool ?? true
let durationSeconds = Double(audioSamples.count) / 16000.0
let isVADEnabled = UserDefaults.standard.object(forKey: "IsVADEnabled") as? Bool ?? true
let speechAudio: [Float]
if durationSeconds < 20.0 || !isVADEnabled {
speechAudio = audioSamples
} else {
let vadConfig = VadConfig(threshold: 0.7)
if vadManager == nil, let customModelsDirectory {
do {
vadManager = try await VadManager(
config: vadConfig,
modelDirectory: customModelsDirectory.deletingLastPathComponent()
)
let speechAudio: [Float]
if durationSeconds < 20.0 || !isVADEnabled {
speechAudio = audioSamples
} else {
let vadConfig = VadConfig(threshold: 0.7)
if vadManager == nil {
do {
vadManager = try await VadManager(config: vadConfig)
} catch {
logger.notice("🦜 VAD initialization failed, using full audio: \(error.localizedDescription)")
}
}
}
}
do {
if let vadManager {
let segments = try await vadManager.segmentSpeechAudio(audioSamples)
@ -109,5 +113,6 @@ class ParakeetTranscriptionService: TranscriptionService {
asrManager?.cleanup()
asrManager = nil
vadManager = nil
activeVersion = nil
}
}

View File

@ -11,11 +11,11 @@ struct ParakeetModelCardRowView: View {
}
var isDownloaded: Bool {
whisperState.isParakeetModelDownloaded
whisperState.isParakeetModelDownloaded(model)
}
var isDownloading: Bool {
whisperState.isDownloadingParakeet
whisperState.isParakeetModelDownloading(model)
}
var body: some View {
@ -104,7 +104,7 @@ struct ParakeetModelCardRowView: View {
private var progressSection: some View {
Group {
if isDownloading {
let progress = whisperState.downloadProgress["parakeet-tdt-0.6b"] ?? 0.0
let progress = whisperState.downloadProgress[model.name] ?? 0.0
ProgressView(value: progress)
.progressViewStyle(LinearProgressViewStyle())
.frame(maxWidth: .infinity, alignment: .leading)
@ -133,7 +133,7 @@ struct ParakeetModelCardRowView: View {
} else {
Button(action: {
Task {
await whisperState.downloadParakeetModel()
await whisperState.downloadParakeetModel(model)
}
}) {
HStack(spacing: 4) {
@ -153,13 +153,13 @@ struct ParakeetModelCardRowView: View {
if isDownloaded {
Menu {
Button(action: {
whisperState.deleteParakeetModel()
whisperState.deleteParakeetModel(model)
}) {
Label("Delete Model", systemImage: "trash")
}
Button {
whisperState.showParakeetModelInFinder()
whisperState.showParakeetModelInFinder(model)
} label: {
Label("Show in Finder", systemImage: "folder")
}

View File

@ -7,7 +7,7 @@ extension WhisperState {
case .local:
return availableModels.contains { $0.name == model.name }
case .parakeet:
return isParakeetModelDownloaded
return isParakeetModelDownloaded(named: model.name)
case .nativeApple:
if #available(macOS 26, *) {
return true

View File

@ -3,99 +3,98 @@ import FluidAudio
import AppKit
extension WhisperState {
var isParakeetModelDownloaded: Bool {
get { UserDefaults.standard.bool(forKey: "ParakeetModelDownloaded") }
set { UserDefaults.standard.set(newValue, forKey: "ParakeetModelDownloaded") }
private func parakeetDefaultsKey(for modelName: String) -> String {
"ParakeetModelDownloaded_\(modelName)"
}
var isParakeetModelDownloading: Bool {
get { isDownloadingParakeet }
set { isDownloadingParakeet = newValue }
private func parakeetVersion(for modelName: String) -> AsrModelVersion {
modelName.lowercased().contains("v2") ? .v2 : .v3
}
private func parakeetCacheDirectory(for version: AsrModelVersion) -> URL {
AsrModels.defaultCacheDirectory(for: version)
}
func isParakeetModelDownloaded(named modelName: String) -> Bool {
UserDefaults.standard.bool(forKey: parakeetDefaultsKey(for: modelName))
}
func isParakeetModelDownloaded(_ model: ParakeetModel) -> Bool {
isParakeetModelDownloaded(named: model.name)
}
func isParakeetModelDownloading(_ model: ParakeetModel) -> Bool {
parakeetDownloadStates[model.name] ?? false
}
@MainActor
func downloadParakeetModel() async {
if isParakeetModelDownloaded {
func downloadParakeetModel(_ model: ParakeetModel) async {
if isParakeetModelDownloaded(model) {
return
}
isDownloadingParakeet = true
downloadProgress["parakeet-tdt-0.6b"] = 0.0
let modelName = model.name
parakeetDownloadStates[modelName] = true
downloadProgress[modelName] = 0.0
// Start progress simulation
let timer = Timer.scheduledTimer(withTimeInterval: 1.2, repeats: true) { timer in
Task { @MainActor in
if let currentProgress = self.downloadProgress["parakeet-tdt-0.6b"], currentProgress < 0.9 {
self.downloadProgress["parakeet-tdt-0.6b"] = currentProgress + 0.0125
if let currentProgress = self.downloadProgress[modelName], currentProgress < 0.9 {
self.downloadProgress[modelName] = currentProgress + 0.005
}
}
}
do {
_ = try await AsrModels.downloadAndLoad(to: parakeetModelsDirectory)
let version = parakeetVersion(for: modelName)
// Also download VAD model into the same parent directory as ASR models
let parentDir = parakeetModelsDirectory.deletingLastPathComponent()
_ = try await DownloadUtils.loadModels(
.vad,
modelNames: Array(ModelNames.VAD.requiredModels),
directory: parentDir
)
self.isParakeetModelDownloaded = true
downloadProgress["parakeet-tdt-0.6b"] = 1.0
do {
_ = try await AsrModels.downloadAndLoad(version: version)
_ = try await VadManager()
UserDefaults.standard.set(true, forKey: parakeetDefaultsKey(for: modelName))
downloadProgress[modelName] = 1.0
} catch {
self.isParakeetModelDownloaded = false
UserDefaults.standard.set(false, forKey: parakeetDefaultsKey(for: modelName))
}
timer.invalidate()
isDownloadingParakeet = false
downloadProgress["parakeet-tdt-0.6b"] = nil
parakeetDownloadStates[modelName] = false
downloadProgress[modelName] = nil
refreshAllAvailableModels()
}
@MainActor
func deleteParakeetModel() {
if let currentModel = currentTranscriptionModel, currentModel.provider == .parakeet {
func deleteParakeetModel(_ model: ParakeetModel) {
if let currentModel = currentTranscriptionModel,
currentModel.provider == .parakeet,
currentModel.name == model.name {
currentTranscriptionModel = nil
UserDefaults.standard.removeObject(forKey: "CurrentTranscriptionModel")
}
let version = parakeetVersion(for: model.name)
let cacheDirectory = parakeetCacheDirectory(for: version)
do {
// First try: app support directory + bundle path
let appSupportDirectory = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask)[0]
.appendingPathComponent("com.prakashjoshipax.VoiceInk")
let parakeetModelDirectory = appSupportDirectory.appendingPathComponent("parakeet-tdt-0.6b-v3-coreml")
if FileManager.default.fileExists(atPath: parakeetModelDirectory.path) {
try FileManager.default.removeItem(at: parakeetModelDirectory)
} else {
// Second try: root of application support directory
let rootAppSupportDirectory = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask)[0]
let rootParakeetModelDirectory = rootAppSupportDirectory.appendingPathComponent("parakeet-tdt-0.6b-v3-coreml")
if FileManager.default.fileExists(atPath: rootParakeetModelDirectory.path) {
try FileManager.default.removeItem(at: rootParakeetModelDirectory)
}
if FileManager.default.fileExists(atPath: cacheDirectory.path) {
try FileManager.default.removeItem(at: cacheDirectory)
}
self.isParakeetModelDownloaded = false
UserDefaults.standard.set(false, forKey: parakeetDefaultsKey(for: model.name))
} catch {
// Silently fail
// Silently ignore removal errors
}
refreshAllAvailableModels()
}
@MainActor
func showParakeetModelInFinder() {
let appSupportDirectory = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask)[0]
.appendingPathComponent("com.prakashjoshipax.VoiceInk")
let parakeetModelDirectory = appSupportDirectory.appendingPathComponent("parakeet-tdt-0.6b-v3-coreml")
if FileManager.default.fileExists(atPath: parakeetModelDirectory.path) {
NSWorkspace.shared.selectFile(parakeetModelDirectory.path, inFileViewerRootedAtPath: "")
func showParakeetModelInFinder(_ model: ParakeetModel) {
let cacheDirectory = parakeetCacheDirectory(for: parakeetVersion(for: model.name))
if FileManager.default.fileExists(atPath: cacheDirectory.path) {
NSWorkspace.shared.selectFile(cacheDirectory.path, inFileViewerRootedAtPath: "")
}
}
}
}

View File

@ -59,7 +59,7 @@ class WhisperState: NSObject, ObservableObject {
private var localTranscriptionService: LocalTranscriptionService!
private lazy var cloudTranscriptionService = CloudTranscriptionService()
private lazy var nativeAppleTranscriptionService = NativeAppleTranscriptionService()
internal lazy var parakeetTranscriptionService = ParakeetTranscriptionService(customModelsDirectory: parakeetModelsDirectory)
internal lazy var parakeetTranscriptionService = ParakeetTranscriptionService()
private var modelUrl: URL? {
let possibleURLs = [
@ -82,7 +82,6 @@ class WhisperState: NSObject, ObservableObject {
let modelsDirectory: URL
let recordingsDirectory: URL
let parakeetModelsDirectory: URL
let enhancementService: AIEnhancementService?
var licenseViewModel: LicenseViewModel
let logger = Logger(subsystem: "com.prakashjoshipax.voiceink", category: "WhisperState")
@ -91,7 +90,7 @@ class WhisperState: NSObject, ObservableObject {
// For model progress tracking
@Published var downloadProgress: [String: Double] = [:]
@Published var isDownloadingParakeet = false
@Published var parakeetDownloadStates: [String: Bool] = [:]
init(modelContext: ModelContext, enhancementService: AIEnhancementService? = nil) {
self.modelContext = modelContext
@ -100,7 +99,6 @@ class WhisperState: NSObject, ObservableObject {
self.modelsDirectory = appSupportDirectory.appendingPathComponent("WhisperModels")
self.recordingsDirectory = appSupportDirectory.appendingPathComponent("Recordings")
self.parakeetModelsDirectory = appSupportDirectory.appendingPathComponent("ParakeetModels")
self.enhancementService = enhancementService
self.licenseViewModel = LicenseViewModel()
@ -200,8 +198,8 @@ class WhisperState: NSObject, ObservableObject {
self.logger.error("❌ Model loading failed: \(error.localizedDescription)")
}
}
} else if let model = self.currentTranscriptionModel, model.provider == .parakeet {
try? await self.parakeetTranscriptionService.loadModel()
} else if let parakeetModel = self.currentTranscriptionModel as? ParakeetModel {
try? await self.parakeetTranscriptionService.loadModel(for: parakeetModel)
}
if let enhancementService = self.enhancementService {