Added support for V2 and V3 models

This commit is contained in:
Beingpax 2025-10-19 14:01:31 +05:45
parent 14d52a9c08
commit a0e4dd1367
9 changed files with 143 additions and 131 deletions

View File

@ -7,7 +7,7 @@
"location" : "https://github.com/FluidInference/FluidAudio", "location" : "https://github.com/FluidInference/FluidAudio",
"state" : { "state" : {
"branch" : "main", "branch" : "main",
"revision" : "328036d255ef76b8d661eacc16ac108eb45f9218" "revision" : "a8f3bc7a3be7a93d7d5d412fdf71ae7922e92d09"
} }
}, },
{ {

View File

@ -87,12 +87,22 @@ import Foundation
supportedLanguages: getLanguageDictionary(isMultilingual: true, provider: .nativeApple) supportedLanguages: getLanguageDictionary(isMultilingual: true, provider: .nativeApple)
), ),
// Parakeet Model // Parakeet Models
ParakeetModel( ParakeetModel(
name: "parakeet-tdt-0.6b", name: "parakeet-tdt-0.6b-v2",
displayName: "Parakeet V2",
description: "NVIDIA's Parakeet V2 model optimized for lightning-fast English-only transcription.",
size: "474 MB",
speed: 0.99,
accuracy: 0.94,
ramUsage: 0.8,
supportedLanguages: getLanguageDictionary(isMultilingual: false, provider: .parakeet)
),
ParakeetModel(
name: "parakeet-tdt-0.6b-v3",
displayName: "Parakeet V3", displayName: "Parakeet V3",
description: "NVIDIA's ASR model V3 for lightning-fast transcription with multi-lingual(English + European) support.", description: "NVIDIA's Parakeet V3 model with multilingual support across English and 25 European languages.",
size: "630 MB", size: "494 MB",
speed: 0.99, speed: 0.99,
accuracy: 0.94, accuracy: 0.94,
ramUsage: 0.8, ramUsage: 0.8,
@ -360,4 +370,4 @@ import Foundation
"yue": "Cantonese", "yue": "Cantonese",
"zh": "Chinese", "zh": "Chinese",
] ]
} }

View File

@ -72,7 +72,7 @@ class AudioTranscriptionManager: ObservableObject {
// Initialize parakeet transcription service if needed // Initialize parakeet transcription service if needed
if parakeetTranscriptionService == nil { if parakeetTranscriptionService == nil {
parakeetTranscriptionService = ParakeetTranscriptionService(customModelsDirectory: whisperState.parakeetModelsDirectory) parakeetTranscriptionService = ParakeetTranscriptionService()
} }
// Process audio file // Process audio file

View File

@ -19,7 +19,7 @@ class AudioTranscriptionService: ObservableObject {
private let localTranscriptionService: LocalTranscriptionService private let localTranscriptionService: LocalTranscriptionService
private lazy var cloudTranscriptionService = CloudTranscriptionService() private lazy var cloudTranscriptionService = CloudTranscriptionService()
private lazy var nativeAppleTranscriptionService = NativeAppleTranscriptionService() private lazy var nativeAppleTranscriptionService = NativeAppleTranscriptionService()
private lazy var parakeetTranscriptionService = ParakeetTranscriptionService(customModelsDirectory: whisperState.parakeetModelsDirectory) private lazy var parakeetTranscriptionService = ParakeetTranscriptionService()
enum TranscriptionError: Error { enum TranscriptionError: Error {
case noAudioFile case noAudioFile

View File

@ -4,67 +4,71 @@ import AVFoundation
import FluidAudio import FluidAudio
import os.log import os.log
class ParakeetTranscriptionService: TranscriptionService { class ParakeetTranscriptionService: TranscriptionService {
private var asrManager: AsrManager? private var asrManager: AsrManager?
private var vadManager: VadManager? private var vadManager: VadManager?
private let customModelsDirectory: URL? private var activeVersion: AsrModelVersion?
private let logger = Logger(subsystem: "com.prakashjoshipax.voiceink.parakeet", category: "ParakeetTranscriptionService") private let logger = Logger(
subsystem: "com.prakashjoshipax.voiceink.parakeet",
init(customModelsDirectory: URL? = nil) { category: "ParakeetTranscriptionService"
self.customModelsDirectory = customModelsDirectory )
}
private func version(for model: any TranscriptionModel) -> AsrModelVersion {
func loadModel() async throws { model.name.lowercased().contains("v2") ? .v2 : .v3
guard asrManager == nil else { }
logger.notice("🦜 Parakeet models already loaded, skipping")
return
}
let manager = AsrManager(config: .default) private func ensureModelsLoaded(for version: AsrModelVersion) async throws {
let models: AsrModels if let manager = asrManager, activeVersion == version {
logger.notice("🦜 Parakeet \(version == .v2 ? "V2" : "V3") models already loaded, skipping")
if let customModelsDirectory = customModelsDirectory { return
models = try await AsrModels.load(from: customModelsDirectory)
} else {
// Fallback to FluidAudio's default directory
logger.notice("🦜 Loading Parakeet models from FluidAudio default directory")
models = try await AsrModels.downloadAndLoad()
} }
try await manager.initialize(models: models)
self.asrManager = manager cleanup()
let manager = AsrManager(config: .default)
let cacheDirectory = AsrModels.defaultCacheDirectory(for: version)
logger.notice("🦜 Loading Parakeet models from \(cacheDirectory.path)")
let models = try await AsrModels.load(
from: cacheDirectory,
configuration: nil,
version: version
)
try await manager.initialize(models: models)
self.asrManager = manager
self.activeVersion = version
}
func loadModel(for model: ParakeetModel) async throws {
try await ensureModelsLoaded(for: version(for: model))
} }
func transcribe(audioURL: URL, model: any TranscriptionModel) async throws -> String { func transcribe(audioURL: URL, model: any TranscriptionModel) async throws -> String {
try await loadModel() let targetVersion = version(for: model)
try await ensureModelsLoaded(for: targetVersion)
guard let asrManager = asrManager else { guard let asrManager = asrManager else {
logger.notice("🦜 ASR manager not initialized, cannot transcribe") logger.notice("🦜 ASR manager not initialized, cannot transcribe")
throw ASRError.notInitialized throw ASRError.notInitialized
} }
let audioSamples = try readAudioSamples(from: audioURL) let audioSamples = try readAudioSamples(from: audioURL)
let durationSeconds = Double(audioSamples.count) / 16000.0 let durationSeconds = Double(audioSamples.count) / 16000.0
let isVADEnabled = UserDefaults.standard.object(forKey: "IsVADEnabled") as? Bool ?? true let isVADEnabled = UserDefaults.standard.object(forKey: "IsVADEnabled") as? Bool ?? true
let speechAudio: [Float] let speechAudio: [Float]
if durationSeconds < 20.0 || !isVADEnabled { if durationSeconds < 20.0 || !isVADEnabled {
speechAudio = audioSamples speechAudio = audioSamples
} else { } else {
let vadConfig = VadConfig(threshold: 0.7) let vadConfig = VadConfig(threshold: 0.7)
if vadManager == nil, let customModelsDirectory { if vadManager == nil {
do { do {
vadManager = try await VadManager( vadManager = try await VadManager(config: vadConfig)
config: vadConfig,
modelDirectory: customModelsDirectory.deletingLastPathComponent()
)
} catch { } catch {
logger.notice("🦜 VAD initialization failed, using full audio: \(error.localizedDescription)") logger.notice("🦜 VAD initialization failed, using full audio: \(error.localizedDescription)")
} }
} }
do { do {
if let vadManager { if let vadManager {
let segments = try await vadManager.segmentSpeechAudio(audioSamples) let segments = try await vadManager.segmentSpeechAudio(audioSamples)
@ -109,5 +113,6 @@ class ParakeetTranscriptionService: TranscriptionService {
asrManager?.cleanup() asrManager?.cleanup()
asrManager = nil asrManager = nil
vadManager = nil vadManager = nil
activeVersion = nil
} }
} }

View File

@ -11,11 +11,11 @@ struct ParakeetModelCardRowView: View {
} }
var isDownloaded: Bool { var isDownloaded: Bool {
whisperState.isParakeetModelDownloaded whisperState.isParakeetModelDownloaded(model)
} }
var isDownloading: Bool { var isDownloading: Bool {
whisperState.isDownloadingParakeet whisperState.isParakeetModelDownloading(model)
} }
var body: some View { var body: some View {
@ -104,7 +104,7 @@ struct ParakeetModelCardRowView: View {
private var progressSection: some View { private var progressSection: some View {
Group { Group {
if isDownloading { if isDownloading {
let progress = whisperState.downloadProgress["parakeet-tdt-0.6b"] ?? 0.0 let progress = whisperState.downloadProgress[model.name] ?? 0.0
ProgressView(value: progress) ProgressView(value: progress)
.progressViewStyle(LinearProgressViewStyle()) .progressViewStyle(LinearProgressViewStyle())
.frame(maxWidth: .infinity, alignment: .leading) .frame(maxWidth: .infinity, alignment: .leading)
@ -133,7 +133,7 @@ struct ParakeetModelCardRowView: View {
} else { } else {
Button(action: { Button(action: {
Task { Task {
await whisperState.downloadParakeetModel() await whisperState.downloadParakeetModel(model)
} }
}) { }) {
HStack(spacing: 4) { HStack(spacing: 4) {
@ -153,13 +153,13 @@ struct ParakeetModelCardRowView: View {
if isDownloaded { if isDownloaded {
Menu { Menu {
Button(action: { Button(action: {
whisperState.deleteParakeetModel() whisperState.deleteParakeetModel(model)
}) { }) {
Label("Delete Model", systemImage: "trash") Label("Delete Model", systemImage: "trash")
} }
Button { Button {
whisperState.showParakeetModelInFinder() whisperState.showParakeetModelInFinder(model)
} label: { } label: {
Label("Show in Finder", systemImage: "folder") Label("Show in Finder", systemImage: "folder")
} }

View File

@ -7,7 +7,7 @@ extension WhisperState {
case .local: case .local:
return availableModels.contains { $0.name == model.name } return availableModels.contains { $0.name == model.name }
case .parakeet: case .parakeet:
return isParakeetModelDownloaded return isParakeetModelDownloaded(named: model.name)
case .nativeApple: case .nativeApple:
if #available(macOS 26, *) { if #available(macOS 26, *) {
return true return true

View File

@ -3,99 +3,98 @@ import FluidAudio
import AppKit import AppKit
extension WhisperState { extension WhisperState {
var isParakeetModelDownloaded: Bool { private func parakeetDefaultsKey(for modelName: String) -> String {
get { UserDefaults.standard.bool(forKey: "ParakeetModelDownloaded") } "ParakeetModelDownloaded_\(modelName)"
set { UserDefaults.standard.set(newValue, forKey: "ParakeetModelDownloaded") }
} }
var isParakeetModelDownloading: Bool { private func parakeetVersion(for modelName: String) -> AsrModelVersion {
get { isDownloadingParakeet } modelName.lowercased().contains("v2") ? .v2 : .v3
set { isDownloadingParakeet = newValue } }
private func parakeetCacheDirectory(for version: AsrModelVersion) -> URL {
AsrModels.defaultCacheDirectory(for: version)
}
func isParakeetModelDownloaded(named modelName: String) -> Bool {
UserDefaults.standard.bool(forKey: parakeetDefaultsKey(for: modelName))
}
func isParakeetModelDownloaded(_ model: ParakeetModel) -> Bool {
isParakeetModelDownloaded(named: model.name)
}
func isParakeetModelDownloading(_ model: ParakeetModel) -> Bool {
parakeetDownloadStates[model.name] ?? false
} }
@MainActor @MainActor
func downloadParakeetModel() async { func downloadParakeetModel(_ model: ParakeetModel) async {
if isParakeetModelDownloaded { if isParakeetModelDownloaded(model) {
return return
} }
isDownloadingParakeet = true let modelName = model.name
downloadProgress["parakeet-tdt-0.6b"] = 0.0 parakeetDownloadStates[modelName] = true
downloadProgress[modelName] = 0.0
// Start progress simulation
let timer = Timer.scheduledTimer(withTimeInterval: 1.2, repeats: true) { timer in let timer = Timer.scheduledTimer(withTimeInterval: 1.2, repeats: true) { timer in
Task { @MainActor in Task { @MainActor in
if let currentProgress = self.downloadProgress["parakeet-tdt-0.6b"], currentProgress < 0.9 { if let currentProgress = self.downloadProgress[modelName], currentProgress < 0.9 {
self.downloadProgress["parakeet-tdt-0.6b"] = currentProgress + 0.0125 self.downloadProgress[modelName] = currentProgress + 0.005
} }
} }
} }
do { let version = parakeetVersion(for: modelName)
_ = try await AsrModels.downloadAndLoad(to: parakeetModelsDirectory)
// Also download VAD model into the same parent directory as ASR models do {
let parentDir = parakeetModelsDirectory.deletingLastPathComponent() _ = try await AsrModels.downloadAndLoad(version: version)
_ = try await DownloadUtils.loadModels(
.vad, _ = try await VadManager()
modelNames: Array(ModelNames.VAD.requiredModels),
directory: parentDir UserDefaults.standard.set(true, forKey: parakeetDefaultsKey(for: modelName))
) downloadProgress[modelName] = 1.0
self.isParakeetModelDownloaded = true
downloadProgress["parakeet-tdt-0.6b"] = 1.0
} catch { } catch {
self.isParakeetModelDownloaded = false UserDefaults.standard.set(false, forKey: parakeetDefaultsKey(for: modelName))
} }
timer.invalidate() timer.invalidate()
isDownloadingParakeet = false parakeetDownloadStates[modelName] = false
downloadProgress["parakeet-tdt-0.6b"] = nil downloadProgress[modelName] = nil
refreshAllAvailableModels() refreshAllAvailableModels()
} }
@MainActor @MainActor
func deleteParakeetModel() { func deleteParakeetModel(_ model: ParakeetModel) {
if let currentModel = currentTranscriptionModel, currentModel.provider == .parakeet { if let currentModel = currentTranscriptionModel,
currentModel.provider == .parakeet,
currentModel.name == model.name {
currentTranscriptionModel = nil currentTranscriptionModel = nil
UserDefaults.standard.removeObject(forKey: "CurrentTranscriptionModel") UserDefaults.standard.removeObject(forKey: "CurrentTranscriptionModel")
} }
let version = parakeetVersion(for: model.name)
let cacheDirectory = parakeetCacheDirectory(for: version)
do { do {
// First try: app support directory + bundle path if FileManager.default.fileExists(atPath: cacheDirectory.path) {
let appSupportDirectory = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask)[0] try FileManager.default.removeItem(at: cacheDirectory)
.appendingPathComponent("com.prakashjoshipax.VoiceInk")
let parakeetModelDirectory = appSupportDirectory.appendingPathComponent("parakeet-tdt-0.6b-v3-coreml")
if FileManager.default.fileExists(atPath: parakeetModelDirectory.path) {
try FileManager.default.removeItem(at: parakeetModelDirectory)
} else {
// Second try: root of application support directory
let rootAppSupportDirectory = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask)[0]
let rootParakeetModelDirectory = rootAppSupportDirectory.appendingPathComponent("parakeet-tdt-0.6b-v3-coreml")
if FileManager.default.fileExists(atPath: rootParakeetModelDirectory.path) {
try FileManager.default.removeItem(at: rootParakeetModelDirectory)
}
} }
UserDefaults.standard.set(false, forKey: parakeetDefaultsKey(for: model.name))
self.isParakeetModelDownloaded = false
} catch { } catch {
// Silently fail // Silently ignore removal errors
} }
refreshAllAvailableModels() refreshAllAvailableModels()
} }
@MainActor @MainActor
func showParakeetModelInFinder() { func showParakeetModelInFinder(_ model: ParakeetModel) {
let appSupportDirectory = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask)[0] let cacheDirectory = parakeetCacheDirectory(for: parakeetVersion(for: model.name))
.appendingPathComponent("com.prakashjoshipax.VoiceInk")
let parakeetModelDirectory = appSupportDirectory.appendingPathComponent("parakeet-tdt-0.6b-v3-coreml") if FileManager.default.fileExists(atPath: cacheDirectory.path) {
NSWorkspace.shared.selectFile(cacheDirectory.path, inFileViewerRootedAtPath: "")
if FileManager.default.fileExists(atPath: parakeetModelDirectory.path) {
NSWorkspace.shared.selectFile(parakeetModelDirectory.path, inFileViewerRootedAtPath: "")
} }
} }
} }

View File

@ -59,7 +59,7 @@ class WhisperState: NSObject, ObservableObject {
private var localTranscriptionService: LocalTranscriptionService! private var localTranscriptionService: LocalTranscriptionService!
private lazy var cloudTranscriptionService = CloudTranscriptionService() private lazy var cloudTranscriptionService = CloudTranscriptionService()
private lazy var nativeAppleTranscriptionService = NativeAppleTranscriptionService() private lazy var nativeAppleTranscriptionService = NativeAppleTranscriptionService()
internal lazy var parakeetTranscriptionService = ParakeetTranscriptionService(customModelsDirectory: parakeetModelsDirectory) internal lazy var parakeetTranscriptionService = ParakeetTranscriptionService()
private var modelUrl: URL? { private var modelUrl: URL? {
let possibleURLs = [ let possibleURLs = [
@ -82,7 +82,6 @@ class WhisperState: NSObject, ObservableObject {
let modelsDirectory: URL let modelsDirectory: URL
let recordingsDirectory: URL let recordingsDirectory: URL
let parakeetModelsDirectory: URL
let enhancementService: AIEnhancementService? let enhancementService: AIEnhancementService?
var licenseViewModel: LicenseViewModel var licenseViewModel: LicenseViewModel
let logger = Logger(subsystem: "com.prakashjoshipax.voiceink", category: "WhisperState") let logger = Logger(subsystem: "com.prakashjoshipax.voiceink", category: "WhisperState")
@ -91,7 +90,7 @@ class WhisperState: NSObject, ObservableObject {
// For model progress tracking // For model progress tracking
@Published var downloadProgress: [String: Double] = [:] @Published var downloadProgress: [String: Double] = [:]
@Published var isDownloadingParakeet = false @Published var parakeetDownloadStates: [String: Bool] = [:]
init(modelContext: ModelContext, enhancementService: AIEnhancementService? = nil) { init(modelContext: ModelContext, enhancementService: AIEnhancementService? = nil) {
self.modelContext = modelContext self.modelContext = modelContext
@ -100,7 +99,6 @@ class WhisperState: NSObject, ObservableObject {
self.modelsDirectory = appSupportDirectory.appendingPathComponent("WhisperModels") self.modelsDirectory = appSupportDirectory.appendingPathComponent("WhisperModels")
self.recordingsDirectory = appSupportDirectory.appendingPathComponent("Recordings") self.recordingsDirectory = appSupportDirectory.appendingPathComponent("Recordings")
self.parakeetModelsDirectory = appSupportDirectory.appendingPathComponent("ParakeetModels")
self.enhancementService = enhancementService self.enhancementService = enhancementService
self.licenseViewModel = LicenseViewModel() self.licenseViewModel = LicenseViewModel()
@ -200,8 +198,8 @@ class WhisperState: NSObject, ObservableObject {
self.logger.error("❌ Model loading failed: \(error.localizedDescription)") self.logger.error("❌ Model loading failed: \(error.localizedDescription)")
} }
} }
} else if let model = self.currentTranscriptionModel, model.provider == .parakeet { } else if let parakeetModel = self.currentTranscriptionModel as? ParakeetModel {
try? await self.parakeetTranscriptionService.loadModel() try? await self.parakeetTranscriptionService.loadModel(for: parakeetModel)
} }
if let enhancementService = self.enhancementService { if let enhancementService = self.enhancementService {