Added support for V2 and V3 models
This commit is contained in:
parent
14d52a9c08
commit
a0e4dd1367
@ -7,7 +7,7 @@
|
||||
"location" : "https://github.com/FluidInference/FluidAudio",
|
||||
"state" : {
|
||||
"branch" : "main",
|
||||
"revision" : "328036d255ef76b8d661eacc16ac108eb45f9218"
|
||||
"revision" : "a8f3bc7a3be7a93d7d5d412fdf71ae7922e92d09"
|
||||
}
|
||||
},
|
||||
{
|
||||
|
||||
@ -87,12 +87,22 @@ import Foundation
|
||||
supportedLanguages: getLanguageDictionary(isMultilingual: true, provider: .nativeApple)
|
||||
),
|
||||
|
||||
// Parakeet Model
|
||||
// Parakeet Models
|
||||
ParakeetModel(
|
||||
name: "parakeet-tdt-0.6b",
|
||||
name: "parakeet-tdt-0.6b-v2",
|
||||
displayName: "Parakeet V2",
|
||||
description: "NVIDIA's Parakeet V2 model optimized for lightning-fast English-only transcription.",
|
||||
size: "474 MB",
|
||||
speed: 0.99,
|
||||
accuracy: 0.94,
|
||||
ramUsage: 0.8,
|
||||
supportedLanguages: getLanguageDictionary(isMultilingual: false, provider: .parakeet)
|
||||
),
|
||||
ParakeetModel(
|
||||
name: "parakeet-tdt-0.6b-v3",
|
||||
displayName: "Parakeet V3",
|
||||
description: "NVIDIA's ASR model V3 for lightning-fast transcription with multi-lingual(English + European) support.",
|
||||
size: "630 MB",
|
||||
description: "NVIDIA's Parakeet V3 model with multilingual support across English and 25 European languages.",
|
||||
size: "494 MB",
|
||||
speed: 0.99,
|
||||
accuracy: 0.94,
|
||||
ramUsage: 0.8,
|
||||
@ -360,4 +370,4 @@ import Foundation
|
||||
"yue": "Cantonese",
|
||||
"zh": "Chinese",
|
||||
]
|
||||
}
|
||||
}
|
||||
|
||||
@ -72,7 +72,7 @@ class AudioTranscriptionManager: ObservableObject {
|
||||
|
||||
// Initialize parakeet transcription service if needed
|
||||
if parakeetTranscriptionService == nil {
|
||||
parakeetTranscriptionService = ParakeetTranscriptionService(customModelsDirectory: whisperState.parakeetModelsDirectory)
|
||||
parakeetTranscriptionService = ParakeetTranscriptionService()
|
||||
}
|
||||
|
||||
// Process audio file
|
||||
|
||||
@ -19,7 +19,7 @@ class AudioTranscriptionService: ObservableObject {
|
||||
private let localTranscriptionService: LocalTranscriptionService
|
||||
private lazy var cloudTranscriptionService = CloudTranscriptionService()
|
||||
private lazy var nativeAppleTranscriptionService = NativeAppleTranscriptionService()
|
||||
private lazy var parakeetTranscriptionService = ParakeetTranscriptionService(customModelsDirectory: whisperState.parakeetModelsDirectory)
|
||||
private lazy var parakeetTranscriptionService = ParakeetTranscriptionService()
|
||||
|
||||
enum TranscriptionError: Error {
|
||||
case noAudioFile
|
||||
|
||||
@ -4,67 +4,71 @@ import AVFoundation
|
||||
import FluidAudio
|
||||
import os.log
|
||||
|
||||
class ParakeetTranscriptionService: TranscriptionService {
|
||||
private var asrManager: AsrManager?
|
||||
private var vadManager: VadManager?
|
||||
private let customModelsDirectory: URL?
|
||||
private let logger = Logger(subsystem: "com.prakashjoshipax.voiceink.parakeet", category: "ParakeetTranscriptionService")
|
||||
|
||||
init(customModelsDirectory: URL? = nil) {
|
||||
self.customModelsDirectory = customModelsDirectory
|
||||
}
|
||||
|
||||
func loadModel() async throws {
|
||||
guard asrManager == nil else {
|
||||
logger.notice("🦜 Parakeet models already loaded, skipping")
|
||||
return
|
||||
}
|
||||
class ParakeetTranscriptionService: TranscriptionService {
|
||||
private var asrManager: AsrManager?
|
||||
private var vadManager: VadManager?
|
||||
private var activeVersion: AsrModelVersion?
|
||||
private let logger = Logger(
|
||||
subsystem: "com.prakashjoshipax.voiceink.parakeet",
|
||||
category: "ParakeetTranscriptionService"
|
||||
)
|
||||
|
||||
private func version(for model: any TranscriptionModel) -> AsrModelVersion {
|
||||
model.name.lowercased().contains("v2") ? .v2 : .v3
|
||||
}
|
||||
|
||||
let manager = AsrManager(config: .default)
|
||||
let models: AsrModels
|
||||
|
||||
if let customModelsDirectory = customModelsDirectory {
|
||||
models = try await AsrModels.load(from: customModelsDirectory)
|
||||
} else {
|
||||
// Fallback to FluidAudio's default directory
|
||||
logger.notice("🦜 Loading Parakeet models from FluidAudio default directory")
|
||||
models = try await AsrModels.downloadAndLoad()
|
||||
private func ensureModelsLoaded(for version: AsrModelVersion) async throws {
|
||||
if let manager = asrManager, activeVersion == version {
|
||||
logger.notice("🦜 Parakeet \(version == .v2 ? "V2" : "V3") models already loaded, skipping")
|
||||
return
|
||||
}
|
||||
|
||||
try await manager.initialize(models: models)
|
||||
|
||||
self.asrManager = manager
|
||||
cleanup()
|
||||
|
||||
let manager = AsrManager(config: .default)
|
||||
let cacheDirectory = AsrModels.defaultCacheDirectory(for: version)
|
||||
logger.notice("🦜 Loading Parakeet models from \(cacheDirectory.path)")
|
||||
let models = try await AsrModels.load(
|
||||
from: cacheDirectory,
|
||||
configuration: nil,
|
||||
version: version
|
||||
)
|
||||
try await manager.initialize(models: models)
|
||||
self.asrManager = manager
|
||||
self.activeVersion = version
|
||||
}
|
||||
|
||||
func loadModel(for model: ParakeetModel) async throws {
|
||||
try await ensureModelsLoaded(for: version(for: model))
|
||||
}
|
||||
|
||||
func transcribe(audioURL: URL, model: any TranscriptionModel) async throws -> String {
|
||||
try await loadModel()
|
||||
|
||||
func transcribe(audioURL: URL, model: any TranscriptionModel) async throws -> String {
|
||||
let targetVersion = version(for: model)
|
||||
try await ensureModelsLoaded(for: targetVersion)
|
||||
|
||||
guard let asrManager = asrManager else {
|
||||
logger.notice("🦜 ASR manager not initialized, cannot transcribe")
|
||||
throw ASRError.notInitialized
|
||||
}
|
||||
}
|
||||
|
||||
let audioSamples = try readAudioSamples(from: audioURL)
|
||||
let audioSamples = try readAudioSamples(from: audioURL)
|
||||
|
||||
let durationSeconds = Double(audioSamples.count) / 16000.0
|
||||
let isVADEnabled = UserDefaults.standard.object(forKey: "IsVADEnabled") as? Bool ?? true
|
||||
let durationSeconds = Double(audioSamples.count) / 16000.0
|
||||
let isVADEnabled = UserDefaults.standard.object(forKey: "IsVADEnabled") as? Bool ?? true
|
||||
|
||||
let speechAudio: [Float]
|
||||
if durationSeconds < 20.0 || !isVADEnabled {
|
||||
speechAudio = audioSamples
|
||||
} else {
|
||||
let vadConfig = VadConfig(threshold: 0.7)
|
||||
if vadManager == nil, let customModelsDirectory {
|
||||
do {
|
||||
vadManager = try await VadManager(
|
||||
config: vadConfig,
|
||||
modelDirectory: customModelsDirectory.deletingLastPathComponent()
|
||||
)
|
||||
let speechAudio: [Float]
|
||||
if durationSeconds < 20.0 || !isVADEnabled {
|
||||
speechAudio = audioSamples
|
||||
} else {
|
||||
let vadConfig = VadConfig(threshold: 0.7)
|
||||
if vadManager == nil {
|
||||
do {
|
||||
vadManager = try await VadManager(config: vadConfig)
|
||||
} catch {
|
||||
logger.notice("🦜 VAD initialization failed, using full audio: \(error.localizedDescription)")
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
do {
|
||||
if let vadManager {
|
||||
let segments = try await vadManager.segmentSpeechAudio(audioSamples)
|
||||
@ -109,5 +113,6 @@ class ParakeetTranscriptionService: TranscriptionService {
|
||||
asrManager?.cleanup()
|
||||
asrManager = nil
|
||||
vadManager = nil
|
||||
activeVersion = nil
|
||||
}
|
||||
}
|
||||
|
||||
@ -11,11 +11,11 @@ struct ParakeetModelCardRowView: View {
|
||||
}
|
||||
|
||||
var isDownloaded: Bool {
|
||||
whisperState.isParakeetModelDownloaded
|
||||
whisperState.isParakeetModelDownloaded(model)
|
||||
}
|
||||
|
||||
var isDownloading: Bool {
|
||||
whisperState.isDownloadingParakeet
|
||||
whisperState.isParakeetModelDownloading(model)
|
||||
}
|
||||
|
||||
var body: some View {
|
||||
@ -104,7 +104,7 @@ struct ParakeetModelCardRowView: View {
|
||||
private var progressSection: some View {
|
||||
Group {
|
||||
if isDownloading {
|
||||
let progress = whisperState.downloadProgress["parakeet-tdt-0.6b"] ?? 0.0
|
||||
let progress = whisperState.downloadProgress[model.name] ?? 0.0
|
||||
ProgressView(value: progress)
|
||||
.progressViewStyle(LinearProgressViewStyle())
|
||||
.frame(maxWidth: .infinity, alignment: .leading)
|
||||
@ -133,7 +133,7 @@ struct ParakeetModelCardRowView: View {
|
||||
} else {
|
||||
Button(action: {
|
||||
Task {
|
||||
await whisperState.downloadParakeetModel()
|
||||
await whisperState.downloadParakeetModel(model)
|
||||
}
|
||||
}) {
|
||||
HStack(spacing: 4) {
|
||||
@ -153,13 +153,13 @@ struct ParakeetModelCardRowView: View {
|
||||
if isDownloaded {
|
||||
Menu {
|
||||
Button(action: {
|
||||
whisperState.deleteParakeetModel()
|
||||
whisperState.deleteParakeetModel(model)
|
||||
}) {
|
||||
Label("Delete Model", systemImage: "trash")
|
||||
}
|
||||
|
||||
Button {
|
||||
whisperState.showParakeetModelInFinder()
|
||||
whisperState.showParakeetModelInFinder(model)
|
||||
} label: {
|
||||
Label("Show in Finder", systemImage: "folder")
|
||||
}
|
||||
|
||||
@ -7,7 +7,7 @@ extension WhisperState {
|
||||
case .local:
|
||||
return availableModels.contains { $0.name == model.name }
|
||||
case .parakeet:
|
||||
return isParakeetModelDownloaded
|
||||
return isParakeetModelDownloaded(named: model.name)
|
||||
case .nativeApple:
|
||||
if #available(macOS 26, *) {
|
||||
return true
|
||||
|
||||
@ -3,99 +3,98 @@ import FluidAudio
|
||||
import AppKit
|
||||
|
||||
extension WhisperState {
|
||||
var isParakeetModelDownloaded: Bool {
|
||||
get { UserDefaults.standard.bool(forKey: "ParakeetModelDownloaded") }
|
||||
set { UserDefaults.standard.set(newValue, forKey: "ParakeetModelDownloaded") }
|
||||
private func parakeetDefaultsKey(for modelName: String) -> String {
|
||||
"ParakeetModelDownloaded_\(modelName)"
|
||||
}
|
||||
|
||||
var isParakeetModelDownloading: Bool {
|
||||
get { isDownloadingParakeet }
|
||||
set { isDownloadingParakeet = newValue }
|
||||
private func parakeetVersion(for modelName: String) -> AsrModelVersion {
|
||||
modelName.lowercased().contains("v2") ? .v2 : .v3
|
||||
}
|
||||
|
||||
private func parakeetCacheDirectory(for version: AsrModelVersion) -> URL {
|
||||
AsrModels.defaultCacheDirectory(for: version)
|
||||
}
|
||||
|
||||
func isParakeetModelDownloaded(named modelName: String) -> Bool {
|
||||
UserDefaults.standard.bool(forKey: parakeetDefaultsKey(for: modelName))
|
||||
}
|
||||
|
||||
func isParakeetModelDownloaded(_ model: ParakeetModel) -> Bool {
|
||||
isParakeetModelDownloaded(named: model.name)
|
||||
}
|
||||
|
||||
func isParakeetModelDownloading(_ model: ParakeetModel) -> Bool {
|
||||
parakeetDownloadStates[model.name] ?? false
|
||||
}
|
||||
|
||||
@MainActor
|
||||
func downloadParakeetModel() async {
|
||||
if isParakeetModelDownloaded {
|
||||
func downloadParakeetModel(_ model: ParakeetModel) async {
|
||||
if isParakeetModelDownloaded(model) {
|
||||
return
|
||||
}
|
||||
|
||||
isDownloadingParakeet = true
|
||||
downloadProgress["parakeet-tdt-0.6b"] = 0.0
|
||||
let modelName = model.name
|
||||
parakeetDownloadStates[modelName] = true
|
||||
downloadProgress[modelName] = 0.0
|
||||
|
||||
// Start progress simulation
|
||||
let timer = Timer.scheduledTimer(withTimeInterval: 1.2, repeats: true) { timer in
|
||||
Task { @MainActor in
|
||||
if let currentProgress = self.downloadProgress["parakeet-tdt-0.6b"], currentProgress < 0.9 {
|
||||
self.downloadProgress["parakeet-tdt-0.6b"] = currentProgress + 0.0125
|
||||
if let currentProgress = self.downloadProgress[modelName], currentProgress < 0.9 {
|
||||
self.downloadProgress[modelName] = currentProgress + 0.005
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
do {
|
||||
_ = try await AsrModels.downloadAndLoad(to: parakeetModelsDirectory)
|
||||
let version = parakeetVersion(for: modelName)
|
||||
|
||||
// Also download VAD model into the same parent directory as ASR models
|
||||
let parentDir = parakeetModelsDirectory.deletingLastPathComponent()
|
||||
_ = try await DownloadUtils.loadModels(
|
||||
.vad,
|
||||
modelNames: Array(ModelNames.VAD.requiredModels),
|
||||
directory: parentDir
|
||||
)
|
||||
self.isParakeetModelDownloaded = true
|
||||
downloadProgress["parakeet-tdt-0.6b"] = 1.0
|
||||
do {
|
||||
_ = try await AsrModels.downloadAndLoad(version: version)
|
||||
|
||||
_ = try await VadManager()
|
||||
|
||||
UserDefaults.standard.set(true, forKey: parakeetDefaultsKey(for: modelName))
|
||||
downloadProgress[modelName] = 1.0
|
||||
} catch {
|
||||
self.isParakeetModelDownloaded = false
|
||||
UserDefaults.standard.set(false, forKey: parakeetDefaultsKey(for: modelName))
|
||||
}
|
||||
|
||||
|
||||
timer.invalidate()
|
||||
isDownloadingParakeet = false
|
||||
downloadProgress["parakeet-tdt-0.6b"] = nil
|
||||
|
||||
parakeetDownloadStates[modelName] = false
|
||||
downloadProgress[modelName] = nil
|
||||
|
||||
refreshAllAvailableModels()
|
||||
}
|
||||
|
||||
|
||||
@MainActor
|
||||
func deleteParakeetModel() {
|
||||
if let currentModel = currentTranscriptionModel, currentModel.provider == .parakeet {
|
||||
func deleteParakeetModel(_ model: ParakeetModel) {
|
||||
if let currentModel = currentTranscriptionModel,
|
||||
currentModel.provider == .parakeet,
|
||||
currentModel.name == model.name {
|
||||
currentTranscriptionModel = nil
|
||||
UserDefaults.standard.removeObject(forKey: "CurrentTranscriptionModel")
|
||||
}
|
||||
|
||||
|
||||
let version = parakeetVersion(for: model.name)
|
||||
let cacheDirectory = parakeetCacheDirectory(for: version)
|
||||
|
||||
do {
|
||||
// First try: app support directory + bundle path
|
||||
let appSupportDirectory = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask)[0]
|
||||
.appendingPathComponent("com.prakashjoshipax.VoiceInk")
|
||||
let parakeetModelDirectory = appSupportDirectory.appendingPathComponent("parakeet-tdt-0.6b-v3-coreml")
|
||||
|
||||
if FileManager.default.fileExists(atPath: parakeetModelDirectory.path) {
|
||||
try FileManager.default.removeItem(at: parakeetModelDirectory)
|
||||
} else {
|
||||
// Second try: root of application support directory
|
||||
let rootAppSupportDirectory = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask)[0]
|
||||
let rootParakeetModelDirectory = rootAppSupportDirectory.appendingPathComponent("parakeet-tdt-0.6b-v3-coreml")
|
||||
|
||||
if FileManager.default.fileExists(atPath: rootParakeetModelDirectory.path) {
|
||||
try FileManager.default.removeItem(at: rootParakeetModelDirectory)
|
||||
}
|
||||
if FileManager.default.fileExists(atPath: cacheDirectory.path) {
|
||||
try FileManager.default.removeItem(at: cacheDirectory)
|
||||
}
|
||||
|
||||
self.isParakeetModelDownloaded = false
|
||||
|
||||
UserDefaults.standard.set(false, forKey: parakeetDefaultsKey(for: model.name))
|
||||
} catch {
|
||||
// Silently fail
|
||||
// Silently ignore removal errors
|
||||
}
|
||||
|
||||
|
||||
refreshAllAvailableModels()
|
||||
}
|
||||
|
||||
|
||||
@MainActor
|
||||
func showParakeetModelInFinder() {
|
||||
let appSupportDirectory = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask)[0]
|
||||
.appendingPathComponent("com.prakashjoshipax.VoiceInk")
|
||||
let parakeetModelDirectory = appSupportDirectory.appendingPathComponent("parakeet-tdt-0.6b-v3-coreml")
|
||||
|
||||
if FileManager.default.fileExists(atPath: parakeetModelDirectory.path) {
|
||||
NSWorkspace.shared.selectFile(parakeetModelDirectory.path, inFileViewerRootedAtPath: "")
|
||||
func showParakeetModelInFinder(_ model: ParakeetModel) {
|
||||
let cacheDirectory = parakeetCacheDirectory(for: parakeetVersion(for: model.name))
|
||||
|
||||
if FileManager.default.fileExists(atPath: cacheDirectory.path) {
|
||||
NSWorkspace.shared.selectFile(cacheDirectory.path, inFileViewerRootedAtPath: "")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -59,7 +59,7 @@ class WhisperState: NSObject, ObservableObject {
|
||||
private var localTranscriptionService: LocalTranscriptionService!
|
||||
private lazy var cloudTranscriptionService = CloudTranscriptionService()
|
||||
private lazy var nativeAppleTranscriptionService = NativeAppleTranscriptionService()
|
||||
internal lazy var parakeetTranscriptionService = ParakeetTranscriptionService(customModelsDirectory: parakeetModelsDirectory)
|
||||
internal lazy var parakeetTranscriptionService = ParakeetTranscriptionService()
|
||||
|
||||
private var modelUrl: URL? {
|
||||
let possibleURLs = [
|
||||
@ -82,7 +82,6 @@ class WhisperState: NSObject, ObservableObject {
|
||||
|
||||
let modelsDirectory: URL
|
||||
let recordingsDirectory: URL
|
||||
let parakeetModelsDirectory: URL
|
||||
let enhancementService: AIEnhancementService?
|
||||
var licenseViewModel: LicenseViewModel
|
||||
let logger = Logger(subsystem: "com.prakashjoshipax.voiceink", category: "WhisperState")
|
||||
@ -91,7 +90,7 @@ class WhisperState: NSObject, ObservableObject {
|
||||
|
||||
// For model progress tracking
|
||||
@Published var downloadProgress: [String: Double] = [:]
|
||||
@Published var isDownloadingParakeet = false
|
||||
@Published var parakeetDownloadStates: [String: Bool] = [:]
|
||||
|
||||
init(modelContext: ModelContext, enhancementService: AIEnhancementService? = nil) {
|
||||
self.modelContext = modelContext
|
||||
@ -100,7 +99,6 @@ class WhisperState: NSObject, ObservableObject {
|
||||
|
||||
self.modelsDirectory = appSupportDirectory.appendingPathComponent("WhisperModels")
|
||||
self.recordingsDirectory = appSupportDirectory.appendingPathComponent("Recordings")
|
||||
self.parakeetModelsDirectory = appSupportDirectory.appendingPathComponent("ParakeetModels")
|
||||
|
||||
self.enhancementService = enhancementService
|
||||
self.licenseViewModel = LicenseViewModel()
|
||||
@ -200,8 +198,8 @@ class WhisperState: NSObject, ObservableObject {
|
||||
self.logger.error("❌ Model loading failed: \(error.localizedDescription)")
|
||||
}
|
||||
}
|
||||
} else if let model = self.currentTranscriptionModel, model.provider == .parakeet {
|
||||
try? await self.parakeetTranscriptionService.loadModel()
|
||||
} else if let parakeetModel = self.currentTranscriptionModel as? ParakeetModel {
|
||||
try? await self.parakeetTranscriptionService.loadModel(for: parakeetModel)
|
||||
}
|
||||
|
||||
if let enhancementService = self.enhancementService {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user