Update app to support Parakeet B3 model
This commit is contained in:
parent
2708cc502a
commit
6a308b81bf
@ -7,12 +7,12 @@
|
||||
objects = {
|
||||
|
||||
/* Begin PBXBuildFile section */
|
||||
E10F06092E3F390600F7FBDC /* FluidAudio in Frameworks */ = {isa = PBXBuildFile; productRef = E10F06082E3F390600F7FBDC /* FluidAudio */; };
|
||||
E17382402E4C7D0E001BAEBE /* whisper.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = E1B2DCAA2E3DE70A008DFD68 /* whisper.xcframework */; };
|
||||
E17382412E4C7D0E001BAEBE /* whisper.xcframework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = E1B2DCAA2E3DE70A008DFD68 /* whisper.xcframework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
|
||||
E1A261122CC143AC00B233D1 /* KeyboardShortcuts in Frameworks */ = {isa = PBXBuildFile; productRef = E1A261112CC143AC00B233D1 /* KeyboardShortcuts */; };
|
||||
E1ADD45A2CC5352A00303ECB /* LaunchAtLogin in Frameworks */ = {isa = PBXBuildFile; productRef = E1ADD4592CC5352A00303ECB /* LaunchAtLogin */; };
|
||||
E1ADD45F2CC544F100303ECB /* Sparkle in Frameworks */ = {isa = PBXBuildFile; productRef = E1ADD45E2CC544F100303ECB /* Sparkle */; };
|
||||
E1C550882E5C391D00823A34 /* FluidAudio in Frameworks */ = {isa = PBXBuildFile; productRef = E1C550872E5C391D00823A34 /* FluidAudio */; };
|
||||
E1D7EF992E35E16C00640029 /* MediaRemoteAdapter in Frameworks */ = {isa = PBXBuildFile; productRef = E1D7EF982E35E16C00640029 /* MediaRemoteAdapter */; };
|
||||
E1D7EF9A2E35E19B00640029 /* MediaRemoteAdapter in Embed Frameworks */ = {isa = PBXBuildFile; productRef = E1D7EF982E35E16C00640029 /* MediaRemoteAdapter */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; };
|
||||
E1ECEC162E44591300DFFBA8 /* Zip in Frameworks */ = {isa = PBXBuildFile; productRef = E1ECEC152E44591300DFFBA8 /* Zip */; };
|
||||
@ -83,9 +83,9 @@
|
||||
files = (
|
||||
E1ECEC162E44591300DFFBA8 /* Zip in Frameworks */,
|
||||
E1ADD45A2CC5352A00303ECB /* LaunchAtLogin in Frameworks */,
|
||||
E1C550882E5C391D00823A34 /* FluidAudio in Frameworks */,
|
||||
E1D7EF992E35E16C00640029 /* MediaRemoteAdapter in Frameworks */,
|
||||
E17382402E4C7D0E001BAEBE /* whisper.xcframework in Frameworks */,
|
||||
E10F06092E3F390600F7FBDC /* FluidAudio in Frameworks */,
|
||||
E1ADD45F2CC544F100303ECB /* Sparkle in Frameworks */,
|
||||
E1A261122CC143AC00B233D1 /* KeyboardShortcuts in Frameworks */,
|
||||
);
|
||||
@ -163,8 +163,8 @@
|
||||
E1ADD4592CC5352A00303ECB /* LaunchAtLogin */,
|
||||
E1ADD45E2CC544F100303ECB /* Sparkle */,
|
||||
E1D7EF982E35E16C00640029 /* MediaRemoteAdapter */,
|
||||
E10F06082E3F390600F7FBDC /* FluidAudio */,
|
||||
E1ECEC152E44591300DFFBA8 /* Zip */,
|
||||
E1C550872E5C391D00823A34 /* FluidAudio */,
|
||||
);
|
||||
productName = VoiceInk;
|
||||
productReference = E11473B02CBE0F0A00318EE4 /* VoiceInk.app */;
|
||||
@ -253,8 +253,8 @@
|
||||
E1ADD4582CC5352A00303ECB /* XCRemoteSwiftPackageReference "LaunchAtLogin-Modern" */,
|
||||
E1ADD45D2CC544F100303ECB /* XCRemoteSwiftPackageReference "Sparkle" */,
|
||||
E1D7EF972E35E16C00640029 /* XCRemoteSwiftPackageReference "mediaremote-adapter" */,
|
||||
E10FFA112E3F37D100F7FBDC /* XCRemoteSwiftPackageReference "FluidAudio" */,
|
||||
E1ECEC142E44590200DFFBA8 /* XCRemoteSwiftPackageReference "Zip" */,
|
||||
E1C550862E5C391D00823A34 /* XCLocalSwiftPackageReference "../FluidAudio" */,
|
||||
);
|
||||
preferredProjectObjectVersion = 77;
|
||||
productRefGroup = E11473B12CBE0F0A00318EE4 /* Products */;
|
||||
@ -626,15 +626,14 @@
|
||||
};
|
||||
/* End XCConfigurationList section */
|
||||
|
||||
/* Begin XCRemoteSwiftPackageReference section */
|
||||
E10FFA112E3F37D100F7FBDC /* XCRemoteSwiftPackageReference "FluidAudio" */ = {
|
||||
isa = XCRemoteSwiftPackageReference;
|
||||
repositoryURL = "https://github.com/FluidInference/FluidAudio";
|
||||
requirement = {
|
||||
branch = main;
|
||||
kind = branch;
|
||||
};
|
||||
/* Begin XCLocalSwiftPackageReference section */
|
||||
E1C550862E5C391D00823A34 /* XCLocalSwiftPackageReference "../FluidAudio" */ = {
|
||||
isa = XCLocalSwiftPackageReference;
|
||||
relativePath = ../FluidAudio;
|
||||
};
|
||||
/* End XCLocalSwiftPackageReference section */
|
||||
|
||||
/* Begin XCRemoteSwiftPackageReference section */
|
||||
E1A261102CC143AC00B233D1 /* XCRemoteSwiftPackageReference "KeyboardShortcuts" */ = {
|
||||
isa = XCRemoteSwiftPackageReference;
|
||||
repositoryURL = "https://github.com/sindresorhus/KeyboardShortcuts";
|
||||
@ -678,11 +677,6 @@
|
||||
/* End XCRemoteSwiftPackageReference section */
|
||||
|
||||
/* Begin XCSwiftPackageProductDependency section */
|
||||
E10F06082E3F390600F7FBDC /* FluidAudio */ = {
|
||||
isa = XCSwiftPackageProductDependency;
|
||||
package = E10FFA112E3F37D100F7FBDC /* XCRemoteSwiftPackageReference "FluidAudio" */;
|
||||
productName = FluidAudio;
|
||||
};
|
||||
E1A261112CC143AC00B233D1 /* KeyboardShortcuts */ = {
|
||||
isa = XCSwiftPackageProductDependency;
|
||||
package = E1A261102CC143AC00B233D1 /* XCRemoteSwiftPackageReference "KeyboardShortcuts" */;
|
||||
@ -698,6 +692,10 @@
|
||||
package = E1ADD45D2CC544F100303ECB /* XCRemoteSwiftPackageReference "Sparkle" */;
|
||||
productName = Sparkle;
|
||||
};
|
||||
E1C550872E5C391D00823A34 /* FluidAudio */ = {
|
||||
isa = XCSwiftPackageProductDependency;
|
||||
productName = FluidAudio;
|
||||
};
|
||||
E1D7EF982E35E16C00640029 /* MediaRemoteAdapter */ = {
|
||||
isa = XCSwiftPackageProductDependency;
|
||||
package = E1D7EF972E35E16C00640029 /* XCRemoteSwiftPackageReference "mediaremote-adapter" */;
|
||||
|
||||
@ -1,15 +1,6 @@
|
||||
{
|
||||
"originHash" : "0b9379abd19d2f53581c233273d09235e935a8d2b1180cf253dd69baa2784b39",
|
||||
"originHash" : "9616310154c7e55deebbc79c5d81e757c482b0338de08ed6e0c7d6522a9d34e9",
|
||||
"pins" : [
|
||||
{
|
||||
"identity" : "fluidaudio",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/FluidInference/FluidAudio",
|
||||
"state" : {
|
||||
"branch" : "main",
|
||||
"revision" : "2a3d6a948cb332b3fd8ae479a9942e33ade2cc9e"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "keyboardshortcuts",
|
||||
"kind" : "remoteSourceControl",
|
||||
|
||||
@ -90,13 +90,13 @@ import Foundation
|
||||
// Parakeet Model
|
||||
ParakeetModel(
|
||||
name: "parakeet-tdt-0.6b",
|
||||
displayName: "Parakeet",
|
||||
description: "NVIDIA's ASR model for lightning-fast english transcription.",
|
||||
size: "600 MB",
|
||||
displayName: "Parakeet V3",
|
||||
description: "NVIDIA's ASR model V3 for lightning-fast multilingual transcription with multi-lingual(English + European) support.",
|
||||
size: "500 MB",
|
||||
speed: 0.99,
|
||||
accuracy: 0.94,
|
||||
ramUsage: 0.8,
|
||||
supportedLanguages: getLanguageDictionary(isMultilingual: false, provider: .parakeet)
|
||||
supportedLanguages: getLanguageDictionary(isMultilingual: true, provider: .parakeet)
|
||||
),
|
||||
|
||||
// Local Models
|
||||
|
||||
@ -41,6 +41,15 @@ struct ConfigurationView: View {
|
||||
// State for prompt editing (similar to EnhancementSettingsView)
|
||||
@State private var isEditingPrompt = false
|
||||
@State private var selectedPromptForEdit: CustomPrompt?
|
||||
|
||||
private func languageSelectionDisabled() -> Bool {
|
||||
guard let selectedModelName = effectiveModelName,
|
||||
let model = whisperState.allAvailableModels.first(where: { $0.name == selectedModelName })
|
||||
else {
|
||||
return false
|
||||
}
|
||||
return model.provider == .parakeet || model.provider == .gemini
|
||||
}
|
||||
|
||||
// Whisper state for model selection
|
||||
@EnvironmentObject private var whisperState: WhisperState
|
||||
@ -376,9 +385,21 @@ struct ConfigurationView: View {
|
||||
}
|
||||
}
|
||||
|
||||
if let selectedModel = effectiveModelName,
|
||||
let modelInfo = whisperState.allAvailableModels.first(where: { $0.name == selectedModel }),
|
||||
modelInfo.isMultilingualModel {
|
||||
if languageSelectionDisabled() {
|
||||
HStack {
|
||||
Text("Language")
|
||||
.font(.subheadline)
|
||||
.foregroundColor(.secondary)
|
||||
|
||||
Text("Autodetected")
|
||||
.font(.subheadline)
|
||||
.foregroundColor(.secondary)
|
||||
|
||||
Spacer()
|
||||
}
|
||||
} else if let selectedModel = effectiveModelName,
|
||||
let modelInfo = whisperState.allAvailableModels.first(where: { $0.name == selectedModel }),
|
||||
modelInfo.isMultilingualModel {
|
||||
|
||||
let languageBinding = Binding<String?>(
|
||||
get: {
|
||||
|
||||
@ -26,15 +26,8 @@ class ParakeetTranscriptionService: TranscriptionService {
|
||||
logger.notice("🦜 Starting Parakeet model loading")
|
||||
|
||||
do {
|
||||
let asrConfig = ASRConfig(
|
||||
maxSymbolsPerFrame: 3,
|
||||
realtimeMode: true,
|
||||
chunkSizeMs: 1500,
|
||||
tdtConfig: TdtConfig(
|
||||
durations: [0, 1, 2, 3, 4],
|
||||
maxSymbolsPerStep: 3
|
||||
)
|
||||
)
|
||||
let tdtConfig = TdtConfig(maxSymbolsPerStep: 3, durationBins: [0, 1, 2, 3, 4])
|
||||
let asrConfig = ASRConfig(tdtConfig: tdtConfig)
|
||||
asrManager = AsrManager(config: asrConfig)
|
||||
|
||||
let models: AsrModels
|
||||
@ -78,6 +71,9 @@ class ParakeetTranscriptionService: TranscriptionService {
|
||||
throw ASRError.notInitialized
|
||||
}
|
||||
|
||||
// Reset the decoder state before each transcription to ensure no state leaks from previous runs
|
||||
try await asrManager.resetDecoderState(for: .microphone)
|
||||
|
||||
let audioSamples = try readAudioSamples(from: audioURL)
|
||||
|
||||
// Validate audio data before transcription
|
||||
@ -110,7 +106,7 @@ class ParakeetTranscriptionService: TranscriptionService {
|
||||
let data = try Data(contentsOf: url)
|
||||
|
||||
// Check minimum file size for valid WAV header
|
||||
guard data.count > 44 else {
|
||||
guard data.count > 44 else {
|
||||
logger.notice("🦜 Audio file too small (\(data.count) bytes), expected > 44 bytes")
|
||||
throw ASRError.invalidAudioData
|
||||
}
|
||||
@ -129,4 +125,4 @@ class ParakeetTranscriptionService: TranscriptionService {
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@ -33,6 +33,13 @@ struct LanguageSelectionView: View {
|
||||
return currentModel.isMultilingualModel
|
||||
}
|
||||
|
||||
private func languageSelectionDisabled() -> Bool {
|
||||
guard let provider = whisperState.currentTranscriptionModel?.provider else {
|
||||
return false
|
||||
}
|
||||
return provider == .parakeet || provider == .gemini
|
||||
}
|
||||
|
||||
// Function to get current model's supported languages
|
||||
private func getCurrentModelLanguages() -> [String: String] {
|
||||
guard let currentModel = whisperState.currentTranscriptionModel else {
|
||||
@ -69,7 +76,22 @@ struct LanguageSelectionView: View {
|
||||
|
||||
if let currentModel = whisperState.currentTranscriptionModel
|
||||
{
|
||||
if isMultilingualModel() {
|
||||
if languageSelectionDisabled() {
|
||||
VStack(alignment: .leading, spacing: 8) {
|
||||
Text("Language: Autodetected")
|
||||
.font(.subheadline)
|
||||
.foregroundColor(.primary)
|
||||
|
||||
Text("Current model: \(currentModel.displayName)")
|
||||
.font(.caption)
|
||||
.foregroundColor(.secondary)
|
||||
|
||||
Text("The transcription language is automatically detected by the model.")
|
||||
.font(.caption)
|
||||
.foregroundColor(.secondary)
|
||||
}
|
||||
.disabled(true)
|
||||
} else if isMultilingualModel() {
|
||||
VStack(alignment: .leading, spacing: 8) {
|
||||
Picker("Select Language", selection: $selectedLanguage) {
|
||||
ForEach(
|
||||
@ -134,7 +156,15 @@ struct LanguageSelectionView: View {
|
||||
// New compact view for menu bar
|
||||
private var menuItemView: some View {
|
||||
Group {
|
||||
if isMultilingualModel() {
|
||||
if languageSelectionDisabled() {
|
||||
Button {
|
||||
// Do nothing, just showing info
|
||||
} label: {
|
||||
Text("Language: Autodetected")
|
||||
.foregroundColor(.secondary)
|
||||
}
|
||||
.disabled(true)
|
||||
} else if isMultilingualModel() {
|
||||
Menu {
|
||||
ForEach(
|
||||
getCurrentModelLanguages().sorted(by: {
|
||||
|
||||
@ -104,7 +104,8 @@ struct ParakeetModelCardRowView: View {
|
||||
private var progressSection: some View {
|
||||
Group {
|
||||
if isDownloading {
|
||||
ProgressView() // Indeterminate for now
|
||||
let progress = whisperState.downloadProgress["parakeet-tdt-0.6b"] ?? 0.0
|
||||
ProgressView(value: progress)
|
||||
.progressViewStyle(LinearProgressViewStyle())
|
||||
.frame(maxWidth: .infinity, alignment: .leading)
|
||||
.padding(.top, 8)
|
||||
|
||||
@ -22,13 +22,24 @@ extension WhisperState {
|
||||
isDownloadingParakeet = true
|
||||
downloadProgress["parakeet-tdt-0.6b"] = 0.0
|
||||
|
||||
// Start progress simulation
|
||||
let timer = Timer.scheduledTimer(withTimeInterval: 1.2, repeats: true) { timer in
|
||||
Task { @MainActor in
|
||||
if let currentProgress = self.downloadProgress["parakeet-tdt-0.6b"], currentProgress < 0.9 {
|
||||
self.downloadProgress["parakeet-tdt-0.6b"] = currentProgress + 0.0125
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
do {
|
||||
_ = try await AsrModels.downloadAndLoad(to: parakeetModelsDirectory)
|
||||
self.isParakeetModelDownloaded = true
|
||||
downloadProgress["parakeet-tdt-0.6b"] = 1.0
|
||||
} catch {
|
||||
self.isParakeetModelDownloaded = false
|
||||
}
|
||||
|
||||
timer.invalidate()
|
||||
isDownloadingParakeet = false
|
||||
downloadProgress["parakeet-tdt-0.6b"] = nil
|
||||
|
||||
@ -46,14 +57,14 @@ extension WhisperState {
|
||||
// First try: app support directory + bundle path
|
||||
let appSupportDirectory = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask)[0]
|
||||
.appendingPathComponent("com.prakashjoshipax.VoiceInk")
|
||||
let parakeetModelDirectory = appSupportDirectory.appendingPathComponent("parakeet-tdt-0.6b-v2-coreml")
|
||||
let parakeetModelDirectory = appSupportDirectory.appendingPathComponent("parakeet-tdt-0.6b-v3-coreml")
|
||||
|
||||
if FileManager.default.fileExists(atPath: parakeetModelDirectory.path) {
|
||||
try FileManager.default.removeItem(at: parakeetModelDirectory)
|
||||
} else {
|
||||
// Second try: root of application support directory
|
||||
let rootAppSupportDirectory = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask)[0]
|
||||
let rootParakeetModelDirectory = rootAppSupportDirectory.appendingPathComponent("parakeet-tdt-0.6b-v2-coreml")
|
||||
let rootParakeetModelDirectory = rootAppSupportDirectory.appendingPathComponent("parakeet-tdt-0.6b-v3-coreml")
|
||||
|
||||
if FileManager.default.fileExists(atPath: rootParakeetModelDirectory.path) {
|
||||
try FileManager.default.removeItem(at: rootParakeetModelDirectory)
|
||||
@ -73,7 +84,7 @@ extension WhisperState {
|
||||
func showParakeetModelInFinder() {
|
||||
let appSupportDirectory = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask)[0]
|
||||
.appendingPathComponent("com.prakashjoshipax.VoiceInk")
|
||||
let parakeetModelDirectory = appSupportDirectory.appendingPathComponent("parakeet-tdt-0.6b-v2-coreml")
|
||||
let parakeetModelDirectory = appSupportDirectory.appendingPathComponent("parakeet-tdt-0.6b-v3-coreml")
|
||||
|
||||
if FileManager.default.fileExists(atPath: parakeetModelDirectory.path) {
|
||||
NSWorkspace.shared.selectFile(parakeetModelDirectory.path, inFileViewerRootedAtPath: "")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user