Experimental new models

This commit is contained in:
Beingpax 2025-08-01 17:26:08 +05:45
parent b42ae5049a
commit d09a9fba7f
19 changed files with 448 additions and 23 deletions

View File

@ -7,13 +7,14 @@
objects = {
/* Begin PBXBuildFile section */
E1304F742E3B9E8A0001F9E2 /* FluidAudio in Frameworks */ = {isa = PBXBuildFile; productRef = E1304F732E3B9E8A0001F9E2 /* FluidAudio */; };
E1304F842E3BB2FF0001F9E2 /* whisper.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = E1304F832E3BB2FF0001F9E2 /* whisper.xcframework */; };
E1304F852E3BB2FF0001F9E2 /* whisper.xcframework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = E1304F832E3BB2FF0001F9E2 /* whisper.xcframework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
E1A261122CC143AC00B233D1 /* KeyboardShortcuts in Frameworks */ = {isa = PBXBuildFile; productRef = E1A261112CC143AC00B233D1 /* KeyboardShortcuts */; };
E1A8C8CB2E1257B7003E58EC /* whisper.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = E1A8C8CA2E1257B7003E58EC /* whisper.xcframework */; };
E1ADD45A2CC5352A00303ECB /* LaunchAtLogin in Frameworks */ = {isa = PBXBuildFile; productRef = E1ADD4592CC5352A00303ECB /* LaunchAtLogin */; };
E1ADD45F2CC544F100303ECB /* Sparkle in Frameworks */ = {isa = PBXBuildFile; productRef = E1ADD45E2CC544F100303ECB /* Sparkle */; };
E1D7EF992E35E16C00640029 /* MediaRemoteAdapter in Frameworks */ = {isa = PBXBuildFile; productRef = E1D7EF982E35E16C00640029 /* MediaRemoteAdapter */; };
E1D7EF9A2E35E19B00640029 /* MediaRemoteAdapter in Embed Frameworks */ = {isa = PBXBuildFile; productRef = E1D7EF982E35E16C00640029 /* MediaRemoteAdapter */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; };
E1E0B9622E3133EF00C10E20 /* whisper.xcframework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = E1A8C8CA2E1257B7003E58EC /* whisper.xcframework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
E1F5FA7A2DA6CBF900B1FD8A /* Zip in Frameworks */ = {isa = PBXBuildFile; productRef = E1F5FA792DA6CBF900B1FD8A /* Zip */; };
/* End PBXBuildFile section */
@ -41,7 +42,7 @@
dstPath = "";
dstSubfolderSpec = 10;
files = (
E1E0B9622E3133EF00C10E20 /* whisper.xcframework in Embed Frameworks */,
E1304F852E3BB2FF0001F9E2 /* whisper.xcframework in Embed Frameworks */,
E1D7EF9A2E35E19B00640029 /* MediaRemoteAdapter in Embed Frameworks */,
);
name = "Embed Frameworks";
@ -53,6 +54,7 @@
E11473B02CBE0F0A00318EE4 /* VoiceInk.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = VoiceInk.app; sourceTree = BUILT_PRODUCTS_DIR; };
E11473C32CBE0F0B00318EE4 /* VoiceInkTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = VoiceInkTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; };
E11473CD2CBE0F0B00318EE4 /* VoiceInkUITests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = VoiceInkUITests.xctest; sourceTree = BUILT_PRODUCTS_DIR; };
E1304F832E3BB2FF0001F9E2 /* whisper.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = whisper.xcframework; path = "../Downloads/build-apple/whisper.xcframework"; sourceTree = "<group>"; };
E1A8C8CA2E1257B7003E58EC /* whisper.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = whisper.xcframework; path = "../whisper.cpp/build-apple/whisper.xcframework"; sourceTree = "<group>"; };
/* End PBXFileReference section */
@ -81,9 +83,10 @@
files = (
E1ADD45A2CC5352A00303ECB /* LaunchAtLogin in Frameworks */,
E1D7EF992E35E16C00640029 /* MediaRemoteAdapter in Frameworks */,
E1304F742E3B9E8A0001F9E2 /* FluidAudio in Frameworks */,
E1304F842E3BB2FF0001F9E2 /* whisper.xcframework in Frameworks */,
E1ADD45F2CC544F100303ECB /* Sparkle in Frameworks */,
E1A261122CC143AC00B233D1 /* KeyboardShortcuts in Frameworks */,
E1A8C8CB2E1257B7003E58EC /* whisper.xcframework in Frameworks */,
E1F5FA7A2DA6CBF900B1FD8A /* Zip in Frameworks */,
);
runOnlyForDeploymentPostprocessing = 0;
@ -129,6 +132,7 @@
E114741C2CBE1DE200318EE4 /* Frameworks */ = {
isa = PBXGroup;
children = (
E1304F832E3BB2FF0001F9E2 /* whisper.xcframework */,
E1A8C8CA2E1257B7003E58EC /* whisper.xcframework */,
);
name = Frameworks;
@ -160,6 +164,7 @@
E1ADD45E2CC544F100303ECB /* Sparkle */,
E1F5FA792DA6CBF900B1FD8A /* Zip */,
E1D7EF982E35E16C00640029 /* MediaRemoteAdapter */,
E1304F732E3B9E8A0001F9E2 /* FluidAudio */,
);
productName = VoiceInk;
productReference = E11473B02CBE0F0A00318EE4 /* VoiceInk.app */;
@ -249,6 +254,7 @@
E1ADD45D2CC544F100303ECB /* XCRemoteSwiftPackageReference "Sparkle" */,
E1F5FA782DA6CBF900B1FD8A /* XCRemoteSwiftPackageReference "Zip" */,
E1D7EF972E35E16C00640029 /* XCRemoteSwiftPackageReference "mediaremote-adapter" */,
E1304F722E3B9E8A0001F9E2 /* XCRemoteSwiftPackageReference "FluidAudio" */,
);
preferredProjectObjectVersion = 77;
productRefGroup = E11473B12CBE0F0A00318EE4 /* Products */;
@ -618,6 +624,14 @@
/* End XCConfigurationList section */
/* Begin XCRemoteSwiftPackageReference section */
E1304F722E3B9E8A0001F9E2 /* XCRemoteSwiftPackageReference "FluidAudio" */ = {
isa = XCRemoteSwiftPackageReference;
repositoryURL = "https://github.com/FluidInference/FluidAudio";
requirement = {
branch = main;
kind = branch;
};
};
E1A261102CC143AC00B233D1 /* XCRemoteSwiftPackageReference "KeyboardShortcuts" */ = {
isa = XCRemoteSwiftPackageReference;
repositoryURL = "https://github.com/sindresorhus/KeyboardShortcuts";
@ -661,6 +675,11 @@
/* End XCRemoteSwiftPackageReference section */
/* Begin XCSwiftPackageProductDependency section */
E1304F732E3B9E8A0001F9E2 /* FluidAudio */ = {
isa = XCSwiftPackageProductDependency;
package = E1304F722E3B9E8A0001F9E2 /* XCRemoteSwiftPackageReference "FluidAudio" */;
productName = FluidAudio;
};
E1A261112CC143AC00B233D1 /* KeyboardShortcuts */ = {
isa = XCSwiftPackageProductDependency;
package = E1A261102CC143AC00B233D1 /* XCRemoteSwiftPackageReference "KeyboardShortcuts" */;

View File

@ -1,6 +1,15 @@
{
"originHash" : "ef9c2994fdcb030d4d27f817e99251821e662f56f62355a728a019e924262633",
"originHash" : "b78069b2535604c42957e4e3be638514547280f6779f44a2b633aab9602881d9",
"pins" : [
{
"identity" : "fluidaudio",
"kind" : "remoteSourceControl",
"location" : "https://github.com/FluidInference/FluidAudio",
"state" : {
"branch" : "main",
"revision" : "2de87c32c320e2f28839c3a9682bc7bd0ea45be7"
}
},
{
"identity" : "keyboardshortcuts",
"kind" : "remoteSourceControl",

View File

@ -87,8 +87,8 @@ import Foundation
supportedLanguages: getLanguageDictionary(isMultilingual: true, provider: .nativeApple)
),
// Fluid Audio Model
FluidAudioModel(
// Parakeet Model
ParakeetModel(
name: "parakeet-tdt-0.6b",
displayName: "Parakeet",
description: "NVIDIA's insanely fast Parakeet model for lightning-fast transcription.",
@ -96,7 +96,7 @@ import Foundation
speed: 0.99,
accuracy: 0.94,
ramUsage: 0.8,
supportedLanguages: getLanguageDictionary(isMultilingual: false, provider: .fluidAudio)
supportedLanguages: getLanguageDictionary(isMultilingual: false, provider: .parakeet)
),
// Local Models

View File

@ -3,6 +3,7 @@ import Foundation
// Enum to differentiate between model providers
enum ModelProvider: String, Codable, Hashable, CaseIterable {
case local = "Local"
case parakeet = "Parakeet"
case groq = "Groq"
case elevenLabs = "ElevenLabs"
case deepgram = "Deepgram"
@ -46,6 +47,23 @@ struct NativeAppleModel: TranscriptionModel {
let supportedLanguages: [String: String]
}
// A new struct for Parakeet models
struct ParakeetModel: TranscriptionModel {
let id = UUID()
let name: String
let displayName: String
let description: String
let provider: ModelProvider = .parakeet
let size: String
let speed: Double
let accuracy: Double
let ramUsage: Double
var isMultilingualModel: Bool {
supportedLanguages.count > 1
}
let supportedLanguages: [String: String]
}
// A new struct for cloud models
struct CloudModel: TranscriptionModel {
let id: UUID

View File

@ -126,25 +126,23 @@ class ActiveWindowService: ObservableObject {
// Set the new model as default. This works for both local and cloud models.
await whisperState.setDefaultTranscriptionModel(selectedModel)
// The cleanup and load cycle is only necessary for local models.
if selectedModel.provider == ModelProvider.local {
// Unload any previously loaded model to free up memory.
switch selectedModel.provider {
case .local:
await whisperState.cleanupModelResources()
// Load the new local model into memory.
if let localModel = await whisperState.availableModels.first(where: { $0.name == selectedModel.name }) {
do {
try await whisperState.loadModel(localModel)
logger.info("✅ Power Mode: Successfully loaded local model '\(localModel.name)'.")
} catch {
logger.error("❌ Power Mode: Failed to load local model '\(localModel.name)': \(error.localizedDescription)")
}
}
} else {
// For cloud models, no in-memory loading is needed, but we should still
// clean up if the *previous* model was a local one.
case .parakeet:
await whisperState.cleanupModelResources()
default:
await whisperState.cleanupModelResources()
logger.info("✅ Power Mode: Switched to cloud model '\(selectedModel.name)'. No local load needed.")
}
}
}

View File

@ -150,9 +150,16 @@ class Recorder: ObservableObject {
recorder?.stop()
recorder = nil
audioMeter = AudioMeter(averagePower: 0, peakPower: 0)
Task {
// Complete system audio operations first
await mediaController.unmuteSystemAudio()
await playbackController.resumeMedia()
// Then play stop sound on main thread after audio operations are complete
await MainActor.run {
SoundManager.shared.playStopSound()
}
}
deviceManager.isRecordingActive = false
}

Binary file not shown.

View File

@ -21,6 +21,7 @@ class AudioTranscriptionManager: ObservableObject {
private var localTranscriptionService: LocalTranscriptionService?
private lazy var cloudTranscriptionService = CloudTranscriptionService()
private lazy var nativeAppleTranscriptionService = NativeAppleTranscriptionService()
private var parakeetTranscriptionService: ParakeetTranscriptionService?
enum ProcessingPhase {
case idle
@ -69,6 +70,11 @@ class AudioTranscriptionManager: ObservableObject {
localTranscriptionService = LocalTranscriptionService(modelsDirectory: whisperState.modelsDirectory, whisperState: whisperState)
}
// Initialize parakeet transcription service if needed
if parakeetTranscriptionService == nil {
parakeetTranscriptionService = ParakeetTranscriptionService(customModelsDirectory: whisperState.parakeetModelsDirectory)
}
// Process audio file
processingPhase = .processingAudio
let samples = try await audioProcessor.processAudioToSamples(url)
@ -96,6 +102,8 @@ class AudioTranscriptionManager: ObservableObject {
switch currentModel.provider {
case .local:
text = try await localTranscriptionService!.transcribe(audioURL: permanentURL, model: currentModel)
case .parakeet:
text = try await parakeetTranscriptionService!.transcribe(audioURL: permanentURL, model: currentModel)
case .nativeApple:
text = try await nativeAppleTranscriptionService.transcribe(audioURL: permanentURL, model: currentModel)
default: // Cloud models

View File

@ -18,6 +18,7 @@ class AudioTranscriptionService: ObservableObject {
private let localTranscriptionService: LocalTranscriptionService
private lazy var cloudTranscriptionService = CloudTranscriptionService()
private lazy var nativeAppleTranscriptionService = NativeAppleTranscriptionService()
private lazy var parakeetTranscriptionService = ParakeetTranscriptionService(customModelsDirectory: whisperState.parakeetModelsDirectory)
enum TranscriptionError: Error {
case noAudioFile
@ -50,6 +51,8 @@ class AudioTranscriptionService: ObservableObject {
switch model.provider {
case .local:
text = try await localTranscriptionService.transcribe(audioURL: url, model: model)
case .parakeet:
text = try await parakeetTranscriptionService.transcribe(audioURL: url, model: model)
case .nativeApple:
text = try await nativeAppleTranscriptionService.transcribe(audioURL: url, model: model)
default: // Cloud models

View File

@ -0,0 +1,89 @@
import Foundation
import AVFoundation
import FluidAudio
class ParakeetTranscriptionService: TranscriptionService {
private var asrManager: AsrManager?
private let customModelsDirectory: URL?
@Published var isModelLoaded = false
init(customModelsDirectory: URL? = nil) {
self.customModelsDirectory = customModelsDirectory
}
func loadModel() async throws {
if isModelLoaded {
return
}
let asrConfig = ASRConfig(
maxSymbolsPerFrame: 3,
realtimeMode: true,
chunkSizeMs: 1500,
tdtConfig: TdtConfig(
durations: [0, 1, 2, 3, 4],
maxSymbolsPerStep: 3
)
)
asrManager = AsrManager(config: asrConfig)
let models: AsrModels
if let customDirectory = customModelsDirectory {
models = try await AsrModels.downloadAndLoad(to: customDirectory)
} else {
models = try await AsrModels.downloadAndLoad()
}
try await asrManager?.initialize(models: models)
isModelLoaded = true
}
func transcribe(audioURL: URL, model: any TranscriptionModel) async throws -> String {
do {
defer {
asrManager?.cleanup()
self.asrManager = nil
self.isModelLoaded = false
}
if !isModelLoaded {
try await loadModel()
}
guard let asrManager = asrManager else {
throw NSError(domain: "ParakeetTranscriptionService", code: -1, userInfo: [NSLocalizedDescriptionKey: "Failed to initialize ASR manager."])
}
let audioSamples = try readAudioSamples(from: audioURL)
let result = try await asrManager.transcribe(audioSamples)
if UserDefaults.standard.object(forKey: "IsTextFormattingEnabled") as? Bool ?? true {
return WhisperTextFormatter.format(result.text)
}
return result.text
} catch {
let errorMessage = error.localizedDescription
await MainActor.run {
NotificationManager.shared.showNotification(
title: "Transcription Failed: \(errorMessage)",
type: .error
)
}
return ""
}
}
private func readAudioSamples(from url: URL) throws -> [Float] {
let data = try Data(contentsOf: url)
// A basic check, assuming a more robust check happens elsewhere.
guard data.count > 44 else { return [] }
let floats = stride(from: 44, to: data.count, by: 2).map {
return data[$0..<$0 + 2].withUnsafeBytes {
let short = Int16(littleEndian: $0.load(as: Int16.self))
return max(-1.0, min(Float(short) / 32767.0, 1.0))
}
}
return floats
}
}

View File

@ -20,7 +20,7 @@ class SoundManager {
// Try loading directly from the main bundle
if let startSoundURL = Bundle.main.url(forResource: "recstart", withExtension: "mp3"),
let stopSoundURL = Bundle.main.url(forResource: "pastes", withExtension: "mp3"),
let stopSoundURL = Bundle.main.url(forResource: "recstop", withExtension: "mp3"),
let escSoundURL = Bundle.main.url(forResource: "esc", withExtension: "wav") {
print("Found sounds in main bundle")
try? loadSounds(start: startSoundURL, stop: stopSoundURL, esc: escSoundURL)
@ -49,8 +49,8 @@ class SoundManager {
escSound = try AVAudioPlayer(contentsOf: escURL)
// Set lower volume for all sounds
startSound?.volume = 0.7
stopSound?.volume = 0.7
startSound?.volume = 0.4
stopSound?.volume = 0.4
escSound?.volume = 0.3
// Prepare sounds for instant playback

View File

@ -3,6 +3,7 @@ import AppKit
struct ModelCardRowView: View {
let model: any TranscriptionModel
@ObservedObject var whisperState: WhisperState
let isDownloaded: Bool
let isCurrent: Bool
let downloadProgress: [String: Double]
@ -30,6 +31,13 @@ struct ModelCardRowView: View {
downloadAction: downloadAction
)
}
case .parakeet:
if let parakeetModel = model as? ParakeetModel {
ParakeetModelCardRowView(
model: parakeetModel,
whisperState: whisperState
)
}
case .nativeApple:
if let nativeAppleModel = model as? NativeAppleModel {
NativeAppleModelCardView(

View File

@ -118,6 +118,7 @@ struct ModelManagementView: View {
ForEach(filteredModels, id: \.id) { model in
ModelCardRowView(
model: model,
whisperState: whisperState,
isDownloaded: whisperState.availableModels.contains { $0.name == model.name },
isCurrent: whisperState.currentTranscriptionModel?.name == model.name,
downloadProgress: whisperState.downloadProgress,
@ -190,7 +191,7 @@ struct ModelManagementView: View {
return index1 < index2
}
case .local:
return whisperState.allAvailableModels.filter { $0.provider == .local || $0.provider == .nativeApple }
return whisperState.allAvailableModels.filter { $0.provider == .local || $0.provider == .nativeApple || $0.provider == .parakeet }
case .cloud:
let cloudProviders: [ModelProvider] = [.groq, .elevenLabs, .deepgram, .mistral]
return whisperState.allAvailableModels.filter { cloudProviders.contains($0.provider) }

View File

@ -0,0 +1,173 @@
import SwiftUI
import Combine
import AppKit
struct ParakeetModelCardRowView: View {
let model: ParakeetModel
@ObservedObject var whisperState: WhisperState
var isCurrent: Bool {
whisperState.currentTranscriptionModel?.name == model.name
}
var isDownloaded: Bool {
whisperState.isParakeetModelDownloaded
}
var isDownloading: Bool {
whisperState.isDownloadingParakeet
}
var body: some View {
HStack(alignment: .top, spacing: 16) {
VStack(alignment: .leading, spacing: 6) {
headerSection
metadataSection
descriptionSection
progressSection
}
.frame(maxWidth: .infinity, alignment: .leading)
actionSection
}
.padding(16)
.background(CardBackground(isSelected: isCurrent, useAccentGradientWhenSelected: isCurrent))
}
private var headerSection: some View {
HStack(alignment: .firstTextBaseline) {
Text(model.displayName)
.font(.system(size: 13, weight: .semibold))
.foregroundColor(Color(.labelColor))
Text("Experimental")
.font(.system(size: 11, weight: .medium))
.padding(.horizontal, 6)
.padding(.vertical, 2)
.background(Capsule().fill(Color.orange.opacity(0.8)))
.foregroundColor(.white)
statusBadge
Spacer()
}
}
private var statusBadge: some View {
Group {
if isCurrent {
Text("Default")
.font(.system(size: 11, weight: .medium))
.padding(.horizontal, 6)
.padding(.vertical, 2)
.background(Capsule().fill(Color.accentColor))
.foregroundColor(.white)
} else if isDownloaded {
Text("Downloaded")
.font(.system(size: 11, weight: .medium))
.padding(.horizontal, 6)
.padding(.vertical, 2)
.background(Capsule().fill(Color(.quaternaryLabelColor)))
.foregroundColor(Color(.labelColor))
}
}
}
private var metadataSection: some View {
HStack(spacing: 12) {
Label(model.language, systemImage: "globe")
Label(model.size, systemImage: "internaldrive")
HStack(spacing: 3) {
Text("Speed")
progressDotsWithNumber(value: model.speed * 10)
}
HStack(spacing: 3) {
Text("Accuracy")
progressDotsWithNumber(value: model.accuracy * 10)
}
}
.font(.system(size: 11))
.foregroundColor(Color(.secondaryLabelColor))
.lineLimit(1)
}
private var descriptionSection: some View {
Text(model.description)
.font(.system(size: 11))
.foregroundColor(Color(.secondaryLabelColor))
.lineLimit(2)
.fixedSize(horizontal: false, vertical: true)
.padding(.top, 4)
}
private var progressSection: some View {
Group {
if isDownloading {
ProgressView() // Indeterminate for now
.progressViewStyle(LinearProgressViewStyle())
.frame(maxWidth: 200)
.padding(.top, 8)
}
}
}
private var actionSection: some View {
HStack(spacing: 8) {
if isCurrent {
Text("Default Model")
.font(.system(size: 12))
.foregroundColor(Color(.secondaryLabelColor))
} else if isDownloaded {
Button(action: {
Task {
await whisperState.setDefaultTranscriptionModel(model)
}
}) {
Text("Set as Default")
.font(.system(size: 12))
}
.buttonStyle(.bordered)
.controlSize(.small)
} else {
Button(action: {
Task {
await whisperState.downloadParakeetModel()
}
}) {
HStack(spacing: 4) {
Text(isDownloading ? "Downloading..." : "Download")
Image(systemName: "arrow.down.circle")
}
.font(.system(size: 12, weight: .medium))
.foregroundColor(.white)
.padding(.horizontal, 12)
.padding(.vertical, 6)
.background(Capsule().fill(Color.accentColor))
}
.buttonStyle(.plain)
.disabled(isDownloading)
}
if isDownloaded {
Menu {
Button(action: {
whisperState.deleteParakeetModel()
}) {
Label("Delete Model", systemImage: "trash")
}
Button {
whisperState.showParakeetModelInFinder()
} label: {
Label("Show in Finder", systemImage: "folder")
}
} label: {
Image(systemName: "ellipsis.circle")
.font(.system(size: 14))
}
.menuStyle(.borderlessButton)
.menuIndicator(.hidden)
.frame(width: 20, height: 20)
}
}
}
}

View File

@ -6,6 +6,8 @@ extension WhisperState {
switch model.provider {
case .local:
return availableModels.contains { $0.name == model.name }
case .parakeet:
return isParakeetModelDownloaded
case .nativeApple:
if #available(macOS 26, *) {
return true

View File

@ -0,0 +1,82 @@
import Foundation
import FluidAudio
import AppKit
extension WhisperState {
var isParakeetModelDownloaded: Bool {
get { UserDefaults.standard.bool(forKey: "ParakeetModelDownloaded") }
set { UserDefaults.standard.set(newValue, forKey: "ParakeetModelDownloaded") }
}
var isParakeetModelDownloading: Bool {
get { isDownloadingParakeet }
set { isDownloadingParakeet = newValue }
}
@MainActor
func downloadParakeetModel() async {
if isParakeetModelDownloaded {
return
}
isDownloadingParakeet = true
downloadProgress["parakeet-tdt-0.6b"] = 0.0
do {
_ = try await AsrModels.downloadAndLoad(to: parakeetModelsDirectory)
self.isParakeetModelDownloaded = true
} catch {
self.isParakeetModelDownloaded = false
}
isDownloadingParakeet = false
downloadProgress["parakeet-tdt-0.6b"] = nil
refreshAllAvailableModels()
}
@MainActor
func deleteParakeetModel() {
if let currentModel = currentTranscriptionModel, currentModel.provider == .parakeet {
currentTranscriptionModel = nil
UserDefaults.standard.removeObject(forKey: "CurrentTranscriptionModel")
}
do {
// First try: app support directory + bundle path
let appSupportDirectory = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask)[0]
.appendingPathComponent("com.prakashjoshipax.VoiceInk")
let parakeetModelDirectory = appSupportDirectory.appendingPathComponent("parakeet-tdt-0.6b-v2-coreml")
if FileManager.default.fileExists(atPath: parakeetModelDirectory.path) {
try FileManager.default.removeItem(at: parakeetModelDirectory)
} else {
// Second try: root of application support directory
let rootAppSupportDirectory = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask)[0]
let rootParakeetModelDirectory = rootAppSupportDirectory.appendingPathComponent("parakeet-tdt-0.6b-v2-coreml")
if FileManager.default.fileExists(atPath: rootParakeetModelDirectory.path) {
try FileManager.default.removeItem(at: rootParakeetModelDirectory)
}
}
self.isParakeetModelDownloaded = false
} catch {
// Silently fail
}
refreshAllAvailableModels()
}
@MainActor
func showParakeetModelInFinder() {
let appSupportDirectory = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask)[0]
.appendingPathComponent("com.prakashjoshipax.VoiceInk")
let parakeetModelDirectory = appSupportDirectory.appendingPathComponent("parakeet-tdt-0.6b-v2-coreml")
if FileManager.default.fileExists(atPath: parakeetModelDirectory.path) {
NSWorkspace.shared.selectFile(parakeetModelDirectory.path, inFileViewerRootedAtPath: "")
}
}
}

View File

@ -62,6 +62,7 @@ class WhisperState: NSObject, ObservableObject {
private var localTranscriptionService: LocalTranscriptionService!
private lazy var cloudTranscriptionService = CloudTranscriptionService()
private lazy var nativeAppleTranscriptionService = NativeAppleTranscriptionService()
private lazy var parakeetTranscriptionService = ParakeetTranscriptionService(customModelsDirectory: parakeetModelsDirectory)
private var modelUrl: URL? {
let possibleURLs = [
@ -84,6 +85,7 @@ class WhisperState: NSObject, ObservableObject {
let modelsDirectory: URL
let recordingsDirectory: URL
let parakeetModelsDirectory: URL
let enhancementService: AIEnhancementService?
var licenseViewModel: LicenseViewModel
let logger = Logger(subsystem: "com.prakashjoshipax.voiceink", category: "WhisperState")
@ -92,6 +94,7 @@ class WhisperState: NSObject, ObservableObject {
// For model progress tracking
@Published var downloadProgress: [String: Double] = [:]
@Published var isDownloadingParakeet = false
init(modelContext: ModelContext, enhancementService: AIEnhancementService? = nil) {
self.modelContext = modelContext
@ -100,6 +103,7 @@ class WhisperState: NSObject, ObservableObject {
self.modelsDirectory = appSupportDirectory.appendingPathComponent("WhisperModels")
self.recordingsDirectory = appSupportDirectory.appendingPathComponent("Recordings")
self.parakeetModelsDirectory = appSupportDirectory.appendingPathComponent("ParakeetModels")
self.enhancementService = enhancementService
self.licenseViewModel = LicenseViewModel()
@ -167,10 +171,11 @@ class WhisperState: NSObject, ObservableObject {
await MainActor.run {
self.recordingState = .recording
SoundManager.shared.playStartSound()
}
await ActiveWindowService.shared.applyConfigurationForCurrentApp()
// Only load model if it's a local model and not already loaded
if let model = self.currentTranscriptionModel, model.provider == .local {
if let localWhisperModel = self.availableModels.first(where: { $0.name == model.name }),
@ -181,6 +186,8 @@ class WhisperState: NSObject, ObservableObject {
self.logger.error("❌ Model loading failed: \(error.localizedDescription)")
}
}
} else if let model = self.currentTranscriptionModel, model.provider == .parakeet {
try? await parakeetTranscriptionService.loadModel()
}
if let enhancementService = self.enhancementService,
@ -239,6 +246,8 @@ class WhisperState: NSObject, ObservableObject {
switch model.provider {
case .local:
transcriptionService = localTranscriptionService
case .parakeet:
transcriptionService = parakeetTranscriptionService
case .nativeApple:
transcriptionService = nativeAppleTranscriptionService
default:
@ -332,7 +341,6 @@ class WhisperState: NSObject, ObservableObject {
if await checkCancellationAndCleanup() { return }
SoundManager.shared.playStopSound()
DispatchQueue.main.asyncAfter(deadline: .now() + 0.05) {
CursorPaster.pasteAtCursor(text, shouldPreserveClipboard: !self.isAutoCopyEnabled)