Experimental new models
This commit is contained in:
parent
b42ae5049a
commit
d09a9fba7f
@ -7,13 +7,14 @@
|
||||
objects = {
|
||||
|
||||
/* Begin PBXBuildFile section */
|
||||
E1304F742E3B9E8A0001F9E2 /* FluidAudio in Frameworks */ = {isa = PBXBuildFile; productRef = E1304F732E3B9E8A0001F9E2 /* FluidAudio */; };
|
||||
E1304F842E3BB2FF0001F9E2 /* whisper.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = E1304F832E3BB2FF0001F9E2 /* whisper.xcframework */; };
|
||||
E1304F852E3BB2FF0001F9E2 /* whisper.xcframework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = E1304F832E3BB2FF0001F9E2 /* whisper.xcframework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
|
||||
E1A261122CC143AC00B233D1 /* KeyboardShortcuts in Frameworks */ = {isa = PBXBuildFile; productRef = E1A261112CC143AC00B233D1 /* KeyboardShortcuts */; };
|
||||
E1A8C8CB2E1257B7003E58EC /* whisper.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = E1A8C8CA2E1257B7003E58EC /* whisper.xcframework */; };
|
||||
E1ADD45A2CC5352A00303ECB /* LaunchAtLogin in Frameworks */ = {isa = PBXBuildFile; productRef = E1ADD4592CC5352A00303ECB /* LaunchAtLogin */; };
|
||||
E1ADD45F2CC544F100303ECB /* Sparkle in Frameworks */ = {isa = PBXBuildFile; productRef = E1ADD45E2CC544F100303ECB /* Sparkle */; };
|
||||
E1D7EF992E35E16C00640029 /* MediaRemoteAdapter in Frameworks */ = {isa = PBXBuildFile; productRef = E1D7EF982E35E16C00640029 /* MediaRemoteAdapter */; };
|
||||
E1D7EF9A2E35E19B00640029 /* MediaRemoteAdapter in Embed Frameworks */ = {isa = PBXBuildFile; productRef = E1D7EF982E35E16C00640029 /* MediaRemoteAdapter */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; };
|
||||
E1E0B9622E3133EF00C10E20 /* whisper.xcframework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = E1A8C8CA2E1257B7003E58EC /* whisper.xcframework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
|
||||
E1F5FA7A2DA6CBF900B1FD8A /* Zip in Frameworks */ = {isa = PBXBuildFile; productRef = E1F5FA792DA6CBF900B1FD8A /* Zip */; };
|
||||
/* End PBXBuildFile section */
|
||||
|
||||
@ -41,7 +42,7 @@
|
||||
dstPath = "";
|
||||
dstSubfolderSpec = 10;
|
||||
files = (
|
||||
E1E0B9622E3133EF00C10E20 /* whisper.xcframework in Embed Frameworks */,
|
||||
E1304F852E3BB2FF0001F9E2 /* whisper.xcframework in Embed Frameworks */,
|
||||
E1D7EF9A2E35E19B00640029 /* MediaRemoteAdapter in Embed Frameworks */,
|
||||
);
|
||||
name = "Embed Frameworks";
|
||||
@ -53,6 +54,7 @@
|
||||
E11473B02CBE0F0A00318EE4 /* VoiceInk.app */ = {isa = PBXFileReference; explicitFileType = wrapper.application; includeInIndex = 0; path = VoiceInk.app; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
E11473C32CBE0F0B00318EE4 /* VoiceInkTests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = VoiceInkTests.xctest; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
E11473CD2CBE0F0B00318EE4 /* VoiceInkUITests.xctest */ = {isa = PBXFileReference; explicitFileType = wrapper.cfbundle; includeInIndex = 0; path = VoiceInkUITests.xctest; sourceTree = BUILT_PRODUCTS_DIR; };
|
||||
E1304F832E3BB2FF0001F9E2 /* whisper.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = whisper.xcframework; path = "../Downloads/build-apple/whisper.xcframework"; sourceTree = "<group>"; };
|
||||
E1A8C8CA2E1257B7003E58EC /* whisper.xcframework */ = {isa = PBXFileReference; lastKnownFileType = wrapper.xcframework; name = whisper.xcframework; path = "../whisper.cpp/build-apple/whisper.xcframework"; sourceTree = "<group>"; };
|
||||
/* End PBXFileReference section */
|
||||
|
||||
@ -81,9 +83,10 @@
|
||||
files = (
|
||||
E1ADD45A2CC5352A00303ECB /* LaunchAtLogin in Frameworks */,
|
||||
E1D7EF992E35E16C00640029 /* MediaRemoteAdapter in Frameworks */,
|
||||
E1304F742E3B9E8A0001F9E2 /* FluidAudio in Frameworks */,
|
||||
E1304F842E3BB2FF0001F9E2 /* whisper.xcframework in Frameworks */,
|
||||
E1ADD45F2CC544F100303ECB /* Sparkle in Frameworks */,
|
||||
E1A261122CC143AC00B233D1 /* KeyboardShortcuts in Frameworks */,
|
||||
E1A8C8CB2E1257B7003E58EC /* whisper.xcframework in Frameworks */,
|
||||
E1F5FA7A2DA6CBF900B1FD8A /* Zip in Frameworks */,
|
||||
);
|
||||
runOnlyForDeploymentPostprocessing = 0;
|
||||
@ -129,6 +132,7 @@
|
||||
E114741C2CBE1DE200318EE4 /* Frameworks */ = {
|
||||
isa = PBXGroup;
|
||||
children = (
|
||||
E1304F832E3BB2FF0001F9E2 /* whisper.xcframework */,
|
||||
E1A8C8CA2E1257B7003E58EC /* whisper.xcframework */,
|
||||
);
|
||||
name = Frameworks;
|
||||
@ -160,6 +164,7 @@
|
||||
E1ADD45E2CC544F100303ECB /* Sparkle */,
|
||||
E1F5FA792DA6CBF900B1FD8A /* Zip */,
|
||||
E1D7EF982E35E16C00640029 /* MediaRemoteAdapter */,
|
||||
E1304F732E3B9E8A0001F9E2 /* FluidAudio */,
|
||||
);
|
||||
productName = VoiceInk;
|
||||
productReference = E11473B02CBE0F0A00318EE4 /* VoiceInk.app */;
|
||||
@ -249,6 +254,7 @@
|
||||
E1ADD45D2CC544F100303ECB /* XCRemoteSwiftPackageReference "Sparkle" */,
|
||||
E1F5FA782DA6CBF900B1FD8A /* XCRemoteSwiftPackageReference "Zip" */,
|
||||
E1D7EF972E35E16C00640029 /* XCRemoteSwiftPackageReference "mediaremote-adapter" */,
|
||||
E1304F722E3B9E8A0001F9E2 /* XCRemoteSwiftPackageReference "FluidAudio" */,
|
||||
);
|
||||
preferredProjectObjectVersion = 77;
|
||||
productRefGroup = E11473B12CBE0F0A00318EE4 /* Products */;
|
||||
@ -618,6 +624,14 @@
|
||||
/* End XCConfigurationList section */
|
||||
|
||||
/* Begin XCRemoteSwiftPackageReference section */
|
||||
E1304F722E3B9E8A0001F9E2 /* XCRemoteSwiftPackageReference "FluidAudio" */ = {
|
||||
isa = XCRemoteSwiftPackageReference;
|
||||
repositoryURL = "https://github.com/FluidInference/FluidAudio";
|
||||
requirement = {
|
||||
branch = main;
|
||||
kind = branch;
|
||||
};
|
||||
};
|
||||
E1A261102CC143AC00B233D1 /* XCRemoteSwiftPackageReference "KeyboardShortcuts" */ = {
|
||||
isa = XCRemoteSwiftPackageReference;
|
||||
repositoryURL = "https://github.com/sindresorhus/KeyboardShortcuts";
|
||||
@ -661,6 +675,11 @@
|
||||
/* End XCRemoteSwiftPackageReference section */
|
||||
|
||||
/* Begin XCSwiftPackageProductDependency section */
|
||||
E1304F732E3B9E8A0001F9E2 /* FluidAudio */ = {
|
||||
isa = XCSwiftPackageProductDependency;
|
||||
package = E1304F722E3B9E8A0001F9E2 /* XCRemoteSwiftPackageReference "FluidAudio" */;
|
||||
productName = FluidAudio;
|
||||
};
|
||||
E1A261112CC143AC00B233D1 /* KeyboardShortcuts */ = {
|
||||
isa = XCSwiftPackageProductDependency;
|
||||
package = E1A261102CC143AC00B233D1 /* XCRemoteSwiftPackageReference "KeyboardShortcuts" */;
|
||||
|
||||
@ -1,6 +1,15 @@
|
||||
{
|
||||
"originHash" : "ef9c2994fdcb030d4d27f817e99251821e662f56f62355a728a019e924262633",
|
||||
"originHash" : "b78069b2535604c42957e4e3be638514547280f6779f44a2b633aab9602881d9",
|
||||
"pins" : [
|
||||
{
|
||||
"identity" : "fluidaudio",
|
||||
"kind" : "remoteSourceControl",
|
||||
"location" : "https://github.com/FluidInference/FluidAudio",
|
||||
"state" : {
|
||||
"branch" : "main",
|
||||
"revision" : "2de87c32c320e2f28839c3a9682bc7bd0ea45be7"
|
||||
}
|
||||
},
|
||||
{
|
||||
"identity" : "keyboardshortcuts",
|
||||
"kind" : "remoteSourceControl",
|
||||
|
||||
@ -87,8 +87,8 @@ import Foundation
|
||||
supportedLanguages: getLanguageDictionary(isMultilingual: true, provider: .nativeApple)
|
||||
),
|
||||
|
||||
// Fluid Audio Model
|
||||
FluidAudioModel(
|
||||
// Parakeet Model
|
||||
ParakeetModel(
|
||||
name: "parakeet-tdt-0.6b",
|
||||
displayName: "Parakeet",
|
||||
description: "NVIDIA's insanely fast Parakeet model for lightning-fast transcription.",
|
||||
@ -96,7 +96,7 @@ import Foundation
|
||||
speed: 0.99,
|
||||
accuracy: 0.94,
|
||||
ramUsage: 0.8,
|
||||
supportedLanguages: getLanguageDictionary(isMultilingual: false, provider: .fluidAudio)
|
||||
supportedLanguages: getLanguageDictionary(isMultilingual: false, provider: .parakeet)
|
||||
),
|
||||
|
||||
// Local Models
|
||||
|
||||
@ -3,6 +3,7 @@ import Foundation
|
||||
// Enum to differentiate between model providers
|
||||
enum ModelProvider: String, Codable, Hashable, CaseIterable {
|
||||
case local = "Local"
|
||||
case parakeet = "Parakeet"
|
||||
case groq = "Groq"
|
||||
case elevenLabs = "ElevenLabs"
|
||||
case deepgram = "Deepgram"
|
||||
@ -46,6 +47,23 @@ struct NativeAppleModel: TranscriptionModel {
|
||||
let supportedLanguages: [String: String]
|
||||
}
|
||||
|
||||
// A new struct for Parakeet models
|
||||
struct ParakeetModel: TranscriptionModel {
|
||||
let id = UUID()
|
||||
let name: String
|
||||
let displayName: String
|
||||
let description: String
|
||||
let provider: ModelProvider = .parakeet
|
||||
let size: String
|
||||
let speed: Double
|
||||
let accuracy: Double
|
||||
let ramUsage: Double
|
||||
var isMultilingualModel: Bool {
|
||||
supportedLanguages.count > 1
|
||||
}
|
||||
let supportedLanguages: [String: String]
|
||||
}
|
||||
|
||||
// A new struct for cloud models
|
||||
struct CloudModel: TranscriptionModel {
|
||||
let id: UUID
|
||||
|
||||
@ -126,25 +126,23 @@ class ActiveWindowService: ObservableObject {
|
||||
// Set the new model as default. This works for both local and cloud models.
|
||||
await whisperState.setDefaultTranscriptionModel(selectedModel)
|
||||
|
||||
// The cleanup and load cycle is only necessary for local models.
|
||||
if selectedModel.provider == ModelProvider.local {
|
||||
// Unload any previously loaded model to free up memory.
|
||||
switch selectedModel.provider {
|
||||
case .local:
|
||||
await whisperState.cleanupModelResources()
|
||||
|
||||
// Load the new local model into memory.
|
||||
if let localModel = await whisperState.availableModels.first(where: { $0.name == selectedModel.name }) {
|
||||
do {
|
||||
try await whisperState.loadModel(localModel)
|
||||
logger.info("✅ Power Mode: Successfully loaded local model '\(localModel.name)'.")
|
||||
} catch {
|
||||
logger.error("❌ Power Mode: Failed to load local model '\(localModel.name)': \(error.localizedDescription)")
|
||||
}
|
||||
}
|
||||
} else {
|
||||
// For cloud models, no in-memory loading is needed, but we should still
|
||||
// clean up if the *previous* model was a local one.
|
||||
|
||||
case .parakeet:
|
||||
await whisperState.cleanupModelResources()
|
||||
|
||||
default:
|
||||
await whisperState.cleanupModelResources()
|
||||
logger.info("✅ Power Mode: Switched to cloud model '\(selectedModel.name)'. No local load needed.")
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -150,9 +150,16 @@ class Recorder: ObservableObject {
|
||||
recorder?.stop()
|
||||
recorder = nil
|
||||
audioMeter = AudioMeter(averagePower: 0, peakPower: 0)
|
||||
|
||||
Task {
|
||||
// Complete system audio operations first
|
||||
await mediaController.unmuteSystemAudio()
|
||||
await playbackController.resumeMedia()
|
||||
|
||||
// Then play stop sound on main thread after audio operations are complete
|
||||
await MainActor.run {
|
||||
SoundManager.shared.playStopSound()
|
||||
}
|
||||
}
|
||||
deviceManager.isRecordingActive = false
|
||||
}
|
||||
|
||||
Binary file not shown.
BIN
VoiceInk/Resources/Sounds/recstop.mp3
Executable file
BIN
VoiceInk/Resources/Sounds/recstop.mp3
Executable file
Binary file not shown.
@ -21,6 +21,7 @@ class AudioTranscriptionManager: ObservableObject {
|
||||
private var localTranscriptionService: LocalTranscriptionService?
|
||||
private lazy var cloudTranscriptionService = CloudTranscriptionService()
|
||||
private lazy var nativeAppleTranscriptionService = NativeAppleTranscriptionService()
|
||||
private var parakeetTranscriptionService: ParakeetTranscriptionService?
|
||||
|
||||
enum ProcessingPhase {
|
||||
case idle
|
||||
@ -69,6 +70,11 @@ class AudioTranscriptionManager: ObservableObject {
|
||||
localTranscriptionService = LocalTranscriptionService(modelsDirectory: whisperState.modelsDirectory, whisperState: whisperState)
|
||||
}
|
||||
|
||||
// Initialize parakeet transcription service if needed
|
||||
if parakeetTranscriptionService == nil {
|
||||
parakeetTranscriptionService = ParakeetTranscriptionService(customModelsDirectory: whisperState.parakeetModelsDirectory)
|
||||
}
|
||||
|
||||
// Process audio file
|
||||
processingPhase = .processingAudio
|
||||
let samples = try await audioProcessor.processAudioToSamples(url)
|
||||
@ -96,6 +102,8 @@ class AudioTranscriptionManager: ObservableObject {
|
||||
switch currentModel.provider {
|
||||
case .local:
|
||||
text = try await localTranscriptionService!.transcribe(audioURL: permanentURL, model: currentModel)
|
||||
case .parakeet:
|
||||
text = try await parakeetTranscriptionService!.transcribe(audioURL: permanentURL, model: currentModel)
|
||||
case .nativeApple:
|
||||
text = try await nativeAppleTranscriptionService.transcribe(audioURL: permanentURL, model: currentModel)
|
||||
default: // Cloud models
|
||||
|
||||
@ -18,6 +18,7 @@ class AudioTranscriptionService: ObservableObject {
|
||||
private let localTranscriptionService: LocalTranscriptionService
|
||||
private lazy var cloudTranscriptionService = CloudTranscriptionService()
|
||||
private lazy var nativeAppleTranscriptionService = NativeAppleTranscriptionService()
|
||||
private lazy var parakeetTranscriptionService = ParakeetTranscriptionService(customModelsDirectory: whisperState.parakeetModelsDirectory)
|
||||
|
||||
enum TranscriptionError: Error {
|
||||
case noAudioFile
|
||||
@ -50,6 +51,8 @@ class AudioTranscriptionService: ObservableObject {
|
||||
switch model.provider {
|
||||
case .local:
|
||||
text = try await localTranscriptionService.transcribe(audioURL: url, model: model)
|
||||
case .parakeet:
|
||||
text = try await parakeetTranscriptionService.transcribe(audioURL: url, model: model)
|
||||
case .nativeApple:
|
||||
text = try await nativeAppleTranscriptionService.transcribe(audioURL: url, model: model)
|
||||
default: // Cloud models
|
||||
|
||||
89
VoiceInk/Services/ParakeetTranscriptionService.swift
Normal file
89
VoiceInk/Services/ParakeetTranscriptionService.swift
Normal file
@ -0,0 +1,89 @@
|
||||
import Foundation
|
||||
import AVFoundation
|
||||
import FluidAudio
|
||||
|
||||
|
||||
|
||||
class ParakeetTranscriptionService: TranscriptionService {
|
||||
private var asrManager: AsrManager?
|
||||
private let customModelsDirectory: URL?
|
||||
@Published var isModelLoaded = false
|
||||
|
||||
init(customModelsDirectory: URL? = nil) {
|
||||
self.customModelsDirectory = customModelsDirectory
|
||||
}
|
||||
|
||||
func loadModel() async throws {
|
||||
if isModelLoaded {
|
||||
return
|
||||
}
|
||||
|
||||
let asrConfig = ASRConfig(
|
||||
maxSymbolsPerFrame: 3,
|
||||
realtimeMode: true,
|
||||
chunkSizeMs: 1500,
|
||||
tdtConfig: TdtConfig(
|
||||
durations: [0, 1, 2, 3, 4],
|
||||
maxSymbolsPerStep: 3
|
||||
)
|
||||
)
|
||||
asrManager = AsrManager(config: asrConfig)
|
||||
let models: AsrModels
|
||||
if let customDirectory = customModelsDirectory {
|
||||
models = try await AsrModels.downloadAndLoad(to: customDirectory)
|
||||
} else {
|
||||
models = try await AsrModels.downloadAndLoad()
|
||||
}
|
||||
try await asrManager?.initialize(models: models)
|
||||
isModelLoaded = true
|
||||
}
|
||||
|
||||
func transcribe(audioURL: URL, model: any TranscriptionModel) async throws -> String {
|
||||
do {
|
||||
defer {
|
||||
asrManager?.cleanup()
|
||||
self.asrManager = nil
|
||||
self.isModelLoaded = false
|
||||
}
|
||||
|
||||
if !isModelLoaded {
|
||||
try await loadModel()
|
||||
}
|
||||
|
||||
guard let asrManager = asrManager else {
|
||||
throw NSError(domain: "ParakeetTranscriptionService", code: -1, userInfo: [NSLocalizedDescriptionKey: "Failed to initialize ASR manager."])
|
||||
}
|
||||
|
||||
let audioSamples = try readAudioSamples(from: audioURL)
|
||||
let result = try await asrManager.transcribe(audioSamples)
|
||||
|
||||
if UserDefaults.standard.object(forKey: "IsTextFormattingEnabled") as? Bool ?? true {
|
||||
return WhisperTextFormatter.format(result.text)
|
||||
}
|
||||
return result.text
|
||||
} catch {
|
||||
let errorMessage = error.localizedDescription
|
||||
await MainActor.run {
|
||||
NotificationManager.shared.showNotification(
|
||||
title: "Transcription Failed: \(errorMessage)",
|
||||
type: .error
|
||||
)
|
||||
}
|
||||
return ""
|
||||
}
|
||||
}
|
||||
|
||||
private func readAudioSamples(from url: URL) throws -> [Float] {
|
||||
let data = try Data(contentsOf: url)
|
||||
// A basic check, assuming a more robust check happens elsewhere.
|
||||
guard data.count > 44 else { return [] }
|
||||
|
||||
let floats = stride(from: 44, to: data.count, by: 2).map {
|
||||
return data[$0..<$0 + 2].withUnsafeBytes {
|
||||
let short = Int16(littleEndian: $0.load(as: Int16.self))
|
||||
return max(-1.0, min(Float(short) / 32767.0, 1.0))
|
||||
}
|
||||
}
|
||||
return floats
|
||||
}
|
||||
}
|
||||
@ -20,7 +20,7 @@ class SoundManager {
|
||||
|
||||
// Try loading directly from the main bundle
|
||||
if let startSoundURL = Bundle.main.url(forResource: "recstart", withExtension: "mp3"),
|
||||
let stopSoundURL = Bundle.main.url(forResource: "pastes", withExtension: "mp3"),
|
||||
let stopSoundURL = Bundle.main.url(forResource: "recstop", withExtension: "mp3"),
|
||||
let escSoundURL = Bundle.main.url(forResource: "esc", withExtension: "wav") {
|
||||
print("Found sounds in main bundle")
|
||||
try? loadSounds(start: startSoundURL, stop: stopSoundURL, esc: escSoundURL)
|
||||
@ -49,8 +49,8 @@ class SoundManager {
|
||||
escSound = try AVAudioPlayer(contentsOf: escURL)
|
||||
|
||||
// Set lower volume for all sounds
|
||||
startSound?.volume = 0.7
|
||||
stopSound?.volume = 0.7
|
||||
startSound?.volume = 0.4
|
||||
stopSound?.volume = 0.4
|
||||
escSound?.volume = 0.3
|
||||
|
||||
// Prepare sounds for instant playback
|
||||
|
||||
@ -3,6 +3,7 @@ import AppKit
|
||||
|
||||
struct ModelCardRowView: View {
|
||||
let model: any TranscriptionModel
|
||||
@ObservedObject var whisperState: WhisperState
|
||||
let isDownloaded: Bool
|
||||
let isCurrent: Bool
|
||||
let downloadProgress: [String: Double]
|
||||
@ -30,6 +31,13 @@ struct ModelCardRowView: View {
|
||||
downloadAction: downloadAction
|
||||
)
|
||||
}
|
||||
case .parakeet:
|
||||
if let parakeetModel = model as? ParakeetModel {
|
||||
ParakeetModelCardRowView(
|
||||
model: parakeetModel,
|
||||
whisperState: whisperState
|
||||
)
|
||||
}
|
||||
case .nativeApple:
|
||||
if let nativeAppleModel = model as? NativeAppleModel {
|
||||
NativeAppleModelCardView(
|
||||
|
||||
@ -118,6 +118,7 @@ struct ModelManagementView: View {
|
||||
ForEach(filteredModels, id: \.id) { model in
|
||||
ModelCardRowView(
|
||||
model: model,
|
||||
whisperState: whisperState,
|
||||
isDownloaded: whisperState.availableModels.contains { $0.name == model.name },
|
||||
isCurrent: whisperState.currentTranscriptionModel?.name == model.name,
|
||||
downloadProgress: whisperState.downloadProgress,
|
||||
@ -190,7 +191,7 @@ struct ModelManagementView: View {
|
||||
return index1 < index2
|
||||
}
|
||||
case .local:
|
||||
return whisperState.allAvailableModels.filter { $0.provider == .local || $0.provider == .nativeApple }
|
||||
return whisperState.allAvailableModels.filter { $0.provider == .local || $0.provider == .nativeApple || $0.provider == .parakeet }
|
||||
case .cloud:
|
||||
let cloudProviders: [ModelProvider] = [.groq, .elevenLabs, .deepgram, .mistral]
|
||||
return whisperState.allAvailableModels.filter { cloudProviders.contains($0.provider) }
|
||||
|
||||
173
VoiceInk/Views/AI Models/ParakeetModelCardRowView.swift
Normal file
173
VoiceInk/Views/AI Models/ParakeetModelCardRowView.swift
Normal file
@ -0,0 +1,173 @@
|
||||
import SwiftUI
|
||||
import Combine
|
||||
import AppKit
|
||||
|
||||
struct ParakeetModelCardRowView: View {
|
||||
let model: ParakeetModel
|
||||
@ObservedObject var whisperState: WhisperState
|
||||
|
||||
var isCurrent: Bool {
|
||||
whisperState.currentTranscriptionModel?.name == model.name
|
||||
}
|
||||
|
||||
var isDownloaded: Bool {
|
||||
whisperState.isParakeetModelDownloaded
|
||||
}
|
||||
|
||||
var isDownloading: Bool {
|
||||
whisperState.isDownloadingParakeet
|
||||
}
|
||||
|
||||
var body: some View {
|
||||
HStack(alignment: .top, spacing: 16) {
|
||||
VStack(alignment: .leading, spacing: 6) {
|
||||
headerSection
|
||||
metadataSection
|
||||
descriptionSection
|
||||
progressSection
|
||||
}
|
||||
.frame(maxWidth: .infinity, alignment: .leading)
|
||||
|
||||
actionSection
|
||||
}
|
||||
.padding(16)
|
||||
.background(CardBackground(isSelected: isCurrent, useAccentGradientWhenSelected: isCurrent))
|
||||
}
|
||||
|
||||
private var headerSection: some View {
|
||||
HStack(alignment: .firstTextBaseline) {
|
||||
Text(model.displayName)
|
||||
.font(.system(size: 13, weight: .semibold))
|
||||
.foregroundColor(Color(.labelColor))
|
||||
|
||||
Text("Experimental")
|
||||
.font(.system(size: 11, weight: .medium))
|
||||
.padding(.horizontal, 6)
|
||||
.padding(.vertical, 2)
|
||||
.background(Capsule().fill(Color.orange.opacity(0.8)))
|
||||
.foregroundColor(.white)
|
||||
|
||||
statusBadge
|
||||
Spacer()
|
||||
}
|
||||
}
|
||||
|
||||
private var statusBadge: some View {
|
||||
Group {
|
||||
if isCurrent {
|
||||
Text("Default")
|
||||
.font(.system(size: 11, weight: .medium))
|
||||
.padding(.horizontal, 6)
|
||||
.padding(.vertical, 2)
|
||||
.background(Capsule().fill(Color.accentColor))
|
||||
.foregroundColor(.white)
|
||||
} else if isDownloaded {
|
||||
Text("Downloaded")
|
||||
.font(.system(size: 11, weight: .medium))
|
||||
.padding(.horizontal, 6)
|
||||
.padding(.vertical, 2)
|
||||
.background(Capsule().fill(Color(.quaternaryLabelColor)))
|
||||
.foregroundColor(Color(.labelColor))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private var metadataSection: some View {
|
||||
HStack(spacing: 12) {
|
||||
Label(model.language, systemImage: "globe")
|
||||
Label(model.size, systemImage: "internaldrive")
|
||||
HStack(spacing: 3) {
|
||||
Text("Speed")
|
||||
progressDotsWithNumber(value: model.speed * 10)
|
||||
}
|
||||
HStack(spacing: 3) {
|
||||
Text("Accuracy")
|
||||
progressDotsWithNumber(value: model.accuracy * 10)
|
||||
}
|
||||
}
|
||||
.font(.system(size: 11))
|
||||
.foregroundColor(Color(.secondaryLabelColor))
|
||||
.lineLimit(1)
|
||||
}
|
||||
|
||||
private var descriptionSection: some View {
|
||||
Text(model.description)
|
||||
.font(.system(size: 11))
|
||||
.foregroundColor(Color(.secondaryLabelColor))
|
||||
.lineLimit(2)
|
||||
.fixedSize(horizontal: false, vertical: true)
|
||||
.padding(.top, 4)
|
||||
}
|
||||
|
||||
private var progressSection: some View {
|
||||
Group {
|
||||
if isDownloading {
|
||||
ProgressView() // Indeterminate for now
|
||||
.progressViewStyle(LinearProgressViewStyle())
|
||||
.frame(maxWidth: 200)
|
||||
.padding(.top, 8)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private var actionSection: some View {
|
||||
HStack(spacing: 8) {
|
||||
if isCurrent {
|
||||
Text("Default Model")
|
||||
.font(.system(size: 12))
|
||||
.foregroundColor(Color(.secondaryLabelColor))
|
||||
} else if isDownloaded {
|
||||
Button(action: {
|
||||
Task {
|
||||
await whisperState.setDefaultTranscriptionModel(model)
|
||||
}
|
||||
}) {
|
||||
Text("Set as Default")
|
||||
.font(.system(size: 12))
|
||||
}
|
||||
.buttonStyle(.bordered)
|
||||
.controlSize(.small)
|
||||
} else {
|
||||
Button(action: {
|
||||
Task {
|
||||
await whisperState.downloadParakeetModel()
|
||||
}
|
||||
}) {
|
||||
HStack(spacing: 4) {
|
||||
Text(isDownloading ? "Downloading..." : "Download")
|
||||
Image(systemName: "arrow.down.circle")
|
||||
}
|
||||
.font(.system(size: 12, weight: .medium))
|
||||
.foregroundColor(.white)
|
||||
.padding(.horizontal, 12)
|
||||
.padding(.vertical, 6)
|
||||
.background(Capsule().fill(Color.accentColor))
|
||||
}
|
||||
.buttonStyle(.plain)
|
||||
.disabled(isDownloading)
|
||||
}
|
||||
|
||||
if isDownloaded {
|
||||
Menu {
|
||||
Button(action: {
|
||||
whisperState.deleteParakeetModel()
|
||||
}) {
|
||||
Label("Delete Model", systemImage: "trash")
|
||||
}
|
||||
|
||||
Button {
|
||||
whisperState.showParakeetModelInFinder()
|
||||
} label: {
|
||||
Label("Show in Finder", systemImage: "folder")
|
||||
}
|
||||
} label: {
|
||||
Image(systemName: "ellipsis.circle")
|
||||
.font(.system(size: 14))
|
||||
}
|
||||
.menuStyle(.borderlessButton)
|
||||
.menuIndicator(.hidden)
|
||||
.frame(width: 20, height: 20)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -6,6 +6,8 @@ extension WhisperState {
|
||||
switch model.provider {
|
||||
case .local:
|
||||
return availableModels.contains { $0.name == model.name }
|
||||
case .parakeet:
|
||||
return isParakeetModelDownloaded
|
||||
case .nativeApple:
|
||||
if #available(macOS 26, *) {
|
||||
return true
|
||||
|
||||
82
VoiceInk/Whisper/WhisperState+Parakeet.swift
Normal file
82
VoiceInk/Whisper/WhisperState+Parakeet.swift
Normal file
@ -0,0 +1,82 @@
|
||||
import Foundation
|
||||
import FluidAudio
|
||||
import AppKit
|
||||
|
||||
extension WhisperState {
|
||||
var isParakeetModelDownloaded: Bool {
|
||||
get { UserDefaults.standard.bool(forKey: "ParakeetModelDownloaded") }
|
||||
set { UserDefaults.standard.set(newValue, forKey: "ParakeetModelDownloaded") }
|
||||
}
|
||||
|
||||
var isParakeetModelDownloading: Bool {
|
||||
get { isDownloadingParakeet }
|
||||
set { isDownloadingParakeet = newValue }
|
||||
}
|
||||
|
||||
@MainActor
|
||||
func downloadParakeetModel() async {
|
||||
if isParakeetModelDownloaded {
|
||||
return
|
||||
}
|
||||
|
||||
isDownloadingParakeet = true
|
||||
downloadProgress["parakeet-tdt-0.6b"] = 0.0
|
||||
|
||||
do {
|
||||
_ = try await AsrModels.downloadAndLoad(to: parakeetModelsDirectory)
|
||||
self.isParakeetModelDownloaded = true
|
||||
} catch {
|
||||
self.isParakeetModelDownloaded = false
|
||||
}
|
||||
|
||||
isDownloadingParakeet = false
|
||||
downloadProgress["parakeet-tdt-0.6b"] = nil
|
||||
|
||||
refreshAllAvailableModels()
|
||||
}
|
||||
|
||||
@MainActor
|
||||
func deleteParakeetModel() {
|
||||
if let currentModel = currentTranscriptionModel, currentModel.provider == .parakeet {
|
||||
currentTranscriptionModel = nil
|
||||
UserDefaults.standard.removeObject(forKey: "CurrentTranscriptionModel")
|
||||
}
|
||||
|
||||
do {
|
||||
// First try: app support directory + bundle path
|
||||
let appSupportDirectory = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask)[0]
|
||||
.appendingPathComponent("com.prakashjoshipax.VoiceInk")
|
||||
let parakeetModelDirectory = appSupportDirectory.appendingPathComponent("parakeet-tdt-0.6b-v2-coreml")
|
||||
|
||||
if FileManager.default.fileExists(atPath: parakeetModelDirectory.path) {
|
||||
try FileManager.default.removeItem(at: parakeetModelDirectory)
|
||||
} else {
|
||||
// Second try: root of application support directory
|
||||
let rootAppSupportDirectory = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask)[0]
|
||||
let rootParakeetModelDirectory = rootAppSupportDirectory.appendingPathComponent("parakeet-tdt-0.6b-v2-coreml")
|
||||
|
||||
if FileManager.default.fileExists(atPath: rootParakeetModelDirectory.path) {
|
||||
try FileManager.default.removeItem(at: rootParakeetModelDirectory)
|
||||
}
|
||||
}
|
||||
|
||||
self.isParakeetModelDownloaded = false
|
||||
|
||||
} catch {
|
||||
// Silently fail
|
||||
}
|
||||
|
||||
refreshAllAvailableModels()
|
||||
}
|
||||
|
||||
@MainActor
|
||||
func showParakeetModelInFinder() {
|
||||
let appSupportDirectory = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask)[0]
|
||||
.appendingPathComponent("com.prakashjoshipax.VoiceInk")
|
||||
let parakeetModelDirectory = appSupportDirectory.appendingPathComponent("parakeet-tdt-0.6b-v2-coreml")
|
||||
|
||||
if FileManager.default.fileExists(atPath: parakeetModelDirectory.path) {
|
||||
NSWorkspace.shared.selectFile(parakeetModelDirectory.path, inFileViewerRootedAtPath: "")
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -62,6 +62,7 @@ class WhisperState: NSObject, ObservableObject {
|
||||
private var localTranscriptionService: LocalTranscriptionService!
|
||||
private lazy var cloudTranscriptionService = CloudTranscriptionService()
|
||||
private lazy var nativeAppleTranscriptionService = NativeAppleTranscriptionService()
|
||||
private lazy var parakeetTranscriptionService = ParakeetTranscriptionService(customModelsDirectory: parakeetModelsDirectory)
|
||||
|
||||
private var modelUrl: URL? {
|
||||
let possibleURLs = [
|
||||
@ -84,6 +85,7 @@ class WhisperState: NSObject, ObservableObject {
|
||||
|
||||
let modelsDirectory: URL
|
||||
let recordingsDirectory: URL
|
||||
let parakeetModelsDirectory: URL
|
||||
let enhancementService: AIEnhancementService?
|
||||
var licenseViewModel: LicenseViewModel
|
||||
let logger = Logger(subsystem: "com.prakashjoshipax.voiceink", category: "WhisperState")
|
||||
@ -92,6 +94,7 @@ class WhisperState: NSObject, ObservableObject {
|
||||
|
||||
// For model progress tracking
|
||||
@Published var downloadProgress: [String: Double] = [:]
|
||||
@Published var isDownloadingParakeet = false
|
||||
|
||||
init(modelContext: ModelContext, enhancementService: AIEnhancementService? = nil) {
|
||||
self.modelContext = modelContext
|
||||
@ -100,6 +103,7 @@ class WhisperState: NSObject, ObservableObject {
|
||||
|
||||
self.modelsDirectory = appSupportDirectory.appendingPathComponent("WhisperModels")
|
||||
self.recordingsDirectory = appSupportDirectory.appendingPathComponent("Recordings")
|
||||
self.parakeetModelsDirectory = appSupportDirectory.appendingPathComponent("ParakeetModels")
|
||||
|
||||
self.enhancementService = enhancementService
|
||||
self.licenseViewModel = LicenseViewModel()
|
||||
@ -167,10 +171,11 @@ class WhisperState: NSObject, ObservableObject {
|
||||
|
||||
await MainActor.run {
|
||||
self.recordingState = .recording
|
||||
SoundManager.shared.playStartSound()
|
||||
}
|
||||
|
||||
await ActiveWindowService.shared.applyConfigurationForCurrentApp()
|
||||
|
||||
|
||||
// Only load model if it's a local model and not already loaded
|
||||
if let model = self.currentTranscriptionModel, model.provider == .local {
|
||||
if let localWhisperModel = self.availableModels.first(where: { $0.name == model.name }),
|
||||
@ -181,6 +186,8 @@ class WhisperState: NSObject, ObservableObject {
|
||||
self.logger.error("❌ Model loading failed: \(error.localizedDescription)")
|
||||
}
|
||||
}
|
||||
} else if let model = self.currentTranscriptionModel, model.provider == .parakeet {
|
||||
try? await parakeetTranscriptionService.loadModel()
|
||||
}
|
||||
|
||||
if let enhancementService = self.enhancementService,
|
||||
@ -239,6 +246,8 @@ class WhisperState: NSObject, ObservableObject {
|
||||
switch model.provider {
|
||||
case .local:
|
||||
transcriptionService = localTranscriptionService
|
||||
case .parakeet:
|
||||
transcriptionService = parakeetTranscriptionService
|
||||
case .nativeApple:
|
||||
transcriptionService = nativeAppleTranscriptionService
|
||||
default:
|
||||
@ -332,7 +341,6 @@ class WhisperState: NSObject, ObservableObject {
|
||||
|
||||
if await checkCancellationAndCleanup() { return }
|
||||
|
||||
SoundManager.shared.playStopSound()
|
||||
DispatchQueue.main.asyncAfter(deadline: .now() + 0.05) {
|
||||
|
||||
CursorPaster.pasteAtCursor(text, shouldPreserveClipboard: !self.isAutoCopyEnabled)
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user