Added support for adding fine-tuned local whisper models.

This commit is contained in:
Beingpax 2025-08-13 14:04:34 +05:45
parent a3c302b50b
commit a8db010900
8 changed files with 243 additions and 28 deletions

View File

@ -8,13 +8,13 @@
/* Begin PBXBuildFile section */
E10F06092E3F390600F7FBDC /* FluidAudio in Frameworks */ = {isa = PBXBuildFile; productRef = E10F06082E3F390600F7FBDC /* FluidAudio */; };
E17382402E4C7D0E001BAEBE /* whisper.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = E1B2DCAA2E3DE70A008DFD68 /* whisper.xcframework */; };
E17382412E4C7D0E001BAEBE /* whisper.xcframework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = E1B2DCAA2E3DE70A008DFD68 /* whisper.xcframework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
E1A261122CC143AC00B233D1 /* KeyboardShortcuts in Frameworks */ = {isa = PBXBuildFile; productRef = E1A261112CC143AC00B233D1 /* KeyboardShortcuts */; };
E1ADD45A2CC5352A00303ECB /* LaunchAtLogin in Frameworks */ = {isa = PBXBuildFile; productRef = E1ADD4592CC5352A00303ECB /* LaunchAtLogin */; };
E1ADD45F2CC544F100303ECB /* Sparkle in Frameworks */ = {isa = PBXBuildFile; productRef = E1ADD45E2CC544F100303ECB /* Sparkle */; };
E1D7EF992E35E16C00640029 /* MediaRemoteAdapter in Frameworks */ = {isa = PBXBuildFile; productRef = E1D7EF982E35E16C00640029 /* MediaRemoteAdapter */; };
E1D7EF9A2E35E19B00640029 /* MediaRemoteAdapter in Embed Frameworks */ = {isa = PBXBuildFile; productRef = E1D7EF982E35E16C00640029 /* MediaRemoteAdapter */; settings = {ATTRIBUTES = (CodeSignOnCopy, ); }; };
E1ECEC102E44551F00DFFBA8 /* whisper.xcframework in Frameworks */ = {isa = PBXBuildFile; fileRef = E1B2DCAA2E3DE70A008DFD68 /* whisper.xcframework */; };
E1ECEC112E44551F00DFFBA8 /* whisper.xcframework in Embed Frameworks */ = {isa = PBXBuildFile; fileRef = E1B2DCAA2E3DE70A008DFD68 /* whisper.xcframework */; settings = {ATTRIBUTES = (CodeSignOnCopy, RemoveHeadersOnCopy, ); }; };
E1ECEC162E44591300DFFBA8 /* Zip in Frameworks */ = {isa = PBXBuildFile; productRef = E1ECEC152E44591300DFFBA8 /* Zip */; };
/* End PBXBuildFile section */
@ -42,7 +42,7 @@
dstPath = "";
dstSubfolderSpec = 10;
files = (
E1ECEC112E44551F00DFFBA8 /* whisper.xcframework in Embed Frameworks */,
E17382412E4C7D0E001BAEBE /* whisper.xcframework in Embed Frameworks */,
E1D7EF9A2E35E19B00640029 /* MediaRemoteAdapter in Embed Frameworks */,
);
name = "Embed Frameworks";
@ -84,8 +84,8 @@
E1ECEC162E44591300DFFBA8 /* Zip in Frameworks */,
E1ADD45A2CC5352A00303ECB /* LaunchAtLogin in Frameworks */,
E1D7EF992E35E16C00640029 /* MediaRemoteAdapter in Frameworks */,
E17382402E4C7D0E001BAEBE /* whisper.xcframework in Frameworks */,
E10F06092E3F390600F7FBDC /* FluidAudio in Frameworks */,
E1ECEC102E44551F00DFFBA8 /* whisper.xcframework in Frameworks */,
E1ADD45F2CC544F100303ECB /* Sparkle in Frameworks */,
E1A261122CC143AC00B233D1 /* KeyboardShortcuts in Frameworks */,
);

View File

@ -138,4 +138,23 @@ struct LocalModel: TranscriptionModel {
var isMultilingualModel: Bool {
supportedLanguages.count > 1
}
}
}
// User-imported local models
struct ImportedLocalModel: TranscriptionModel {
let id = UUID()
let name: String
let displayName: String
let description: String
let provider: ModelProvider = .local
let isMultilingualModel: Bool
let supportedLanguages: [String: String]
init(fileBaseName: String) {
self.name = fileBaseName
self.displayName = fileBaseName
self.description = "Imported local model"
self.isMultilingualModel = true
self.supportedLanguages = PredefinedModels.getLanguageDictionary(isMultilingual: true, provider: .local)
}
}

View File

@ -15,36 +15,36 @@ class LocalTranscriptionService: TranscriptionService {
}
func transcribe(audioURL: URL, model: any TranscriptionModel) async throws -> String {
guard let localModel = model as? LocalModel else {
guard model.provider == .local else {
throw WhisperStateError.modelLoadFailed
}
logger.notice("Initiating local transcription for model: \(localModel.displayName)")
logger.notice("Initiating local transcription for model: \(model.displayName)")
// Check if the required model is already loaded in WhisperState
if let whisperState = whisperState,
await whisperState.isModelLoaded,
let loadedContext = await whisperState.whisperContext,
let currentModel = await whisperState.currentTranscriptionModel,
currentModel.provider == .local,
currentModel.name == localModel.name {
let currentModel = await whisperState.currentTranscriptionModel,
currentModel.provider == .local,
currentModel.name == model.name {
logger.notice("✅ Using already loaded model: \(localModel.name)")
logger.notice("✅ Using already loaded model: \(model.name)")
whisperContext = loadedContext
} else {
// Model not loaded or wrong model loaded, proceed with loading
let modelURL = modelsDirectory.appendingPathComponent(localModel.filename)
guard FileManager.default.fileExists(atPath: modelURL.path) else {
logger.error("Model file not found at path: \(modelURL.path)")
// Resolve the on-disk URL using WhisperState.availableModels (covers imports)
let resolvedURL: URL? = await whisperState?.availableModels.first(where: { $0.name == model.name })?.url
guard let modelURL = resolvedURL, FileManager.default.fileExists(atPath: modelURL.path) else {
logger.error("Model file not found for: \(model.name)")
throw WhisperStateError.modelLoadFailed
}
logger.notice("Loading model: \(localModel.name)")
logger.notice("Loading model: \(model.name)")
do {
whisperContext = try await WhisperContext.createContext(path: modelURL.path)
} catch {
logger.error("Failed to load model: \(localModel.name) - \(error.localizedDescription)")
logger.error("Failed to load model: \(model.name) - \(error.localizedDescription)")
throw WhisperStateError.modelLoadFailed
}
}

View File

@ -186,6 +186,91 @@ struct LocalModelCardView: View {
}
}
// MARK: - Imported Local Model (minimal UI)
struct ImportedLocalModelCardView: View {
let model: ImportedLocalModel
let isDownloaded: Bool
let isCurrent: Bool
let modelURL: URL?
var deleteAction: () -> Void
var setDefaultAction: () -> Void
var body: some View {
HStack(alignment: .top, spacing: 16) {
VStack(alignment: .leading, spacing: 6) {
HStack(alignment: .firstTextBaseline) {
Text(model.displayName)
.font(.system(size: 13, weight: .semibold))
.foregroundColor(Color(.labelColor))
if isCurrent {
Text("Default")
.font(.system(size: 11, weight: .medium))
.padding(.horizontal, 6)
.padding(.vertical, 2)
.background(Capsule().fill(Color.accentColor))
.foregroundColor(.white)
} else if isDownloaded {
Text("Imported")
.font(.system(size: 11, weight: .medium))
.padding(.horizontal, 6)
.padding(.vertical, 2)
.background(Capsule().fill(Color(.quaternaryLabelColor)))
.foregroundColor(Color(.labelColor))
}
Spacer()
}
Text("Imported local model")
.font(.system(size: 11))
.foregroundColor(Color(.secondaryLabelColor))
.lineLimit(2)
.fixedSize(horizontal: false, vertical: true)
.padding(.top, 4)
}
.frame(maxWidth: .infinity, alignment: .leading)
HStack(spacing: 8) {
if isCurrent {
Text("Default Model")
.font(.system(size: 12))
.foregroundColor(Color(.secondaryLabelColor))
} else if isDownloaded {
Button(action: setDefaultAction) {
Text("Set as Default")
.font(.system(size: 12))
}
.buttonStyle(.bordered)
.controlSize(.small)
}
if isDownloaded {
Menu {
Button(action: deleteAction) {
Label("Delete Model", systemImage: "trash")
}
Button {
if let modelURL = modelURL {
NSWorkspace.shared.selectFile(modelURL.path, inFileViewerRootedAtPath: "")
}
} label: {
Label("Show in Finder", systemImage: "folder")
}
} label: {
Image(systemName: "ellipsis.circle")
.font(.system(size: 14))
}
.menuStyle(.borderlessButton)
.menuIndicator(.hidden)
.frame(width: 20, height: 20)
}
}
}
.padding(16)
.background(CardBackground(isSelected: isCurrent, useAccentGradientWhenSelected: isCurrent))
}
}
// MARK: - Helper Views and Functions

View File

@ -30,6 +30,15 @@ struct ModelCardRowView: View {
setDefaultAction: setDefaultAction,
downloadAction: downloadAction
)
} else if let importedModel = model as? ImportedLocalModel {
ImportedLocalModelCardView(
model: importedModel,
isDownloaded: isDownloaded,
isCurrent: isCurrent,
modelURL: modelURL,
deleteAction: deleteAction,
setDefaultAction: setDefaultAction
)
}
case .parakeet:
if let parakeetModel = model as? ParakeetModel {

View File

@ -1,5 +1,7 @@
import SwiftUI
import SwiftData
import AppKit
import UniformTypeIdentifiers
enum ModelFilter: String, CaseIterable, Identifiable {
case recommended = "Recommended"
@ -150,9 +152,7 @@ struct ModelManagementView: View {
},
downloadAction: {
if let localModel = model as? LocalModel {
Task {
await whisperState.downloadModel(localModel)
}
Task { await whisperState.downloadModel(localModel) }
}
},
editAction: model.provider == .custom ? { customModel in
@ -161,6 +161,31 @@ struct ModelManagementView: View {
)
}
// Import button as a card at the end of the Local list
if selectedFilter == .local {
HStack(spacing: 8) {
Button(action: { presentImportPanel() }) {
HStack(spacing: 8) {
Image(systemName: "square.and.arrow.down")
Text("Import Local Model…")
.font(.system(size: 12, weight: .semibold))
}
.frame(maxWidth: .infinity)
.padding(16)
.background(CardBackground(isSelected: false))
.cornerRadius(10)
}
.buttonStyle(.plain)
InfoTip(
title: "Import local Whisper models",
message: "Add a custom fine-tuned whisper model to use with VoiceInk. Select the downloaded .bin file.",
learnMoreURL: "https://tryvoiceink.com/docs/custom-local-whisper-models"
)
.help("Read more about custom local models")
}
}
if selectedFilter == .custom {
// Add Custom Model Card at the bottom
AddCustomModelCardView(
@ -199,4 +224,19 @@ struct ModelManagementView: View {
return whisperState.allAvailableModels.filter { $0.provider == .custom }
}
}
// MARK: - Import Panel
private func presentImportPanel() {
let panel = NSOpenPanel()
panel.allowedContentTypes = [.init(filenameExtension: "bin")!]
panel.allowsMultipleSelection = false
panel.canChooseDirectories = false
panel.resolvesAliases = true
panel.title = "Select a Whisper ggml .bin model"
if panel.runModal() == .OK, let url = panel.url {
Task { @MainActor in
await whisperState.importLocalModel(from: url)
}
}
}
}

View File

@ -304,6 +304,11 @@ extension WhisperState {
} catch {
logError("Error deleting model: \(model.name)", error)
}
// Ensure UI reflects removal of imported models as well
await MainActor.run {
self.refreshAllAvailableModels()
}
}
func unloadModel() {
@ -343,6 +348,55 @@ extension WhisperState {
private func logError(_ message: String, _ error: Error) {
self.logger.error("\(message): \(error.localizedDescription)")
}
// MARK: - Import Local Model (User-provided .bin)
@MainActor
func importLocalModel(from sourceURL: URL) async {
// Accept only .bin files for ggml Whisper models
guard sourceURL.pathExtension.lowercased() == "bin" else { return }
// Build a destination URL inside the app-managed models directory
let baseName = sourceURL.deletingPathExtension().lastPathComponent
var destinationURL = modelsDirectory.appendingPathComponent("\(baseName).bin")
// Do not rename on collision; simply notify the user and abort
if FileManager.default.fileExists(atPath: destinationURL.path) {
await NotificationManager.shared.showNotification(
title: "A model named \(baseName).bin already exists",
type: .warning,
duration: 4.0
)
return
}
do {
try FileManager.default.createDirectory(at: modelsDirectory, withIntermediateDirectories: true)
try FileManager.default.copyItem(at: sourceURL, to: destinationURL)
// Append ONLY the newly imported model to in-memory lists (no full rescan)
let newWhisperModel = WhisperModel(name: baseName, url: destinationURL)
availableModels.append(newWhisperModel)
if !allAvailableModels.contains(where: { $0.name == baseName }) {
let imported = ImportedLocalModel(fileBaseName: baseName)
allAvailableModels.append(imported)
}
await NotificationManager.shared.showNotification(
title: "Imported \(destinationURL.lastPathComponent)",
type: .success,
duration: 3.0
)
} catch {
logError("Failed to import local model", error)
await NotificationManager.shared.showNotification(
title: "Failed to import model: \(error.localizedDescription)",
type: .error,
duration: 5.0
)
}
}
}
// MARK: - Download Progress View

View File

@ -31,14 +31,22 @@ extension WhisperState {
}
func refreshAllAvailableModels() {
let currentModelId = currentTranscriptionModel?.id
allAvailableModels = PredefinedModels.models
// If there was a current default model, find its new version in the refreshed list and update it.
// This handles cases where the default model was edited.
if let currentId = currentModelId,
let updatedModel = allAvailableModels.first(where: { $0.id == currentId })
{
let currentModelName = currentTranscriptionModel?.name
var models = PredefinedModels.models
// Append dynamically discovered local models (imported .bin files) with minimal metadata
for whisperModel in availableModels {
if !models.contains(where: { $0.name == whisperModel.name }) {
let importedModel = ImportedLocalModel(fileBaseName: whisperModel.name)
models.append(importedModel)
}
}
allAvailableModels = models
// Preserve current selection by name (IDs may change for dynamic models)
if let currentName = currentModelName,
let updatedModel = allAvailableModels.first(where: { $0.name == currentName }) {
setDefaultTranscriptionModel(updatedModel)
}
}