Added output format customization for the Whisper Model

This commit is contained in:
Beingpax 2025-05-14 11:52:26 +05:45
parent f03b0f3a18
commit c2ac1a8510
11 changed files with 165 additions and 17 deletions

View File

@ -448,7 +448,7 @@
"CODE_SIGN_IDENTITY[sdk=macosx*]" = "Apple Development";
CODE_SIGN_STYLE = Automatic;
COMBINE_HIDPI_IMAGES = YES;
CURRENT_PROJECT_VERSION = 125;
CURRENT_PROJECT_VERSION = 128;
DEVELOPMENT_ASSET_PATHS = "\"VoiceInk/Preview Content\"";
DEVELOPMENT_TEAM = V6J6A3VWY2;
ENABLE_HARDENED_RUNTIME = YES;
@ -463,7 +463,7 @@
"@executable_path/../Frameworks",
);
MACOSX_DEPLOYMENT_TARGET = 14.0;
MARKETING_VERSION = 1.25;
MARKETING_VERSION = 1.28;
PRODUCT_BUNDLE_IDENTIFIER = com.prakashjoshipax.VoiceInk;
PRODUCT_NAME = "$(TARGET_NAME)";
SWIFT_EMIT_LOC_STRINGS = YES;
@ -481,7 +481,7 @@
"CODE_SIGN_IDENTITY[sdk=macosx*]" = "Apple Development";
CODE_SIGN_STYLE = Automatic;
COMBINE_HIDPI_IMAGES = YES;
CURRENT_PROJECT_VERSION = 125;
CURRENT_PROJECT_VERSION = 128;
DEVELOPMENT_ASSET_PATHS = "\"VoiceInk/Preview Content\"";
DEVELOPMENT_TEAM = V6J6A3VWY2;
ENABLE_HARDENED_RUNTIME = YES;
@ -496,7 +496,7 @@
"@executable_path/../Frameworks",
);
MACOSX_DEPLOYMENT_TARGET = 14.0;
MARKETING_VERSION = 1.25;
MARKETING_VERSION = 1.28;
PRODUCT_BUNDLE_IDENTIFIER = com.prakashjoshipax.VoiceInk;
PRODUCT_NAME = "$(TARGET_NAME)";
SWIFT_EMIT_LOC_STRINGS = YES;

View File

@ -162,10 +162,10 @@ import Foundation
"fi": "Finnish",
"fo": "Faroese",
"fr": "French",
"ga": "Irish",
"gl": "Galician",
"gu": "Gujarati",
"ha": "Hausa",
"haw": "Hawaiian",
"he": "Hebrew",
"hi": "Hindi",
"hr": "Croatian",
@ -228,7 +228,6 @@ import Foundation
"tl": "Tagalog",
"tr": "Turkish",
"tt": "Tatar",
"ug": "Uyghur",
"uk": "Ukrainian",
"ur": "Urdu",
"uz": "Uzbek",

View File

@ -11,11 +11,15 @@ struct LanguageSelectionView: View {
@AppStorage("SelectedLanguage") private var selectedLanguage: String = "en"
// Add display mode parameter with full as the default
var displayMode: LanguageDisplayMode = .full
@ObservedObject var whisperPrompt: WhisperPrompt
private func updateLanguage(_ language: String) {
// Update UI state - the UserDefaults updating is now automatic with @AppStorage
selectedLanguage = language
// Force the prompt to update for the new language
whisperPrompt.updateTranscriptionPrompt()
// Post notification for language change
NotificationCenter.default.post(name: .languageDidChange, object: nil)
}
@ -57,6 +61,15 @@ struct LanguageSelectionView: View {
// The original full view layout for settings page
private var fullView: some View {
VStack(alignment: .leading, spacing: 16) {
languageSelectionSection
// Add prompt customization view below language selection
PromptCustomizationView(whisperPrompt: whisperPrompt)
}
}
private var languageSelectionSection: some View {
VStack(alignment: .leading, spacing: 16) {
Text("Transcription Language")
.font(.headline)

View File

@ -10,6 +10,7 @@ struct MenuBarView: View {
@EnvironmentObject var aiService: AIService
@State private var launchAtLoginEnabled = LaunchAtLogin.isEnabled
@State private var menuRefreshTrigger = false // Added to force menu updates
@State private var isHovered = false
var body: some View {
VStack {
@ -112,7 +113,7 @@ struct MenuBarView: View {
}
}
LanguageSelectionView(whisperState: whisperState, displayMode: .menuItem)
LanguageSelectionView(whisperState: whisperState, displayMode: .menuItem, whisperPrompt: whisperState.whisperPrompt)
Toggle("Use Clipboard Context", isOn: $enhancementService.useClipboardContext)
.disabled(!enhancementService.isEnhancementEnabled)

View File

@ -7,6 +7,7 @@ struct ModelManagementView: View {
@StateObject private var aiService = AIService()
@EnvironmentObject private var enhancementService: AIEnhancementService
@Environment(\.modelContext) private var modelContext
@StateObject private var whisperPrompt = WhisperPrompt()
var body: some View {
ScrollView {
@ -51,7 +52,7 @@ struct ModelManagementView: View {
}
private var languageSelectionSection: some View {
LanguageSelectionView(whisperState: whisperState, displayMode: .full)
LanguageSelectionView(whisperState: whisperState, displayMode: .full, whisperPrompt: whisperPrompt)
}
private var availableModelsSection: some View {

View File

@ -0,0 +1,69 @@
import SwiftUI
struct PromptCustomizationView: View {
@ObservedObject var whisperPrompt: WhisperPrompt
@AppStorage("SelectedLanguage") private var selectedLanguage: String = "en"
@State private var customPrompt: String = ""
@State private var isEditing: Bool = false
var body: some View {
VStack(alignment: .leading, spacing: 12) {
HStack {
Text("Output Format")
.font(.headline)
Spacer()
Button(action: {
if isEditing {
// Save changes
whisperPrompt.setCustomPrompt(customPrompt, for: selectedLanguage)
isEditing = false
} else {
// Enter edit mode
customPrompt = whisperPrompt.getLanguagePrompt(for: selectedLanguage)
isEditing = true
}
}) {
Text(isEditing ? "Save" : "Edit")
.font(.caption)
}
}
if isEditing {
TextEditor(text: $customPrompt)
.font(.system(size: 12))
.padding(8)
.frame(height: 80)
.overlay(
RoundedRectangle(cornerRadius: 6)
.stroke(Color.secondary.opacity(0.2), lineWidth: 1)
)
} else {
Text(whisperPrompt.getLanguagePrompt(for: selectedLanguage))
.font(.system(size: 12))
.foregroundColor(.secondary)
.padding(8)
.frame(maxWidth: .infinity, alignment: .leading)
.background(
RoundedRectangle(cornerRadius: 6)
.fill(Color(.windowBackgroundColor).opacity(0.4))
)
.overlay(
RoundedRectangle(cornerRadius: 6)
.stroke(Color.secondary.opacity(0.2), lineWidth: 1)
)
}
}
.padding()
.background(Color(NSColor.controlBackgroundColor))
.cornerRadius(10)
// Reset the editor when language changes
.onChange(of: selectedLanguage) { _ in
if isEditing {
customPrompt = whisperPrompt.getLanguagePrompt(for: selectedLanguage)
}
}
}
}

View File

@ -53,7 +53,6 @@ actor WhisperContext {
logger.notice("🌐 Using auto language detection")
}
// Use prompt for all languages
if prompt != nil {
promptCString = Array(prompt!.utf8CString)
params.initial_prompt = promptCString?.withUnsafeBufferPointer { ptr in
@ -64,7 +63,7 @@ actor WhisperContext {
promptCString = nil
params.initial_prompt = nil
}
// Adapted from whisper.objc
params.print_realtime = true
params.print_progress = false
params.print_timestamps = true

View File

@ -2,6 +2,7 @@ import Foundation
extension Notification.Name {
static let languageDidChange = Notification.Name("languageDidChange")
static let promptDidChange = Notification.Name("promptDidChange")
}
@MainActor
@ -10,6 +11,10 @@ class WhisperPrompt: ObservableObject {
private var dictionaryWords: [String] = []
private let saveKey = "CustomDictionaryItems"
private let customPromptsKey = "CustomLanguagePrompts"
// Store user-customized prompts
private var customPrompts: [String: String] = [:]
// Language-specific base prompts
private let languagePrompts: [String: String] = [
@ -55,6 +60,7 @@ class WhisperPrompt: ObservableObject {
init() {
loadDictionaryItems()
loadCustomPrompts()
updateTranscriptionPrompt()
// Setup notification observer
@ -84,27 +90,62 @@ class WhisperPrompt: ObservableObject {
}
}
private func loadCustomPrompts() {
if let savedPrompts = UserDefaults.standard.dictionary(forKey: customPromptsKey) as? [String: String] {
customPrompts = savedPrompts
}
}
private func saveCustomPrompts() {
UserDefaults.standard.set(customPrompts, forKey: customPromptsKey)
UserDefaults.standard.synchronize() // Force immediate synchronization
}
func updateDictionaryWords(_ words: [String]) {
dictionaryWords = words
updateTranscriptionPrompt()
}
private func updateTranscriptionPrompt() {
func updateTranscriptionPrompt() {
// Get the currently selected language from UserDefaults
let selectedLanguage = UserDefaults.standard.string(forKey: "SelectedLanguage") ?? "en"
// Get the appropriate base prompt for the selected language
let basePrompt = languagePrompts[selectedLanguage] ?? languagePrompts["default"]!
// Get the prompt for the selected language (custom if available, otherwise default)
let basePrompt = getLanguagePrompt(for: selectedLanguage)
var prompt = basePrompt
// Always include VoiceInk in the prompt
var prompt = basePrompt + "\nVoiceInk, "
// Add dictionary words directly, without any prefix
// Add dictionary words if available
if !dictionaryWords.isEmpty {
prompt += "\n VoiceInk, " + dictionaryWords.joined(separator: ", ")
prompt += dictionaryWords.joined(separator: ", ")
}
transcriptionPrompt = prompt
UserDefaults.standard.set(prompt, forKey: "TranscriptionPrompt")
UserDefaults.standard.synchronize() // Force immediate synchronization
// Notify that the prompt has changed
NotificationCenter.default.post(name: .promptDidChange, object: nil)
}
func getLanguagePrompt(for language: String) -> String {
// First check if there's a custom prompt for this language
if let customPrompt = customPrompts[language], !customPrompt.isEmpty {
return customPrompt
}
// Otherwise return the default prompt
return languagePrompts[language] ?? languagePrompts["default"]!
}
func setCustomPrompt(_ prompt: String, for language: String) {
customPrompts[language] = prompt
saveCustomPrompts()
updateTranscriptionPrompt()
// Force update the UI
objectWillChange.send()
}
func saveDictionaryItems(_ items: [DictionaryItem]) async {

View File

@ -38,6 +38,11 @@ extension WhisperState {
do {
whisperContext = try await WhisperContext.createContext(path: model.url.path)
// Set the prompt from UserDefaults to ensure we have the latest
let currentPrompt = UserDefaults.standard.string(forKey: "TranscriptionPrompt") ?? whisperPrompt.transcriptionPrompt
await whisperContext?.setPrompt(currentPrompt)
isModelLoaded = true
currentModel = model
} catch {

View File

@ -95,6 +95,7 @@ extension WhisperState {
func setupNotifications() {
NotificationCenter.default.addObserver(self, selector: #selector(handleToggleMiniRecorder), name: .toggleMiniRecorder, object: nil)
NotificationCenter.default.addObserver(self, selector: #selector(handleLicenseStatusChanged), name: .licenseStatusChanged, object: nil)
NotificationCenter.default.addObserver(self, selector: #selector(handlePromptChange), name: .promptDidChange, object: nil)
}
@objc public func handleToggleMiniRecorder() {
@ -106,4 +107,20 @@ extension WhisperState {
@objc func handleLicenseStatusChanged() {
self.licenseViewModel = LicenseViewModel()
}
@objc func handlePromptChange() {
// Update the whisper context with the new prompt
Task {
await updateContextPrompt()
}
}
private func updateContextPrompt() async {
// Always reload the prompt from UserDefaults to ensure we have the latest
let currentPrompt = UserDefaults.standard.string(forKey: "TranscriptionPrompt") ?? whisperPrompt.transcriptionPrompt
if let context = whisperContext {
await context.setPrompt(currentPrompt)
}
}
}

View File

@ -288,7 +288,10 @@ class WhisperState: NSObject, ObservableObject, AVAudioRecorderDelegate {
let actualDuration = CMTimeGetSeconds(audioAsset.duration)
logger.notice("📊 Audio file duration: \(actualDuration) seconds")
await whisperContext.setPrompt(whisperPrompt.transcriptionPrompt)
// Ensure we're using the most recent prompt from UserDefaults
let currentPrompt = UserDefaults.standard.string(forKey: "TranscriptionPrompt") ?? whisperPrompt.transcriptionPrompt
await whisperContext.setPrompt(currentPrompt)
if shouldCancelRecording { return }
await whisperContext.fullTranscribe(samples: data)
if shouldCancelRecording { return }