Added output format customization for the Whisper Model
This commit is contained in:
parent
f03b0f3a18
commit
c2ac1a8510
@ -448,7 +448,7 @@
|
||||
"CODE_SIGN_IDENTITY[sdk=macosx*]" = "Apple Development";
|
||||
CODE_SIGN_STYLE = Automatic;
|
||||
COMBINE_HIDPI_IMAGES = YES;
|
||||
CURRENT_PROJECT_VERSION = 125;
|
||||
CURRENT_PROJECT_VERSION = 128;
|
||||
DEVELOPMENT_ASSET_PATHS = "\"VoiceInk/Preview Content\"";
|
||||
DEVELOPMENT_TEAM = V6J6A3VWY2;
|
||||
ENABLE_HARDENED_RUNTIME = YES;
|
||||
@ -463,7 +463,7 @@
|
||||
"@executable_path/../Frameworks",
|
||||
);
|
||||
MACOSX_DEPLOYMENT_TARGET = 14.0;
|
||||
MARKETING_VERSION = 1.25;
|
||||
MARKETING_VERSION = 1.28;
|
||||
PRODUCT_BUNDLE_IDENTIFIER = com.prakashjoshipax.VoiceInk;
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
SWIFT_EMIT_LOC_STRINGS = YES;
|
||||
@ -481,7 +481,7 @@
|
||||
"CODE_SIGN_IDENTITY[sdk=macosx*]" = "Apple Development";
|
||||
CODE_SIGN_STYLE = Automatic;
|
||||
COMBINE_HIDPI_IMAGES = YES;
|
||||
CURRENT_PROJECT_VERSION = 125;
|
||||
CURRENT_PROJECT_VERSION = 128;
|
||||
DEVELOPMENT_ASSET_PATHS = "\"VoiceInk/Preview Content\"";
|
||||
DEVELOPMENT_TEAM = V6J6A3VWY2;
|
||||
ENABLE_HARDENED_RUNTIME = YES;
|
||||
@ -496,7 +496,7 @@
|
||||
"@executable_path/../Frameworks",
|
||||
);
|
||||
MACOSX_DEPLOYMENT_TARGET = 14.0;
|
||||
MARKETING_VERSION = 1.25;
|
||||
MARKETING_VERSION = 1.28;
|
||||
PRODUCT_BUNDLE_IDENTIFIER = com.prakashjoshipax.VoiceInk;
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
SWIFT_EMIT_LOC_STRINGS = YES;
|
||||
|
||||
@ -162,10 +162,10 @@ import Foundation
|
||||
"fi": "Finnish",
|
||||
"fo": "Faroese",
|
||||
"fr": "French",
|
||||
"ga": "Irish",
|
||||
"gl": "Galician",
|
||||
"gu": "Gujarati",
|
||||
"ha": "Hausa",
|
||||
"haw": "Hawaiian",
|
||||
"he": "Hebrew",
|
||||
"hi": "Hindi",
|
||||
"hr": "Croatian",
|
||||
@ -228,7 +228,6 @@ import Foundation
|
||||
"tl": "Tagalog",
|
||||
"tr": "Turkish",
|
||||
"tt": "Tatar",
|
||||
"ug": "Uyghur",
|
||||
"uk": "Ukrainian",
|
||||
"ur": "Urdu",
|
||||
"uz": "Uzbek",
|
||||
|
||||
@ -11,11 +11,15 @@ struct LanguageSelectionView: View {
|
||||
@AppStorage("SelectedLanguage") private var selectedLanguage: String = "en"
|
||||
// Add display mode parameter with full as the default
|
||||
var displayMode: LanguageDisplayMode = .full
|
||||
@ObservedObject var whisperPrompt: WhisperPrompt
|
||||
|
||||
private func updateLanguage(_ language: String) {
|
||||
// Update UI state - the UserDefaults updating is now automatic with @AppStorage
|
||||
selectedLanguage = language
|
||||
|
||||
// Force the prompt to update for the new language
|
||||
whisperPrompt.updateTranscriptionPrompt()
|
||||
|
||||
// Post notification for language change
|
||||
NotificationCenter.default.post(name: .languageDidChange, object: nil)
|
||||
}
|
||||
@ -57,6 +61,15 @@ struct LanguageSelectionView: View {
|
||||
|
||||
// The original full view layout for settings page
|
||||
private var fullView: some View {
|
||||
VStack(alignment: .leading, spacing: 16) {
|
||||
languageSelectionSection
|
||||
|
||||
// Add prompt customization view below language selection
|
||||
PromptCustomizationView(whisperPrompt: whisperPrompt)
|
||||
}
|
||||
}
|
||||
|
||||
private var languageSelectionSection: some View {
|
||||
VStack(alignment: .leading, spacing: 16) {
|
||||
Text("Transcription Language")
|
||||
.font(.headline)
|
||||
|
||||
@ -10,6 +10,7 @@ struct MenuBarView: View {
|
||||
@EnvironmentObject var aiService: AIService
|
||||
@State private var launchAtLoginEnabled = LaunchAtLogin.isEnabled
|
||||
@State private var menuRefreshTrigger = false // Added to force menu updates
|
||||
@State private var isHovered = false
|
||||
|
||||
var body: some View {
|
||||
VStack {
|
||||
@ -112,7 +113,7 @@ struct MenuBarView: View {
|
||||
}
|
||||
}
|
||||
|
||||
LanguageSelectionView(whisperState: whisperState, displayMode: .menuItem)
|
||||
LanguageSelectionView(whisperState: whisperState, displayMode: .menuItem, whisperPrompt: whisperState.whisperPrompt)
|
||||
|
||||
Toggle("Use Clipboard Context", isOn: $enhancementService.useClipboardContext)
|
||||
.disabled(!enhancementService.isEnhancementEnabled)
|
||||
|
||||
@ -7,6 +7,7 @@ struct ModelManagementView: View {
|
||||
@StateObject private var aiService = AIService()
|
||||
@EnvironmentObject private var enhancementService: AIEnhancementService
|
||||
@Environment(\.modelContext) private var modelContext
|
||||
@StateObject private var whisperPrompt = WhisperPrompt()
|
||||
|
||||
var body: some View {
|
||||
ScrollView {
|
||||
@ -51,7 +52,7 @@ struct ModelManagementView: View {
|
||||
}
|
||||
|
||||
private var languageSelectionSection: some View {
|
||||
LanguageSelectionView(whisperState: whisperState, displayMode: .full)
|
||||
LanguageSelectionView(whisperState: whisperState, displayMode: .full, whisperPrompt: whisperPrompt)
|
||||
}
|
||||
|
||||
private var availableModelsSection: some View {
|
||||
|
||||
69
VoiceInk/Views/PromptCustomizationView.swift
Normal file
69
VoiceInk/Views/PromptCustomizationView.swift
Normal file
@ -0,0 +1,69 @@
|
||||
import SwiftUI
|
||||
|
||||
struct PromptCustomizationView: View {
|
||||
@ObservedObject var whisperPrompt: WhisperPrompt
|
||||
@AppStorage("SelectedLanguage") private var selectedLanguage: String = "en"
|
||||
@State private var customPrompt: String = ""
|
||||
@State private var isEditing: Bool = false
|
||||
|
||||
var body: some View {
|
||||
VStack(alignment: .leading, spacing: 12) {
|
||||
HStack {
|
||||
Text("Output Format")
|
||||
.font(.headline)
|
||||
|
||||
Spacer()
|
||||
|
||||
Button(action: {
|
||||
if isEditing {
|
||||
// Save changes
|
||||
whisperPrompt.setCustomPrompt(customPrompt, for: selectedLanguage)
|
||||
isEditing = false
|
||||
} else {
|
||||
// Enter edit mode
|
||||
customPrompt = whisperPrompt.getLanguagePrompt(for: selectedLanguage)
|
||||
isEditing = true
|
||||
}
|
||||
}) {
|
||||
Text(isEditing ? "Save" : "Edit")
|
||||
.font(.caption)
|
||||
}
|
||||
}
|
||||
|
||||
if isEditing {
|
||||
TextEditor(text: $customPrompt)
|
||||
.font(.system(size: 12))
|
||||
.padding(8)
|
||||
.frame(height: 80)
|
||||
.overlay(
|
||||
RoundedRectangle(cornerRadius: 6)
|
||||
.stroke(Color.secondary.opacity(0.2), lineWidth: 1)
|
||||
)
|
||||
|
||||
} else {
|
||||
Text(whisperPrompt.getLanguagePrompt(for: selectedLanguage))
|
||||
.font(.system(size: 12))
|
||||
.foregroundColor(.secondary)
|
||||
.padding(8)
|
||||
.frame(maxWidth: .infinity, alignment: .leading)
|
||||
.background(
|
||||
RoundedRectangle(cornerRadius: 6)
|
||||
.fill(Color(.windowBackgroundColor).opacity(0.4))
|
||||
)
|
||||
.overlay(
|
||||
RoundedRectangle(cornerRadius: 6)
|
||||
.stroke(Color.secondary.opacity(0.2), lineWidth: 1)
|
||||
)
|
||||
}
|
||||
}
|
||||
.padding()
|
||||
.background(Color(NSColor.controlBackgroundColor))
|
||||
.cornerRadius(10)
|
||||
// Reset the editor when language changes
|
||||
.onChange(of: selectedLanguage) { _ in
|
||||
if isEditing {
|
||||
customPrompt = whisperPrompt.getLanguagePrompt(for: selectedLanguage)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -53,7 +53,6 @@ actor WhisperContext {
|
||||
logger.notice("🌐 Using auto language detection")
|
||||
}
|
||||
|
||||
// Use prompt for all languages
|
||||
if prompt != nil {
|
||||
promptCString = Array(prompt!.utf8CString)
|
||||
params.initial_prompt = promptCString?.withUnsafeBufferPointer { ptr in
|
||||
@ -64,7 +63,7 @@ actor WhisperContext {
|
||||
promptCString = nil
|
||||
params.initial_prompt = nil
|
||||
}
|
||||
// Adapted from whisper.objc
|
||||
|
||||
params.print_realtime = true
|
||||
params.print_progress = false
|
||||
params.print_timestamps = true
|
||||
|
||||
@ -2,6 +2,7 @@ import Foundation
|
||||
|
||||
extension Notification.Name {
|
||||
static let languageDidChange = Notification.Name("languageDidChange")
|
||||
static let promptDidChange = Notification.Name("promptDidChange")
|
||||
}
|
||||
|
||||
@MainActor
|
||||
@ -10,6 +11,10 @@ class WhisperPrompt: ObservableObject {
|
||||
|
||||
private var dictionaryWords: [String] = []
|
||||
private let saveKey = "CustomDictionaryItems"
|
||||
private let customPromptsKey = "CustomLanguagePrompts"
|
||||
|
||||
// Store user-customized prompts
|
||||
private var customPrompts: [String: String] = [:]
|
||||
|
||||
// Language-specific base prompts
|
||||
private let languagePrompts: [String: String] = [
|
||||
@ -55,6 +60,7 @@ class WhisperPrompt: ObservableObject {
|
||||
|
||||
init() {
|
||||
loadDictionaryItems()
|
||||
loadCustomPrompts()
|
||||
updateTranscriptionPrompt()
|
||||
|
||||
// Setup notification observer
|
||||
@ -84,27 +90,62 @@ class WhisperPrompt: ObservableObject {
|
||||
}
|
||||
}
|
||||
|
||||
private func loadCustomPrompts() {
|
||||
if let savedPrompts = UserDefaults.standard.dictionary(forKey: customPromptsKey) as? [String: String] {
|
||||
customPrompts = savedPrompts
|
||||
}
|
||||
}
|
||||
|
||||
private func saveCustomPrompts() {
|
||||
UserDefaults.standard.set(customPrompts, forKey: customPromptsKey)
|
||||
UserDefaults.standard.synchronize() // Force immediate synchronization
|
||||
}
|
||||
|
||||
func updateDictionaryWords(_ words: [String]) {
|
||||
dictionaryWords = words
|
||||
updateTranscriptionPrompt()
|
||||
}
|
||||
|
||||
private func updateTranscriptionPrompt() {
|
||||
func updateTranscriptionPrompt() {
|
||||
// Get the currently selected language from UserDefaults
|
||||
let selectedLanguage = UserDefaults.standard.string(forKey: "SelectedLanguage") ?? "en"
|
||||
|
||||
// Get the appropriate base prompt for the selected language
|
||||
let basePrompt = languagePrompts[selectedLanguage] ?? languagePrompts["default"]!
|
||||
// Get the prompt for the selected language (custom if available, otherwise default)
|
||||
let basePrompt = getLanguagePrompt(for: selectedLanguage)
|
||||
|
||||
var prompt = basePrompt
|
||||
// Always include VoiceInk in the prompt
|
||||
var prompt = basePrompt + "\nVoiceInk, "
|
||||
|
||||
// Add dictionary words directly, without any prefix
|
||||
// Add dictionary words if available
|
||||
if !dictionaryWords.isEmpty {
|
||||
prompt += "\n VoiceInk, " + dictionaryWords.joined(separator: ", ")
|
||||
prompt += dictionaryWords.joined(separator: ", ")
|
||||
}
|
||||
|
||||
transcriptionPrompt = prompt
|
||||
UserDefaults.standard.set(prompt, forKey: "TranscriptionPrompt")
|
||||
UserDefaults.standard.synchronize() // Force immediate synchronization
|
||||
|
||||
// Notify that the prompt has changed
|
||||
NotificationCenter.default.post(name: .promptDidChange, object: nil)
|
||||
}
|
||||
|
||||
func getLanguagePrompt(for language: String) -> String {
|
||||
// First check if there's a custom prompt for this language
|
||||
if let customPrompt = customPrompts[language], !customPrompt.isEmpty {
|
||||
return customPrompt
|
||||
}
|
||||
|
||||
// Otherwise return the default prompt
|
||||
return languagePrompts[language] ?? languagePrompts["default"]!
|
||||
}
|
||||
|
||||
func setCustomPrompt(_ prompt: String, for language: String) {
|
||||
customPrompts[language] = prompt
|
||||
saveCustomPrompts()
|
||||
updateTranscriptionPrompt()
|
||||
|
||||
// Force update the UI
|
||||
objectWillChange.send()
|
||||
}
|
||||
|
||||
func saveDictionaryItems(_ items: [DictionaryItem]) async {
|
||||
|
||||
@ -38,6 +38,11 @@ extension WhisperState {
|
||||
|
||||
do {
|
||||
whisperContext = try await WhisperContext.createContext(path: model.url.path)
|
||||
|
||||
// Set the prompt from UserDefaults to ensure we have the latest
|
||||
let currentPrompt = UserDefaults.standard.string(forKey: "TranscriptionPrompt") ?? whisperPrompt.transcriptionPrompt
|
||||
await whisperContext?.setPrompt(currentPrompt)
|
||||
|
||||
isModelLoaded = true
|
||||
currentModel = model
|
||||
} catch {
|
||||
|
||||
@ -95,6 +95,7 @@ extension WhisperState {
|
||||
func setupNotifications() {
|
||||
NotificationCenter.default.addObserver(self, selector: #selector(handleToggleMiniRecorder), name: .toggleMiniRecorder, object: nil)
|
||||
NotificationCenter.default.addObserver(self, selector: #selector(handleLicenseStatusChanged), name: .licenseStatusChanged, object: nil)
|
||||
NotificationCenter.default.addObserver(self, selector: #selector(handlePromptChange), name: .promptDidChange, object: nil)
|
||||
}
|
||||
|
||||
@objc public func handleToggleMiniRecorder() {
|
||||
@ -106,4 +107,20 @@ extension WhisperState {
|
||||
@objc func handleLicenseStatusChanged() {
|
||||
self.licenseViewModel = LicenseViewModel()
|
||||
}
|
||||
|
||||
@objc func handlePromptChange() {
|
||||
// Update the whisper context with the new prompt
|
||||
Task {
|
||||
await updateContextPrompt()
|
||||
}
|
||||
}
|
||||
|
||||
private func updateContextPrompt() async {
|
||||
// Always reload the prompt from UserDefaults to ensure we have the latest
|
||||
let currentPrompt = UserDefaults.standard.string(forKey: "TranscriptionPrompt") ?? whisperPrompt.transcriptionPrompt
|
||||
|
||||
if let context = whisperContext {
|
||||
await context.setPrompt(currentPrompt)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -288,7 +288,10 @@ class WhisperState: NSObject, ObservableObject, AVAudioRecorderDelegate {
|
||||
let actualDuration = CMTimeGetSeconds(audioAsset.duration)
|
||||
logger.notice("📊 Audio file duration: \(actualDuration) seconds")
|
||||
|
||||
await whisperContext.setPrompt(whisperPrompt.transcriptionPrompt)
|
||||
// Ensure we're using the most recent prompt from UserDefaults
|
||||
let currentPrompt = UserDefaults.standard.string(forKey: "TranscriptionPrompt") ?? whisperPrompt.transcriptionPrompt
|
||||
await whisperContext.setPrompt(currentPrompt)
|
||||
|
||||
if shouldCancelRecording { return }
|
||||
await whisperContext.fullTranscribe(samples: data)
|
||||
if shouldCancelRecording { return }
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user