From 8341dda787c7c9ae1d59ca11b33e78b075c74a2a Mon Sep 17 00:00:00 2001 From: Beingpax Date: Thu, 27 Mar 2025 20:06:51 +0545 Subject: [PATCH] Improve AI prompts and enhancement service handling --- VoiceInk/Models/PredefinedPrompts.swift | 35 ++------------ VoiceInk/Services/AIEnhancementService.swift | 51 +++++++++++++++----- VoiceInk/Services/AIPrompts.swift | 37 +++++++------- 3 files changed, 60 insertions(+), 63 deletions(-) diff --git a/VoiceInk/Models/PredefinedPrompts.swift b/VoiceInk/Models/PredefinedPrompts.swift index 17c40be..aded8d3 100644 --- a/VoiceInk/Models/PredefinedPrompts.swift +++ b/VoiceInk/Models/PredefinedPrompts.swift @@ -1,11 +1,12 @@ import Foundation +import SwiftUI // Import to ensure we have access to SwiftUI types if needed enum PredefinedPrompts { private static let predefinedPromptsKey = "PredefinedPrompts" // Static UUIDs for predefined prompts private static let defaultPromptId = UUID(uuidString: "00000000-0000-0000-0000-000000000001")! - private static let assistantPromptId = UUID(uuidString: "00000000-0000-0000-0000-000000000002")! + static let assistantPromptId = UUID(uuidString: "00000000-0000-0000-0000-000000000002")! static var all: [CustomPrompt] { // Always return the latest predefined prompts from source code @@ -65,41 +66,15 @@ enum PredefinedPrompts { Here is the transcribed text: """, icon: .sealedFill, - description: "Defeault mode to improved clarity and accuracy of the transcription", + description: "Default mode to improved clarity and accuracy of the transcription", isPredefined: true ), CustomPrompt( id: assistantPromptId, title: "Assistant", - promptText: """ - Provide a direct clear, and concise reply to the user's query. Use the available context if directly related to the user's query. - Remember to: - 1. Be helpful and informative - 2. Be accurate and precise - 3. Don't add meta commentary or anything extra other than the actual answer - 6. Maintain a friendly, casual tone - - Use the following information if provided: - 1. Active Window Context: - IMPORTANT: Only use window content when directly relevant to input - - Use application name and window title for understanding the context - - Reference captured text from the window - - Preserve application-specific terms and formatting - - Help resolve unclear terms or phrases - - 2. Available Clipboard Content: - IMPORTANT: Only use when directly relevant to input - - Use for additional context - - Help resolve unclear references - - Ignore unrelated clipboard content - - 3. Examples: - - Follow the correction patterns shown in examples - - Match the formatting style of similar texts - - Use consistent terminology with examples - - Learn from previous corrections - """, + // Combine assistant mode prompt with context instructions + promptText: AIPrompts.assistantMode + "\n\n" + AIPrompts.contextInstructions, icon: .chatFill, description: "AI assistant that provides direct answers to queries", isPredefined: true diff --git a/VoiceInk/Services/AIEnhancementService.swift b/VoiceInk/Services/AIEnhancementService.swift index 144654b..ccd91cf 100644 --- a/VoiceInk/Services/AIEnhancementService.swift +++ b/VoiceInk/Services/AIEnhancementService.swift @@ -173,7 +173,6 @@ class AIEnhancementService: ObservableObject { !clipboardText.isEmpty { """ - Context Awareness Available Clipboard Context: \(clipboardText) """ } else { @@ -192,16 +191,39 @@ class AIEnhancementService: ObservableObject { "" } + // Format all context information together with instructions + let contextSection = if !clipboardContext.isEmpty || !screenCaptureContext.isEmpty { + """ + + \(AIPrompts.contextInstructions) + + + \(clipboardContext) + \(screenCaptureContext) + + """ + } else { + "" + } + switch mode { case .transcriptionEnhancement: - // Always use activePrompt since we've removed the toggle + // Check if the active prompt is the Assistant prompt + if let activePrompt = activePrompt, + activePrompt.id == PredefinedPrompts.assistantPromptId { + // For the Assistant predefined prompt, use the assistant mode prompt directly + // This ensures proper system message formatting + return AIPrompts.assistantMode + contextSection + } + + // For all other prompts, use the custom prompt template var systemMessage = String(format: AIPrompts.customPromptTemplate, activePrompt!.promptText) - systemMessage += "\n\n" + AIPrompts.contextInstructions - systemMessage += clipboardContext + screenCaptureContext + systemMessage += contextSection return systemMessage case .aiAssistant: - return AIPrompts.assistantMode + clipboardContext + screenCaptureContext + // For AI assistant mode, use the assistant mode prompt directly + return AIPrompts.assistantMode + contextSection } } @@ -216,6 +238,9 @@ class AIEnhancementService: ObservableObject { throw EnhancementError.emptyText } + // Format transcript with boundary markers + let formattedText = "\n\(text)\n" + // Determine mode and get system message let mode = determineMode(text: text) let systemMessage = getSystemMessage(for: mode) @@ -224,9 +249,9 @@ class AIEnhancementService: ObservableObject { if aiService.selectedProvider == .ollama { logger.notice("📤 Request to Ollama") logger.notice("🤖 System: \(systemMessage, privacy: .public)") - logger.notice("📝 Sending: \(text, privacy: .public)") + logger.notice("📝 Sending: \(formattedText, privacy: .public)") do { - let result = try await aiService.enhanceWithOllama(text: text, systemPrompt: systemMessage) + let result = try await aiService.enhanceWithOllama(text: formattedText, systemPrompt: systemMessage) logger.notice("✅ Ollama enhancement successful") logger.notice("📝 Received: \(result, privacy: .public)") return result @@ -274,7 +299,7 @@ class AIEnhancementService: ObservableObject { [ "parts": [ ["text": systemMessage], - ["text": "Transcript:\n\(text)"] + ["text": formattedText] ] ] ], @@ -288,7 +313,7 @@ class AIEnhancementService: ObservableObject { do { logger.notice("📤 Request to Gemini") logger.notice("🤖 System: \(systemMessage, privacy: .public)") - logger.notice("📝 Sending: \(text, privacy: .public)") + logger.notice("📝 Sending: \(formattedText, privacy: .public)") let (data, response) = try await URLSession.shared.data(for: request) guard let httpResponse = response as? HTTPURLResponse else { @@ -349,7 +374,7 @@ class AIEnhancementService: ObservableObject { "max_tokens": 1024, "system": systemMessage, "messages": [ - ["role": "user", "content": text] + ["role": "user", "content": formattedText] ] ] @@ -367,7 +392,7 @@ class AIEnhancementService: ObservableObject { do { logger.notice("📤 Request to Anthropic") logger.notice("🤖 System: \(systemMessage, privacy: .public)") - logger.notice("📝 Sending: \(text, privacy: .public)") + logger.notice("📝 Sending: \(formattedText, privacy: .public)") let (data, response) = try await URLSession.shared.data(for: request) guard let httpResponse = response as? HTTPURLResponse else { @@ -435,7 +460,7 @@ class AIEnhancementService: ObservableObject { let messages: [[String: Any]] = [ ["role": "system", "content": systemMessage], - ["role": "user", "content": "Transcript:\n\(text)"] + ["role": "user", "content": formattedText] ] logger.info("Making request to \(self.aiService.selectedProvider.rawValue) with text length: \(text.count) characters") @@ -454,7 +479,7 @@ class AIEnhancementService: ObservableObject { do { logger.notice("📤 Request to \(self.aiService.selectedProvider.rawValue, privacy: .public)") logger.notice("🤖 System: \(systemMessage, privacy: .public)") - logger.notice("📝 Sending: \(text, privacy: .public)") + logger.notice("📝 Sending: \(formattedText, privacy: .public)") let (data, response) = try await URLSession.shared.data(for: request) guard let httpResponse = response as? HTTPURLResponse else { diff --git a/VoiceInk/Services/AIPrompts.swift b/VoiceInk/Services/AIPrompts.swift index d04b06f..5d22fd9 100644 --- a/VoiceInk/Services/AIPrompts.swift +++ b/VoiceInk/Services/AIPrompts.swift @@ -1,11 +1,18 @@ enum AIPrompts { static let customPromptTemplate = """ - Reformat the input message according to the given guidelines: + + Your task is to reformat and enhance the text provided within tags according to the following guidelines: %@ + + IMPORTANT: The input will be wrapped in tags to identify what needs enhancement. + Your response should ONLY be to enhance text WITHOUT any tags. + DO NOT include tags in your response. + """ static let assistantMode = """ + Provide a direct clear, and concise reply to the user's query. Use the available context if directly related to the user's query. Remember to: 1. Be helpful and informative @@ -14,27 +21,17 @@ enum AIPrompts { 4. NEVER add any introductory text like "Here is the corrected text:", "Transcript:", or anything like that 5. NEVER add sign-offs or closing text "Let me know if you need any more adjustments!", or anything like that except the actual answer. 6. Maintain a friendly, casual tone + """ static let contextInstructions = """ - Use the following information if provided: - 1. Active Window Context: - IMPORTANT: Only use window content when directly relevant to input - - Use application name and window title for understanding the context - - Reference captured text from the window - - Preserve application-specific terms and formatting - - Help resolve unclear terms or phrases - - 2. Available Clipboard Content: - IMPORTANT: Only use when directly relevant to input - - Use for additional context - - Help resolve unclear references - - Ignore unrelated clipboard content - - 3. Examples: - - Follow the correction patterns shown in examples - - Match the formatting style of similar texts - - Use consistent terminology with examples - - Learn from previous corrections + + Your task is to work ONLY with the content within the tags. + + IMPORTANT: The information in section is ONLY for reference. + - NEVER include the context directly in your output + - Context should only help you better understand the user's query + + """ }