Improve AI prompts and enhancement service handling

2025-03-27 20:06:51 +05:45 · 2025-03-27 20:06:51 +05:45 · 8341dda787
commit 8341dda787
parent 9a3fc048b1
3 changed files with 60 additions and 63 deletions
--- a/VoiceInk/Models/PredefinedPrompts.swift
+++ b/VoiceInk/Models/PredefinedPrompts.swift
@ -1,11 +1,12 @@
 import Foundation
+import SwiftUI    // Import to ensure we have access to SwiftUI types if needed

 enum PredefinedPrompts {
    private static let predefinedPromptsKey = "PredefinedPrompts"
    
    // Static UUIDs for predefined prompts
    private static let defaultPromptId = UUID(uuidString: "00000000-0000-0000-0000-000000000001")!
-    private static let assistantPromptId = UUID(uuidString: "00000000-0000-0000-0000-000000000002")!
+    static let assistantPromptId = UUID(uuidString: "00000000-0000-0000-0000-000000000002")!
    
    static var all: [CustomPrompt] {
        // Always return the latest predefined prompts from source code
@ -65,41 +66,15 @@ enum PredefinedPrompts {
                Here is the transcribed text: 
                """,
                icon: .sealedFill,
-                description: "Defeault mode to improved clarity and accuracy of the transcription",
+                description: "Default mode to improved clarity and accuracy of the transcription",
                isPredefined: true
            ),
            
            CustomPrompt(
                id: assistantPromptId,
                title: "Assistant",
-                promptText: """
-                Provide a direct clear, and concise reply to the user's query. Use the available context if directly related to the user's query. 
-                Remember to:
-                1. Be helpful and informative
-                2. Be accurate and precise
-                3. Don't add  meta commentary or anything extra other than the actual answer
-                6. Maintain a friendly, casual tone
-
-                Use the following information if provided:
-                1. Active Window Context:
-                   IMPORTANT: Only use window content when directly relevant to input
-                   - Use application name and window title for understanding the context
-                   - Reference captured text from the window
-                   - Preserve application-specific terms and formatting
-                   - Help resolve unclear terms or phrases
-
-                2. Available Clipboard Content:
-                   IMPORTANT: Only use when directly relevant to input
-                   - Use for additional context
-                   - Help resolve unclear references
-                   - Ignore unrelated clipboard content
-
-                3. Examples:
-                   - Follow the correction patterns shown in examples
-                   - Match the formatting style of similar texts
-                   - Use consistent terminology with examples
-                   - Learn from previous corrections
-                """,
+                // Combine assistant mode prompt with context instructions
+                promptText: AIPrompts.assistantMode + "\n\n" + AIPrompts.contextInstructions,
                icon: .chatFill,
                description: "AI assistant that provides direct answers to queries",
                isPredefined: true
--- a/VoiceInk/Services/AIEnhancementService.swift
+++ b/VoiceInk/Services/AIEnhancementService.swift
@ -173,7 +173,6 @@ class AIEnhancementService: ObservableObject {
                              !clipboardText.isEmpty {
            """
            
-            Context Awareness
            Available Clipboard Context: \(clipboardText)
            """
        } else {
@ -192,16 +191,39 @@ class AIEnhancementService: ObservableObject {
            ""
        }
        
+        // Format all context information together with instructions
+        let contextSection = if !clipboardContext.isEmpty || !screenCaptureContext.isEmpty {
+            """
+            
+            \(AIPrompts.contextInstructions)
+            
+            <CONTEXT_INFORMATION>
+            \(clipboardContext)
+            \(screenCaptureContext)
+            </CONTEXT_INFORMATION>
+            """
+        } else {
+            ""
+        }
+        
        switch mode {
        case .transcriptionEnhancement:
-            // Always use activePrompt since we've removed the toggle
+            // Check if the active prompt is the Assistant prompt
+            if let activePrompt = activePrompt,
+               activePrompt.id == PredefinedPrompts.assistantPromptId {
+                // For the Assistant predefined prompt, use the assistant mode prompt directly
+                // This ensures proper system message formatting
+                return AIPrompts.assistantMode + contextSection
+            }
+            
+            // For all other prompts, use the custom prompt template
            var systemMessage = String(format: AIPrompts.customPromptTemplate, activePrompt!.promptText)
-            systemMessage += "\n\n" + AIPrompts.contextInstructions
-            systemMessage += clipboardContext + screenCaptureContext
+            systemMessage += contextSection
            return systemMessage

        case .aiAssistant:
-            return AIPrompts.assistantMode + clipboardContext + screenCaptureContext
+            // For AI assistant mode, use the assistant mode prompt directly
+            return AIPrompts.assistantMode + contextSection
        }
    }
    
@ -216,6 +238,9 @@ class AIEnhancementService: ObservableObject {
            throw EnhancementError.emptyText
        }
        
+        // Format transcript with boundary markers
+        let formattedText = "<TRANSCRIPT>\n\(text)\n</TRANSCRIPT>"
+        
        // Determine mode and get system message
        let mode = determineMode(text: text)
        let systemMessage = getSystemMessage(for: mode)
@ -224,9 +249,9 @@ class AIEnhancementService: ObservableObject {
        if aiService.selectedProvider == .ollama {
            logger.notice("📤 Request to Ollama")
            logger.notice("🤖 System: \(systemMessage, privacy: .public)")
-            logger.notice("📝 Sending: \(text, privacy: .public)")
+            logger.notice("📝 Sending: \(formattedText, privacy: .public)")
            do {
-                let result = try await aiService.enhanceWithOllama(text: text, systemPrompt: systemMessage)
+                let result = try await aiService.enhanceWithOllama(text: formattedText, systemPrompt: systemMessage)
                logger.notice("✅ Ollama enhancement successful")
                logger.notice("📝 Received: \(result, privacy: .public)")
                return result
@ -274,7 +299,7 @@ class AIEnhancementService: ObservableObject {
                    [
                        "parts": [
                            ["text": systemMessage],
-                            ["text": "Transcript:\n\(text)"]
+                            ["text": formattedText]
                        ]
                    ]
                ],
@ -288,7 +313,7 @@ class AIEnhancementService: ObservableObject {
            do {
                logger.notice("📤 Request to Gemini")
                logger.notice("🤖 System: \(systemMessage, privacy: .public)")
-                logger.notice("📝 Sending: \(text, privacy: .public)")
+                logger.notice("📝 Sending: \(formattedText, privacy: .public)")
                let (data, response) = try await URLSession.shared.data(for: request)
                
                guard let httpResponse = response as? HTTPURLResponse else {
@ -349,7 +374,7 @@ class AIEnhancementService: ObservableObject {
                "max_tokens": 1024,
                "system": systemMessage,
                "messages": [
-                    ["role": "user", "content": text]
+                    ["role": "user", "content": formattedText]
                ]
            ]
            
@ -367,7 +392,7 @@ class AIEnhancementService: ObservableObject {
            do {
                logger.notice("📤 Request to Anthropic")
                logger.notice("🤖 System: \(systemMessage, privacy: .public)")
-                logger.notice("📝 Sending: \(text, privacy: .public)")
+                logger.notice("📝 Sending: \(formattedText, privacy: .public)")
                let (data, response) = try await URLSession.shared.data(for: request)
                
                guard let httpResponse = response as? HTTPURLResponse else {
@ -435,7 +460,7 @@ class AIEnhancementService: ObservableObject {
            
            let messages: [[String: Any]] = [
                ["role": "system", "content": systemMessage],
-                ["role": "user", "content": "Transcript:\n\(text)"]
+                ["role": "user", "content": formattedText]
            ]
            
            logger.info("Making request to \(self.aiService.selectedProvider.rawValue) with text length: \(text.count) characters")
@ -454,7 +479,7 @@ class AIEnhancementService: ObservableObject {
            do {
                logger.notice("📤 Request to \(self.aiService.selectedProvider.rawValue, privacy: .public)")
                logger.notice("🤖 System: \(systemMessage, privacy: .public)")
-                logger.notice("📝 Sending: \(text, privacy: .public)")
+                logger.notice("📝 Sending: \(formattedText, privacy: .public)")
                let (data, response) = try await URLSession.shared.data(for: request)
                
                guard let httpResponse = response as? HTTPURLResponse else {
--- a/VoiceInk/Services/AIPrompts.swift
+++ b/VoiceInk/Services/AIPrompts.swift
@ -1,11 +1,18 @@
 enum AIPrompts {
    static let customPromptTemplate = """
-    Reformat the input message according to the given guidelines:
+    <SYSTEM_INSTRUCTIONS>
+    Your task is to reformat and enhance the text provided within <TRANSCRIPT> tags according to the following guidelines:

    %@
+
+    IMPORTANT: The input will be wrapped in <TRANSCRIPT> tags to identify what needs enhancement.
+    Your response should ONLY be to enhance text WITHOUT any tags.
+    DO NOT include <TRANSCRIPT> tags in your response.
+    </SYSTEM_INSTRUCTIONS>
    """
    
    static let assistantMode = """
+    <SYSTEM_INSTRUCTIONS>
    Provide a direct clear, and concise reply to the user's query. Use the available context if directly related to the user's query. 
    Remember to:
    1. Be helpful and informative
@ -14,27 +21,17 @@ enum AIPrompts {
    4. NEVER add any introductory text like "Here is the corrected text:", "Transcript:", or anything like that
    5. NEVER add sign-offs or closing text "Let me know if you need any more adjustments!", or anything like that except the actual answer.
    6. Maintain a friendly, casual tone
+    </SYSTEM_INSTRUCTIONS>
    """
    
    static let contextInstructions = """
-    Use the following information if provided:
-    1. Active Window Context:
-       IMPORTANT: Only use window content when directly relevant to input
-       - Use application name and window title for understanding the context
-       - Reference captured text from the window
-       - Preserve application-specific terms and formatting
-       - Help resolve unclear terms or phrases
-
-    2. Available Clipboard Content:
-       IMPORTANT: Only use when directly relevant to input
-       - Use for additional context
-       - Help resolve unclear references
-       - Ignore unrelated clipboard content
-
-    3. Examples:
-       - Follow the correction patterns shown in examples
-       - Match the formatting style of similar texts
-       - Use consistent terminology with examples
-       - Learn from previous corrections
+    <CONTEXT_USAGE_INSTRUCTIONS>
+    Your task is to work ONLY with the content within the <TRANSCRIPT> tags.
+    
+    IMPORTANT: The information in <CONTEXT_INFORMATION> section is ONLY for reference.
+    - NEVER include the context directly in your output
+    - Context should only help you better understand the user's query
+    
+    </CONTEXT_USAGE_INSTRUCTIONS>
    """
 }