Improced the recording/transcription flow

2025-09-28 20:54:05 +05:45 · 2025-09-28 20:54:05 +05:45 · a37b95620c
commit a37b95620c
parent 63db449355
2 changed files with 100 additions and 118 deletions
--- a/VoiceInk/Models/Transcription.swift
+++ b/VoiceInk/Models/Transcription.swift
@ -1,6 +1,12 @@
 import Foundation
 import SwiftData

+enum TranscriptionStatus: String, Codable {
+    case pending
+    case completed
+    case failed
+}
+
@Model
 final class Transcription {
    var id: UUID
@ -18,6 +24,7 @@ final class Transcription {
    var aiRequestUserMessage: String?
    var powerModeName: String?
    var powerModeEmoji: String?
+    var transcriptionStatus: String?

    init(text: String,
         duration: TimeInterval,
@ -31,7 +38,8 @@ final class Transcription {
         aiRequestSystemMessage: String? = nil,
         aiRequestUserMessage: String? = nil,
         powerModeName: String? = nil,
-         powerModeEmoji: String? = nil) {
+         powerModeEmoji: String? = nil,
+         transcriptionStatus: TranscriptionStatus = .pending) {
        self.id = UUID()
        self.text = text
        self.enhancedText = enhancedText
@ -47,5 +55,6 @@ final class Transcription {
        self.aiRequestUserMessage = aiRequestUserMessage
        self.powerModeName = powerModeName
        self.powerModeEmoji = powerModeEmoji
+        self.transcriptionStatus = transcriptionStatus.rawValue
    }
 }
--- a/VoiceInk/Whisper/WhisperState.swift
+++ b/VoiceInk/Whisper/WhisperState.swift
@ -136,7 +136,20 @@ class WhisperState: NSObject, ObservableObject {
            await recorder.stopRecording()
            if let recordedFile {
                if !shouldCancelRecording {
-                    await transcribeAudio(recordedFile)
+                    let audioAsset = AVURLAsset(url: recordedFile)
+                    let duration = (try? CMTimeGetSeconds(await audioAsset.load(.duration))) ?? 0.0
+
+                    let transcription = Transcription(
+                        text: "",
+                        duration: duration,
+                        audioFileURL: recordedFile.absoluteString,
+                        transcriptionStatus: .pending
+                    )
+                    modelContext.insert(transcription)
+                    try? modelContext.save()
+                    NotificationCenter.default.post(name: .transcriptionCreated, object: transcription)
+
+                    await transcribeAudio(on: transcription)
                } else {
                    await MainActor.run {
                        recordingState = .idle
@ -215,7 +228,18 @@ class WhisperState: NSObject, ObservableObject {
        response(true)
    }
    
-    private func transcribeAudio(_ url: URL) async {
+    private func transcribeAudio(on transcription: Transcription) async {
+        guard let urlString = transcription.audioFileURL, let url = URL(string: urlString) else {
+            logger.error("❌ Invalid audio file URL in transcription object.")
+            await MainActor.run {
+                recordingState = .idle
+            }
+            transcription.text = "Transcription Failed: Invalid audio file URL"
+            transcription.transcriptionStatus = TranscriptionStatus.failed.rawValue
+            try? modelContext.save()
+            return
+        }
+
        if shouldCancelRecording {
            await MainActor.run {
                recordingState = .idle
@ -223,11 +247,11 @@ class WhisperState: NSObject, ObservableObject {
            await cleanupModelResources()
            return
        }
-        
+
        await MainActor.run {
            recordingState = .transcribing
        }
-        
+
        // Play stop sound when transcription starts with a small delay
        Task {
            let isSystemMuteEnabled = UserDefaults.standard.bool(forKey: "isSystemMuteEnabled")
@ -238,7 +262,7 @@ class WhisperState: NSObject, ObservableObject {
                SoundManager.shared.playStopSound()
            }
        }
-        
+
        defer {
            if shouldCancelRecording {
                Task {
@ -246,9 +270,12 @@ class WhisperState: NSObject, ObservableObject {
                }
            }
        }
-        
+
        logger.notice("🔄 Starting transcription...")
        
+        var finalPastedText: String?
+        var promptDetectionResult: PromptDetectionService.PromptDetectionResult?
+
        do {
            guard let model = currentTranscriptionModel else {
                throw WhisperStateError.transcriptionFailed
@ -275,9 +302,9 @@ class WhisperState: NSObject, ObservableObject {
            let activePowerModeConfig = powerModeManager.currentActiveConfiguration
            let powerModeName = (activePowerModeConfig?.isEnabled == true) ? activePowerModeConfig?.name : nil
            let powerModeEmoji = (activePowerModeConfig?.isEnabled == true) ? activePowerModeConfig?.emoji : nil
-            
+
            if await checkCancellationAndCleanup() { return }
-            
+
            text = text.trimmingCharacters(in: .whitespacesAndNewlines)

            if UserDefaults.standard.object(forKey: "IsTextFormattingEnabled") as? Bool ?? true {
@ -287,62 +314,43 @@ class WhisperState: NSObject, ObservableObject {
            if UserDefaults.standard.bool(forKey: "IsWordReplacementEnabled") {
                text = WordReplacementService.shared.applyReplacements(to: text)
            }
-            
+
            let audioAsset = AVURLAsset(url: url)
            let actualDuration = (try? CMTimeGetSeconds(await audioAsset.load(.duration))) ?? 0.0
-            var promptDetectionResult: PromptDetectionService.PromptDetectionResult? = nil
-            let originalText = text
+            
+            transcription.text = text
+            transcription.duration = actualDuration
+            transcription.transcriptionModelName = model.displayName
+            transcription.transcriptionDuration = transcriptionDuration
+            transcription.powerModeName = powerModeName
+            transcription.powerModeEmoji = powerModeEmoji
+            finalPastedText = text
            
            if let enhancementService = enhancementService, enhancementService.isConfigured {
                let detectionResult = promptDetectionService.analyzeText(text, with: enhancementService)
                promptDetectionResult = detectionResult
                await promptDetectionService.applyDetectionResult(detectionResult, to: enhancementService)
            }
-            
+
            if let enhancementService = enhancementService,
               enhancementService.isEnhancementEnabled,
               enhancementService.isConfigured {
-                do {
-                    if await checkCancellationAndCleanup() { return }
+                if await checkCancellationAndCleanup() { return }

-                    await MainActor.run { self.recordingState = .enhancing }
-                    let textForAI = promptDetectionResult?.processedText ?? text
+                await MainActor.run { self.recordingState = .enhancing }
+                let textForAI = promptDetectionResult?.processedText ?? text
+                
+                do {
                    let (enhancedText, enhancementDuration, promptName) = try await enhancementService.enhance(textForAI)
-                    let newTranscription = Transcription(
-                        text: originalText,
-                        duration: actualDuration,
-                        enhancedText: enhancedText,
-                        audioFileURL: url.absoluteString,
-                        transcriptionModelName: model.displayName,
-                        aiEnhancementModelName: enhancementService.getAIService()?.currentModel,
-                        promptName: promptName,
-                        transcriptionDuration: transcriptionDuration,
-                        enhancementDuration: enhancementDuration,
-                        aiRequestSystemMessage: enhancementService.lastSystemMessageSent,
-                        aiRequestUserMessage: enhancementService.lastUserMessageSent,
-                        powerModeName: powerModeName,
-                        powerModeEmoji: powerModeEmoji
-                    )
-                    modelContext.insert(newTranscription)
-                    try? modelContext.save()
-                    NotificationCenter.default.post(name: .transcriptionCreated, object: newTranscription)
-                    text = enhancedText
+                    transcription.enhancedText = enhancedText
+                    transcription.aiEnhancementModelName = enhancementService.getAIService()?.currentModel
+                    transcription.promptName = promptName
+                    transcription.enhancementDuration = enhancementDuration
+                    transcription.aiRequestSystemMessage = enhancementService.lastSystemMessageSent
+                    transcription.aiRequestUserMessage = enhancementService.lastUserMessageSent
+                    finalPastedText = enhancedText
                } catch {
-                    let newTranscription = Transcription(
-                        text: originalText,
-                        duration: actualDuration,
-                        enhancedText: "Enhancement failed: \(error)",
-                        audioFileURL: url.absoluteString,
-                        transcriptionModelName: model.displayName,
-                        promptName: nil,
-                        transcriptionDuration: transcriptionDuration,
-                        powerModeName: powerModeName,
-                        powerModeEmoji: powerModeEmoji
-                    )
-                    modelContext.insert(newTranscription)
-                    try? modelContext.save()
-                    NotificationCenter.default.post(name: .transcriptionCreated, object: newTranscription)
-                    
+                    transcription.enhancedText = "Enhancement failed: \(error)"
                    await MainActor.run {
                        NotificationManager.shared.showNotification(
                            title: "AI enhancement failed",
@ -350,38 +358,40 @@ class WhisperState: NSObject, ObservableObject {
                        )
                    }
                }
-            } else {
-                let newTranscription = Transcription(
-                    text: originalText,
-                    duration: actualDuration,
-                    audioFileURL: url.absoluteString,
-                    transcriptionModelName: model.displayName,
-                    promptName: nil,
-                    transcriptionDuration: transcriptionDuration,
-                    powerModeName: powerModeName,
-                    powerModeEmoji: powerModeEmoji
-                )
-                modelContext.insert(newTranscription)
-                try? modelContext.save()
-                NotificationCenter.default.post(name: .transcriptionCreated, object: newTranscription)
            }
-            
+
+            transcription.transcriptionStatus = TranscriptionStatus.completed.rawValue
+
+        } catch {
+            let errorDescription = (error as? LocalizedError)?.errorDescription ?? error.localizedDescription
+            let recoverySuggestion = (error as? LocalizedError)?.recoverySuggestion ?? ""
+            let fullErrorText = recoverySuggestion.isEmpty ? errorDescription : "\(errorDescription) \(recoverySuggestion)"
+
+            transcription.text = "Transcription Failed: \(fullErrorText)"
+            transcription.transcriptionStatus = TranscriptionStatus.failed.rawValue
+        }
+
+        // --- Finalize and save ---
+        try? modelContext.save()
+        NotificationCenter.default.post(name: .transcriptionCreated, object: transcription)
+        
+        if var textToPaste = finalPastedText, transcription.transcriptionStatus == TranscriptionStatus.completed.rawValue {
            if case .trialExpired = licenseViewModel.licenseState {
-                text = """
+                textToPaste = """
                    Your trial has expired. Upgrade to VoiceInk Pro at tryvoiceink.com/buy
-                    \n\(text)
+                    \n\(textToPaste)
                    """
            }
-
+            
            let shouldAddSpace = UserDefaults.standard.object(forKey: "AppendTrailingSpace") as? Bool ?? true
            if shouldAddSpace {
-                text += " "
+                textToPaste += " "
            }
-
+            
            if await checkCancellationAndCleanup() { return }
-
+            
            DispatchQueue.main.asyncAfter(deadline: .now() + 0.05) {
-                CursorPaster.pasteAtCursor(text)
+                CursorPaster.pasteAtCursor(textToPaste)

                let powerMode = PowerModeManager.shared
                if let activeConfig = powerMode.currentActiveConfiguration, activeConfig.isAutoSendEnabled {
@ -391,52 +401,15 @@ class WhisperState: NSObject, ObservableObject {
                    }
                }
            }
-            
-            if let result = promptDetectionResult,
-               let enhancementService = enhancementService,
-               result.shouldEnableAI {
-                await promptDetectionService.restoreOriginalSettings(result, to: enhancementService)
-            }
-            
-            await self.dismissMiniRecorder()
-            
-        } catch {
-            do {
-                let audioAsset = AVURLAsset(url: url)
-                let duration = (try? CMTimeGetSeconds(await audioAsset.load(.duration))) ?? 0.0
-                
-                await MainActor.run {
-                    let errorDescription = (error as? LocalizedError)?.errorDescription ?? error.localizedDescription
-                    let recoverySuggestion = (error as? LocalizedError)?.recoverySuggestion ?? ""
-                    let fullErrorText = recoverySuggestion.isEmpty ? errorDescription : "\(errorDescription) \(recoverySuggestion)"
-                    
-                    let failedTranscription = Transcription(
-                        text: "Transcription Failed: \(fullErrorText)",
-                        duration: duration,
-                        enhancedText: nil,
-                        audioFileURL: url.absoluteString,
-                        promptName: nil,
-                        powerModeName: nil,
-                        powerModeEmoji: nil
-                    )
-                    
-                    modelContext.insert(failedTranscription)
-                    try? modelContext.save()
-                    NotificationCenter.default.post(name: .transcriptionCreated, object: failedTranscription)
-                }
-            } catch {
-                logger.error("❌ Could not create a record for the failed transcription: \(error.localizedDescription)")
-            }
-            
-            await MainActor.run {
-                NotificationManager.shared.showNotification(
-                    title: "Transcription Failed",
-                    type: .error
-                )
-            }
-            
-            await self.dismissMiniRecorder()
        }
+        
+        if let result = promptDetectionResult,
+           let enhancementService = enhancementService,
+           result.shouldEnableAI {
+            await promptDetectionService.restoreOriginalSettings(result, to: enhancementService)
+        }
+        
+        await self.dismissMiniRecorder()
    }

    func getEnhancementService() -> AIEnhancementService? {