diff --git a/VoiceInk/Models/Transcription.swift b/VoiceInk/Models/Transcription.swift index 3b76994..257da10 100644 --- a/VoiceInk/Models/Transcription.swift +++ b/VoiceInk/Models/Transcription.swift @@ -1,6 +1,12 @@ import Foundation import SwiftData +enum TranscriptionStatus: String, Codable { + case pending + case completed + case failed +} + @Model final class Transcription { var id: UUID @@ -18,6 +24,7 @@ final class Transcription { var aiRequestUserMessage: String? var powerModeName: String? var powerModeEmoji: String? + var transcriptionStatus: String? init(text: String, duration: TimeInterval, @@ -31,7 +38,8 @@ final class Transcription { aiRequestSystemMessage: String? = nil, aiRequestUserMessage: String? = nil, powerModeName: String? = nil, - powerModeEmoji: String? = nil) { + powerModeEmoji: String? = nil, + transcriptionStatus: TranscriptionStatus = .pending) { self.id = UUID() self.text = text self.enhancedText = enhancedText @@ -47,5 +55,6 @@ final class Transcription { self.aiRequestUserMessage = aiRequestUserMessage self.powerModeName = powerModeName self.powerModeEmoji = powerModeEmoji + self.transcriptionStatus = transcriptionStatus.rawValue } } diff --git a/VoiceInk/Whisper/WhisperState.swift b/VoiceInk/Whisper/WhisperState.swift index 8671b74..d9ff183 100644 --- a/VoiceInk/Whisper/WhisperState.swift +++ b/VoiceInk/Whisper/WhisperState.swift @@ -136,7 +136,20 @@ class WhisperState: NSObject, ObservableObject { await recorder.stopRecording() if let recordedFile { if !shouldCancelRecording { - await transcribeAudio(recordedFile) + let audioAsset = AVURLAsset(url: recordedFile) + let duration = (try? CMTimeGetSeconds(await audioAsset.load(.duration))) ?? 0.0 + + let transcription = Transcription( + text: "", + duration: duration, + audioFileURL: recordedFile.absoluteString, + transcriptionStatus: .pending + ) + modelContext.insert(transcription) + try? modelContext.save() + NotificationCenter.default.post(name: .transcriptionCreated, object: transcription) + + await transcribeAudio(on: transcription) } else { await MainActor.run { recordingState = .idle @@ -215,7 +228,18 @@ class WhisperState: NSObject, ObservableObject { response(true) } - private func transcribeAudio(_ url: URL) async { + private func transcribeAudio(on transcription: Transcription) async { + guard let urlString = transcription.audioFileURL, let url = URL(string: urlString) else { + logger.error("❌ Invalid audio file URL in transcription object.") + await MainActor.run { + recordingState = .idle + } + transcription.text = "Transcription Failed: Invalid audio file URL" + transcription.transcriptionStatus = TranscriptionStatus.failed.rawValue + try? modelContext.save() + return + } + if shouldCancelRecording { await MainActor.run { recordingState = .idle @@ -223,11 +247,11 @@ class WhisperState: NSObject, ObservableObject { await cleanupModelResources() return } - + await MainActor.run { recordingState = .transcribing } - + // Play stop sound when transcription starts with a small delay Task { let isSystemMuteEnabled = UserDefaults.standard.bool(forKey: "isSystemMuteEnabled") @@ -238,7 +262,7 @@ class WhisperState: NSObject, ObservableObject { SoundManager.shared.playStopSound() } } - + defer { if shouldCancelRecording { Task { @@ -246,9 +270,12 @@ class WhisperState: NSObject, ObservableObject { } } } - + logger.notice("🔄 Starting transcription...") + var finalPastedText: String? + var promptDetectionResult: PromptDetectionService.PromptDetectionResult? + do { guard let model = currentTranscriptionModel else { throw WhisperStateError.transcriptionFailed @@ -275,9 +302,9 @@ class WhisperState: NSObject, ObservableObject { let activePowerModeConfig = powerModeManager.currentActiveConfiguration let powerModeName = (activePowerModeConfig?.isEnabled == true) ? activePowerModeConfig?.name : nil let powerModeEmoji = (activePowerModeConfig?.isEnabled == true) ? activePowerModeConfig?.emoji : nil - + if await checkCancellationAndCleanup() { return } - + text = text.trimmingCharacters(in: .whitespacesAndNewlines) if UserDefaults.standard.object(forKey: "IsTextFormattingEnabled") as? Bool ?? true { @@ -287,62 +314,43 @@ class WhisperState: NSObject, ObservableObject { if UserDefaults.standard.bool(forKey: "IsWordReplacementEnabled") { text = WordReplacementService.shared.applyReplacements(to: text) } - + let audioAsset = AVURLAsset(url: url) let actualDuration = (try? CMTimeGetSeconds(await audioAsset.load(.duration))) ?? 0.0 - var promptDetectionResult: PromptDetectionService.PromptDetectionResult? = nil - let originalText = text + + transcription.text = text + transcription.duration = actualDuration + transcription.transcriptionModelName = model.displayName + transcription.transcriptionDuration = transcriptionDuration + transcription.powerModeName = powerModeName + transcription.powerModeEmoji = powerModeEmoji + finalPastedText = text if let enhancementService = enhancementService, enhancementService.isConfigured { let detectionResult = promptDetectionService.analyzeText(text, with: enhancementService) promptDetectionResult = detectionResult await promptDetectionService.applyDetectionResult(detectionResult, to: enhancementService) } - + if let enhancementService = enhancementService, enhancementService.isEnhancementEnabled, enhancementService.isConfigured { - do { - if await checkCancellationAndCleanup() { return } + if await checkCancellationAndCleanup() { return } - await MainActor.run { self.recordingState = .enhancing } - let textForAI = promptDetectionResult?.processedText ?? text + await MainActor.run { self.recordingState = .enhancing } + let textForAI = promptDetectionResult?.processedText ?? text + + do { let (enhancedText, enhancementDuration, promptName) = try await enhancementService.enhance(textForAI) - let newTranscription = Transcription( - text: originalText, - duration: actualDuration, - enhancedText: enhancedText, - audioFileURL: url.absoluteString, - transcriptionModelName: model.displayName, - aiEnhancementModelName: enhancementService.getAIService()?.currentModel, - promptName: promptName, - transcriptionDuration: transcriptionDuration, - enhancementDuration: enhancementDuration, - aiRequestSystemMessage: enhancementService.lastSystemMessageSent, - aiRequestUserMessage: enhancementService.lastUserMessageSent, - powerModeName: powerModeName, - powerModeEmoji: powerModeEmoji - ) - modelContext.insert(newTranscription) - try? modelContext.save() - NotificationCenter.default.post(name: .transcriptionCreated, object: newTranscription) - text = enhancedText + transcription.enhancedText = enhancedText + transcription.aiEnhancementModelName = enhancementService.getAIService()?.currentModel + transcription.promptName = promptName + transcription.enhancementDuration = enhancementDuration + transcription.aiRequestSystemMessage = enhancementService.lastSystemMessageSent + transcription.aiRequestUserMessage = enhancementService.lastUserMessageSent + finalPastedText = enhancedText } catch { - let newTranscription = Transcription( - text: originalText, - duration: actualDuration, - enhancedText: "Enhancement failed: \(error)", - audioFileURL: url.absoluteString, - transcriptionModelName: model.displayName, - promptName: nil, - transcriptionDuration: transcriptionDuration, - powerModeName: powerModeName, - powerModeEmoji: powerModeEmoji - ) - modelContext.insert(newTranscription) - try? modelContext.save() - NotificationCenter.default.post(name: .transcriptionCreated, object: newTranscription) - + transcription.enhancedText = "Enhancement failed: \(error)" await MainActor.run { NotificationManager.shared.showNotification( title: "AI enhancement failed", @@ -350,38 +358,40 @@ class WhisperState: NSObject, ObservableObject { ) } } - } else { - let newTranscription = Transcription( - text: originalText, - duration: actualDuration, - audioFileURL: url.absoluteString, - transcriptionModelName: model.displayName, - promptName: nil, - transcriptionDuration: transcriptionDuration, - powerModeName: powerModeName, - powerModeEmoji: powerModeEmoji - ) - modelContext.insert(newTranscription) - try? modelContext.save() - NotificationCenter.default.post(name: .transcriptionCreated, object: newTranscription) } - + + transcription.transcriptionStatus = TranscriptionStatus.completed.rawValue + + } catch { + let errorDescription = (error as? LocalizedError)?.errorDescription ?? error.localizedDescription + let recoverySuggestion = (error as? LocalizedError)?.recoverySuggestion ?? "" + let fullErrorText = recoverySuggestion.isEmpty ? errorDescription : "\(errorDescription) \(recoverySuggestion)" + + transcription.text = "Transcription Failed: \(fullErrorText)" + transcription.transcriptionStatus = TranscriptionStatus.failed.rawValue + } + + // --- Finalize and save --- + try? modelContext.save() + NotificationCenter.default.post(name: .transcriptionCreated, object: transcription) + + if var textToPaste = finalPastedText, transcription.transcriptionStatus == TranscriptionStatus.completed.rawValue { if case .trialExpired = licenseViewModel.licenseState { - text = """ + textToPaste = """ Your trial has expired. Upgrade to VoiceInk Pro at tryvoiceink.com/buy - \n\(text) + \n\(textToPaste) """ } - + let shouldAddSpace = UserDefaults.standard.object(forKey: "AppendTrailingSpace") as? Bool ?? true if shouldAddSpace { - text += " " + textToPaste += " " } - + if await checkCancellationAndCleanup() { return } - + DispatchQueue.main.asyncAfter(deadline: .now() + 0.05) { - CursorPaster.pasteAtCursor(text) + CursorPaster.pasteAtCursor(textToPaste) let powerMode = PowerModeManager.shared if let activeConfig = powerMode.currentActiveConfiguration, activeConfig.isAutoSendEnabled { @@ -391,52 +401,15 @@ class WhisperState: NSObject, ObservableObject { } } } - - if let result = promptDetectionResult, - let enhancementService = enhancementService, - result.shouldEnableAI { - await promptDetectionService.restoreOriginalSettings(result, to: enhancementService) - } - - await self.dismissMiniRecorder() - - } catch { - do { - let audioAsset = AVURLAsset(url: url) - let duration = (try? CMTimeGetSeconds(await audioAsset.load(.duration))) ?? 0.0 - - await MainActor.run { - let errorDescription = (error as? LocalizedError)?.errorDescription ?? error.localizedDescription - let recoverySuggestion = (error as? LocalizedError)?.recoverySuggestion ?? "" - let fullErrorText = recoverySuggestion.isEmpty ? errorDescription : "\(errorDescription) \(recoverySuggestion)" - - let failedTranscription = Transcription( - text: "Transcription Failed: \(fullErrorText)", - duration: duration, - enhancedText: nil, - audioFileURL: url.absoluteString, - promptName: nil, - powerModeName: nil, - powerModeEmoji: nil - ) - - modelContext.insert(failedTranscription) - try? modelContext.save() - NotificationCenter.default.post(name: .transcriptionCreated, object: failedTranscription) - } - } catch { - logger.error("❌ Could not create a record for the failed transcription: \(error.localizedDescription)") - } - - await MainActor.run { - NotificationManager.shared.showNotification( - title: "Transcription Failed", - type: .error - ) - } - - await self.dismissMiniRecorder() } + + if let result = promptDetectionResult, + let enhancementService = enhancementService, + result.shouldEnableAI { + await promptDetectionService.restoreOriginalSettings(result, to: enhancementService) + } + + await self.dismissMiniRecorder() } func getEnhancementService() -> AIEnhancementService? {