import Foundation import SwiftUI import AVFoundation import SwiftData import os @MainActor class AudioTranscriptionService: ObservableObject { @Published var isTranscribing = false @Published var currentError: TranscriptionError? private let modelContext: ModelContext private let enhancementService: AIEnhancementService? private let whisperState: WhisperState private let promptDetectionService = PromptDetectionService() private let logger = Logger(subsystem: "com.prakashjoshipax.voiceink", category: "AudioTranscriptionService") private let serviceRegistry: TranscriptionServiceRegistry enum TranscriptionError: Error { case noAudioFile case transcriptionFailed case modelNotLoaded case invalidAudioFormat } init(modelContext: ModelContext, whisperState: WhisperState) { self.modelContext = modelContext self.whisperState = whisperState self.enhancementService = whisperState.enhancementService self.serviceRegistry = TranscriptionServiceRegistry(whisperState: whisperState, modelsDirectory: whisperState.modelsDirectory) } func retranscribeAudio(from url: URL, using model: any TranscriptionModel) async throws -> Transcription { guard FileManager.default.fileExists(atPath: url.path) else { throw TranscriptionError.noAudioFile } await MainActor.run { isTranscribing = true } do { let transcriptionStart = Date() var text = try await serviceRegistry.transcribe(audioURL: url, model: model) let transcriptionDuration = Date().timeIntervalSince(transcriptionStart) text = TranscriptionOutputFilter.filter(text) text = text.trimmingCharacters(in: .whitespacesAndNewlines) let powerModeManager = PowerModeManager.shared let activePowerModeConfig = powerModeManager.currentActiveConfiguration let powerModeName = (activePowerModeConfig?.isEnabled == true) ? activePowerModeConfig?.name : nil let powerModeEmoji = (activePowerModeConfig?.isEnabled == true) ? activePowerModeConfig?.emoji : nil if UserDefaults.standard.object(forKey: "IsTextFormattingEnabled") as? Bool ?? true { text = WhisperTextFormatter.format(text) } text = WordReplacementService.shared.applyReplacements(to: text, using: modelContext) logger.notice("✅ Word replacements applied") let audioAsset = AVURLAsset(url: url) let duration = CMTimeGetSeconds(try await audioAsset.load(.duration)) let recordingsDirectory = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask)[0] .appendingPathComponent("com.prakashjoshipax.VoiceInk") .appendingPathComponent("Recordings") let fileName = "retranscribed_\(UUID().uuidString).wav" let permanentURL = recordingsDirectory.appendingPathComponent(fileName) do { try FileManager.default.copyItem(at: url, to: permanentURL) } catch { logger.error("❌ Failed to create permanent copy of audio: \(error.localizedDescription)") isTranscribing = false throw error } let permanentURLString = permanentURL.absoluteString // Apply prompt detection for trigger words let originalText = text var promptDetectionResult: PromptDetectionService.PromptDetectionResult? = nil if let enhancementService = enhancementService, enhancementService.isConfigured { let detectionResult = await promptDetectionService.analyzeText(text, with: enhancementService) promptDetectionResult = detectionResult await promptDetectionService.applyDetectionResult(detectionResult, to: enhancementService) } // Apply AI enhancement if enabled if let enhancementService = enhancementService, enhancementService.isEnhancementEnabled, enhancementService.isConfigured { do { let textForAI = promptDetectionResult?.processedText ?? text let (enhancedText, enhancementDuration, promptName) = try await enhancementService.enhance(textForAI) let newTranscription = Transcription( text: originalText, duration: duration, enhancedText: enhancedText, audioFileURL: permanentURLString, transcriptionModelName: model.displayName, aiEnhancementModelName: enhancementService.getAIService()?.currentModel, promptName: promptName, transcriptionDuration: transcriptionDuration, enhancementDuration: enhancementDuration, aiRequestSystemMessage: enhancementService.lastSystemMessageSent, aiRequestUserMessage: enhancementService.lastUserMessageSent, powerModeName: powerModeName, powerModeEmoji: powerModeEmoji ) modelContext.insert(newTranscription) do { try modelContext.save() NotificationCenter.default.post(name: .transcriptionCreated, object: newTranscription) } catch { logger.error("❌ Failed to save transcription: \(error.localizedDescription)") } // Restore original prompt settings if AI was temporarily enabled if let result = promptDetectionResult, result.shouldEnableAI { await promptDetectionService.restoreOriginalSettings(result, to: enhancementService) } await MainActor.run { isTranscribing = false } return newTranscription } catch { let newTranscription = Transcription( text: originalText, duration: duration, audioFileURL: permanentURLString, transcriptionModelName: model.displayName, promptName: nil, transcriptionDuration: transcriptionDuration, powerModeName: powerModeName, powerModeEmoji: powerModeEmoji ) modelContext.insert(newTranscription) do { try modelContext.save() NotificationCenter.default.post(name: .transcriptionCreated, object: newTranscription) } catch { logger.error("❌ Failed to save transcription: \(error.localizedDescription)") } await MainActor.run { isTranscribing = false } return newTranscription } } else { let newTranscription = Transcription( text: originalText, duration: duration, audioFileURL: permanentURLString, transcriptionModelName: model.displayName, promptName: nil, transcriptionDuration: transcriptionDuration, powerModeName: powerModeName, powerModeEmoji: powerModeEmoji ) modelContext.insert(newTranscription) do { try modelContext.save() } catch { logger.error("❌ Failed to save transcription: \(error.localizedDescription)") } await MainActor.run { isTranscribing = false } return newTranscription } } catch { logger.error("❌ Transcription failed: \(error.localizedDescription)") currentError = .transcriptionFailed isTranscribing = false throw error } } }