import Foundation import SwiftUI import AVFoundation import SwiftData import AppKit import KeyboardShortcuts import os // MARK: - Recording State Machine enum RecordingState: Equatable { case idle case recording case transcribing case enhancing case busy } @MainActor class WhisperState: NSObject, ObservableObject { @Published var recordingState: RecordingState = .idle @Published var isModelLoaded = false @Published var loadedLocalModel: WhisperModel? @Published var currentTranscriptionModel: (any TranscriptionModel)? @Published var isModelLoading = false @Published var availableModels: [WhisperModel] = [] @Published var allAvailableModels: [any TranscriptionModel] = PredefinedModels.models @Published var clipboardMessage = "" @Published var miniRecorderError: String? @Published var shouldCancelRecording = false @Published var recorderType: String = UserDefaults.standard.string(forKey: "RecorderType") ?? "mini" { didSet { UserDefaults.standard.set(recorderType, forKey: "RecorderType") } } @Published var isMiniRecorderVisible = false { didSet { if isMiniRecorderVisible { showRecorderPanel() } else { hideRecorderPanel() } } } var whisperContext: WhisperContext? let recorder = Recorder() var recordedFile: URL? = nil let whisperPrompt = WhisperPrompt() // Prompt detection service for trigger word handling private let promptDetectionService = PromptDetectionService() let modelContext: ModelContext // Transcription Services private var localTranscriptionService: LocalTranscriptionService! private lazy var cloudTranscriptionService = CloudTranscriptionService() private lazy var nativeAppleTranscriptionService = NativeAppleTranscriptionService() internal lazy var parakeetTranscriptionService = ParakeetTranscriptionService() private var modelUrl: URL? { let possibleURLs = [ Bundle.main.url(forResource: "ggml-base.en", withExtension: "bin", subdirectory: "Models"), Bundle.main.url(forResource: "ggml-base.en", withExtension: "bin"), Bundle.main.bundleURL.appendingPathComponent("Models/ggml-base.en.bin") ] for url in possibleURLs { if let url = url, FileManager.default.fileExists(atPath: url.path) { return url } } return nil } private enum LoadError: Error { case couldNotLocateModel } let modelsDirectory: URL let recordingsDirectory: URL let enhancementService: AIEnhancementService? var licenseViewModel: LicenseViewModel let logger = Logger(subsystem: "com.prakashjoshipax.voiceink", category: "WhisperState") var notchWindowManager: NotchWindowManager? var miniWindowManager: MiniWindowManager? // For model progress tracking @Published var downloadProgress: [String: Double] = [:] @Published var parakeetDownloadStates: [String: Bool] = [:] init(modelContext: ModelContext, enhancementService: AIEnhancementService? = nil) { self.modelContext = modelContext let appSupportDirectory = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask)[0] .appendingPathComponent("com.prakashjoshipax.VoiceInk") self.modelsDirectory = appSupportDirectory.appendingPathComponent("WhisperModels") self.recordingsDirectory = appSupportDirectory.appendingPathComponent("Recordings") self.enhancementService = enhancementService self.licenseViewModel = LicenseViewModel() super.init() // Configure the session manager if let enhancementService = enhancementService { PowerModeSessionManager.shared.configure(whisperState: self, enhancementService: enhancementService) } // Set the whisperState reference after super.init() self.localTranscriptionService = LocalTranscriptionService(modelsDirectory: self.modelsDirectory, whisperState: self) setupNotifications() createModelsDirectoryIfNeeded() createRecordingsDirectoryIfNeeded() loadAvailableModels() loadCurrentTranscriptionModel() refreshAllAvailableModels() } private func createRecordingsDirectoryIfNeeded() { do { try FileManager.default.createDirectory(at: recordingsDirectory, withIntermediateDirectories: true, attributes: nil) } catch { logger.error("Error creating recordings directory: \(error.localizedDescription)") } } func toggleRecord() async { if recordingState == .recording { await recorder.stopRecording() if let recordedFile { if !shouldCancelRecording { let audioAsset = AVURLAsset(url: recordedFile) let duration = (try? CMTimeGetSeconds(await audioAsset.load(.duration))) ?? 0.0 let transcription = Transcription( text: "", duration: duration, audioFileURL: recordedFile.absoluteString, transcriptionStatus: .pending ) modelContext.insert(transcription) try? modelContext.save() NotificationCenter.default.post(name: .transcriptionCreated, object: transcription) await transcribeAudio(on: transcription) } else { await MainActor.run { recordingState = .idle } await cleanupModelResources() } } else { logger.error("❌ No recorded file found after stopping recording") await MainActor.run { recordingState = .idle } } } else { guard currentTranscriptionModel != nil else { await MainActor.run { NotificationManager.shared.showNotification( title: "No AI Model Selected", type: .error ) } return } shouldCancelRecording = false requestRecordPermission { [self] granted in if granted { Task { do { // --- Prepare permanent file URL --- let fileName = "\(UUID().uuidString).wav" let permanentURL = self.recordingsDirectory.appendingPathComponent(fileName) self.recordedFile = permanentURL try await self.recorder.startRecording(toOutputFile: permanentURL) await MainActor.run { self.recordingState = .recording } await ActiveWindowService.shared.applyConfigurationForCurrentApp() // Only load model if it's a local model and not already loaded if let model = self.currentTranscriptionModel, model.provider == .local { if let localWhisperModel = self.availableModels.first(where: { $0.name == model.name }), self.whisperContext == nil { do { try await self.loadModel(localWhisperModel) } catch { self.logger.error("❌ Model loading failed: \(error.localizedDescription)") } } } else if let parakeetModel = self.currentTranscriptionModel as? ParakeetModel { try? await self.parakeetTranscriptionService.loadModel(for: parakeetModel) } if let enhancementService = self.enhancementService { enhancementService.captureClipboardContext() await enhancementService.captureScreenContext() } } catch { self.logger.error("❌ Failed to start recording: \(error.localizedDescription)") await NotificationManager.shared.showNotification(title: "Recording failed to start", type: .error) await self.dismissMiniRecorder() // Do not remove the file on a failed start, to preserve all recordings. self.recordedFile = nil } } } else { logger.error("❌ Recording permission denied.") } } } } private func requestRecordPermission(response: @escaping (Bool) -> Void) { response(true) } private func transcribeAudio(on transcription: Transcription) async { guard let urlString = transcription.audioFileURL, let url = URL(string: urlString) else { logger.error("❌ Invalid audio file URL in transcription object.") await MainActor.run { recordingState = .idle } transcription.text = "Transcription Failed: Invalid audio file URL" transcription.transcriptionStatus = TranscriptionStatus.failed.rawValue try? modelContext.save() return } if shouldCancelRecording { await MainActor.run { recordingState = .idle } await cleanupModelResources() return } await MainActor.run { recordingState = .transcribing } // Play stop sound when transcription starts with a small delay Task { let isSystemMuteEnabled = UserDefaults.standard.bool(forKey: "isSystemMuteEnabled") if isSystemMuteEnabled { try? await Task.sleep(nanoseconds: 200_000_000) // 200 milliseconds delay } await MainActor.run { SoundManager.shared.playStopSound() } } defer { if shouldCancelRecording { Task { await cleanupModelResources() } } } logger.notice("🔄 Starting transcription...") var finalPastedText: String? var promptDetectionResult: PromptDetectionService.PromptDetectionResult? do { guard let model = currentTranscriptionModel else { throw WhisperStateError.transcriptionFailed } let transcriptionService: TranscriptionService switch model.provider { case .local: transcriptionService = localTranscriptionService case .parakeet: transcriptionService = parakeetTranscriptionService case .nativeApple: transcriptionService = nativeAppleTranscriptionService default: transcriptionService = cloudTranscriptionService } let transcriptionStart = Date() var text = try await transcriptionService.transcribe(audioURL: url, model: model) logger.notice("📝 Raw transcript: \(text)") text = TranscriptionOutputFilter.filter(text) logger.notice("📝 Output filter result: \(text)") let transcriptionDuration = Date().timeIntervalSince(transcriptionStart) let powerModeManager = PowerModeManager.shared let activePowerModeConfig = powerModeManager.currentActiveConfiguration let powerModeName = (activePowerModeConfig?.isEnabled == true) ? activePowerModeConfig?.name : nil let powerModeEmoji = (activePowerModeConfig?.isEnabled == true) ? activePowerModeConfig?.emoji : nil if await checkCancellationAndCleanup() { return } text = text.trimmingCharacters(in: .whitespacesAndNewlines) if UserDefaults.standard.object(forKey: "IsTextFormattingEnabled") as? Bool ?? true { text = WhisperTextFormatter.format(text) logger.notice("📝 Formatted transcript: \(text)") } if UserDefaults.standard.bool(forKey: "IsWordReplacementEnabled") { text = WordReplacementService.shared.applyReplacements(to: text) logger.notice("📝 WordReplacement: \(text)") } let audioAsset = AVURLAsset(url: url) let actualDuration = (try? CMTimeGetSeconds(await audioAsset.load(.duration))) ?? 0.0 transcription.text = text transcription.duration = actualDuration transcription.transcriptionModelName = model.displayName transcription.transcriptionDuration = transcriptionDuration transcription.powerModeName = powerModeName transcription.powerModeEmoji = powerModeEmoji finalPastedText = text if let enhancementService = enhancementService, enhancementService.isConfigured { let detectionResult = await promptDetectionService.analyzeText(text, with: enhancementService) promptDetectionResult = detectionResult await promptDetectionService.applyDetectionResult(detectionResult, to: enhancementService) } if let enhancementService = enhancementService, enhancementService.isEnhancementEnabled, enhancementService.isConfigured { if await checkCancellationAndCleanup() { return } await MainActor.run { self.recordingState = .enhancing } let textForAI = promptDetectionResult?.processedText ?? text do { let (enhancedText, enhancementDuration, promptName) = try await enhancementService.enhance(textForAI) logger.notice("📝 AI enhancement: \(enhancedText)") transcription.enhancedText = enhancedText transcription.aiEnhancementModelName = enhancementService.getAIService()?.currentModel transcription.promptName = promptName transcription.enhancementDuration = enhancementDuration transcription.aiRequestSystemMessage = enhancementService.lastSystemMessageSent transcription.aiRequestUserMessage = enhancementService.lastUserMessageSent finalPastedText = enhancedText } catch { transcription.enhancedText = "Enhancement failed: \(error)" if await checkCancellationAndCleanup() { return } } } transcription.transcriptionStatus = TranscriptionStatus.completed.rawValue } catch { let errorDescription = (error as? LocalizedError)?.errorDescription ?? error.localizedDescription let recoverySuggestion = (error as? LocalizedError)?.recoverySuggestion ?? "" let fullErrorText = recoverySuggestion.isEmpty ? errorDescription : "\(errorDescription) \(recoverySuggestion)" transcription.text = "Transcription Failed: \(fullErrorText)" transcription.transcriptionStatus = TranscriptionStatus.failed.rawValue } // --- Finalize and save --- try? modelContext.save() if transcription.transcriptionStatus == TranscriptionStatus.completed.rawValue { NotificationCenter.default.post(name: .transcriptionCompleted, object: transcription) } if await checkCancellationAndCleanup() { return } if var textToPaste = finalPastedText, transcription.transcriptionStatus == TranscriptionStatus.completed.rawValue { if case .trialExpired = licenseViewModel.licenseState { textToPaste = """ Your trial has expired. Upgrade to VoiceInk Pro at tryvoiceink.com/buy \n\(textToPaste) """ } let shouldAddSpace = UserDefaults.standard.object(forKey: "AppendTrailingSpace") as? Bool ?? true if shouldAddSpace { textToPaste += " " } DispatchQueue.main.asyncAfter(deadline: .now() + 0.05) { CursorPaster.pasteAtCursor(textToPaste) let powerMode = PowerModeManager.shared if let activeConfig = powerMode.currentActiveConfiguration, activeConfig.isAutoSendEnabled { // Slight delay to ensure the paste operation completes DispatchQueue.main.asyncAfter(deadline: .now() + 0.2) { CursorPaster.pressEnter() } } } } if let result = promptDetectionResult, let enhancementService = enhancementService, result.shouldEnableAI { await promptDetectionService.restoreOriginalSettings(result, to: enhancementService) } await self.dismissMiniRecorder() shouldCancelRecording = false } func getEnhancementService() -> AIEnhancementService? { return enhancementService } private func checkCancellationAndCleanup() async -> Bool { if shouldCancelRecording { await cleanupModelResources() return true } return false } private func cleanupAndDismiss() async { await dismissMiniRecorder() } }