diff --git a/VoiceInk/Services/LocalTranscriptionService.swift b/VoiceInk/Services/LocalTranscriptionService.swift index 8c61860..f8dbe76 100644 --- a/VoiceInk/Services/LocalTranscriptionService.swift +++ b/VoiceInk/Services/LocalTranscriptionService.swift @@ -62,7 +62,13 @@ class LocalTranscriptionService: TranscriptionService { await whisperContext.setPrompt(currentPrompt) // Transcribe - await whisperContext.fullTranscribe(samples: data) + let success = await whisperContext.fullTranscribe(samples: data) + + guard success else { + logger.error("Core transcription engine failed (whisper_full).") + throw WhisperStateError.whisperCoreFailed + } + var text = await whisperContext.getTranscription() if UserDefaults.standard.object(forKey: "IsTextFormattingEnabled") as? Bool ?? true { diff --git a/VoiceInk/Whisper/LibWhisper.swift b/VoiceInk/Whisper/LibWhisper.swift index c472c78..7ab6547 100644 --- a/VoiceInk/Whisper/LibWhisper.swift +++ b/VoiceInk/Whisper/LibWhisper.swift @@ -13,6 +13,7 @@ actor WhisperContext { private var languageCString: [CChar]? private var prompt: String? private var promptCString: [CChar]? + private var vadModelPath: String? private let logger = Logger(subsystem: "com.prakashjoshipax.voiceink", category: "WhisperContext") private init() {} @@ -27,12 +28,13 @@ actor WhisperContext { } } - func fullTranscribe(samples: [Float]) async { - guard let context = context else { return } + func fullTranscribe(samples: [Float]) -> Bool { + guard let context = context else { return false } let maxThreads = max(1, min(8, cpuCount() - 2)) var params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY) + // Read language directly from UserDefaults let selectedLanguage = UserDefaults.standard.string(forKey: "SelectedLanguage") ?? "auto" if selectedLanguage != "auto" { languageCString = Array(selectedLanguage.utf8CString) @@ -70,7 +72,7 @@ actor WhisperContext { whisper_reset_timings(context) - if let vadModelPath = await VADModelManager.shared.getModelPath() { + if let vadModelPath = self.vadModelPath { params.vad = true params.vad_model_path = (vadModelPath as NSString).utf8String @@ -86,14 +88,18 @@ actor WhisperContext { params.vad = false } + var success = true samples.withUnsafeBufferPointer { samplesBuffer in if whisper_full(context, params, samplesBuffer.baseAddress, Int32(samplesBuffer.count)) != 0 { - self.logger.error("Failed to run whisper_full") + self.logger.error("Failed to run whisper_full. VAD enabled: \(params.vad, privacy: .public)") + success = false } } languageCString = nil promptCString = nil + + return success } func getTranscription() -> String { @@ -109,6 +115,13 @@ actor WhisperContext { static func createContext(path: String) async throws -> WhisperContext { let whisperContext = WhisperContext() try await whisperContext.initializeModel(path: path) + + // Asynchronously prepare VAD model path in the background + Task.detached(priority: .background) { + let path = await VADModelManager.shared.getModelPath() + await whisperContext.setVADModelPath(path) + } + return whisperContext } @@ -126,6 +139,10 @@ actor WhisperContext { throw WhisperStateError.modelLoadFailed } } + + private func setVADModelPath(_ path: String?) { + self.vadModelPath = path + } func releaseResources() { if let context = context { diff --git a/VoiceInk/Whisper/WhisperError.swift b/VoiceInk/Whisper/WhisperError.swift index 34fc74b..c96f422 100644 --- a/VoiceInk/Whisper/WhisperError.swift +++ b/VoiceInk/Whisper/WhisperError.swift @@ -3,6 +3,7 @@ import Foundation enum WhisperStateError: Error, Identifiable { case modelLoadFailed case transcriptionFailed + case whisperCoreFailed case unzipFailed case unknownError @@ -16,6 +17,8 @@ extension WhisperStateError: LocalizedError { return "Failed to load the transcription model." case .transcriptionFailed: return "Failed to transcribe the audio." + case .whisperCoreFailed: + return "The core transcription engine failed." case .unzipFailed: return "Failed to unzip the downloaded Core ML model." case .unknownError: @@ -29,6 +32,8 @@ extension WhisperStateError: LocalizedError { return "Try selecting a different model or redownloading the current model." case .transcriptionFailed: return "Check the default model try again. If the problem persists, try a different model." + case .whisperCoreFailed: + return "This can happen due to an issue with the audio recording or insufficient system resources. Please try again, or restart the app." case .unzipFailed: return "The downloaded Core ML model archive might be corrupted. Try deleting the model and downloading it again. Check available disk space." case .unknownError: