Fix: Revert async transcription to fix whisper failures

2025-07-09 08:45:56 +05:45 · 2025-07-09 08:45:56 +05:45 · f0a3383139
commit f0a3383139
parent 10a758e926
3 changed files with 33 additions and 5 deletions
--- a/VoiceInk/Services/LocalTranscriptionService.swift
+++ b/VoiceInk/Services/LocalTranscriptionService.swift
@ -62,7 +62,13 @@ class LocalTranscriptionService: TranscriptionService {
        await whisperContext.setPrompt(currentPrompt)
        
        // Transcribe
-        await whisperContext.fullTranscribe(samples: data)
+        let success = await whisperContext.fullTranscribe(samples: data)
+        
+        guard success else {
+            logger.error("Core transcription engine failed (whisper_full).")
+            throw WhisperStateError.whisperCoreFailed
+        }
+        
        var text = await whisperContext.getTranscription()
        
        if UserDefaults.standard.object(forKey: "IsTextFormattingEnabled") as? Bool ?? true {
--- a/VoiceInk/Whisper/LibWhisper.swift
+++ b/VoiceInk/Whisper/LibWhisper.swift
@ -13,6 +13,7 @@ actor WhisperContext {
    private var languageCString: [CChar]?
    private var prompt: String?
    private var promptCString: [CChar]?
+    private var vadModelPath: String?
    private let logger = Logger(subsystem: "com.prakashjoshipax.voiceink", category: "WhisperContext")

    private init() {}
@ -27,12 +28,13 @@ actor WhisperContext {
        }
    }

-    func fullTranscribe(samples: [Float]) async {
-        guard let context = context else { return }
+    func fullTranscribe(samples: [Float]) -> Bool {
+        guard let context = context else { return false }
        
        let maxThreads = max(1, min(8, cpuCount() - 2))
        var params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY)
        
+        // Read language directly from UserDefaults
        let selectedLanguage = UserDefaults.standard.string(forKey: "SelectedLanguage") ?? "auto"
        if selectedLanguage != "auto" {
            languageCString = Array(selectedLanguage.utf8CString)
@ -70,7 +72,7 @@ actor WhisperContext {

        whisper_reset_timings(context)
        
-        if let vadModelPath = await VADModelManager.shared.getModelPath() {
+        if let vadModelPath = self.vadModelPath {
            params.vad = true
            params.vad_model_path = (vadModelPath as NSString).utf8String
            
@ -86,14 +88,18 @@ actor WhisperContext {
            params.vad = false
        }
        
+        var success = true
        samples.withUnsafeBufferPointer { samplesBuffer in
            if whisper_full(context, params, samplesBuffer.baseAddress, Int32(samplesBuffer.count)) != 0 {
-                self.logger.error("Failed to run whisper_full")
+                self.logger.error("Failed to run whisper_full. VAD enabled: \(params.vad, privacy: .public)")
+                success = false
            }
        }
        
        languageCString = nil
        promptCString = nil
+        
+        return success
    }

    func getTranscription() -> String {
@ -109,6 +115,13 @@ actor WhisperContext {
    static func createContext(path: String) async throws -> WhisperContext {
        let whisperContext = WhisperContext()
        try await whisperContext.initializeModel(path: path)
+        
+        // Asynchronously prepare VAD model path in the background
+        Task.detached(priority: .background) {
+            let path = await VADModelManager.shared.getModelPath()
+            await whisperContext.setVADModelPath(path)
+        }
+        
        return whisperContext
    }
    
@ -126,6 +139,10 @@ actor WhisperContext {
            throw WhisperStateError.modelLoadFailed
        }
    }
+    
+    private func setVADModelPath(_ path: String?) {
+        self.vadModelPath = path
+    }

    func releaseResources() {
        if let context = context {
--- a/VoiceInk/Whisper/WhisperError.swift
+++ b/VoiceInk/Whisper/WhisperError.swift
@ -3,6 +3,7 @@ import Foundation
 enum WhisperStateError: Error, Identifiable {
    case modelLoadFailed
    case transcriptionFailed
+    case whisperCoreFailed
    case unzipFailed
    case unknownError
    
@ -16,6 +17,8 @@ extension WhisperStateError: LocalizedError {
            return "Failed to load the transcription model."
        case .transcriptionFailed:
            return "Failed to transcribe the audio."
+        case .whisperCoreFailed:
+            return "The core transcription engine failed."
        case .unzipFailed:
            return "Failed to unzip the downloaded Core ML model."
        case .unknownError:
@ -29,6 +32,8 @@ extension WhisperStateError: LocalizedError {
            return "Try selecting a different model or redownloading the current model."
        case .transcriptionFailed:
            return "Check the default model try again. If the problem persists, try a different model."
+        case .whisperCoreFailed:
+            return "This can happen due to an issue with the audio recording or insufficient system resources. Please try again, or restart the app."
        case .unzipFailed:
            return "The downloaded Core ML model archive might be corrupted. Try deleting the model and downloading it again. Check available disk space."
        case .unknownError: