Fix: Revert async transcription to fix whisper failures
This commit is contained in:
parent
10a758e926
commit
f0a3383139
@ -62,7 +62,13 @@ class LocalTranscriptionService: TranscriptionService {
|
|||||||
await whisperContext.setPrompt(currentPrompt)
|
await whisperContext.setPrompt(currentPrompt)
|
||||||
|
|
||||||
// Transcribe
|
// Transcribe
|
||||||
await whisperContext.fullTranscribe(samples: data)
|
let success = await whisperContext.fullTranscribe(samples: data)
|
||||||
|
|
||||||
|
guard success else {
|
||||||
|
logger.error("Core transcription engine failed (whisper_full).")
|
||||||
|
throw WhisperStateError.whisperCoreFailed
|
||||||
|
}
|
||||||
|
|
||||||
var text = await whisperContext.getTranscription()
|
var text = await whisperContext.getTranscription()
|
||||||
|
|
||||||
if UserDefaults.standard.object(forKey: "IsTextFormattingEnabled") as? Bool ?? true {
|
if UserDefaults.standard.object(forKey: "IsTextFormattingEnabled") as? Bool ?? true {
|
||||||
|
|||||||
@ -13,6 +13,7 @@ actor WhisperContext {
|
|||||||
private var languageCString: [CChar]?
|
private var languageCString: [CChar]?
|
||||||
private var prompt: String?
|
private var prompt: String?
|
||||||
private var promptCString: [CChar]?
|
private var promptCString: [CChar]?
|
||||||
|
private var vadModelPath: String?
|
||||||
private let logger = Logger(subsystem: "com.prakashjoshipax.voiceink", category: "WhisperContext")
|
private let logger = Logger(subsystem: "com.prakashjoshipax.voiceink", category: "WhisperContext")
|
||||||
|
|
||||||
private init() {}
|
private init() {}
|
||||||
@ -27,12 +28,13 @@ actor WhisperContext {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
func fullTranscribe(samples: [Float]) async {
|
func fullTranscribe(samples: [Float]) -> Bool {
|
||||||
guard let context = context else { return }
|
guard let context = context else { return false }
|
||||||
|
|
||||||
let maxThreads = max(1, min(8, cpuCount() - 2))
|
let maxThreads = max(1, min(8, cpuCount() - 2))
|
||||||
var params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY)
|
var params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY)
|
||||||
|
|
||||||
|
// Read language directly from UserDefaults
|
||||||
let selectedLanguage = UserDefaults.standard.string(forKey: "SelectedLanguage") ?? "auto"
|
let selectedLanguage = UserDefaults.standard.string(forKey: "SelectedLanguage") ?? "auto"
|
||||||
if selectedLanguage != "auto" {
|
if selectedLanguage != "auto" {
|
||||||
languageCString = Array(selectedLanguage.utf8CString)
|
languageCString = Array(selectedLanguage.utf8CString)
|
||||||
@ -70,7 +72,7 @@ actor WhisperContext {
|
|||||||
|
|
||||||
whisper_reset_timings(context)
|
whisper_reset_timings(context)
|
||||||
|
|
||||||
if let vadModelPath = await VADModelManager.shared.getModelPath() {
|
if let vadModelPath = self.vadModelPath {
|
||||||
params.vad = true
|
params.vad = true
|
||||||
params.vad_model_path = (vadModelPath as NSString).utf8String
|
params.vad_model_path = (vadModelPath as NSString).utf8String
|
||||||
|
|
||||||
@ -86,14 +88,18 @@ actor WhisperContext {
|
|||||||
params.vad = false
|
params.vad = false
|
||||||
}
|
}
|
||||||
|
|
||||||
|
var success = true
|
||||||
samples.withUnsafeBufferPointer { samplesBuffer in
|
samples.withUnsafeBufferPointer { samplesBuffer in
|
||||||
if whisper_full(context, params, samplesBuffer.baseAddress, Int32(samplesBuffer.count)) != 0 {
|
if whisper_full(context, params, samplesBuffer.baseAddress, Int32(samplesBuffer.count)) != 0 {
|
||||||
self.logger.error("Failed to run whisper_full")
|
self.logger.error("Failed to run whisper_full. VAD enabled: \(params.vad, privacy: .public)")
|
||||||
|
success = false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
languageCString = nil
|
languageCString = nil
|
||||||
promptCString = nil
|
promptCString = nil
|
||||||
|
|
||||||
|
return success
|
||||||
}
|
}
|
||||||
|
|
||||||
func getTranscription() -> String {
|
func getTranscription() -> String {
|
||||||
@ -109,6 +115,13 @@ actor WhisperContext {
|
|||||||
static func createContext(path: String) async throws -> WhisperContext {
|
static func createContext(path: String) async throws -> WhisperContext {
|
||||||
let whisperContext = WhisperContext()
|
let whisperContext = WhisperContext()
|
||||||
try await whisperContext.initializeModel(path: path)
|
try await whisperContext.initializeModel(path: path)
|
||||||
|
|
||||||
|
// Asynchronously prepare VAD model path in the background
|
||||||
|
Task.detached(priority: .background) {
|
||||||
|
let path = await VADModelManager.shared.getModelPath()
|
||||||
|
await whisperContext.setVADModelPath(path)
|
||||||
|
}
|
||||||
|
|
||||||
return whisperContext
|
return whisperContext
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -126,6 +139,10 @@ actor WhisperContext {
|
|||||||
throw WhisperStateError.modelLoadFailed
|
throw WhisperStateError.modelLoadFailed
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private func setVADModelPath(_ path: String?) {
|
||||||
|
self.vadModelPath = path
|
||||||
|
}
|
||||||
|
|
||||||
func releaseResources() {
|
func releaseResources() {
|
||||||
if let context = context {
|
if let context = context {
|
||||||
|
|||||||
@ -3,6 +3,7 @@ import Foundation
|
|||||||
enum WhisperStateError: Error, Identifiable {
|
enum WhisperStateError: Error, Identifiable {
|
||||||
case modelLoadFailed
|
case modelLoadFailed
|
||||||
case transcriptionFailed
|
case transcriptionFailed
|
||||||
|
case whisperCoreFailed
|
||||||
case unzipFailed
|
case unzipFailed
|
||||||
case unknownError
|
case unknownError
|
||||||
|
|
||||||
@ -16,6 +17,8 @@ extension WhisperStateError: LocalizedError {
|
|||||||
return "Failed to load the transcription model."
|
return "Failed to load the transcription model."
|
||||||
case .transcriptionFailed:
|
case .transcriptionFailed:
|
||||||
return "Failed to transcribe the audio."
|
return "Failed to transcribe the audio."
|
||||||
|
case .whisperCoreFailed:
|
||||||
|
return "The core transcription engine failed."
|
||||||
case .unzipFailed:
|
case .unzipFailed:
|
||||||
return "Failed to unzip the downloaded Core ML model."
|
return "Failed to unzip the downloaded Core ML model."
|
||||||
case .unknownError:
|
case .unknownError:
|
||||||
@ -29,6 +32,8 @@ extension WhisperStateError: LocalizedError {
|
|||||||
return "Try selecting a different model or redownloading the current model."
|
return "Try selecting a different model or redownloading the current model."
|
||||||
case .transcriptionFailed:
|
case .transcriptionFailed:
|
||||||
return "Check the default model try again. If the problem persists, try a different model."
|
return "Check the default model try again. If the problem persists, try a different model."
|
||||||
|
case .whisperCoreFailed:
|
||||||
|
return "This can happen due to an issue with the audio recording or insufficient system resources. Please try again, or restart the app."
|
||||||
case .unzipFailed:
|
case .unzipFailed:
|
||||||
return "The downloaded Core ML model archive might be corrupted. Try deleting the model and downloading it again. Check available disk space."
|
return "The downloaded Core ML model archive might be corrupted. Try deleting the model and downloading it again. Check available disk space."
|
||||||
case .unknownError:
|
case .unknownError:
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user