diff --git a/VoiceInk/Whisper/LibWhisper.swift b/VoiceInk/Whisper/LibWhisper.swift index 622d598..2703152 100644 --- a/VoiceInk/Whisper/LibWhisper.swift +++ b/VoiceInk/Whisper/LibWhisper.swift @@ -18,9 +18,7 @@ actor WhisperContext { private var promptCString: [CChar]? private let logger = Logger(subsystem: "com.prakashjoshipax.voiceink", category: "WhisperContext") - private init() { - // Private initializer without context - } + private init() {} init(context: OpaquePointer) { self.context = context @@ -35,22 +33,18 @@ actor WhisperContext { func fullTranscribe(samples: [Float]) async { guard let context = context else { return } - // Leave 2 processors free (i.e. the high-efficiency cores). let maxThreads = max(1, min(8, cpuCount() - 2)) var params = whisper_full_default_params(WHISPER_SAMPLING_GREEDY) - // Read language directly from UserDefaults let selectedLanguage = UserDefaults.standard.string(forKey: "SelectedLanguage") ?? "auto" if selectedLanguage != "auto" { languageCString = Array(selectedLanguage.utf8CString) params.language = languageCString?.withUnsafeBufferPointer { ptr in ptr.baseAddress } - logger.notice("🌐 Using language: \(selectedLanguage)") } else { languageCString = nil params.language = nil - logger.notice("🌐 Using auto language detection") } if prompt != nil { @@ -58,53 +52,46 @@ actor WhisperContext { params.initial_prompt = promptCString?.withUnsafeBufferPointer { ptr in ptr.baseAddress } - logger.notice("💬 Using prompt for transcription in language: \(selectedLanguage)") } else { promptCString = nil params.initial_prompt = nil } - params.print_realtime = true - params.print_progress = false + params.print_realtime = true + params.print_progress = false params.print_timestamps = true - params.print_special = false - params.translate = false - params.n_threads = Int32(maxThreads) - params.offset_ms = 0 - params.no_context = true - params.single_segment = false + params.print_special = false + params.translate = false + params.n_threads = Int32(maxThreads) + params.offset_ms = 0 + params.no_context = true + params.single_segment = false + params.suppress_nst = true + params.entropy_thold = 2.0 + params.logprob_thold = -0.8 + params.no_speech_thold = 0.6 whisper_reset_timings(context) - logger.notice("⚙️ Starting whisper transcription with VAD: \(params.vad ? "ENABLED" : "DISABLED")") if let vadModelPath = await VADModelManager.shared.getModelPath() { - logger.notice("🎤 VAD is ENABLED - Successfully retrieved VAD model path: \(vadModelPath)") params.vad = true params.vad_model_path = (vadModelPath as NSString).utf8String var vadParams = whisper_vad_default_params() - vadParams.min_speech_duration_ms = 500 - vadParams.min_silence_duration_ms = 500 + vadParams.threshold = 0.50 + vadParams.min_speech_duration_ms = 250 + vadParams.min_silence_duration_ms = 100 + vadParams.max_speech_duration_s = Float.greatestFiniteMagnitude + vadParams.speech_pad_ms = 30 vadParams.samples_overlap = 0.1 params.vad_params = vadParams - - logger.notice("🎤 VAD configured with parameters: min_speech=500ms, min_silence=500ms, overlap=10%") - logger.notice("🎤 VAD will be used for voice activity detection during transcription") } else { - logger.notice("🎤 VAD is DISABLED - VAD model path not found, proceeding without VAD") params.vad = false - logger.notice("🎤 Transcription will process entire audio without voice activity detection") } samples.withUnsafeBufferPointer { samplesBuffer in if whisper_full(context, params, samplesBuffer.baseAddress, Int32(samplesBuffer.count)) != 0 { self.logger.error("Failed to run whisper_full") - } else { - if params.vad { - self.logger.notice("✅ Whisper transcription completed successfully with VAD processing") - } else { - self.logger.notice("✅ Whisper transcription completed successfully without VAD") - } } } @@ -118,19 +105,13 @@ actor WhisperContext { for i in 0.. WhisperContext { - // Create empty context first let whisperContext = WhisperContext() - - // Initialize the context within the actor's isolated context try await whisperContext.initializeModel(path: path) - return whisperContext } @@ -138,7 +119,6 @@ actor WhisperContext { var params = whisper_context_default_params() #if targetEnvironment(simulator) params.use_gpu = false - logger.notice("🖥️ Running on simulator, using CPU") #endif let context = whisper_init_from_file_with_params(path, params) @@ -160,7 +140,6 @@ actor WhisperContext { func setPrompt(_ prompt: String?) { self.prompt = prompt - logger.notice("💬 Prompt set: \(prompt ?? "none")") } }