diff --git a/VoiceInk/Whisper/LibWhisper.swift b/VoiceInk/Whisper/LibWhisper.swift index 0347a59..ff0830b 100644 --- a/VoiceInk/Whisper/LibWhisper.swift +++ b/VoiceInk/Whisper/LibWhisper.swift @@ -32,7 +32,7 @@ actor WhisperContext { } } - func fullTranscribe(samples: [Float]) { + func fullTranscribe(samples: [Float]) async { guard let context = context else { return } // Leave 2 processors free (i.e. the high-efficiency cores). @@ -76,15 +76,26 @@ actor WhisperContext { whisper_reset_timings(context) logger.notice("⚙️ Starting whisper transcription") - samples.withUnsafeBufferPointer { samples in - if (whisper_full(context, params, samples.baseAddress, Int32(samples.count)) != 0) { - logger.error("❌ Failed to run whisper model") - } else { - // Print detected language info before timings - let langId = whisper_full_lang_id(context) - let detectedLang = String(cString: whisper_lang_str(langId)) - logger.notice("✅ Transcription completed - Language: \(detectedLang)") - + + if let vadModelPath = await VADModelManager.shared.getModelPath() { + logger.notice("Successfully retrieved VAD model path.") + params.vad = true + params.vad_model_path = (vadModelPath as NSString).utf8String + + var vadParams = whisper_vad_default_params() + vadParams.min_speech_duration_ms = 500 + vadParams.min_silence_duration_ms = 500 + vadParams.samples_overlap = 0.1 + params.vad_params = vadParams + + logger.notice("🎤 VAD configured: min_speech=500ms, min_silence=500ms, overlap=100ms") + } else { + logger.error("VAD model path not found, proceeding without VAD.") + } + + samples.withUnsafeBufferPointer { samplesBuffer in + if whisper_full(context, params, samplesBuffer.baseAddress, Int32(samplesBuffer.count)) != 0 { + self.logger.error("Failed to run whisper_full") } } @@ -140,7 +151,7 @@ actor WhisperContext { func setPrompt(_ prompt: String?) { self.prompt = prompt - logger.debug("💬 Prompt set: \(prompt ?? "none")") + logger.notice("💬 Prompt set: \(prompt ?? "none")") } } diff --git a/VoiceInk/Whisper/VADModelManager.swift b/VoiceInk/Whisper/VADModelManager.swift new file mode 100644 index 0000000..b5460fd --- /dev/null +++ b/VoiceInk/Whisper/VADModelManager.swift @@ -0,0 +1,81 @@ +import Foundation +import OSLog + +class VADModelManager { + static let shared = VADModelManager() + private let logger = Logger(subsystem: "VADModelManager", category: "ModelManagement") + + private let modelURL = URL(string: "https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-silero-v5.1.2.bin")! + private var modelPath: URL? { + guard let appSupportDir = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask).first else { + return nil + } + // Using the same directory structure as WhisperState for consistency + let modelsDir = appSupportDir.appendingPathComponent("com.prakashjoshipax.VoiceInk/WhisperModels") + return modelsDir.appendingPathComponent("ggml-silero-v5.1.2.bin") + } + + private init() { + if let modelPath = modelPath { + let directory = modelPath.deletingLastPathComponent() + if !FileManager.default.fileExists(atPath: directory.path) { + do { + try FileManager.default.createDirectory(at: directory, withIntermediateDirectories: true, attributes: nil) + logger.log("Created directory for VAD model at \(directory.path)") + } catch { + logger.error("Failed to create model directory: \(error.localizedDescription)") + } + } + } + } + + func getModelPath() async -> String? { + guard let modelPath = modelPath else { + logger.error("Could not construct VAD model path.") + return nil + } + + if FileManager.default.fileExists(atPath: modelPath.path) { + logger.log("VAD model already exists at \(modelPath.path)") + return modelPath.path + } else { + logger.log("VAD model not found, downloading...") + return await downloadModel(to: modelPath) + } + } + + private func downloadModel(to path: URL) async -> String? { + return await withCheckedContinuation { continuation in + let task = URLSession.shared.downloadTask(with: modelURL) { location, response, error in + DispatchQueue.main.async { + if let error = error { + self.logger.error("Failed to download VAD model: \(error.localizedDescription)") + continuation.resume(returning: nil) + return + } + + guard let location = location else { + self.logger.error("Download location is nil.") + continuation.resume(returning: nil) + return + } + + do { + // Ensure the destination directory exists + let directory = path.deletingLastPathComponent() + if !FileManager.default.fileExists(atPath: directory.path) { + try FileManager.default.createDirectory(at: directory, withIntermediateDirectories: true, attributes: nil) + } + try FileManager.default.moveItem(at: location, to: path) + self.logger.log("Successfully downloaded and moved VAD model to \(path.path)") + continuation.resume(returning: path.path) + } catch { + self.logger.error("Failed to move VAD model to destination: \(error.localizedDescription)") + continuation.resume(returning: nil) + } + } + } + task.resume() + } + } +} \ No newline at end of file