diff --git a/VoiceInk/Resources/models/ggml-silero-v5.1.2.bin b/VoiceInk/Resources/models/ggml-silero-v5.1.2.bin new file mode 100644 index 0000000..c5ddfb5 Binary files /dev/null and b/VoiceInk/Resources/models/ggml-silero-v5.1.2.bin differ diff --git a/VoiceInk/Views/ModelSettingsView.swift b/VoiceInk/Views/ModelSettingsView.swift index a94c003..3ba4bfe 100644 --- a/VoiceInk/Views/ModelSettingsView.swift +++ b/VoiceInk/Views/ModelSettingsView.swift @@ -4,6 +4,7 @@ struct ModelSettingsView: View { @ObservedObject var whisperPrompt: WhisperPrompt @AppStorage("SelectedLanguage") private var selectedLanguage: String = "en" @AppStorage("IsTextFormattingEnabled") private var isTextFormattingEnabled = true + @AppStorage("IsVADEnabled") private var isVADEnabled = true @State private var customPrompt: String = "" @State private var isEditing: Bool = false @@ -77,6 +78,18 @@ struct ModelSettingsView: View { ) } + HStack { + Toggle(isOn: $isVADEnabled) { + Text("Voice Activity Detection (VAD)") + } + .toggleStyle(.switch) + + InfoTip( + title: "Voice Activity Detection", + message: "Detects speech segments and filters out silence to reduce hallucinations in local Whisper models." + ) + } + } .padding() .background(Color(NSColor.controlBackgroundColor)) diff --git a/VoiceInk/Whisper/LibWhisper.swift b/VoiceInk/Whisper/LibWhisper.swift index 7ab6547..145ba05 100644 --- a/VoiceInk/Whisper/LibWhisper.swift +++ b/VoiceInk/Whisper/LibWhisper.swift @@ -72,7 +72,9 @@ actor WhisperContext { whisper_reset_timings(context) - if let vadModelPath = self.vadModelPath { + // Configure VAD if enabled by user and model is available + let isVADEnabled = UserDefaults.standard.object(forKey: "IsVADEnabled") as? Bool ?? true + if isVADEnabled, let vadModelPath = self.vadModelPath { params.vad = true params.vad_model_path = (vadModelPath as NSString).utf8String @@ -91,7 +93,7 @@ actor WhisperContext { var success = true samples.withUnsafeBufferPointer { samplesBuffer in if whisper_full(context, params, samplesBuffer.baseAddress, Int32(samplesBuffer.count)) != 0 { - self.logger.error("Failed to run whisper_full. VAD enabled: \(params.vad, privacy: .public)") + logger.error("Failed to run whisper_full. VAD enabled: \(params.vad)") success = false } } @@ -116,11 +118,9 @@ actor WhisperContext { let whisperContext = WhisperContext() try await whisperContext.initializeModel(path: path) - // Asynchronously prepare VAD model path in the background - Task.detached(priority: .background) { - let path = await VADModelManager.shared.getModelPath() - await whisperContext.setVADModelPath(path) - } + // Load VAD model from bundle resources + let vadModelPath = await VADModelManager.shared.getModelPath() + await whisperContext.setVADModelPath(vadModelPath) return whisperContext } @@ -135,13 +135,16 @@ actor WhisperContext { if let context { self.context = context } else { - logger.error("❌ Couldn't load model at \(path)") + logger.error("Couldn't load model at \(path)") throw WhisperStateError.modelLoadFailed } } private func setVADModelPath(_ path: String?) { self.vadModelPath = path + if path != nil { + logger.info("VAD model loaded from bundle resources") + } } func releaseResources() { diff --git a/VoiceInk/Whisper/VADModelManager.swift b/VoiceInk/Whisper/VADModelManager.swift index b5460fd..afa5f38 100644 --- a/VoiceInk/Whisper/VADModelManager.swift +++ b/VoiceInk/Whisper/VADModelManager.swift @@ -5,77 +5,14 @@ class VADModelManager { static let shared = VADModelManager() private let logger = Logger(subsystem: "VADModelManager", category: "ModelManagement") - private let modelURL = URL(string: "https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-silero-v5.1.2.bin")! - private var modelPath: URL? { - guard let appSupportDir = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask).first else { - return nil - } - // Using the same directory structure as WhisperState for consistency - let modelsDir = appSupportDir.appendingPathComponent("com.prakashjoshipax.VoiceInk/WhisperModels") - return modelsDir.appendingPathComponent("ggml-silero-v5.1.2.bin") - } - - private init() { - if let modelPath = modelPath { - let directory = modelPath.deletingLastPathComponent() - if !FileManager.default.fileExists(atPath: directory.path) { - do { - try FileManager.default.createDirectory(at: directory, withIntermediateDirectories: true, attributes: nil) - logger.log("Created directory for VAD model at \(directory.path)") - } catch { - logger.error("Failed to create model directory: \(error.localizedDescription)") - } - } - } - } + private init() {} func getModelPath() async -> String? { - guard let modelPath = modelPath else { - logger.error("Could not construct VAD model path.") + guard let modelURL = Bundle.main.url(forResource: "ggml-silero-v5.1.2", withExtension: "bin") else { + logger.error("VAD model not found in bundle resources") return nil } - - if FileManager.default.fileExists(atPath: modelPath.path) { - logger.log("VAD model already exists at \(modelPath.path)") - return modelPath.path - } else { - logger.log("VAD model not found, downloading...") - return await downloadModel(to: modelPath) - } - } - - private func downloadModel(to path: URL) async -> String? { - return await withCheckedContinuation { continuation in - let task = URLSession.shared.downloadTask(with: modelURL) { location, response, error in - DispatchQueue.main.async { - if let error = error { - self.logger.error("Failed to download VAD model: \(error.localizedDescription)") - continuation.resume(returning: nil) - return - } - - guard let location = location else { - self.logger.error("Download location is nil.") - continuation.resume(returning: nil) - return - } - - do { - // Ensure the destination directory exists - let directory = path.deletingLastPathComponent() - if !FileManager.default.fileExists(atPath: directory.path) { - try FileManager.default.createDirectory(at: directory, withIntermediateDirectories: true, attributes: nil) - } - try FileManager.default.moveItem(at: location, to: path) - self.logger.log("Successfully downloaded and moved VAD model to \(path.path)") - continuation.resume(returning: path.path) - } catch { - self.logger.error("Failed to move VAD model to destination: \(error.localizedDescription)") - continuation.resume(returning: nil) - } - } - } - task.resume() - } + + return modelURL.path } } \ No newline at end of file