Add initial VAD support

This commit is contained in:
Beingpax 2025-06-29 12:31:02 +05:45
parent a05ddfe887
commit 6f3cef665c
2 changed files with 103 additions and 11 deletions

View File

@ -32,7 +32,7 @@ actor WhisperContext {
}
}
func fullTranscribe(samples: [Float]) {
func fullTranscribe(samples: [Float]) async {
guard let context = context else { return }
// Leave 2 processors free (i.e. the high-efficiency cores).
@ -76,15 +76,26 @@ actor WhisperContext {
whisper_reset_timings(context)
logger.notice("⚙️ Starting whisper transcription")
samples.withUnsafeBufferPointer { samples in
if (whisper_full(context, params, samples.baseAddress, Int32(samples.count)) != 0) {
logger.error("❌ Failed to run whisper model")
} else {
// Print detected language info before timings
let langId = whisper_full_lang_id(context)
let detectedLang = String(cString: whisper_lang_str(langId))
logger.notice("✅ Transcription completed - Language: \(detectedLang)")
if let vadModelPath = await VADModelManager.shared.getModelPath() {
logger.notice("Successfully retrieved VAD model path.")
params.vad = true
params.vad_model_path = (vadModelPath as NSString).utf8String
var vadParams = whisper_vad_default_params()
vadParams.min_speech_duration_ms = 500
vadParams.min_silence_duration_ms = 500
vadParams.samples_overlap = 0.1
params.vad_params = vadParams
logger.notice("🎤 VAD configured: min_speech=500ms, min_silence=500ms, overlap=100ms")
} else {
logger.error("VAD model path not found, proceeding without VAD.")
}
samples.withUnsafeBufferPointer { samplesBuffer in
if whisper_full(context, params, samplesBuffer.baseAddress, Int32(samplesBuffer.count)) != 0 {
self.logger.error("Failed to run whisper_full")
}
}
@ -140,7 +151,7 @@ actor WhisperContext {
func setPrompt(_ prompt: String?) {
self.prompt = prompt
logger.debug("💬 Prompt set: \(prompt ?? "none")")
logger.notice("💬 Prompt set: \(prompt ?? "none")")
}
}

View File

@ -0,0 +1,81 @@
import Foundation
import OSLog
class VADModelManager {
static let shared = VADModelManager()
private let logger = Logger(subsystem: "VADModelManager", category: "ModelManagement")
private let modelURL = URL(string: "https://huggingface.co/ggml-org/whisper-vad/resolve/main/ggml-silero-v5.1.2.bin")!
private var modelPath: URL? {
guard let appSupportDir = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask).first else {
return nil
}
// Using the same directory structure as WhisperState for consistency
let modelsDir = appSupportDir.appendingPathComponent("com.prakashjoshipax.VoiceInk/WhisperModels")
return modelsDir.appendingPathComponent("ggml-silero-v5.1.2.bin")
}
private init() {
if let modelPath = modelPath {
let directory = modelPath.deletingLastPathComponent()
if !FileManager.default.fileExists(atPath: directory.path) {
do {
try FileManager.default.createDirectory(at: directory, withIntermediateDirectories: true, attributes: nil)
logger.log("Created directory for VAD model at \(directory.path)")
} catch {
logger.error("Failed to create model directory: \(error.localizedDescription)")
}
}
}
}
func getModelPath() async -> String? {
guard let modelPath = modelPath else {
logger.error("Could not construct VAD model path.")
return nil
}
if FileManager.default.fileExists(atPath: modelPath.path) {
logger.log("VAD model already exists at \(modelPath.path)")
return modelPath.path
} else {
logger.log("VAD model not found, downloading...")
return await downloadModel(to: modelPath)
}
}
private func downloadModel(to path: URL) async -> String? {
return await withCheckedContinuation { continuation in
let task = URLSession.shared.downloadTask(with: modelURL) { location, response, error in
DispatchQueue.main.async {
if let error = error {
self.logger.error("Failed to download VAD model: \(error.localizedDescription)")
continuation.resume(returning: nil)
return
}
guard let location = location else {
self.logger.error("Download location is nil.")
continuation.resume(returning: nil)
return
}
do {
// Ensure the destination directory exists
let directory = path.deletingLastPathComponent()
if !FileManager.default.fileExists(atPath: directory.path) {
try FileManager.default.createDirectory(at: directory, withIntermediateDirectories: true, attributes: nil)
}
try FileManager.default.moveItem(at: location, to: path)
self.logger.log("Successfully downloaded and moved VAD model to \(path.path)")
continuation.resume(returning: path.path)
} catch {
self.logger.error("Failed to move VAD model to destination: \(error.localizedDescription)")
continuation.resume(returning: nil)
}
}
}
task.resume()
}
}
}