Feat: Respect VAD user setting in ParakeetTranscriptionService

This commit is contained in:
Beingpax 2025-09-06 08:57:32 +05:45
parent c0ed2dc78a
commit 5eacee467a

View File

@ -79,34 +79,41 @@ class ParakeetTranscriptionService: TranscriptionService {
// Use VAD to get speech segments // Use VAD to get speech segments
var speechAudio: [Float] = [] var speechAudio: [Float] = []
if let modelPath = await VADModelManager.shared.getModelPath() { let isVADEnabled = UserDefaults.standard.object(forKey: "IsVADEnabled") as? Bool ?? true
if let vad = VoiceActivityDetector(modelPath: modelPath) {
let speechSegments = vad.process(audioSamples: audioSamples)
logger.notice("🦜 VAD detected \(speechSegments.count) speech segments.")
let sampleRate = 16000 // Assuming 16kHz sample rate if isVADEnabled {
for segment in speechSegments { if let modelPath = await VADModelManager.shared.getModelPath() {
let startSample = Int(segment.start * Double(sampleRate)) if let vad = VoiceActivityDetector(modelPath: modelPath) {
var endSample = Int(segment.end * Double(sampleRate)) let speechSegments = vad.process(audioSamples: audioSamples)
logger.notice("🦜 VAD detected \(speechSegments.count) speech segments.")
// Cap endSample to the audio buffer size let sampleRate = 16000 // Assuming 16kHz sample rate
if endSample > audioSamples.count { for segment in speechSegments {
endSample = audioSamples.count let startSample = Int(segment.start * Double(sampleRate))
} var endSample = Int(segment.end * Double(sampleRate))
if startSample < endSample { // Cap endSample to the audio buffer size
speechAudio.append(contentsOf: audioSamples[startSample..<endSample]) if endSample > audioSamples.count {
} else { endSample = audioSamples.count
logger.warning("🦜 Invalid sample range for segment: start=\(startSample), end=\(endSample). Skipping.") }
if startSample < endSample {
speechAudio.append(contentsOf: audioSamples[startSample..<endSample])
} else {
logger.warning("🦜 Invalid sample range for segment: start=\(startSample), end=\(endSample). Skipping.")
}
} }
logger.notice("🦜 Extracted \(speechAudio.count) samples from VAD segments.")
} else {
logger.warning("🦜 VAD could not be initialized. Transcribing original audio.")
speechAudio = audioSamples
} }
logger.notice("🦜 Extracted \(speechAudio.count) samples from VAD segments.")
} else { } else {
logger.warning("🦜 VAD could not be initialized. Transcribing original audio.") logger.warning("🦜 VAD model path not found. Transcribing original audio.")
speechAudio = audioSamples speechAudio = audioSamples
} }
} else { } else {
logger.warning("🦜 VAD model path not found. Transcribing original audio.") logger.notice("🦜 VAD is disabled by user setting. Transcribing original audio.")
speechAudio = audioSamples speechAudio = audioSamples
} }