Feat: Respect VAD user setting in ParakeetTranscriptionService

2025-09-06 08:57:32 +05:45 · 2025-09-06 08:57:32 +05:45 · 5eacee467a
commit 5eacee467a
parent c0ed2dc78a
1 changed files with 27 additions and 20 deletions
--- a/VoiceInk/Services/ParakeetTranscriptionService.swift
+++ b/VoiceInk/Services/ParakeetTranscriptionService.swift
@ -79,34 +79,41 @@ class ParakeetTranscriptionService: TranscriptionService {
        // Use VAD to get speech segments
        var speechAudio: [Float] = []
-        if let modelPath = await VADModelManager.shared.getModelPath() {
+        let isVADEnabled = UserDefaults.standard.object(forKey: "IsVADEnabled") as? Bool ?? true
            if let vad = VoiceActivityDetector(modelPath: modelPath) {
                let speechSegments = vad.process(audioSamples: audioSamples)
                logger.notice("🦜 VAD detected \(speechSegments.count) speech segments.")
-                let sampleRate = 16000 // Assuming 16kHz sample rate
+        if isVADEnabled {
-                for segment in speechSegments {
+            if let modelPath = await VADModelManager.shared.getModelPath() {
-                    let startSample = Int(segment.start * Double(sampleRate))
+                if let vad = VoiceActivityDetector(modelPath: modelPath) {
-                    var endSample = Int(segment.end * Double(sampleRate))
+                    let speechSegments = vad.process(audioSamples: audioSamples)
                    logger.notice("🦜 VAD detected \(speechSegments.count) speech segments.")
-                    // Cap endSample to the audio buffer size
+                    let sampleRate = 16000 // Assuming 16kHz sample rate
-                    if endSample > audioSamples.count {
+                    for segment in speechSegments {
-                        endSample = audioSamples.count
+                        let startSample = Int(segment.start * Double(sampleRate))
-                    }
+                        var endSample = Int(segment.end * Double(sampleRate))
-
+
-                    if startSample < endSample {
+                        // Cap endSample to the audio buffer size
-                        speechAudio.append(contentsOf: audioSamples[startSample..<endSample])
+                        if endSample > audioSamples.count {
-                    } else {
+                            endSample = audioSamples.count
-                        logger.warning("🦜 Invalid sample range for segment: start=\(startSample), end=\(endSample). Skipping.")
+                        }
                        if startSample < endSample {
                            speechAudio.append(contentsOf: audioSamples[startSample..<endSample])
                        } else {
                            logger.warning("🦜 Invalid sample range for segment: start=\(startSample), end=\(endSample). Skipping.")
                        }
                    }
                    logger.notice("🦜 Extracted \(speechAudio.count) samples from VAD segments.")
                } else {
                    logger.warning("🦜 VAD could not be initialized. Transcribing original audio.")
                    speechAudio = audioSamples
                }
                logger.notice("🦜 Extracted \(speechAudio.count) samples from VAD segments.")
            } else {
-                logger.warning("🦜 VAD could not be initialized. Transcribing original audio.")
+                logger.warning("🦜 VAD model path not found. Transcribing original audio.")
                speechAudio = audioSamples
            }
        } else {
-            logger.warning("🦜 VAD model path not found. Transcribing original audio.")
+            logger.notice("🦜 VAD is disabled by user setting. Transcribing original audio.")
            speechAudio = audioSamples
        }