Feat: Respect VAD user setting in ParakeetTranscriptionService
This commit is contained in:
parent
c0ed2dc78a
commit
5eacee467a
@ -79,34 +79,41 @@ class ParakeetTranscriptionService: TranscriptionService {
|
|||||||
|
|
||||||
// Use VAD to get speech segments
|
// Use VAD to get speech segments
|
||||||
var speechAudio: [Float] = []
|
var speechAudio: [Float] = []
|
||||||
if let modelPath = await VADModelManager.shared.getModelPath() {
|
let isVADEnabled = UserDefaults.standard.object(forKey: "IsVADEnabled") as? Bool ?? true
|
||||||
if let vad = VoiceActivityDetector(modelPath: modelPath) {
|
|
||||||
let speechSegments = vad.process(audioSamples: audioSamples)
|
|
||||||
logger.notice("🦜 VAD detected \(speechSegments.count) speech segments.")
|
|
||||||
|
|
||||||
let sampleRate = 16000 // Assuming 16kHz sample rate
|
if isVADEnabled {
|
||||||
for segment in speechSegments {
|
if let modelPath = await VADModelManager.shared.getModelPath() {
|
||||||
let startSample = Int(segment.start * Double(sampleRate))
|
if let vad = VoiceActivityDetector(modelPath: modelPath) {
|
||||||
var endSample = Int(segment.end * Double(sampleRate))
|
let speechSegments = vad.process(audioSamples: audioSamples)
|
||||||
|
logger.notice("🦜 VAD detected \(speechSegments.count) speech segments.")
|
||||||
|
|
||||||
// Cap endSample to the audio buffer size
|
let sampleRate = 16000 // Assuming 16kHz sample rate
|
||||||
if endSample > audioSamples.count {
|
for segment in speechSegments {
|
||||||
endSample = audioSamples.count
|
let startSample = Int(segment.start * Double(sampleRate))
|
||||||
}
|
var endSample = Int(segment.end * Double(sampleRate))
|
||||||
|
|
||||||
if startSample < endSample {
|
// Cap endSample to the audio buffer size
|
||||||
speechAudio.append(contentsOf: audioSamples[startSample..<endSample])
|
if endSample > audioSamples.count {
|
||||||
} else {
|
endSample = audioSamples.count
|
||||||
logger.warning("🦜 Invalid sample range for segment: start=\(startSample), end=\(endSample). Skipping.")
|
}
|
||||||
|
|
||||||
|
if startSample < endSample {
|
||||||
|
speechAudio.append(contentsOf: audioSamples[startSample..<endSample])
|
||||||
|
} else {
|
||||||
|
logger.warning("🦜 Invalid sample range for segment: start=\(startSample), end=\(endSample). Skipping.")
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
logger.notice("🦜 Extracted \(speechAudio.count) samples from VAD segments.")
|
||||||
|
} else {
|
||||||
|
logger.warning("🦜 VAD could not be initialized. Transcribing original audio.")
|
||||||
|
speechAudio = audioSamples
|
||||||
}
|
}
|
||||||
logger.notice("🦜 Extracted \(speechAudio.count) samples from VAD segments.")
|
|
||||||
} else {
|
} else {
|
||||||
logger.warning("🦜 VAD could not be initialized. Transcribing original audio.")
|
logger.warning("🦜 VAD model path not found. Transcribing original audio.")
|
||||||
speechAudio = audioSamples
|
speechAudio = audioSamples
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
logger.warning("🦜 VAD model path not found. Transcribing original audio.")
|
logger.notice("🦜 VAD is disabled by user setting. Transcribing original audio.")
|
||||||
speechAudio = audioSamples
|
speechAudio = audioSamples
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user