Respect VAD flag, downloading & updated to latest version
This commit is contained in:
parent
91734bda45
commit
97c6234fb3
@ -7,7 +7,7 @@
|
|||||||
"location" : "https://github.com/FluidInference/FluidAudio",
|
"location" : "https://github.com/FluidInference/FluidAudio",
|
||||||
"state" : {
|
"state" : {
|
||||||
"branch" : "main",
|
"branch" : "main",
|
||||||
"revision" : "1416b2f8d6be50d7aa47f32a3baeeb8669c375e9"
|
"revision" : "328036d255ef76b8d661eacc16ac108eb45f9218"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
|
|||||||
Binary file not shown.
Binary file not shown.
@ -1,120 +0,0 @@
|
|||||||
[
|
|
||||||
{
|
|
||||||
"shortDescription" : "Silero VAD Unified Model 256ms (STFT + Encoder + Decoder) with noisy-OR aggregation",
|
|
||||||
"metadataOutputVersion" : "3.0",
|
|
||||||
"outputSchema" : [
|
|
||||||
{
|
|
||||||
"hasShapeFlexibility" : "0",
|
|
||||||
"isOptional" : "0",
|
|
||||||
"dataType" : "Float32",
|
|
||||||
"formattedType" : "MultiArray (Float32 1 × 1 × 1)",
|
|
||||||
"shortDescription" : "",
|
|
||||||
"shape" : "[1, 1, 1]",
|
|
||||||
"name" : "vad_output",
|
|
||||||
"type" : "MultiArray"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"hasShapeFlexibility" : "0",
|
|
||||||
"isOptional" : "0",
|
|
||||||
"dataType" : "Float32",
|
|
||||||
"formattedType" : "MultiArray (Float32 1 × 128)",
|
|
||||||
"shortDescription" : "",
|
|
||||||
"shape" : "[1, 128]",
|
|
||||||
"name" : "new_hidden_state",
|
|
||||||
"type" : "MultiArray"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"hasShapeFlexibility" : "0",
|
|
||||||
"isOptional" : "0",
|
|
||||||
"dataType" : "Float32",
|
|
||||||
"formattedType" : "MultiArray (Float32 1 × 128)",
|
|
||||||
"shortDescription" : "",
|
|
||||||
"shape" : "[1, 128]",
|
|
||||||
"name" : "new_cell_state",
|
|
||||||
"type" : "MultiArray"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"version" : "6.0.0",
|
|
||||||
"modelParameters" : [
|
|
||||||
|
|
||||||
],
|
|
||||||
"author" : "Fluid Infernece + Silero Team",
|
|
||||||
"specificationVersion" : 6,
|
|
||||||
"storagePrecision" : "Mixed (Float16, Float32)",
|
|
||||||
"mlProgramOperationTypeHistogram" : {
|
|
||||||
"Concat" : 9,
|
|
||||||
"Lstm" : 8,
|
|
||||||
"SliceByIndex" : 41,
|
|
||||||
"Clip" : 32,
|
|
||||||
"Pow" : 16,
|
|
||||||
"Transpose" : 16,
|
|
||||||
"Sub" : 2,
|
|
||||||
"Relu" : 40,
|
|
||||||
"Squeeze" : 18,
|
|
||||||
"Cast" : 54,
|
|
||||||
"Sigmoid" : 8,
|
|
||||||
"Add" : 16,
|
|
||||||
"ExpandDims" : 26,
|
|
||||||
"Sqrt" : 8,
|
|
||||||
"Mul" : 7,
|
|
||||||
"Conv" : 48,
|
|
||||||
"Pad" : 8
|
|
||||||
},
|
|
||||||
"computePrecision" : "Mixed (Float16, Float32, Int32)",
|
|
||||||
"stateSchema" : [
|
|
||||||
|
|
||||||
],
|
|
||||||
"isUpdatable" : "0",
|
|
||||||
"availability" : {
|
|
||||||
"macOS" : "12.0",
|
|
||||||
"tvOS" : "15.0",
|
|
||||||
"visionOS" : "1.0",
|
|
||||||
"watchOS" : "8.0",
|
|
||||||
"iOS" : "15.0",
|
|
||||||
"macCatalyst" : "15.0"
|
|
||||||
},
|
|
||||||
"modelType" : {
|
|
||||||
"name" : "MLModelType_mlProgram"
|
|
||||||
},
|
|
||||||
"inputSchema" : [
|
|
||||||
{
|
|
||||||
"hasShapeFlexibility" : "0",
|
|
||||||
"isOptional" : "0",
|
|
||||||
"dataType" : "Float32",
|
|
||||||
"formattedType" : "MultiArray (Float32 1 × 4160)",
|
|
||||||
"shortDescription" : "",
|
|
||||||
"shape" : "[1, 4160]",
|
|
||||||
"name" : "audio_input",
|
|
||||||
"type" : "MultiArray"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"hasShapeFlexibility" : "0",
|
|
||||||
"isOptional" : "0",
|
|
||||||
"dataType" : "Float32",
|
|
||||||
"formattedType" : "MultiArray (Float32 1 × 128)",
|
|
||||||
"shortDescription" : "",
|
|
||||||
"shape" : "[1, 128]",
|
|
||||||
"name" : "hidden_state",
|
|
||||||
"type" : "MultiArray"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
"hasShapeFlexibility" : "0",
|
|
||||||
"isOptional" : "0",
|
|
||||||
"dataType" : "Float32",
|
|
||||||
"formattedType" : "MultiArray (Float32 1 × 128)",
|
|
||||||
"shortDescription" : "",
|
|
||||||
"shape" : "[1, 128]",
|
|
||||||
"name" : "cell_state",
|
|
||||||
"type" : "MultiArray"
|
|
||||||
}
|
|
||||||
],
|
|
||||||
"userDefinedMetadata" : {
|
|
||||||
"com.github.apple.coremltools.conversion_date" : "2025-09-15",
|
|
||||||
"com.github.apple.coremltools.source" : "torch==2.7.0",
|
|
||||||
"com.github.apple.coremltools.version" : "9.0b1",
|
|
||||||
"com.github.apple.coremltools.source_dialect" : "TorchScript"
|
|
||||||
},
|
|
||||||
"generatedClassName" : "silero_vad_unified_256ms_v6_0_0",
|
|
||||||
"method" : "predict"
|
|
||||||
}
|
|
||||||
]
|
|
||||||
File diff suppressed because it is too large
Load Diff
Binary file not shown.
@ -22,29 +22,16 @@ class ParakeetTranscriptionService: TranscriptionService {
|
|||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
|
||||||
do {
|
if let customModelsDirectory {
|
||||||
|
do {
|
||||||
asrManager = AsrManager(config: .default)
|
asrManager = AsrManager(config: .default)
|
||||||
let models: AsrModels
|
let models = try await AsrModels.load(from: customModelsDirectory)
|
||||||
if let customDirectory = customModelsDirectory {
|
try await asrManager?.initialize(models: models)
|
||||||
logger.notice("🦜 Loading Parakeet models from: \(customDirectory.path)")
|
isModelLoaded = true
|
||||||
models = try await AsrModels.load(from: customDirectory)
|
} catch {
|
||||||
} else {
|
isModelLoaded = false
|
||||||
logger.notice("🦜 Loading Parakeet models from default directory")
|
asrManager = nil
|
||||||
let defaultDir = AsrModels.defaultCacheDirectory()
|
}
|
||||||
models = try await AsrModels.load(from: defaultDir)
|
|
||||||
}
|
|
||||||
|
|
||||||
try await asrManager?.initialize(models: models)
|
|
||||||
isModelLoaded = true
|
|
||||||
logger.notice("🦜 Parakeet model loaded successfully")
|
|
||||||
|
|
||||||
} catch {
|
|
||||||
let description = (error as? LocalizedError)?.errorDescription ?? error.localizedDescription
|
|
||||||
logger.error("🦜 Failed to load Parakeet model: \(description)")
|
|
||||||
isModelLoaded = false
|
|
||||||
asrManager = nil
|
|
||||||
throw error
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -59,22 +46,23 @@ class ParakeetTranscriptionService: TranscriptionService {
|
|||||||
|
|
||||||
let audioSamples = try readAudioSamples(from: audioURL)
|
let audioSamples = try readAudioSamples(from: audioURL)
|
||||||
|
|
||||||
let sampleRate = 16000.0
|
let durationSeconds = Double(audioSamples.count) / 16000.0
|
||||||
let durationSeconds = Double(audioSamples.count) / sampleRate
|
|
||||||
|
let isVADEnabled = UserDefaults.standard.object(forKey: "IsVADEnabled") as? Bool ?? true
|
||||||
|
|
||||||
let speechAudio: [Float]
|
let speechAudio: [Float]
|
||||||
if durationSeconds < 20.0 {
|
if durationSeconds < 20.0 || !isVADEnabled {
|
||||||
speechAudio = audioSamples
|
speechAudio = audioSamples
|
||||||
} else {
|
} else {
|
||||||
let vadConfig = VadConfig(threshold: 0.7)
|
let vadConfig = VadConfig(threshold: 0.7)
|
||||||
if vadManager == nil {
|
if vadManager == nil, let customModelsDirectory {
|
||||||
if let bundledVadURL = Bundle.main.url(forResource: ModelNames.VAD.sileroVad, withExtension: "mlmodelc") {
|
do {
|
||||||
do {
|
vadManager = try await VadManager(
|
||||||
let bundledModel = try MLModel(contentsOf: bundledVadURL)
|
config: vadConfig,
|
||||||
vadManager = VadManager(config: vadConfig, vadModel: bundledModel)
|
modelDirectory: customModelsDirectory.deletingLastPathComponent()
|
||||||
} catch {
|
)
|
||||||
}
|
} catch {
|
||||||
} else {
|
// Silent failure
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
@ -33,6 +33,14 @@ extension WhisperState {
|
|||||||
|
|
||||||
do {
|
do {
|
||||||
_ = try await AsrModels.downloadAndLoad(to: parakeetModelsDirectory)
|
_ = try await AsrModels.downloadAndLoad(to: parakeetModelsDirectory)
|
||||||
|
|
||||||
|
// Also download VAD model into the same parent directory as ASR models
|
||||||
|
let parentDir = parakeetModelsDirectory.deletingLastPathComponent()
|
||||||
|
_ = try await DownloadUtils.loadModels(
|
||||||
|
.vad,
|
||||||
|
modelNames: Array(ModelNames.VAD.requiredModels),
|
||||||
|
directory: parentDir
|
||||||
|
)
|
||||||
self.isParakeetModelDownloaded = true
|
self.isParakeetModelDownloaded = true
|
||||||
downloadProgress["parakeet-tdt-0.6b"] = 1.0
|
downloadProgress["parakeet-tdt-0.6b"] = 1.0
|
||||||
} catch {
|
} catch {
|
||||||
|
|||||||
Loading…
x
Reference in New Issue
Block a user