diff --git a/VoiceInk.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved b/VoiceInk.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved index 2c29478..405f4b6 100644 --- a/VoiceInk.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved +++ b/VoiceInk.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved @@ -7,7 +7,7 @@ "location" : "https://github.com/FluidInference/FluidAudio", "state" : { "branch" : "main", - "revision" : "052cbb27cf073a9407251d74ef3459ea258e41b3" + "revision" : "1416b2f8d6be50d7aa47f32a3baeeb8669c375e9" } }, { diff --git a/VoiceInk/Resources/models/silero-vad-unified-256ms-v6.0.0.mlmodelc/analytics/coremldata.bin b/VoiceInk/Resources/models/silero-vad-unified-256ms-v6.0.0.mlmodelc/analytics/coremldata.bin new file mode 100644 index 0000000..6d7b1ce Binary files /dev/null and b/VoiceInk/Resources/models/silero-vad-unified-256ms-v6.0.0.mlmodelc/analytics/coremldata.bin differ diff --git a/VoiceInk/Resources/models/silero-vad-unified-256ms-v6.0.0.mlmodelc/coremldata.bin b/VoiceInk/Resources/models/silero-vad-unified-256ms-v6.0.0.mlmodelc/coremldata.bin new file mode 100644 index 0000000..b967672 Binary files /dev/null and b/VoiceInk/Resources/models/silero-vad-unified-256ms-v6.0.0.mlmodelc/coremldata.bin differ diff --git a/VoiceInk/Resources/models/silero-vad-unified-256ms-v6.0.0.mlmodelc/metadata.json b/VoiceInk/Resources/models/silero-vad-unified-256ms-v6.0.0.mlmodelc/metadata.json new file mode 100644 index 0000000..407b909 --- /dev/null +++ b/VoiceInk/Resources/models/silero-vad-unified-256ms-v6.0.0.mlmodelc/metadata.json @@ -0,0 +1,120 @@ +[ + { + "shortDescription" : "Silero VAD Unified Model 256ms (STFT + Encoder + Decoder) with noisy-OR aggregation", + "metadataOutputVersion" : "3.0", + "outputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float32", + "formattedType" : "MultiArray (Float32 1 × 1 × 1)", + "shortDescription" : "", + "shape" : "[1, 1, 1]", + "name" : "vad_output", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float32", + "formattedType" : "MultiArray (Float32 1 × 128)", + "shortDescription" : "", + "shape" : "[1, 128]", + "name" : "new_hidden_state", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float32", + "formattedType" : "MultiArray (Float32 1 × 128)", + "shortDescription" : "", + "shape" : "[1, 128]", + "name" : "new_cell_state", + "type" : "MultiArray" + } + ], + "version" : "6.0.0", + "modelParameters" : [ + + ], + "author" : "Fluid Infernece + Silero Team", + "specificationVersion" : 6, + "storagePrecision" : "Mixed (Float16, Float32)", + "mlProgramOperationTypeHistogram" : { + "Concat" : 9, + "Lstm" : 8, + "SliceByIndex" : 41, + "Clip" : 32, + "Pow" : 16, + "Transpose" : 16, + "Sub" : 2, + "Relu" : 40, + "Squeeze" : 18, + "Cast" : 54, + "Sigmoid" : 8, + "Add" : 16, + "ExpandDims" : 26, + "Sqrt" : 8, + "Mul" : 7, + "Conv" : 48, + "Pad" : 8 + }, + "computePrecision" : "Mixed (Float16, Float32, Int32)", + "stateSchema" : [ + + ], + "isUpdatable" : "0", + "availability" : { + "macOS" : "12.0", + "tvOS" : "15.0", + "visionOS" : "1.0", + "watchOS" : "8.0", + "iOS" : "15.0", + "macCatalyst" : "15.0" + }, + "modelType" : { + "name" : "MLModelType_mlProgram" + }, + "inputSchema" : [ + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float32", + "formattedType" : "MultiArray (Float32 1 × 4160)", + "shortDescription" : "", + "shape" : "[1, 4160]", + "name" : "audio_input", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float32", + "formattedType" : "MultiArray (Float32 1 × 128)", + "shortDescription" : "", + "shape" : "[1, 128]", + "name" : "hidden_state", + "type" : "MultiArray" + }, + { + "hasShapeFlexibility" : "0", + "isOptional" : "0", + "dataType" : "Float32", + "formattedType" : "MultiArray (Float32 1 × 128)", + "shortDescription" : "", + "shape" : "[1, 128]", + "name" : "cell_state", + "type" : "MultiArray" + } + ], + "userDefinedMetadata" : { + "com.github.apple.coremltools.conversion_date" : "2025-09-15", + "com.github.apple.coremltools.source" : "torch==2.7.0", + "com.github.apple.coremltools.version" : "9.0b1", + "com.github.apple.coremltools.source_dialect" : "TorchScript" + }, + "generatedClassName" : "silero_vad_unified_256ms_v6_0_0", + "method" : "predict" + } +] \ No newline at end of file diff --git a/VoiceInk/Resources/models/silero-vad-unified-256ms-v6.0.0.mlmodelc/model.mil b/VoiceInk/Resources/models/silero-vad-unified-256ms-v6.0.0.mlmodelc/model.mil new file mode 100644 index 0000000..32323b0 --- /dev/null +++ b/VoiceInk/Resources/models/silero-vad-unified-256ms-v6.0.0.mlmodelc/model.mil @@ -0,0 +1,1002 @@ +program(1.0) +[buildInfo = dict, tensor>({{"coremlc-component-MIL", "3500.14.1"}, {"coremlc-version", "3500.32.1"}, {"coremltools-component-torch", "2.7.0"}, {"coremltools-source-dialect", "TorchScript"}, {"coremltools-version", "9.0b1"}})] +{ + func main(tensor audio_input, tensor cell_state, tensor hidden_state) { + tensor initial_context_begin_0 = const()[name = tensor("initial_context_begin_0"), val = tensor([0, 0])]; + tensor initial_context_end_0 = const()[name = tensor("initial_context_end_0"), val = tensor([1, 64])]; + tensor initial_context_end_mask_0 = const()[name = tensor("initial_context_end_mask_0"), val = tensor([true, false])]; + tensor audio_input_to_fp16_dtype_0 = const()[name = tensor("audio_input_to_fp16_dtype_0"), val = tensor("fp16")]; + tensor audio_input_to_fp16 = cast(dtype = audio_input_to_fp16_dtype_0, x = audio_input)[name = tensor("cast_53")]; + tensor initial_context_cast_fp16 = slice_by_index(begin = initial_context_begin_0, end = initial_context_end_0, end_mask = initial_context_end_mask_0, x = audio_input_to_fp16)[name = tensor("initial_context_cast_fp16")]; + tensor current_audio_begin_0 = const()[name = tensor("current_audio_begin_0"), val = tensor([0, 64])]; + tensor current_audio_end_0 = const()[name = tensor("current_audio_end_0"), val = tensor([1, 4160])]; + tensor current_audio_end_mask_0 = const()[name = tensor("current_audio_end_mask_0"), val = tensor([true, true])]; + tensor current_audio_cast_fp16 = slice_by_index(begin = current_audio_begin_0, end = current_audio_end_0, end_mask = current_audio_end_mask_0, x = audio_input_to_fp16)[name = tensor("current_audio_cast_fp16")]; + tensor chunk_1_begin_0 = const()[name = tensor("chunk_1_begin_0"), val = tensor([0, 0])]; + tensor chunk_1_end_0 = const()[name = tensor("chunk_1_end_0"), val = tensor([1, 512])]; + tensor chunk_1_end_mask_0 = const()[name = tensor("chunk_1_end_mask_0"), val = tensor([true, false])]; + tensor chunk_1_cast_fp16 = slice_by_index(begin = chunk_1_begin_0, end = chunk_1_end_0, end_mask = chunk_1_end_mask_0, x = current_audio_cast_fp16)[name = tensor("chunk_1_cast_fp16")]; + tensor var_38 = const()[name = tensor("op_38"), val = tensor(1)]; + tensor input_1_interleave_0 = const()[name = tensor("input_1_interleave_0"), val = tensor(false)]; + tensor input_1_cast_fp16 = concat(axis = var_38, interleave = input_1_interleave_0, values = (initial_context_cast_fp16, chunk_1_cast_fp16))[name = tensor("input_1_cast_fp16")]; + tensor context_1_begin_0 = const()[name = tensor("context_1_begin_0"), val = tensor([0, 448])]; + tensor context_1_end_0 = const()[name = tensor("context_1_end_0"), val = tensor([1, 512])]; + tensor context_1_end_mask_0 = const()[name = tensor("context_1_end_mask_0"), val = tensor([true, true])]; + tensor context_1_cast_fp16 = slice_by_index(begin = context_1_begin_0, end = context_1_end_0, end_mask = context_1_end_mask_0, x = chunk_1_cast_fp16)[name = tensor("context_1_cast_fp16")]; + tensor chunk_3_begin_0 = const()[name = tensor("chunk_3_begin_0"), val = tensor([0, 512])]; + tensor chunk_3_end_0 = const()[name = tensor("chunk_3_end_0"), val = tensor([1, 1024])]; + tensor chunk_3_end_mask_0 = const()[name = tensor("chunk_3_end_mask_0"), val = tensor([true, false])]; + tensor chunk_3_cast_fp16 = slice_by_index(begin = chunk_3_begin_0, end = chunk_3_end_0, end_mask = chunk_3_end_mask_0, x = current_audio_cast_fp16)[name = tensor("chunk_3_cast_fp16")]; + tensor var_61 = const()[name = tensor("op_61"), val = tensor(1)]; + tensor input_29_interleave_0 = const()[name = tensor("input_29_interleave_0"), val = tensor(false)]; + tensor input_29_cast_fp16 = concat(axis = var_61, interleave = input_29_interleave_0, values = (context_1_cast_fp16, chunk_3_cast_fp16))[name = tensor("input_29_cast_fp16")]; + tensor context_3_begin_0 = const()[name = tensor("context_3_begin_0"), val = tensor([0, 448])]; + tensor context_3_end_0 = const()[name = tensor("context_3_end_0"), val = tensor([1, 512])]; + tensor context_3_end_mask_0 = const()[name = tensor("context_3_end_mask_0"), val = tensor([true, true])]; + tensor context_3_cast_fp16 = slice_by_index(begin = context_3_begin_0, end = context_3_end_0, end_mask = context_3_end_mask_0, x = chunk_3_cast_fp16)[name = tensor("context_3_cast_fp16")]; + tensor chunk_5_begin_0 = const()[name = tensor("chunk_5_begin_0"), val = tensor([0, 1024])]; + tensor chunk_5_end_0 = const()[name = tensor("chunk_5_end_0"), val = tensor([1, 1536])]; + tensor chunk_5_end_mask_0 = const()[name = tensor("chunk_5_end_mask_0"), val = tensor([true, false])]; + tensor chunk_5_cast_fp16 = slice_by_index(begin = chunk_5_begin_0, end = chunk_5_end_0, end_mask = chunk_5_end_mask_0, x = current_audio_cast_fp16)[name = tensor("chunk_5_cast_fp16")]; + tensor var_84 = const()[name = tensor("op_84"), val = tensor(1)]; + tensor input_57_interleave_0 = const()[name = tensor("input_57_interleave_0"), val = tensor(false)]; + tensor input_57_cast_fp16 = concat(axis = var_84, interleave = input_57_interleave_0, values = (context_3_cast_fp16, chunk_5_cast_fp16))[name = tensor("input_57_cast_fp16")]; + tensor context_5_begin_0 = const()[name = tensor("context_5_begin_0"), val = tensor([0, 448])]; + tensor context_5_end_0 = const()[name = tensor("context_5_end_0"), val = tensor([1, 512])]; + tensor context_5_end_mask_0 = const()[name = tensor("context_5_end_mask_0"), val = tensor([true, true])]; + tensor context_5_cast_fp16 = slice_by_index(begin = context_5_begin_0, end = context_5_end_0, end_mask = context_5_end_mask_0, x = chunk_5_cast_fp16)[name = tensor("context_5_cast_fp16")]; + tensor chunk_7_begin_0 = const()[name = tensor("chunk_7_begin_0"), val = tensor([0, 1536])]; + tensor chunk_7_end_0 = const()[name = tensor("chunk_7_end_0"), val = tensor([1, 2048])]; + tensor chunk_7_end_mask_0 = const()[name = tensor("chunk_7_end_mask_0"), val = tensor([true, false])]; + tensor chunk_7_cast_fp16 = slice_by_index(begin = chunk_7_begin_0, end = chunk_7_end_0, end_mask = chunk_7_end_mask_0, x = current_audio_cast_fp16)[name = tensor("chunk_7_cast_fp16")]; + tensor var_107 = const()[name = tensor("op_107"), val = tensor(1)]; + tensor input_85_interleave_0 = const()[name = tensor("input_85_interleave_0"), val = tensor(false)]; + tensor input_85_cast_fp16 = concat(axis = var_107, interleave = input_85_interleave_0, values = (context_5_cast_fp16, chunk_7_cast_fp16))[name = tensor("input_85_cast_fp16")]; + tensor context_7_begin_0 = const()[name = tensor("context_7_begin_0"), val = tensor([0, 448])]; + tensor context_7_end_0 = const()[name = tensor("context_7_end_0"), val = tensor([1, 512])]; + tensor context_7_end_mask_0 = const()[name = tensor("context_7_end_mask_0"), val = tensor([true, true])]; + tensor context_7_cast_fp16 = slice_by_index(begin = context_7_begin_0, end = context_7_end_0, end_mask = context_7_end_mask_0, x = chunk_7_cast_fp16)[name = tensor("context_7_cast_fp16")]; + tensor chunk_9_begin_0 = const()[name = tensor("chunk_9_begin_0"), val = tensor([0, 2048])]; + tensor chunk_9_end_0 = const()[name = tensor("chunk_9_end_0"), val = tensor([1, 2560])]; + tensor chunk_9_end_mask_0 = const()[name = tensor("chunk_9_end_mask_0"), val = tensor([true, false])]; + tensor chunk_9_cast_fp16 = slice_by_index(begin = chunk_9_begin_0, end = chunk_9_end_0, end_mask = chunk_9_end_mask_0, x = current_audio_cast_fp16)[name = tensor("chunk_9_cast_fp16")]; + tensor var_130 = const()[name = tensor("op_130"), val = tensor(1)]; + tensor input_113_interleave_0 = const()[name = tensor("input_113_interleave_0"), val = tensor(false)]; + tensor input_113_cast_fp16 = concat(axis = var_130, interleave = input_113_interleave_0, values = (context_7_cast_fp16, chunk_9_cast_fp16))[name = tensor("input_113_cast_fp16")]; + tensor context_9_begin_0 = const()[name = tensor("context_9_begin_0"), val = tensor([0, 448])]; + tensor context_9_end_0 = const()[name = tensor("context_9_end_0"), val = tensor([1, 512])]; + tensor context_9_end_mask_0 = const()[name = tensor("context_9_end_mask_0"), val = tensor([true, true])]; + tensor context_9_cast_fp16 = slice_by_index(begin = context_9_begin_0, end = context_9_end_0, end_mask = context_9_end_mask_0, x = chunk_9_cast_fp16)[name = tensor("context_9_cast_fp16")]; + tensor chunk_11_begin_0 = const()[name = tensor("chunk_11_begin_0"), val = tensor([0, 2560])]; + tensor chunk_11_end_0 = const()[name = tensor("chunk_11_end_0"), val = tensor([1, 3072])]; + tensor chunk_11_end_mask_0 = const()[name = tensor("chunk_11_end_mask_0"), val = tensor([true, false])]; + tensor chunk_11_cast_fp16 = slice_by_index(begin = chunk_11_begin_0, end = chunk_11_end_0, end_mask = chunk_11_end_mask_0, x = current_audio_cast_fp16)[name = tensor("chunk_11_cast_fp16")]; + tensor var_153 = const()[name = tensor("op_153"), val = tensor(1)]; + tensor input_141_interleave_0 = const()[name = tensor("input_141_interleave_0"), val = tensor(false)]; + tensor input_141_cast_fp16 = concat(axis = var_153, interleave = input_141_interleave_0, values = (context_9_cast_fp16, chunk_11_cast_fp16))[name = tensor("input_141_cast_fp16")]; + tensor context_11_begin_0 = const()[name = tensor("context_11_begin_0"), val = tensor([0, 448])]; + tensor context_11_end_0 = const()[name = tensor("context_11_end_0"), val = tensor([1, 512])]; + tensor context_11_end_mask_0 = const()[name = tensor("context_11_end_mask_0"), val = tensor([true, true])]; + tensor context_11_cast_fp16 = slice_by_index(begin = context_11_begin_0, end = context_11_end_0, end_mask = context_11_end_mask_0, x = chunk_11_cast_fp16)[name = tensor("context_11_cast_fp16")]; + tensor chunk_13_begin_0 = const()[name = tensor("chunk_13_begin_0"), val = tensor([0, 3072])]; + tensor chunk_13_end_0 = const()[name = tensor("chunk_13_end_0"), val = tensor([1, 3584])]; + tensor chunk_13_end_mask_0 = const()[name = tensor("chunk_13_end_mask_0"), val = tensor([true, false])]; + tensor chunk_13_cast_fp16 = slice_by_index(begin = chunk_13_begin_0, end = chunk_13_end_0, end_mask = chunk_13_end_mask_0, x = current_audio_cast_fp16)[name = tensor("chunk_13_cast_fp16")]; + tensor var_176 = const()[name = tensor("op_176"), val = tensor(1)]; + tensor input_169_interleave_0 = const()[name = tensor("input_169_interleave_0"), val = tensor(false)]; + tensor input_169_cast_fp16 = concat(axis = var_176, interleave = input_169_interleave_0, values = (context_11_cast_fp16, chunk_13_cast_fp16))[name = tensor("input_169_cast_fp16")]; + tensor context_begin_0 = const()[name = tensor("context_begin_0"), val = tensor([0, 448])]; + tensor context_end_0 = const()[name = tensor("context_end_0"), val = tensor([1, 512])]; + tensor context_end_mask_0 = const()[name = tensor("context_end_mask_0"), val = tensor([true, true])]; + tensor context_cast_fp16 = slice_by_index(begin = context_begin_0, end = context_end_0, end_mask = context_end_mask_0, x = chunk_13_cast_fp16)[name = tensor("context_cast_fp16")]; + tensor chunk_begin_0 = const()[name = tensor("chunk_begin_0"), val = tensor([0, 3584])]; + tensor chunk_end_0 = const()[name = tensor("chunk_end_0"), val = tensor([1, 1])]; + tensor chunk_end_mask_0 = const()[name = tensor("chunk_end_mask_0"), val = tensor([true, true])]; + tensor chunk_cast_fp16 = slice_by_index(begin = chunk_begin_0, end = chunk_end_0, end_mask = chunk_end_mask_0, x = current_audio_cast_fp16)[name = tensor("chunk_cast_fp16")]; + tensor var_199 = const()[name = tensor("op_199"), val = tensor(1)]; + tensor input_197_interleave_0 = const()[name = tensor("input_197_interleave_0"), val = tensor(false)]; + tensor input_197_cast_fp16 = concat(axis = var_199, interleave = input_197_interleave_0, values = (context_cast_fp16, chunk_cast_fp16))[name = tensor("input_197_cast_fp16")]; + tensor x_1_pad_0 = const()[name = tensor("x_1_pad_0"), val = tensor([0, 0, 0, 64])]; + tensor x_1_mode_0 = const()[name = tensor("x_1_mode_0"), val = tensor("reflect")]; + tensor const_0_to_fp16 = const()[name = tensor("const_0_to_fp16"), val = tensor(0x0p+0)]; + tensor x_1_cast_fp16 = pad(constant_val = const_0_to_fp16, mode = x_1_mode_0, pad = x_1_pad_0, x = input_1_cast_fp16)[name = tensor("x_1_cast_fp16")]; + tensor x_3_axes_0 = const()[name = tensor("x_3_axes_0"), val = tensor([1])]; + tensor x_3_cast_fp16 = expand_dims(axes = x_3_axes_0, x = x_1_cast_fp16)[name = tensor("x_3_cast_fp16")]; + tensor stft_out_1_pad_type_0 = const()[name = tensor("stft_out_1_pad_type_0"), val = tensor("valid")]; + tensor stft_out_1_strides_0 = const()[name = tensor("stft_out_1_strides_0"), val = tensor([128])]; + tensor stft_out_1_pad_0 = const()[name = tensor("stft_out_1_pad_0"), val = tensor([0, 0])]; + tensor stft_out_1_dilations_0 = const()[name = tensor("stft_out_1_dilations_0"), val = tensor([1])]; + tensor stft_out_1_groups_0 = const()[name = tensor("stft_out_1_groups_0"), val = tensor(1)]; + tensor stft_forward_basis_to_fp16 = const()[name = tensor("stft_forward_basis_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(64)))]; + tensor stft_out_1_cast_fp16 = conv(dilations = stft_out_1_dilations_0, groups = stft_out_1_groups_0, pad = stft_out_1_pad_0, pad_type = stft_out_1_pad_type_0, strides = stft_out_1_strides_0, weight = stft_forward_basis_to_fp16, x = x_3_cast_fp16)[name = tensor("stft_out_1_cast_fp16")]; + tensor var_222_begin_0 = const()[name = tensor("op_222_begin_0"), val = tensor([0, 0, 0])]; + tensor var_222_end_0 = const()[name = tensor("op_222_end_0"), val = tensor([1, 129, 4])]; + tensor var_222_end_mask_0 = const()[name = tensor("op_222_end_mask_0"), val = tensor([true, false, true])]; + tensor var_222_cast_fp16 = slice_by_index(begin = var_222_begin_0, end = var_222_end_0, end_mask = var_222_end_mask_0, x = stft_out_1_cast_fp16)[name = tensor("op_222_cast_fp16")]; + tensor var_225_begin_0 = const()[name = tensor("op_225_begin_0"), val = tensor([0, 129, 0])]; + tensor var_225_end_0 = const()[name = tensor("op_225_end_0"), val = tensor([1, 258, 4])]; + tensor var_225_end_mask_0 = const()[name = tensor("op_225_end_mask_0"), val = tensor([true, true, true])]; + tensor var_225_cast_fp16 = slice_by_index(begin = var_225_begin_0, end = var_225_end_0, end_mask = var_225_end_mask_0, x = stft_out_1_cast_fp16)[name = tensor("op_225_cast_fp16")]; + tensor var_201_promoted_to_fp16 = const()[name = tensor("op_201_promoted_to_fp16"), val = tensor(0x1p+1)]; + tensor var_227_cast_fp16 = pow(x = var_222_cast_fp16, y = var_201_promoted_to_fp16)[name = tensor("op_227_cast_fp16")]; + tensor var_201_promoted_1_to_fp16 = const()[name = tensor("op_201_promoted_1_to_fp16"), val = tensor(0x1p+1)]; + tensor var_228_cast_fp16 = pow(x = var_225_cast_fp16, y = var_201_promoted_1_to_fp16)[name = tensor("op_228_cast_fp16")]; + tensor var_229_cast_fp16 = add(x = var_227_cast_fp16, y = var_228_cast_fp16)[name = tensor("op_229_cast_fp16")]; + tensor var_230_to_fp16 = const()[name = tensor("op_230_to_fp16"), val = tensor(0x1p-24)]; + tensor var_231_cast_fp16 = add(x = var_229_cast_fp16, y = var_230_to_fp16)[name = tensor("op_231_cast_fp16")]; + tensor input_3_cast_fp16 = sqrt(x = var_231_cast_fp16)[name = tensor("input_3_cast_fp16")]; + tensor input_5_pad_type_0 = const()[name = tensor("input_5_pad_type_0"), val = tensor("custom")]; + tensor input_5_pad_0 = const()[name = tensor("input_5_pad_0"), val = tensor([1, 1])]; + tensor input_5_strides_0 = const()[name = tensor("input_5_strides_0"), val = tensor([1])]; + tensor input_5_dilations_0 = const()[name = tensor("input_5_dilations_0"), val = tensor([1])]; + tensor input_5_groups_0 = const()[name = tensor("input_5_groups_0"), val = tensor(1)]; + tensor encoder_layers_0_weight_to_fp16 = const()[name = tensor("encoder_layers_0_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(132224)))]; + tensor encoder_layers_0_bias_to_fp16 = const()[name = tensor("encoder_layers_0_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(231360)))]; + tensor input_5_cast_fp16 = conv(bias = encoder_layers_0_bias_to_fp16, dilations = input_5_dilations_0, groups = input_5_groups_0, pad = input_5_pad_0, pad_type = input_5_pad_type_0, strides = input_5_strides_0, weight = encoder_layers_0_weight_to_fp16, x = input_3_cast_fp16)[name = tensor("input_5_cast_fp16")]; + tensor x_5_cast_fp16 = relu(x = input_5_cast_fp16)[name = tensor("x_5_cast_fp16")]; + tensor const_1_to_fp16 = const()[name = tensor("const_1_to_fp16"), val = tensor(-inf)]; + tensor var_234_to_fp16 = const()[name = tensor("op_234_to_fp16"), val = tensor(0x1.388p+13)]; + tensor clip_0_cast_fp16 = clip(alpha = const_1_to_fp16, beta = var_234_to_fp16, x = x_5_cast_fp16)[name = tensor("clip_0_cast_fp16")]; + tensor input_9_pad_type_0 = const()[name = tensor("input_9_pad_type_0"), val = tensor("custom")]; + tensor input_9_pad_0 = const()[name = tensor("input_9_pad_0"), val = tensor([1, 1])]; + tensor input_9_strides_0 = const()[name = tensor("input_9_strides_0"), val = tensor([2])]; + tensor input_9_dilations_0 = const()[name = tensor("input_9_dilations_0"), val = tensor([1])]; + tensor input_9_groups_0 = const()[name = tensor("input_9_groups_0"), val = tensor(1)]; + tensor encoder_layers_2_weight_to_fp16 = const()[name = tensor("encoder_layers_2_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(231680)))]; + tensor encoder_layers_2_bias_to_fp16 = const()[name = tensor("encoder_layers_2_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(280896)))]; + tensor input_9_cast_fp16 = conv(bias = encoder_layers_2_bias_to_fp16, dilations = input_9_dilations_0, groups = input_9_groups_0, pad = input_9_pad_0, pad_type = input_9_pad_type_0, strides = input_9_strides_0, weight = encoder_layers_2_weight_to_fp16, x = clip_0_cast_fp16)[name = tensor("input_9_cast_fp16")]; + tensor x_7_cast_fp16 = relu(x = input_9_cast_fp16)[name = tensor("x_7_cast_fp16")]; + tensor const_2_to_fp16 = const()[name = tensor("const_2_to_fp16"), val = tensor(-inf)]; + tensor clip_1_cast_fp16 = clip(alpha = const_2_to_fp16, beta = var_234_to_fp16, x = x_7_cast_fp16)[name = tensor("clip_1_cast_fp16")]; + tensor input_13_pad_type_0 = const()[name = tensor("input_13_pad_type_0"), val = tensor("custom")]; + tensor input_13_pad_0 = const()[name = tensor("input_13_pad_0"), val = tensor([1, 1])]; + tensor input_13_strides_0 = const()[name = tensor("input_13_strides_0"), val = tensor([2])]; + tensor input_13_dilations_0 = const()[name = tensor("input_13_dilations_0"), val = tensor([1])]; + tensor input_13_groups_0 = const()[name = tensor("input_13_groups_0"), val = tensor(1)]; + tensor encoder_layers_4_weight_to_fp16 = const()[name = tensor("encoder_layers_4_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(281088)))]; + tensor encoder_layers_4_bias_to_fp16 = const()[name = tensor("encoder_layers_4_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(305728)))]; + tensor input_13_cast_fp16 = conv(bias = encoder_layers_4_bias_to_fp16, dilations = input_13_dilations_0, groups = input_13_groups_0, pad = input_13_pad_0, pad_type = input_13_pad_type_0, strides = input_13_strides_0, weight = encoder_layers_4_weight_to_fp16, x = clip_1_cast_fp16)[name = tensor("input_13_cast_fp16")]; + tensor x_9_cast_fp16 = relu(x = input_13_cast_fp16)[name = tensor("x_9_cast_fp16")]; + tensor const_3_to_fp16 = const()[name = tensor("const_3_to_fp16"), val = tensor(-inf)]; + tensor clip_2_cast_fp16 = clip(alpha = const_3_to_fp16, beta = var_234_to_fp16, x = x_9_cast_fp16)[name = tensor("clip_2_cast_fp16")]; + tensor input_17_pad_type_0 = const()[name = tensor("input_17_pad_type_0"), val = tensor("custom")]; + tensor input_17_pad_0 = const()[name = tensor("input_17_pad_0"), val = tensor([1, 1])]; + tensor input_17_strides_0 = const()[name = tensor("input_17_strides_0"), val = tensor([1])]; + tensor input_17_dilations_0 = const()[name = tensor("input_17_dilations_0"), val = tensor([1])]; + tensor input_17_groups_0 = const()[name = tensor("input_17_groups_0"), val = tensor(1)]; + tensor encoder_layers_6_weight_to_fp16 = const()[name = tensor("encoder_layers_6_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(305920)))]; + tensor encoder_layers_6_bias_to_fp16 = const()[name = tensor("encoder_layers_6_bias_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(355136)))]; + tensor input_17_cast_fp16 = conv(bias = encoder_layers_6_bias_to_fp16, dilations = input_17_dilations_0, groups = input_17_groups_0, pad = input_17_pad_0, pad_type = input_17_pad_type_0, strides = input_17_strides_0, weight = encoder_layers_6_weight_to_fp16, x = clip_2_cast_fp16)[name = tensor("input_17_cast_fp16")]; + tensor x_11_cast_fp16 = relu(x = input_17_cast_fp16)[name = tensor("x_11_cast_fp16")]; + tensor const_4_to_fp16 = const()[name = tensor("const_4_to_fp16"), val = tensor(-inf)]; + tensor clip_3_cast_fp16 = clip(alpha = const_4_to_fp16, beta = var_234_to_fp16, x = x_11_cast_fp16)[name = tensor("clip_3_cast_fp16")]; + tensor transpose_0_perm_0 = const()[name = tensor("transpose_0_perm_0"), val = tensor([2, 0, 1])]; + tensor transpose_0_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("transpose_0_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; + tensor hx_1_axes_0 = const()[name = tensor("hx_1_axes_0"), val = tensor([0])]; + tensor hidden_state_to_fp16_dtype_0 = const()[name = tensor("hidden_state_to_fp16_dtype_0"), val = tensor("fp16")]; + tensor hidden_state_to_fp16 = cast(dtype = hidden_state_to_fp16_dtype_0, x = hidden_state)[name = tensor("cast_51")]; + tensor hx_1_cast_fp16 = expand_dims(axes = hx_1_axes_0, x = hidden_state_to_fp16)[name = tensor("hx_1_cast_fp16")]; + tensor hx_3_axes_0 = const()[name = tensor("hx_3_axes_0"), val = tensor([0])]; + tensor cell_state_to_fp16_dtype_0 = const()[name = tensor("cell_state_to_fp16_dtype_0"), val = tensor("fp16")]; + tensor cell_state_to_fp16 = cast(dtype = cell_state_to_fp16_dtype_0, x = cell_state)[name = tensor("cast_50")]; + tensor hx_3_cast_fp16 = expand_dims(axes = hx_3_axes_0, x = cell_state_to_fp16)[name = tensor("hx_3_cast_fp16")]; + tensor concat_0 = const()[name = tensor("concat_0"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(355456)))]; + tensor concat_1 = const()[name = tensor("concat_1"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(357568)))]; + tensor concat_2 = const()[name = tensor("concat_2"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(619776)))]; + tensor lstm_out_1_batch_first_lstm_h0_squeeze_axes_0 = const()[name = tensor("lstm_out_1_batch_first_lstm_h0_squeeze_axes_0"), val = tensor([0])]; + tensor lstm_out_1_batch_first_lstm_h0_squeeze_cast_fp16 = squeeze(axes = lstm_out_1_batch_first_lstm_h0_squeeze_axes_0, x = hx_1_cast_fp16)[name = tensor("lstm_out_1_batch_first_lstm_h0_squeeze_cast_fp16")]; + tensor lstm_out_1_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("lstm_out_1_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; + tensor lstm_out_1_batch_first_lstm_c0_squeeze_axes_0 = const()[name = tensor("lstm_out_1_batch_first_lstm_c0_squeeze_axes_0"), val = tensor([0])]; + tensor lstm_out_1_batch_first_lstm_c0_squeeze_cast_fp16 = squeeze(axes = lstm_out_1_batch_first_lstm_c0_squeeze_axes_0, x = hx_3_cast_fp16)[name = tensor("lstm_out_1_batch_first_lstm_c0_squeeze_cast_fp16")]; + tensor lstm_out_1_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("lstm_out_1_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; + tensor lstm_out_1_batch_first_direction_0 = const()[name = tensor("lstm_out_1_batch_first_direction_0"), val = tensor("forward")]; + tensor lstm_out_1_batch_first_output_sequence_0 = const()[name = tensor("lstm_out_1_batch_first_output_sequence_0"), val = tensor(true)]; + tensor lstm_out_1_batch_first_recurrent_activation_0 = const()[name = tensor("lstm_out_1_batch_first_recurrent_activation_0"), val = tensor("sigmoid")]; + tensor lstm_out_1_batch_first_cell_activation_0 = const()[name = tensor("lstm_out_1_batch_first_cell_activation_0"), val = tensor("tanh")]; + tensor lstm_out_1_batch_first_activation_0 = const()[name = tensor("lstm_out_1_batch_first_activation_0"), val = tensor("tanh")]; + tensor lstm_out_1_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32 = cast(dtype = lstm_out_1_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32_dtype_0, x = lstm_out_1_batch_first_lstm_c0_squeeze_cast_fp16)[name = tensor("cast_48")]; + tensor lstm_out_1_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32 = cast(dtype = lstm_out_1_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32_dtype_0, x = lstm_out_1_batch_first_lstm_h0_squeeze_cast_fp16)[name = tensor("cast_49")]; + tensor transpose_0_cast_fp16 = transpose(perm = transpose_0_perm_0, x = clip_3_cast_fp16)[name = tensor("transpose_31")]; + tensor transpose_0_cast_fp16_to_fp32 = cast(dtype = transpose_0_cast_fp16_to_fp32_dtype_0, x = transpose_0_cast_fp16)[name = tensor("cast_52")]; + tensor lstm_out_1_batch_first_0, tensor lstm_out_1_batch_first_1, tensor lstm_out_1_batch_first_2 = lstm(activation = lstm_out_1_batch_first_activation_0, bias = concat_0, cell_activation = lstm_out_1_batch_first_cell_activation_0, direction = lstm_out_1_batch_first_direction_0, initial_c = lstm_out_1_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32, initial_h = lstm_out_1_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32, output_sequence = lstm_out_1_batch_first_output_sequence_0, recurrent_activation = lstm_out_1_batch_first_recurrent_activation_0, weight_hh = concat_2, weight_ih = concat_1, x = transpose_0_cast_fp16_to_fp32)[name = tensor("lstm_out_1_batch_first")]; + tensor transpose_1_perm_0 = const()[name = tensor("transpose_1_perm_0"), val = tensor([1, 2, 0])]; + tensor lstm_out_1_batch_first_0_to_fp16_dtype_0 = const()[name = tensor("lstm_out_1_batch_first_0_to_fp16_dtype_0"), val = tensor("fp16")]; + tensor hn_1_axes_0 = const()[name = tensor("hn_1_axes_0"), val = tensor([0])]; + tensor lstm_out_1_batch_first_1_to_fp16_dtype_0 = const()[name = tensor("lstm_out_1_batch_first_1_to_fp16_dtype_0"), val = tensor("fp16")]; + tensor lstm_out_1_batch_first_1_to_fp16 = cast(dtype = lstm_out_1_batch_first_1_to_fp16_dtype_0, x = lstm_out_1_batch_first_1)[name = tensor("cast_46")]; + tensor hn_1_cast_fp16 = expand_dims(axes = hn_1_axes_0, x = lstm_out_1_batch_first_1_to_fp16)[name = tensor("hn_1_cast_fp16")]; + tensor cn_1_axes_0 = const()[name = tensor("cn_1_axes_0"), val = tensor([0])]; + tensor lstm_out_1_batch_first_2_to_fp16_dtype_0 = const()[name = tensor("lstm_out_1_batch_first_2_to_fp16_dtype_0"), val = tensor("fp16")]; + tensor lstm_out_1_batch_first_2_to_fp16 = cast(dtype = lstm_out_1_batch_first_2_to_fp16_dtype_0, x = lstm_out_1_batch_first_2)[name = tensor("cast_45")]; + tensor cn_1_cast_fp16 = expand_dims(axes = cn_1_axes_0, x = lstm_out_1_batch_first_2_to_fp16)[name = tensor("cn_1_cast_fp16")]; + tensor lstm_out_1_batch_first_0_to_fp16 = cast(dtype = lstm_out_1_batch_first_0_to_fp16_dtype_0, x = lstm_out_1_batch_first_0)[name = tensor("cast_47")]; + tensor transpose_1_cast_fp16 = transpose(perm = transpose_1_perm_0, x = lstm_out_1_batch_first_0_to_fp16)[name = tensor("transpose_30")]; + tensor input_25_cast_fp16 = relu(x = transpose_1_cast_fp16)[name = tensor("input_25_cast_fp16")]; + tensor input_27_pad_type_0 = const()[name = tensor("input_27_pad_type_0"), val = tensor("valid")]; + tensor input_27_strides_0 = const()[name = tensor("input_27_strides_0"), val = tensor([1])]; + tensor input_27_pad_0 = const()[name = tensor("input_27_pad_0"), val = tensor([0, 0])]; + tensor input_27_dilations_0 = const()[name = tensor("input_27_dilations_0"), val = tensor([1])]; + tensor input_27_groups_0 = const()[name = tensor("input_27_groups_0"), val = tensor(1)]; + tensor decoder_final_conv_weight_to_fp16 = const()[name = tensor("decoder_final_conv_weight_to_fp16"), val = tensor(BLOBFILE(path = tensor("@model_path/weights/weight.bin"), offset = tensor(881984)))]; + tensor decoder_final_conv_bias_to_fp16 = const()[name = tensor("decoder_final_conv_bias_to_fp16"), val = tensor([0x1.dfp-5])]; + tensor input_27_cast_fp16 = conv(bias = decoder_final_conv_bias_to_fp16, dilations = input_27_dilations_0, groups = input_27_groups_0, pad = input_27_pad_0, pad_type = input_27_pad_type_0, strides = input_27_strides_0, weight = decoder_final_conv_weight_to_fp16, x = input_25_cast_fp16)[name = tensor("input_27_cast_fp16")]; + tensor var_318_cast_fp16 = sigmoid(x = input_27_cast_fp16)[name = tensor("op_318_cast_fp16")]; + tensor x_15_pad_0 = const()[name = tensor("x_15_pad_0"), val = tensor([0, 0, 0, 64])]; + tensor x_15_mode_0 = const()[name = tensor("x_15_mode_0"), val = tensor("reflect")]; + tensor const_5_to_fp16 = const()[name = tensor("const_5_to_fp16"), val = tensor(0x0p+0)]; + tensor x_15_cast_fp16 = pad(constant_val = const_5_to_fp16, mode = x_15_mode_0, pad = x_15_pad_0, x = input_29_cast_fp16)[name = tensor("x_15_cast_fp16")]; + tensor x_17_axes_0 = const()[name = tensor("x_17_axes_0"), val = tensor([1])]; + tensor x_17_cast_fp16 = expand_dims(axes = x_17_axes_0, x = x_15_cast_fp16)[name = tensor("x_17_cast_fp16")]; + tensor stft_out_3_pad_type_0 = const()[name = tensor("stft_out_3_pad_type_0"), val = tensor("valid")]; + tensor stft_out_3_strides_0 = const()[name = tensor("stft_out_3_strides_0"), val = tensor([128])]; + tensor stft_out_3_pad_0 = const()[name = tensor("stft_out_3_pad_0"), val = tensor([0, 0])]; + tensor stft_out_3_dilations_0 = const()[name = tensor("stft_out_3_dilations_0"), val = tensor([1])]; + tensor stft_out_3_groups_0 = const()[name = tensor("stft_out_3_groups_0"), val = tensor(1)]; + tensor stft_out_3_cast_fp16 = conv(dilations = stft_out_3_dilations_0, groups = stft_out_3_groups_0, pad = stft_out_3_pad_0, pad_type = stft_out_3_pad_type_0, strides = stft_out_3_strides_0, weight = stft_forward_basis_to_fp16, x = x_17_cast_fp16)[name = tensor("stft_out_3_cast_fp16")]; + tensor var_346_begin_0 = const()[name = tensor("op_346_begin_0"), val = tensor([0, 0, 0])]; + tensor var_346_end_0 = const()[name = tensor("op_346_end_0"), val = tensor([1, 129, 4])]; + tensor var_346_end_mask_0 = const()[name = tensor("op_346_end_mask_0"), val = tensor([true, false, true])]; + tensor var_346_cast_fp16 = slice_by_index(begin = var_346_begin_0, end = var_346_end_0, end_mask = var_346_end_mask_0, x = stft_out_3_cast_fp16)[name = tensor("op_346_cast_fp16")]; + tensor var_349_begin_0 = const()[name = tensor("op_349_begin_0"), val = tensor([0, 129, 0])]; + tensor var_349_end_0 = const()[name = tensor("op_349_end_0"), val = tensor([1, 258, 4])]; + tensor var_349_end_mask_0 = const()[name = tensor("op_349_end_mask_0"), val = tensor([true, true, true])]; + tensor var_349_cast_fp16 = slice_by_index(begin = var_349_begin_0, end = var_349_end_0, end_mask = var_349_end_mask_0, x = stft_out_3_cast_fp16)[name = tensor("op_349_cast_fp16")]; + tensor var_325_promoted_to_fp16 = const()[name = tensor("op_325_promoted_to_fp16"), val = tensor(0x1p+1)]; + tensor var_351_cast_fp16 = pow(x = var_346_cast_fp16, y = var_325_promoted_to_fp16)[name = tensor("op_351_cast_fp16")]; + tensor var_325_promoted_1_to_fp16 = const()[name = tensor("op_325_promoted_1_to_fp16"), val = tensor(0x1p+1)]; + tensor var_352_cast_fp16 = pow(x = var_349_cast_fp16, y = var_325_promoted_1_to_fp16)[name = tensor("op_352_cast_fp16")]; + tensor var_353_cast_fp16 = add(x = var_351_cast_fp16, y = var_352_cast_fp16)[name = tensor("op_353_cast_fp16")]; + tensor var_354_to_fp16 = const()[name = tensor("op_354_to_fp16"), val = tensor(0x1p-24)]; + tensor var_355_cast_fp16 = add(x = var_353_cast_fp16, y = var_354_to_fp16)[name = tensor("op_355_cast_fp16")]; + tensor input_31_cast_fp16 = sqrt(x = var_355_cast_fp16)[name = tensor("input_31_cast_fp16")]; + tensor input_33_pad_type_0 = const()[name = tensor("input_33_pad_type_0"), val = tensor("custom")]; + tensor input_33_pad_0 = const()[name = tensor("input_33_pad_0"), val = tensor([1, 1])]; + tensor input_33_strides_0 = const()[name = tensor("input_33_strides_0"), val = tensor([1])]; + tensor input_33_dilations_0 = const()[name = tensor("input_33_dilations_0"), val = tensor([1])]; + tensor input_33_groups_0 = const()[name = tensor("input_33_groups_0"), val = tensor(1)]; + tensor input_33_cast_fp16 = conv(bias = encoder_layers_0_bias_to_fp16, dilations = input_33_dilations_0, groups = input_33_groups_0, pad = input_33_pad_0, pad_type = input_33_pad_type_0, strides = input_33_strides_0, weight = encoder_layers_0_weight_to_fp16, x = input_31_cast_fp16)[name = tensor("input_33_cast_fp16")]; + tensor x_19_cast_fp16 = relu(x = input_33_cast_fp16)[name = tensor("x_19_cast_fp16")]; + tensor const_6_to_fp16 = const()[name = tensor("const_6_to_fp16"), val = tensor(-inf)]; + tensor var_358_to_fp16 = const()[name = tensor("op_358_to_fp16"), val = tensor(0x1.388p+13)]; + tensor clip_4_cast_fp16 = clip(alpha = const_6_to_fp16, beta = var_358_to_fp16, x = x_19_cast_fp16)[name = tensor("clip_4_cast_fp16")]; + tensor input_37_pad_type_0 = const()[name = tensor("input_37_pad_type_0"), val = tensor("custom")]; + tensor input_37_pad_0 = const()[name = tensor("input_37_pad_0"), val = tensor([1, 1])]; + tensor input_37_strides_0 = const()[name = tensor("input_37_strides_0"), val = tensor([2])]; + tensor input_37_dilations_0 = const()[name = tensor("input_37_dilations_0"), val = tensor([1])]; + tensor input_37_groups_0 = const()[name = tensor("input_37_groups_0"), val = tensor(1)]; + tensor input_37_cast_fp16 = conv(bias = encoder_layers_2_bias_to_fp16, dilations = input_37_dilations_0, groups = input_37_groups_0, pad = input_37_pad_0, pad_type = input_37_pad_type_0, strides = input_37_strides_0, weight = encoder_layers_2_weight_to_fp16, x = clip_4_cast_fp16)[name = tensor("input_37_cast_fp16")]; + tensor x_21_cast_fp16 = relu(x = input_37_cast_fp16)[name = tensor("x_21_cast_fp16")]; + tensor const_7_to_fp16 = const()[name = tensor("const_7_to_fp16"), val = tensor(-inf)]; + tensor clip_5_cast_fp16 = clip(alpha = const_7_to_fp16, beta = var_358_to_fp16, x = x_21_cast_fp16)[name = tensor("clip_5_cast_fp16")]; + tensor input_41_pad_type_0 = const()[name = tensor("input_41_pad_type_0"), val = tensor("custom")]; + tensor input_41_pad_0 = const()[name = tensor("input_41_pad_0"), val = tensor([1, 1])]; + tensor input_41_strides_0 = const()[name = tensor("input_41_strides_0"), val = tensor([2])]; + tensor input_41_dilations_0 = const()[name = tensor("input_41_dilations_0"), val = tensor([1])]; + tensor input_41_groups_0 = const()[name = tensor("input_41_groups_0"), val = tensor(1)]; + tensor input_41_cast_fp16 = conv(bias = encoder_layers_4_bias_to_fp16, dilations = input_41_dilations_0, groups = input_41_groups_0, pad = input_41_pad_0, pad_type = input_41_pad_type_0, strides = input_41_strides_0, weight = encoder_layers_4_weight_to_fp16, x = clip_5_cast_fp16)[name = tensor("input_41_cast_fp16")]; + tensor x_23_cast_fp16 = relu(x = input_41_cast_fp16)[name = tensor("x_23_cast_fp16")]; + tensor const_8_to_fp16 = const()[name = tensor("const_8_to_fp16"), val = tensor(-inf)]; + tensor clip_6_cast_fp16 = clip(alpha = const_8_to_fp16, beta = var_358_to_fp16, x = x_23_cast_fp16)[name = tensor("clip_6_cast_fp16")]; + tensor input_45_pad_type_0 = const()[name = tensor("input_45_pad_type_0"), val = tensor("custom")]; + tensor input_45_pad_0 = const()[name = tensor("input_45_pad_0"), val = tensor([1, 1])]; + tensor input_45_strides_0 = const()[name = tensor("input_45_strides_0"), val = tensor([1])]; + tensor input_45_dilations_0 = const()[name = tensor("input_45_dilations_0"), val = tensor([1])]; + tensor input_45_groups_0 = const()[name = tensor("input_45_groups_0"), val = tensor(1)]; + tensor input_45_cast_fp16 = conv(bias = encoder_layers_6_bias_to_fp16, dilations = input_45_dilations_0, groups = input_45_groups_0, pad = input_45_pad_0, pad_type = input_45_pad_type_0, strides = input_45_strides_0, weight = encoder_layers_6_weight_to_fp16, x = clip_6_cast_fp16)[name = tensor("input_45_cast_fp16")]; + tensor x_25_cast_fp16 = relu(x = input_45_cast_fp16)[name = tensor("x_25_cast_fp16")]; + tensor const_9_to_fp16 = const()[name = tensor("const_9_to_fp16"), val = tensor(-inf)]; + tensor clip_7_cast_fp16 = clip(alpha = const_9_to_fp16, beta = var_358_to_fp16, x = x_25_cast_fp16)[name = tensor("clip_7_cast_fp16")]; + tensor transpose_2_perm_0 = const()[name = tensor("transpose_2_perm_0"), val = tensor([2, 0, 1])]; + tensor transpose_2_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("transpose_2_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; + tensor lstm_out_3_batch_first_lstm_h0_squeeze_axes_0 = const()[name = tensor("lstm_out_3_batch_first_lstm_h0_squeeze_axes_0"), val = tensor([0])]; + tensor lstm_out_3_batch_first_lstm_h0_squeeze_cast_fp16 = squeeze(axes = lstm_out_3_batch_first_lstm_h0_squeeze_axes_0, x = hn_1_cast_fp16)[name = tensor("lstm_out_3_batch_first_lstm_h0_squeeze_cast_fp16")]; + tensor lstm_out_3_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("lstm_out_3_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; + tensor lstm_out_3_batch_first_lstm_c0_squeeze_axes_0 = const()[name = tensor("lstm_out_3_batch_first_lstm_c0_squeeze_axes_0"), val = tensor([0])]; + tensor lstm_out_3_batch_first_lstm_c0_squeeze_cast_fp16 = squeeze(axes = lstm_out_3_batch_first_lstm_c0_squeeze_axes_0, x = cn_1_cast_fp16)[name = tensor("lstm_out_3_batch_first_lstm_c0_squeeze_cast_fp16")]; + tensor lstm_out_3_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("lstm_out_3_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; + tensor lstm_out_3_batch_first_direction_0 = const()[name = tensor("lstm_out_3_batch_first_direction_0"), val = tensor("forward")]; + tensor lstm_out_3_batch_first_output_sequence_0 = const()[name = tensor("lstm_out_3_batch_first_output_sequence_0"), val = tensor(true)]; + tensor lstm_out_3_batch_first_recurrent_activation_0 = const()[name = tensor("lstm_out_3_batch_first_recurrent_activation_0"), val = tensor("sigmoid")]; + tensor lstm_out_3_batch_first_cell_activation_0 = const()[name = tensor("lstm_out_3_batch_first_cell_activation_0"), val = tensor("tanh")]; + tensor lstm_out_3_batch_first_activation_0 = const()[name = tensor("lstm_out_3_batch_first_activation_0"), val = tensor("tanh")]; + tensor lstm_out_3_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32 = cast(dtype = lstm_out_3_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32_dtype_0, x = lstm_out_3_batch_first_lstm_c0_squeeze_cast_fp16)[name = tensor("cast_42")]; + tensor lstm_out_3_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32 = cast(dtype = lstm_out_3_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32_dtype_0, x = lstm_out_3_batch_first_lstm_h0_squeeze_cast_fp16)[name = tensor("cast_43")]; + tensor transpose_2_cast_fp16 = transpose(perm = transpose_2_perm_0, x = clip_7_cast_fp16)[name = tensor("transpose_29")]; + tensor transpose_2_cast_fp16_to_fp32 = cast(dtype = transpose_2_cast_fp16_to_fp32_dtype_0, x = transpose_2_cast_fp16)[name = tensor("cast_44")]; + tensor lstm_out_3_batch_first_0, tensor lstm_out_3_batch_first_1, tensor lstm_out_3_batch_first_2 = lstm(activation = lstm_out_3_batch_first_activation_0, bias = concat_0, cell_activation = lstm_out_3_batch_first_cell_activation_0, direction = lstm_out_3_batch_first_direction_0, initial_c = lstm_out_3_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32, initial_h = lstm_out_3_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32, output_sequence = lstm_out_3_batch_first_output_sequence_0, recurrent_activation = lstm_out_3_batch_first_recurrent_activation_0, weight_hh = concat_2, weight_ih = concat_1, x = transpose_2_cast_fp16_to_fp32)[name = tensor("lstm_out_3_batch_first")]; + tensor transpose_3_perm_0 = const()[name = tensor("transpose_3_perm_0"), val = tensor([1, 2, 0])]; + tensor lstm_out_3_batch_first_0_to_fp16_dtype_0 = const()[name = tensor("lstm_out_3_batch_first_0_to_fp16_dtype_0"), val = tensor("fp16")]; + tensor hn_3_axes_0 = const()[name = tensor("hn_3_axes_0"), val = tensor([0])]; + tensor lstm_out_3_batch_first_1_to_fp16_dtype_0 = const()[name = tensor("lstm_out_3_batch_first_1_to_fp16_dtype_0"), val = tensor("fp16")]; + tensor lstm_out_3_batch_first_1_to_fp16 = cast(dtype = lstm_out_3_batch_first_1_to_fp16_dtype_0, x = lstm_out_3_batch_first_1)[name = tensor("cast_40")]; + tensor hn_3_cast_fp16 = expand_dims(axes = hn_3_axes_0, x = lstm_out_3_batch_first_1_to_fp16)[name = tensor("hn_3_cast_fp16")]; + tensor cn_3_axes_0 = const()[name = tensor("cn_3_axes_0"), val = tensor([0])]; + tensor lstm_out_3_batch_first_2_to_fp16_dtype_0 = const()[name = tensor("lstm_out_3_batch_first_2_to_fp16_dtype_0"), val = tensor("fp16")]; + tensor lstm_out_3_batch_first_2_to_fp16 = cast(dtype = lstm_out_3_batch_first_2_to_fp16_dtype_0, x = lstm_out_3_batch_first_2)[name = tensor("cast_39")]; + tensor cn_3_cast_fp16 = expand_dims(axes = cn_3_axes_0, x = lstm_out_3_batch_first_2_to_fp16)[name = tensor("cn_3_cast_fp16")]; + tensor lstm_out_3_batch_first_0_to_fp16 = cast(dtype = lstm_out_3_batch_first_0_to_fp16_dtype_0, x = lstm_out_3_batch_first_0)[name = tensor("cast_41")]; + tensor transpose_3_cast_fp16 = transpose(perm = transpose_3_perm_0, x = lstm_out_3_batch_first_0_to_fp16)[name = tensor("transpose_28")]; + tensor input_53_cast_fp16 = relu(x = transpose_3_cast_fp16)[name = tensor("input_53_cast_fp16")]; + tensor input_55_pad_type_0 = const()[name = tensor("input_55_pad_type_0"), val = tensor("valid")]; + tensor input_55_strides_0 = const()[name = tensor("input_55_strides_0"), val = tensor([1])]; + tensor input_55_pad_0 = const()[name = tensor("input_55_pad_0"), val = tensor([0, 0])]; + tensor input_55_dilations_0 = const()[name = tensor("input_55_dilations_0"), val = tensor([1])]; + tensor input_55_groups_0 = const()[name = tensor("input_55_groups_0"), val = tensor(1)]; + tensor input_55_cast_fp16 = conv(bias = decoder_final_conv_bias_to_fp16, dilations = input_55_dilations_0, groups = input_55_groups_0, pad = input_55_pad_0, pad_type = input_55_pad_type_0, strides = input_55_strides_0, weight = decoder_final_conv_weight_to_fp16, x = input_53_cast_fp16)[name = tensor("input_55_cast_fp16")]; + tensor var_442_cast_fp16 = sigmoid(x = input_55_cast_fp16)[name = tensor("op_442_cast_fp16")]; + tensor x_29_pad_0 = const()[name = tensor("x_29_pad_0"), val = tensor([0, 0, 0, 64])]; + tensor x_29_mode_0 = const()[name = tensor("x_29_mode_0"), val = tensor("reflect")]; + tensor const_10_to_fp16 = const()[name = tensor("const_10_to_fp16"), val = tensor(0x0p+0)]; + tensor x_29_cast_fp16 = pad(constant_val = const_10_to_fp16, mode = x_29_mode_0, pad = x_29_pad_0, x = input_57_cast_fp16)[name = tensor("x_29_cast_fp16")]; + tensor x_31_axes_0 = const()[name = tensor("x_31_axes_0"), val = tensor([1])]; + tensor x_31_cast_fp16 = expand_dims(axes = x_31_axes_0, x = x_29_cast_fp16)[name = tensor("x_31_cast_fp16")]; + tensor stft_out_5_pad_type_0 = const()[name = tensor("stft_out_5_pad_type_0"), val = tensor("valid")]; + tensor stft_out_5_strides_0 = const()[name = tensor("stft_out_5_strides_0"), val = tensor([128])]; + tensor stft_out_5_pad_0 = const()[name = tensor("stft_out_5_pad_0"), val = tensor([0, 0])]; + tensor stft_out_5_dilations_0 = const()[name = tensor("stft_out_5_dilations_0"), val = tensor([1])]; + tensor stft_out_5_groups_0 = const()[name = tensor("stft_out_5_groups_0"), val = tensor(1)]; + tensor stft_out_5_cast_fp16 = conv(dilations = stft_out_5_dilations_0, groups = stft_out_5_groups_0, pad = stft_out_5_pad_0, pad_type = stft_out_5_pad_type_0, strides = stft_out_5_strides_0, weight = stft_forward_basis_to_fp16, x = x_31_cast_fp16)[name = tensor("stft_out_5_cast_fp16")]; + tensor var_470_begin_0 = const()[name = tensor("op_470_begin_0"), val = tensor([0, 0, 0])]; + tensor var_470_end_0 = const()[name = tensor("op_470_end_0"), val = tensor([1, 129, 4])]; + tensor var_470_end_mask_0 = const()[name = tensor("op_470_end_mask_0"), val = tensor([true, false, true])]; + tensor var_470_cast_fp16 = slice_by_index(begin = var_470_begin_0, end = var_470_end_0, end_mask = var_470_end_mask_0, x = stft_out_5_cast_fp16)[name = tensor("op_470_cast_fp16")]; + tensor var_473_begin_0 = const()[name = tensor("op_473_begin_0"), val = tensor([0, 129, 0])]; + tensor var_473_end_0 = const()[name = tensor("op_473_end_0"), val = tensor([1, 258, 4])]; + tensor var_473_end_mask_0 = const()[name = tensor("op_473_end_mask_0"), val = tensor([true, true, true])]; + tensor var_473_cast_fp16 = slice_by_index(begin = var_473_begin_0, end = var_473_end_0, end_mask = var_473_end_mask_0, x = stft_out_5_cast_fp16)[name = tensor("op_473_cast_fp16")]; + tensor var_449_promoted_to_fp16 = const()[name = tensor("op_449_promoted_to_fp16"), val = tensor(0x1p+1)]; + tensor var_475_cast_fp16 = pow(x = var_470_cast_fp16, y = var_449_promoted_to_fp16)[name = tensor("op_475_cast_fp16")]; + tensor var_449_promoted_1_to_fp16 = const()[name = tensor("op_449_promoted_1_to_fp16"), val = tensor(0x1p+1)]; + tensor var_476_cast_fp16 = pow(x = var_473_cast_fp16, y = var_449_promoted_1_to_fp16)[name = tensor("op_476_cast_fp16")]; + tensor var_477_cast_fp16 = add(x = var_475_cast_fp16, y = var_476_cast_fp16)[name = tensor("op_477_cast_fp16")]; + tensor var_478_to_fp16 = const()[name = tensor("op_478_to_fp16"), val = tensor(0x1p-24)]; + tensor var_479_cast_fp16 = add(x = var_477_cast_fp16, y = var_478_to_fp16)[name = tensor("op_479_cast_fp16")]; + tensor input_59_cast_fp16 = sqrt(x = var_479_cast_fp16)[name = tensor("input_59_cast_fp16")]; + tensor input_61_pad_type_0 = const()[name = tensor("input_61_pad_type_0"), val = tensor("custom")]; + tensor input_61_pad_0 = const()[name = tensor("input_61_pad_0"), val = tensor([1, 1])]; + tensor input_61_strides_0 = const()[name = tensor("input_61_strides_0"), val = tensor([1])]; + tensor input_61_dilations_0 = const()[name = tensor("input_61_dilations_0"), val = tensor([1])]; + tensor input_61_groups_0 = const()[name = tensor("input_61_groups_0"), val = tensor(1)]; + tensor input_61_cast_fp16 = conv(bias = encoder_layers_0_bias_to_fp16, dilations = input_61_dilations_0, groups = input_61_groups_0, pad = input_61_pad_0, pad_type = input_61_pad_type_0, strides = input_61_strides_0, weight = encoder_layers_0_weight_to_fp16, x = input_59_cast_fp16)[name = tensor("input_61_cast_fp16")]; + tensor x_33_cast_fp16 = relu(x = input_61_cast_fp16)[name = tensor("x_33_cast_fp16")]; + tensor const_11_to_fp16 = const()[name = tensor("const_11_to_fp16"), val = tensor(-inf)]; + tensor var_482_to_fp16 = const()[name = tensor("op_482_to_fp16"), val = tensor(0x1.388p+13)]; + tensor clip_8_cast_fp16 = clip(alpha = const_11_to_fp16, beta = var_482_to_fp16, x = x_33_cast_fp16)[name = tensor("clip_8_cast_fp16")]; + tensor input_65_pad_type_0 = const()[name = tensor("input_65_pad_type_0"), val = tensor("custom")]; + tensor input_65_pad_0 = const()[name = tensor("input_65_pad_0"), val = tensor([1, 1])]; + tensor input_65_strides_0 = const()[name = tensor("input_65_strides_0"), val = tensor([2])]; + tensor input_65_dilations_0 = const()[name = tensor("input_65_dilations_0"), val = tensor([1])]; + tensor input_65_groups_0 = const()[name = tensor("input_65_groups_0"), val = tensor(1)]; + tensor input_65_cast_fp16 = conv(bias = encoder_layers_2_bias_to_fp16, dilations = input_65_dilations_0, groups = input_65_groups_0, pad = input_65_pad_0, pad_type = input_65_pad_type_0, strides = input_65_strides_0, weight = encoder_layers_2_weight_to_fp16, x = clip_8_cast_fp16)[name = tensor("input_65_cast_fp16")]; + tensor x_35_cast_fp16 = relu(x = input_65_cast_fp16)[name = tensor("x_35_cast_fp16")]; + tensor const_12_to_fp16 = const()[name = tensor("const_12_to_fp16"), val = tensor(-inf)]; + tensor clip_9_cast_fp16 = clip(alpha = const_12_to_fp16, beta = var_482_to_fp16, x = x_35_cast_fp16)[name = tensor("clip_9_cast_fp16")]; + tensor input_69_pad_type_0 = const()[name = tensor("input_69_pad_type_0"), val = tensor("custom")]; + tensor input_69_pad_0 = const()[name = tensor("input_69_pad_0"), val = tensor([1, 1])]; + tensor input_69_strides_0 = const()[name = tensor("input_69_strides_0"), val = tensor([2])]; + tensor input_69_dilations_0 = const()[name = tensor("input_69_dilations_0"), val = tensor([1])]; + tensor input_69_groups_0 = const()[name = tensor("input_69_groups_0"), val = tensor(1)]; + tensor input_69_cast_fp16 = conv(bias = encoder_layers_4_bias_to_fp16, dilations = input_69_dilations_0, groups = input_69_groups_0, pad = input_69_pad_0, pad_type = input_69_pad_type_0, strides = input_69_strides_0, weight = encoder_layers_4_weight_to_fp16, x = clip_9_cast_fp16)[name = tensor("input_69_cast_fp16")]; + tensor x_37_cast_fp16 = relu(x = input_69_cast_fp16)[name = tensor("x_37_cast_fp16")]; + tensor const_13_to_fp16 = const()[name = tensor("const_13_to_fp16"), val = tensor(-inf)]; + tensor clip_10_cast_fp16 = clip(alpha = const_13_to_fp16, beta = var_482_to_fp16, x = x_37_cast_fp16)[name = tensor("clip_10_cast_fp16")]; + tensor input_73_pad_type_0 = const()[name = tensor("input_73_pad_type_0"), val = tensor("custom")]; + tensor input_73_pad_0 = const()[name = tensor("input_73_pad_0"), val = tensor([1, 1])]; + tensor input_73_strides_0 = const()[name = tensor("input_73_strides_0"), val = tensor([1])]; + tensor input_73_dilations_0 = const()[name = tensor("input_73_dilations_0"), val = tensor([1])]; + tensor input_73_groups_0 = const()[name = tensor("input_73_groups_0"), val = tensor(1)]; + tensor input_73_cast_fp16 = conv(bias = encoder_layers_6_bias_to_fp16, dilations = input_73_dilations_0, groups = input_73_groups_0, pad = input_73_pad_0, pad_type = input_73_pad_type_0, strides = input_73_strides_0, weight = encoder_layers_6_weight_to_fp16, x = clip_10_cast_fp16)[name = tensor("input_73_cast_fp16")]; + tensor x_39_cast_fp16 = relu(x = input_73_cast_fp16)[name = tensor("x_39_cast_fp16")]; + tensor const_14_to_fp16 = const()[name = tensor("const_14_to_fp16"), val = tensor(-inf)]; + tensor clip_11_cast_fp16 = clip(alpha = const_14_to_fp16, beta = var_482_to_fp16, x = x_39_cast_fp16)[name = tensor("clip_11_cast_fp16")]; + tensor transpose_4_perm_0 = const()[name = tensor("transpose_4_perm_0"), val = tensor([2, 0, 1])]; + tensor transpose_4_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("transpose_4_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; + tensor lstm_out_5_batch_first_lstm_h0_squeeze_axes_0 = const()[name = tensor("lstm_out_5_batch_first_lstm_h0_squeeze_axes_0"), val = tensor([0])]; + tensor lstm_out_5_batch_first_lstm_h0_squeeze_cast_fp16 = squeeze(axes = lstm_out_5_batch_first_lstm_h0_squeeze_axes_0, x = hn_3_cast_fp16)[name = tensor("lstm_out_5_batch_first_lstm_h0_squeeze_cast_fp16")]; + tensor lstm_out_5_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("lstm_out_5_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; + tensor lstm_out_5_batch_first_lstm_c0_squeeze_axes_0 = const()[name = tensor("lstm_out_5_batch_first_lstm_c0_squeeze_axes_0"), val = tensor([0])]; + tensor lstm_out_5_batch_first_lstm_c0_squeeze_cast_fp16 = squeeze(axes = lstm_out_5_batch_first_lstm_c0_squeeze_axes_0, x = cn_3_cast_fp16)[name = tensor("lstm_out_5_batch_first_lstm_c0_squeeze_cast_fp16")]; + tensor lstm_out_5_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("lstm_out_5_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; + tensor lstm_out_5_batch_first_direction_0 = const()[name = tensor("lstm_out_5_batch_first_direction_0"), val = tensor("forward")]; + tensor lstm_out_5_batch_first_output_sequence_0 = const()[name = tensor("lstm_out_5_batch_first_output_sequence_0"), val = tensor(true)]; + tensor lstm_out_5_batch_first_recurrent_activation_0 = const()[name = tensor("lstm_out_5_batch_first_recurrent_activation_0"), val = tensor("sigmoid")]; + tensor lstm_out_5_batch_first_cell_activation_0 = const()[name = tensor("lstm_out_5_batch_first_cell_activation_0"), val = tensor("tanh")]; + tensor lstm_out_5_batch_first_activation_0 = const()[name = tensor("lstm_out_5_batch_first_activation_0"), val = tensor("tanh")]; + tensor lstm_out_5_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32 = cast(dtype = lstm_out_5_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32_dtype_0, x = lstm_out_5_batch_first_lstm_c0_squeeze_cast_fp16)[name = tensor("cast_36")]; + tensor lstm_out_5_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32 = cast(dtype = lstm_out_5_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32_dtype_0, x = lstm_out_5_batch_first_lstm_h0_squeeze_cast_fp16)[name = tensor("cast_37")]; + tensor transpose_4_cast_fp16 = transpose(perm = transpose_4_perm_0, x = clip_11_cast_fp16)[name = tensor("transpose_27")]; + tensor transpose_4_cast_fp16_to_fp32 = cast(dtype = transpose_4_cast_fp16_to_fp32_dtype_0, x = transpose_4_cast_fp16)[name = tensor("cast_38")]; + tensor lstm_out_5_batch_first_0, tensor lstm_out_5_batch_first_1, tensor lstm_out_5_batch_first_2 = lstm(activation = lstm_out_5_batch_first_activation_0, bias = concat_0, cell_activation = lstm_out_5_batch_first_cell_activation_0, direction = lstm_out_5_batch_first_direction_0, initial_c = lstm_out_5_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32, initial_h = lstm_out_5_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32, output_sequence = lstm_out_5_batch_first_output_sequence_0, recurrent_activation = lstm_out_5_batch_first_recurrent_activation_0, weight_hh = concat_2, weight_ih = concat_1, x = transpose_4_cast_fp16_to_fp32)[name = tensor("lstm_out_5_batch_first")]; + tensor transpose_5_perm_0 = const()[name = tensor("transpose_5_perm_0"), val = tensor([1, 2, 0])]; + tensor lstm_out_5_batch_first_0_to_fp16_dtype_0 = const()[name = tensor("lstm_out_5_batch_first_0_to_fp16_dtype_0"), val = tensor("fp16")]; + tensor hn_5_axes_0 = const()[name = tensor("hn_5_axes_0"), val = tensor([0])]; + tensor lstm_out_5_batch_first_1_to_fp16_dtype_0 = const()[name = tensor("lstm_out_5_batch_first_1_to_fp16_dtype_0"), val = tensor("fp16")]; + tensor lstm_out_5_batch_first_1_to_fp16 = cast(dtype = lstm_out_5_batch_first_1_to_fp16_dtype_0, x = lstm_out_5_batch_first_1)[name = tensor("cast_34")]; + tensor hn_5_cast_fp16 = expand_dims(axes = hn_5_axes_0, x = lstm_out_5_batch_first_1_to_fp16)[name = tensor("hn_5_cast_fp16")]; + tensor cn_5_axes_0 = const()[name = tensor("cn_5_axes_0"), val = tensor([0])]; + tensor lstm_out_5_batch_first_2_to_fp16_dtype_0 = const()[name = tensor("lstm_out_5_batch_first_2_to_fp16_dtype_0"), val = tensor("fp16")]; + tensor lstm_out_5_batch_first_2_to_fp16 = cast(dtype = lstm_out_5_batch_first_2_to_fp16_dtype_0, x = lstm_out_5_batch_first_2)[name = tensor("cast_33")]; + tensor cn_5_cast_fp16 = expand_dims(axes = cn_5_axes_0, x = lstm_out_5_batch_first_2_to_fp16)[name = tensor("cn_5_cast_fp16")]; + tensor lstm_out_5_batch_first_0_to_fp16 = cast(dtype = lstm_out_5_batch_first_0_to_fp16_dtype_0, x = lstm_out_5_batch_first_0)[name = tensor("cast_35")]; + tensor transpose_5_cast_fp16 = transpose(perm = transpose_5_perm_0, x = lstm_out_5_batch_first_0_to_fp16)[name = tensor("transpose_26")]; + tensor input_81_cast_fp16 = relu(x = transpose_5_cast_fp16)[name = tensor("input_81_cast_fp16")]; + tensor input_83_pad_type_0 = const()[name = tensor("input_83_pad_type_0"), val = tensor("valid")]; + tensor input_83_strides_0 = const()[name = tensor("input_83_strides_0"), val = tensor([1])]; + tensor input_83_pad_0 = const()[name = tensor("input_83_pad_0"), val = tensor([0, 0])]; + tensor input_83_dilations_0 = const()[name = tensor("input_83_dilations_0"), val = tensor([1])]; + tensor input_83_groups_0 = const()[name = tensor("input_83_groups_0"), val = tensor(1)]; + tensor input_83_cast_fp16 = conv(bias = decoder_final_conv_bias_to_fp16, dilations = input_83_dilations_0, groups = input_83_groups_0, pad = input_83_pad_0, pad_type = input_83_pad_type_0, strides = input_83_strides_0, weight = decoder_final_conv_weight_to_fp16, x = input_81_cast_fp16)[name = tensor("input_83_cast_fp16")]; + tensor var_566_cast_fp16 = sigmoid(x = input_83_cast_fp16)[name = tensor("op_566_cast_fp16")]; + tensor x_43_pad_0 = const()[name = tensor("x_43_pad_0"), val = tensor([0, 0, 0, 64])]; + tensor x_43_mode_0 = const()[name = tensor("x_43_mode_0"), val = tensor("reflect")]; + tensor const_15_to_fp16 = const()[name = tensor("const_15_to_fp16"), val = tensor(0x0p+0)]; + tensor x_43_cast_fp16 = pad(constant_val = const_15_to_fp16, mode = x_43_mode_0, pad = x_43_pad_0, x = input_85_cast_fp16)[name = tensor("x_43_cast_fp16")]; + tensor x_45_axes_0 = const()[name = tensor("x_45_axes_0"), val = tensor([1])]; + tensor x_45_cast_fp16 = expand_dims(axes = x_45_axes_0, x = x_43_cast_fp16)[name = tensor("x_45_cast_fp16")]; + tensor stft_out_7_pad_type_0 = const()[name = tensor("stft_out_7_pad_type_0"), val = tensor("valid")]; + tensor stft_out_7_strides_0 = const()[name = tensor("stft_out_7_strides_0"), val = tensor([128])]; + tensor stft_out_7_pad_0 = const()[name = tensor("stft_out_7_pad_0"), val = tensor([0, 0])]; + tensor stft_out_7_dilations_0 = const()[name = tensor("stft_out_7_dilations_0"), val = tensor([1])]; + tensor stft_out_7_groups_0 = const()[name = tensor("stft_out_7_groups_0"), val = tensor(1)]; + tensor stft_out_7_cast_fp16 = conv(dilations = stft_out_7_dilations_0, groups = stft_out_7_groups_0, pad = stft_out_7_pad_0, pad_type = stft_out_7_pad_type_0, strides = stft_out_7_strides_0, weight = stft_forward_basis_to_fp16, x = x_45_cast_fp16)[name = tensor("stft_out_7_cast_fp16")]; + tensor var_594_begin_0 = const()[name = tensor("op_594_begin_0"), val = tensor([0, 0, 0])]; + tensor var_594_end_0 = const()[name = tensor("op_594_end_0"), val = tensor([1, 129, 4])]; + tensor var_594_end_mask_0 = const()[name = tensor("op_594_end_mask_0"), val = tensor([true, false, true])]; + tensor var_594_cast_fp16 = slice_by_index(begin = var_594_begin_0, end = var_594_end_0, end_mask = var_594_end_mask_0, x = stft_out_7_cast_fp16)[name = tensor("op_594_cast_fp16")]; + tensor var_597_begin_0 = const()[name = tensor("op_597_begin_0"), val = tensor([0, 129, 0])]; + tensor var_597_end_0 = const()[name = tensor("op_597_end_0"), val = tensor([1, 258, 4])]; + tensor var_597_end_mask_0 = const()[name = tensor("op_597_end_mask_0"), val = tensor([true, true, true])]; + tensor var_597_cast_fp16 = slice_by_index(begin = var_597_begin_0, end = var_597_end_0, end_mask = var_597_end_mask_0, x = stft_out_7_cast_fp16)[name = tensor("op_597_cast_fp16")]; + tensor var_573_promoted_to_fp16 = const()[name = tensor("op_573_promoted_to_fp16"), val = tensor(0x1p+1)]; + tensor var_599_cast_fp16 = pow(x = var_594_cast_fp16, y = var_573_promoted_to_fp16)[name = tensor("op_599_cast_fp16")]; + tensor var_573_promoted_1_to_fp16 = const()[name = tensor("op_573_promoted_1_to_fp16"), val = tensor(0x1p+1)]; + tensor var_600_cast_fp16 = pow(x = var_597_cast_fp16, y = var_573_promoted_1_to_fp16)[name = tensor("op_600_cast_fp16")]; + tensor var_601_cast_fp16 = add(x = var_599_cast_fp16, y = var_600_cast_fp16)[name = tensor("op_601_cast_fp16")]; + tensor var_602_to_fp16 = const()[name = tensor("op_602_to_fp16"), val = tensor(0x1p-24)]; + tensor var_603_cast_fp16 = add(x = var_601_cast_fp16, y = var_602_to_fp16)[name = tensor("op_603_cast_fp16")]; + tensor input_87_cast_fp16 = sqrt(x = var_603_cast_fp16)[name = tensor("input_87_cast_fp16")]; + tensor input_89_pad_type_0 = const()[name = tensor("input_89_pad_type_0"), val = tensor("custom")]; + tensor input_89_pad_0 = const()[name = tensor("input_89_pad_0"), val = tensor([1, 1])]; + tensor input_89_strides_0 = const()[name = tensor("input_89_strides_0"), val = tensor([1])]; + tensor input_89_dilations_0 = const()[name = tensor("input_89_dilations_0"), val = tensor([1])]; + tensor input_89_groups_0 = const()[name = tensor("input_89_groups_0"), val = tensor(1)]; + tensor input_89_cast_fp16 = conv(bias = encoder_layers_0_bias_to_fp16, dilations = input_89_dilations_0, groups = input_89_groups_0, pad = input_89_pad_0, pad_type = input_89_pad_type_0, strides = input_89_strides_0, weight = encoder_layers_0_weight_to_fp16, x = input_87_cast_fp16)[name = tensor("input_89_cast_fp16")]; + tensor x_47_cast_fp16 = relu(x = input_89_cast_fp16)[name = tensor("x_47_cast_fp16")]; + tensor const_16_to_fp16 = const()[name = tensor("const_16_to_fp16"), val = tensor(-inf)]; + tensor var_606_to_fp16 = const()[name = tensor("op_606_to_fp16"), val = tensor(0x1.388p+13)]; + tensor clip_12_cast_fp16 = clip(alpha = const_16_to_fp16, beta = var_606_to_fp16, x = x_47_cast_fp16)[name = tensor("clip_12_cast_fp16")]; + tensor input_93_pad_type_0 = const()[name = tensor("input_93_pad_type_0"), val = tensor("custom")]; + tensor input_93_pad_0 = const()[name = tensor("input_93_pad_0"), val = tensor([1, 1])]; + tensor input_93_strides_0 = const()[name = tensor("input_93_strides_0"), val = tensor([2])]; + tensor input_93_dilations_0 = const()[name = tensor("input_93_dilations_0"), val = tensor([1])]; + tensor input_93_groups_0 = const()[name = tensor("input_93_groups_0"), val = tensor(1)]; + tensor input_93_cast_fp16 = conv(bias = encoder_layers_2_bias_to_fp16, dilations = input_93_dilations_0, groups = input_93_groups_0, pad = input_93_pad_0, pad_type = input_93_pad_type_0, strides = input_93_strides_0, weight = encoder_layers_2_weight_to_fp16, x = clip_12_cast_fp16)[name = tensor("input_93_cast_fp16")]; + tensor x_49_cast_fp16 = relu(x = input_93_cast_fp16)[name = tensor("x_49_cast_fp16")]; + tensor const_17_to_fp16 = const()[name = tensor("const_17_to_fp16"), val = tensor(-inf)]; + tensor clip_13_cast_fp16 = clip(alpha = const_17_to_fp16, beta = var_606_to_fp16, x = x_49_cast_fp16)[name = tensor("clip_13_cast_fp16")]; + tensor input_97_pad_type_0 = const()[name = tensor("input_97_pad_type_0"), val = tensor("custom")]; + tensor input_97_pad_0 = const()[name = tensor("input_97_pad_0"), val = tensor([1, 1])]; + tensor input_97_strides_0 = const()[name = tensor("input_97_strides_0"), val = tensor([2])]; + tensor input_97_dilations_0 = const()[name = tensor("input_97_dilations_0"), val = tensor([1])]; + tensor input_97_groups_0 = const()[name = tensor("input_97_groups_0"), val = tensor(1)]; + tensor input_97_cast_fp16 = conv(bias = encoder_layers_4_bias_to_fp16, dilations = input_97_dilations_0, groups = input_97_groups_0, pad = input_97_pad_0, pad_type = input_97_pad_type_0, strides = input_97_strides_0, weight = encoder_layers_4_weight_to_fp16, x = clip_13_cast_fp16)[name = tensor("input_97_cast_fp16")]; + tensor x_51_cast_fp16 = relu(x = input_97_cast_fp16)[name = tensor("x_51_cast_fp16")]; + tensor const_18_to_fp16 = const()[name = tensor("const_18_to_fp16"), val = tensor(-inf)]; + tensor clip_14_cast_fp16 = clip(alpha = const_18_to_fp16, beta = var_606_to_fp16, x = x_51_cast_fp16)[name = tensor("clip_14_cast_fp16")]; + tensor input_101_pad_type_0 = const()[name = tensor("input_101_pad_type_0"), val = tensor("custom")]; + tensor input_101_pad_0 = const()[name = tensor("input_101_pad_0"), val = tensor([1, 1])]; + tensor input_101_strides_0 = const()[name = tensor("input_101_strides_0"), val = tensor([1])]; + tensor input_101_dilations_0 = const()[name = tensor("input_101_dilations_0"), val = tensor([1])]; + tensor input_101_groups_0 = const()[name = tensor("input_101_groups_0"), val = tensor(1)]; + tensor input_101_cast_fp16 = conv(bias = encoder_layers_6_bias_to_fp16, dilations = input_101_dilations_0, groups = input_101_groups_0, pad = input_101_pad_0, pad_type = input_101_pad_type_0, strides = input_101_strides_0, weight = encoder_layers_6_weight_to_fp16, x = clip_14_cast_fp16)[name = tensor("input_101_cast_fp16")]; + tensor x_53_cast_fp16 = relu(x = input_101_cast_fp16)[name = tensor("x_53_cast_fp16")]; + tensor const_19_to_fp16 = const()[name = tensor("const_19_to_fp16"), val = tensor(-inf)]; + tensor clip_15_cast_fp16 = clip(alpha = const_19_to_fp16, beta = var_606_to_fp16, x = x_53_cast_fp16)[name = tensor("clip_15_cast_fp16")]; + tensor transpose_6_perm_0 = const()[name = tensor("transpose_6_perm_0"), val = tensor([2, 0, 1])]; + tensor transpose_6_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("transpose_6_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; + tensor lstm_out_7_batch_first_lstm_h0_squeeze_axes_0 = const()[name = tensor("lstm_out_7_batch_first_lstm_h0_squeeze_axes_0"), val = tensor([0])]; + tensor lstm_out_7_batch_first_lstm_h0_squeeze_cast_fp16 = squeeze(axes = lstm_out_7_batch_first_lstm_h0_squeeze_axes_0, x = hn_5_cast_fp16)[name = tensor("lstm_out_7_batch_first_lstm_h0_squeeze_cast_fp16")]; + tensor lstm_out_7_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("lstm_out_7_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; + tensor lstm_out_7_batch_first_lstm_c0_squeeze_axes_0 = const()[name = tensor("lstm_out_7_batch_first_lstm_c0_squeeze_axes_0"), val = tensor([0])]; + tensor lstm_out_7_batch_first_lstm_c0_squeeze_cast_fp16 = squeeze(axes = lstm_out_7_batch_first_lstm_c0_squeeze_axes_0, x = cn_5_cast_fp16)[name = tensor("lstm_out_7_batch_first_lstm_c0_squeeze_cast_fp16")]; + tensor lstm_out_7_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("lstm_out_7_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; + tensor lstm_out_7_batch_first_direction_0 = const()[name = tensor("lstm_out_7_batch_first_direction_0"), val = tensor("forward")]; + tensor lstm_out_7_batch_first_output_sequence_0 = const()[name = tensor("lstm_out_7_batch_first_output_sequence_0"), val = tensor(true)]; + tensor lstm_out_7_batch_first_recurrent_activation_0 = const()[name = tensor("lstm_out_7_batch_first_recurrent_activation_0"), val = tensor("sigmoid")]; + tensor lstm_out_7_batch_first_cell_activation_0 = const()[name = tensor("lstm_out_7_batch_first_cell_activation_0"), val = tensor("tanh")]; + tensor lstm_out_7_batch_first_activation_0 = const()[name = tensor("lstm_out_7_batch_first_activation_0"), val = tensor("tanh")]; + tensor lstm_out_7_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32 = cast(dtype = lstm_out_7_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32_dtype_0, x = lstm_out_7_batch_first_lstm_c0_squeeze_cast_fp16)[name = tensor("cast_30")]; + tensor lstm_out_7_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32 = cast(dtype = lstm_out_7_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32_dtype_0, x = lstm_out_7_batch_first_lstm_h0_squeeze_cast_fp16)[name = tensor("cast_31")]; + tensor transpose_6_cast_fp16 = transpose(perm = transpose_6_perm_0, x = clip_15_cast_fp16)[name = tensor("transpose_25")]; + tensor transpose_6_cast_fp16_to_fp32 = cast(dtype = transpose_6_cast_fp16_to_fp32_dtype_0, x = transpose_6_cast_fp16)[name = tensor("cast_32")]; + tensor lstm_out_7_batch_first_0, tensor lstm_out_7_batch_first_1, tensor lstm_out_7_batch_first_2 = lstm(activation = lstm_out_7_batch_first_activation_0, bias = concat_0, cell_activation = lstm_out_7_batch_first_cell_activation_0, direction = lstm_out_7_batch_first_direction_0, initial_c = lstm_out_7_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32, initial_h = lstm_out_7_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32, output_sequence = lstm_out_7_batch_first_output_sequence_0, recurrent_activation = lstm_out_7_batch_first_recurrent_activation_0, weight_hh = concat_2, weight_ih = concat_1, x = transpose_6_cast_fp16_to_fp32)[name = tensor("lstm_out_7_batch_first")]; + tensor transpose_7_perm_0 = const()[name = tensor("transpose_7_perm_0"), val = tensor([1, 2, 0])]; + tensor lstm_out_7_batch_first_0_to_fp16_dtype_0 = const()[name = tensor("lstm_out_7_batch_first_0_to_fp16_dtype_0"), val = tensor("fp16")]; + tensor hn_7_axes_0 = const()[name = tensor("hn_7_axes_0"), val = tensor([0])]; + tensor lstm_out_7_batch_first_1_to_fp16_dtype_0 = const()[name = tensor("lstm_out_7_batch_first_1_to_fp16_dtype_0"), val = tensor("fp16")]; + tensor lstm_out_7_batch_first_1_to_fp16 = cast(dtype = lstm_out_7_batch_first_1_to_fp16_dtype_0, x = lstm_out_7_batch_first_1)[name = tensor("cast_28")]; + tensor hn_7_cast_fp16 = expand_dims(axes = hn_7_axes_0, x = lstm_out_7_batch_first_1_to_fp16)[name = tensor("hn_7_cast_fp16")]; + tensor cn_7_axes_0 = const()[name = tensor("cn_7_axes_0"), val = tensor([0])]; + tensor lstm_out_7_batch_first_2_to_fp16_dtype_0 = const()[name = tensor("lstm_out_7_batch_first_2_to_fp16_dtype_0"), val = tensor("fp16")]; + tensor lstm_out_7_batch_first_2_to_fp16 = cast(dtype = lstm_out_7_batch_first_2_to_fp16_dtype_0, x = lstm_out_7_batch_first_2)[name = tensor("cast_27")]; + tensor cn_7_cast_fp16 = expand_dims(axes = cn_7_axes_0, x = lstm_out_7_batch_first_2_to_fp16)[name = tensor("cn_7_cast_fp16")]; + tensor lstm_out_7_batch_first_0_to_fp16 = cast(dtype = lstm_out_7_batch_first_0_to_fp16_dtype_0, x = lstm_out_7_batch_first_0)[name = tensor("cast_29")]; + tensor transpose_7_cast_fp16 = transpose(perm = transpose_7_perm_0, x = lstm_out_7_batch_first_0_to_fp16)[name = tensor("transpose_24")]; + tensor input_109_cast_fp16 = relu(x = transpose_7_cast_fp16)[name = tensor("input_109_cast_fp16")]; + tensor input_111_pad_type_0 = const()[name = tensor("input_111_pad_type_0"), val = tensor("valid")]; + tensor input_111_strides_0 = const()[name = tensor("input_111_strides_0"), val = tensor([1])]; + tensor input_111_pad_0 = const()[name = tensor("input_111_pad_0"), val = tensor([0, 0])]; + tensor input_111_dilations_0 = const()[name = tensor("input_111_dilations_0"), val = tensor([1])]; + tensor input_111_groups_0 = const()[name = tensor("input_111_groups_0"), val = tensor(1)]; + tensor input_111_cast_fp16 = conv(bias = decoder_final_conv_bias_to_fp16, dilations = input_111_dilations_0, groups = input_111_groups_0, pad = input_111_pad_0, pad_type = input_111_pad_type_0, strides = input_111_strides_0, weight = decoder_final_conv_weight_to_fp16, x = input_109_cast_fp16)[name = tensor("input_111_cast_fp16")]; + tensor var_690_cast_fp16 = sigmoid(x = input_111_cast_fp16)[name = tensor("op_690_cast_fp16")]; + tensor x_57_pad_0 = const()[name = tensor("x_57_pad_0"), val = tensor([0, 0, 0, 64])]; + tensor x_57_mode_0 = const()[name = tensor("x_57_mode_0"), val = tensor("reflect")]; + tensor const_20_to_fp16 = const()[name = tensor("const_20_to_fp16"), val = tensor(0x0p+0)]; + tensor x_57_cast_fp16 = pad(constant_val = const_20_to_fp16, mode = x_57_mode_0, pad = x_57_pad_0, x = input_113_cast_fp16)[name = tensor("x_57_cast_fp16")]; + tensor x_59_axes_0 = const()[name = tensor("x_59_axes_0"), val = tensor([1])]; + tensor x_59_cast_fp16 = expand_dims(axes = x_59_axes_0, x = x_57_cast_fp16)[name = tensor("x_59_cast_fp16")]; + tensor stft_out_9_pad_type_0 = const()[name = tensor("stft_out_9_pad_type_0"), val = tensor("valid")]; + tensor stft_out_9_strides_0 = const()[name = tensor("stft_out_9_strides_0"), val = tensor([128])]; + tensor stft_out_9_pad_0 = const()[name = tensor("stft_out_9_pad_0"), val = tensor([0, 0])]; + tensor stft_out_9_dilations_0 = const()[name = tensor("stft_out_9_dilations_0"), val = tensor([1])]; + tensor stft_out_9_groups_0 = const()[name = tensor("stft_out_9_groups_0"), val = tensor(1)]; + tensor stft_out_9_cast_fp16 = conv(dilations = stft_out_9_dilations_0, groups = stft_out_9_groups_0, pad = stft_out_9_pad_0, pad_type = stft_out_9_pad_type_0, strides = stft_out_9_strides_0, weight = stft_forward_basis_to_fp16, x = x_59_cast_fp16)[name = tensor("stft_out_9_cast_fp16")]; + tensor var_718_begin_0 = const()[name = tensor("op_718_begin_0"), val = tensor([0, 0, 0])]; + tensor var_718_end_0 = const()[name = tensor("op_718_end_0"), val = tensor([1, 129, 4])]; + tensor var_718_end_mask_0 = const()[name = tensor("op_718_end_mask_0"), val = tensor([true, false, true])]; + tensor var_718_cast_fp16 = slice_by_index(begin = var_718_begin_0, end = var_718_end_0, end_mask = var_718_end_mask_0, x = stft_out_9_cast_fp16)[name = tensor("op_718_cast_fp16")]; + tensor var_721_begin_0 = const()[name = tensor("op_721_begin_0"), val = tensor([0, 129, 0])]; + tensor var_721_end_0 = const()[name = tensor("op_721_end_0"), val = tensor([1, 258, 4])]; + tensor var_721_end_mask_0 = const()[name = tensor("op_721_end_mask_0"), val = tensor([true, true, true])]; + tensor var_721_cast_fp16 = slice_by_index(begin = var_721_begin_0, end = var_721_end_0, end_mask = var_721_end_mask_0, x = stft_out_9_cast_fp16)[name = tensor("op_721_cast_fp16")]; + tensor var_697_promoted_to_fp16 = const()[name = tensor("op_697_promoted_to_fp16"), val = tensor(0x1p+1)]; + tensor var_723_cast_fp16 = pow(x = var_718_cast_fp16, y = var_697_promoted_to_fp16)[name = tensor("op_723_cast_fp16")]; + tensor var_697_promoted_1_to_fp16 = const()[name = tensor("op_697_promoted_1_to_fp16"), val = tensor(0x1p+1)]; + tensor var_724_cast_fp16 = pow(x = var_721_cast_fp16, y = var_697_promoted_1_to_fp16)[name = tensor("op_724_cast_fp16")]; + tensor var_725_cast_fp16 = add(x = var_723_cast_fp16, y = var_724_cast_fp16)[name = tensor("op_725_cast_fp16")]; + tensor var_726_to_fp16 = const()[name = tensor("op_726_to_fp16"), val = tensor(0x1p-24)]; + tensor var_727_cast_fp16 = add(x = var_725_cast_fp16, y = var_726_to_fp16)[name = tensor("op_727_cast_fp16")]; + tensor input_115_cast_fp16 = sqrt(x = var_727_cast_fp16)[name = tensor("input_115_cast_fp16")]; + tensor input_117_pad_type_0 = const()[name = tensor("input_117_pad_type_0"), val = tensor("custom")]; + tensor input_117_pad_0 = const()[name = tensor("input_117_pad_0"), val = tensor([1, 1])]; + tensor input_117_strides_0 = const()[name = tensor("input_117_strides_0"), val = tensor([1])]; + tensor input_117_dilations_0 = const()[name = tensor("input_117_dilations_0"), val = tensor([1])]; + tensor input_117_groups_0 = const()[name = tensor("input_117_groups_0"), val = tensor(1)]; + tensor input_117_cast_fp16 = conv(bias = encoder_layers_0_bias_to_fp16, dilations = input_117_dilations_0, groups = input_117_groups_0, pad = input_117_pad_0, pad_type = input_117_pad_type_0, strides = input_117_strides_0, weight = encoder_layers_0_weight_to_fp16, x = input_115_cast_fp16)[name = tensor("input_117_cast_fp16")]; + tensor x_61_cast_fp16 = relu(x = input_117_cast_fp16)[name = tensor("x_61_cast_fp16")]; + tensor const_21_to_fp16 = const()[name = tensor("const_21_to_fp16"), val = tensor(-inf)]; + tensor var_730_to_fp16 = const()[name = tensor("op_730_to_fp16"), val = tensor(0x1.388p+13)]; + tensor clip_16_cast_fp16 = clip(alpha = const_21_to_fp16, beta = var_730_to_fp16, x = x_61_cast_fp16)[name = tensor("clip_16_cast_fp16")]; + tensor input_121_pad_type_0 = const()[name = tensor("input_121_pad_type_0"), val = tensor("custom")]; + tensor input_121_pad_0 = const()[name = tensor("input_121_pad_0"), val = tensor([1, 1])]; + tensor input_121_strides_0 = const()[name = tensor("input_121_strides_0"), val = tensor([2])]; + tensor input_121_dilations_0 = const()[name = tensor("input_121_dilations_0"), val = tensor([1])]; + tensor input_121_groups_0 = const()[name = tensor("input_121_groups_0"), val = tensor(1)]; + tensor input_121_cast_fp16 = conv(bias = encoder_layers_2_bias_to_fp16, dilations = input_121_dilations_0, groups = input_121_groups_0, pad = input_121_pad_0, pad_type = input_121_pad_type_0, strides = input_121_strides_0, weight = encoder_layers_2_weight_to_fp16, x = clip_16_cast_fp16)[name = tensor("input_121_cast_fp16")]; + tensor x_63_cast_fp16 = relu(x = input_121_cast_fp16)[name = tensor("x_63_cast_fp16")]; + tensor const_22_to_fp16 = const()[name = tensor("const_22_to_fp16"), val = tensor(-inf)]; + tensor clip_17_cast_fp16 = clip(alpha = const_22_to_fp16, beta = var_730_to_fp16, x = x_63_cast_fp16)[name = tensor("clip_17_cast_fp16")]; + tensor input_125_pad_type_0 = const()[name = tensor("input_125_pad_type_0"), val = tensor("custom")]; + tensor input_125_pad_0 = const()[name = tensor("input_125_pad_0"), val = tensor([1, 1])]; + tensor input_125_strides_0 = const()[name = tensor("input_125_strides_0"), val = tensor([2])]; + tensor input_125_dilations_0 = const()[name = tensor("input_125_dilations_0"), val = tensor([1])]; + tensor input_125_groups_0 = const()[name = tensor("input_125_groups_0"), val = tensor(1)]; + tensor input_125_cast_fp16 = conv(bias = encoder_layers_4_bias_to_fp16, dilations = input_125_dilations_0, groups = input_125_groups_0, pad = input_125_pad_0, pad_type = input_125_pad_type_0, strides = input_125_strides_0, weight = encoder_layers_4_weight_to_fp16, x = clip_17_cast_fp16)[name = tensor("input_125_cast_fp16")]; + tensor x_65_cast_fp16 = relu(x = input_125_cast_fp16)[name = tensor("x_65_cast_fp16")]; + tensor const_23_to_fp16 = const()[name = tensor("const_23_to_fp16"), val = tensor(-inf)]; + tensor clip_18_cast_fp16 = clip(alpha = const_23_to_fp16, beta = var_730_to_fp16, x = x_65_cast_fp16)[name = tensor("clip_18_cast_fp16")]; + tensor input_129_pad_type_0 = const()[name = tensor("input_129_pad_type_0"), val = tensor("custom")]; + tensor input_129_pad_0 = const()[name = tensor("input_129_pad_0"), val = tensor([1, 1])]; + tensor input_129_strides_0 = const()[name = tensor("input_129_strides_0"), val = tensor([1])]; + tensor input_129_dilations_0 = const()[name = tensor("input_129_dilations_0"), val = tensor([1])]; + tensor input_129_groups_0 = const()[name = tensor("input_129_groups_0"), val = tensor(1)]; + tensor input_129_cast_fp16 = conv(bias = encoder_layers_6_bias_to_fp16, dilations = input_129_dilations_0, groups = input_129_groups_0, pad = input_129_pad_0, pad_type = input_129_pad_type_0, strides = input_129_strides_0, weight = encoder_layers_6_weight_to_fp16, x = clip_18_cast_fp16)[name = tensor("input_129_cast_fp16")]; + tensor x_67_cast_fp16 = relu(x = input_129_cast_fp16)[name = tensor("x_67_cast_fp16")]; + tensor const_24_to_fp16 = const()[name = tensor("const_24_to_fp16"), val = tensor(-inf)]; + tensor clip_19_cast_fp16 = clip(alpha = const_24_to_fp16, beta = var_730_to_fp16, x = x_67_cast_fp16)[name = tensor("clip_19_cast_fp16")]; + tensor transpose_8_perm_0 = const()[name = tensor("transpose_8_perm_0"), val = tensor([2, 0, 1])]; + tensor transpose_8_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("transpose_8_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; + tensor lstm_out_9_batch_first_lstm_h0_squeeze_axes_0 = const()[name = tensor("lstm_out_9_batch_first_lstm_h0_squeeze_axes_0"), val = tensor([0])]; + tensor lstm_out_9_batch_first_lstm_h0_squeeze_cast_fp16 = squeeze(axes = lstm_out_9_batch_first_lstm_h0_squeeze_axes_0, x = hn_7_cast_fp16)[name = tensor("lstm_out_9_batch_first_lstm_h0_squeeze_cast_fp16")]; + tensor lstm_out_9_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("lstm_out_9_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; + tensor lstm_out_9_batch_first_lstm_c0_squeeze_axes_0 = const()[name = tensor("lstm_out_9_batch_first_lstm_c0_squeeze_axes_0"), val = tensor([0])]; + tensor lstm_out_9_batch_first_lstm_c0_squeeze_cast_fp16 = squeeze(axes = lstm_out_9_batch_first_lstm_c0_squeeze_axes_0, x = cn_7_cast_fp16)[name = tensor("lstm_out_9_batch_first_lstm_c0_squeeze_cast_fp16")]; + tensor lstm_out_9_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("lstm_out_9_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; + tensor lstm_out_9_batch_first_direction_0 = const()[name = tensor("lstm_out_9_batch_first_direction_0"), val = tensor("forward")]; + tensor lstm_out_9_batch_first_output_sequence_0 = const()[name = tensor("lstm_out_9_batch_first_output_sequence_0"), val = tensor(true)]; + tensor lstm_out_9_batch_first_recurrent_activation_0 = const()[name = tensor("lstm_out_9_batch_first_recurrent_activation_0"), val = tensor("sigmoid")]; + tensor lstm_out_9_batch_first_cell_activation_0 = const()[name = tensor("lstm_out_9_batch_first_cell_activation_0"), val = tensor("tanh")]; + tensor lstm_out_9_batch_first_activation_0 = const()[name = tensor("lstm_out_9_batch_first_activation_0"), val = tensor("tanh")]; + tensor lstm_out_9_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32 = cast(dtype = lstm_out_9_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32_dtype_0, x = lstm_out_9_batch_first_lstm_c0_squeeze_cast_fp16)[name = tensor("cast_24")]; + tensor lstm_out_9_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32 = cast(dtype = lstm_out_9_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32_dtype_0, x = lstm_out_9_batch_first_lstm_h0_squeeze_cast_fp16)[name = tensor("cast_25")]; + tensor transpose_8_cast_fp16 = transpose(perm = transpose_8_perm_0, x = clip_19_cast_fp16)[name = tensor("transpose_23")]; + tensor transpose_8_cast_fp16_to_fp32 = cast(dtype = transpose_8_cast_fp16_to_fp32_dtype_0, x = transpose_8_cast_fp16)[name = tensor("cast_26")]; + tensor lstm_out_9_batch_first_0, tensor lstm_out_9_batch_first_1, tensor lstm_out_9_batch_first_2 = lstm(activation = lstm_out_9_batch_first_activation_0, bias = concat_0, cell_activation = lstm_out_9_batch_first_cell_activation_0, direction = lstm_out_9_batch_first_direction_0, initial_c = lstm_out_9_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32, initial_h = lstm_out_9_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32, output_sequence = lstm_out_9_batch_first_output_sequence_0, recurrent_activation = lstm_out_9_batch_first_recurrent_activation_0, weight_hh = concat_2, weight_ih = concat_1, x = transpose_8_cast_fp16_to_fp32)[name = tensor("lstm_out_9_batch_first")]; + tensor transpose_9_perm_0 = const()[name = tensor("transpose_9_perm_0"), val = tensor([1, 2, 0])]; + tensor lstm_out_9_batch_first_0_to_fp16_dtype_0 = const()[name = tensor("lstm_out_9_batch_first_0_to_fp16_dtype_0"), val = tensor("fp16")]; + tensor hn_9_axes_0 = const()[name = tensor("hn_9_axes_0"), val = tensor([0])]; + tensor lstm_out_9_batch_first_1_to_fp16_dtype_0 = const()[name = tensor("lstm_out_9_batch_first_1_to_fp16_dtype_0"), val = tensor("fp16")]; + tensor lstm_out_9_batch_first_1_to_fp16 = cast(dtype = lstm_out_9_batch_first_1_to_fp16_dtype_0, x = lstm_out_9_batch_first_1)[name = tensor("cast_22")]; + tensor hn_9_cast_fp16 = expand_dims(axes = hn_9_axes_0, x = lstm_out_9_batch_first_1_to_fp16)[name = tensor("hn_9_cast_fp16")]; + tensor cn_9_axes_0 = const()[name = tensor("cn_9_axes_0"), val = tensor([0])]; + tensor lstm_out_9_batch_first_2_to_fp16_dtype_0 = const()[name = tensor("lstm_out_9_batch_first_2_to_fp16_dtype_0"), val = tensor("fp16")]; + tensor lstm_out_9_batch_first_2_to_fp16 = cast(dtype = lstm_out_9_batch_first_2_to_fp16_dtype_0, x = lstm_out_9_batch_first_2)[name = tensor("cast_21")]; + tensor cn_9_cast_fp16 = expand_dims(axes = cn_9_axes_0, x = lstm_out_9_batch_first_2_to_fp16)[name = tensor("cn_9_cast_fp16")]; + tensor lstm_out_9_batch_first_0_to_fp16 = cast(dtype = lstm_out_9_batch_first_0_to_fp16_dtype_0, x = lstm_out_9_batch_first_0)[name = tensor("cast_23")]; + tensor transpose_9_cast_fp16 = transpose(perm = transpose_9_perm_0, x = lstm_out_9_batch_first_0_to_fp16)[name = tensor("transpose_22")]; + tensor input_137_cast_fp16 = relu(x = transpose_9_cast_fp16)[name = tensor("input_137_cast_fp16")]; + tensor input_139_pad_type_0 = const()[name = tensor("input_139_pad_type_0"), val = tensor("valid")]; + tensor input_139_strides_0 = const()[name = tensor("input_139_strides_0"), val = tensor([1])]; + tensor input_139_pad_0 = const()[name = tensor("input_139_pad_0"), val = tensor([0, 0])]; + tensor input_139_dilations_0 = const()[name = tensor("input_139_dilations_0"), val = tensor([1])]; + tensor input_139_groups_0 = const()[name = tensor("input_139_groups_0"), val = tensor(1)]; + tensor input_139_cast_fp16 = conv(bias = decoder_final_conv_bias_to_fp16, dilations = input_139_dilations_0, groups = input_139_groups_0, pad = input_139_pad_0, pad_type = input_139_pad_type_0, strides = input_139_strides_0, weight = decoder_final_conv_weight_to_fp16, x = input_137_cast_fp16)[name = tensor("input_139_cast_fp16")]; + tensor var_814_cast_fp16 = sigmoid(x = input_139_cast_fp16)[name = tensor("op_814_cast_fp16")]; + tensor x_71_pad_0 = const()[name = tensor("x_71_pad_0"), val = tensor([0, 0, 0, 64])]; + tensor x_71_mode_0 = const()[name = tensor("x_71_mode_0"), val = tensor("reflect")]; + tensor const_25_to_fp16 = const()[name = tensor("const_25_to_fp16"), val = tensor(0x0p+0)]; + tensor x_71_cast_fp16 = pad(constant_val = const_25_to_fp16, mode = x_71_mode_0, pad = x_71_pad_0, x = input_141_cast_fp16)[name = tensor("x_71_cast_fp16")]; + tensor x_73_axes_0 = const()[name = tensor("x_73_axes_0"), val = tensor([1])]; + tensor x_73_cast_fp16 = expand_dims(axes = x_73_axes_0, x = x_71_cast_fp16)[name = tensor("x_73_cast_fp16")]; + tensor stft_out_11_pad_type_0 = const()[name = tensor("stft_out_11_pad_type_0"), val = tensor("valid")]; + tensor stft_out_11_strides_0 = const()[name = tensor("stft_out_11_strides_0"), val = tensor([128])]; + tensor stft_out_11_pad_0 = const()[name = tensor("stft_out_11_pad_0"), val = tensor([0, 0])]; + tensor stft_out_11_dilations_0 = const()[name = tensor("stft_out_11_dilations_0"), val = tensor([1])]; + tensor stft_out_11_groups_0 = const()[name = tensor("stft_out_11_groups_0"), val = tensor(1)]; + tensor stft_out_11_cast_fp16 = conv(dilations = stft_out_11_dilations_0, groups = stft_out_11_groups_0, pad = stft_out_11_pad_0, pad_type = stft_out_11_pad_type_0, strides = stft_out_11_strides_0, weight = stft_forward_basis_to_fp16, x = x_73_cast_fp16)[name = tensor("stft_out_11_cast_fp16")]; + tensor var_842_begin_0 = const()[name = tensor("op_842_begin_0"), val = tensor([0, 0, 0])]; + tensor var_842_end_0 = const()[name = tensor("op_842_end_0"), val = tensor([1, 129, 4])]; + tensor var_842_end_mask_0 = const()[name = tensor("op_842_end_mask_0"), val = tensor([true, false, true])]; + tensor var_842_cast_fp16 = slice_by_index(begin = var_842_begin_0, end = var_842_end_0, end_mask = var_842_end_mask_0, x = stft_out_11_cast_fp16)[name = tensor("op_842_cast_fp16")]; + tensor var_845_begin_0 = const()[name = tensor("op_845_begin_0"), val = tensor([0, 129, 0])]; + tensor var_845_end_0 = const()[name = tensor("op_845_end_0"), val = tensor([1, 258, 4])]; + tensor var_845_end_mask_0 = const()[name = tensor("op_845_end_mask_0"), val = tensor([true, true, true])]; + tensor var_845_cast_fp16 = slice_by_index(begin = var_845_begin_0, end = var_845_end_0, end_mask = var_845_end_mask_0, x = stft_out_11_cast_fp16)[name = tensor("op_845_cast_fp16")]; + tensor var_821_promoted_to_fp16 = const()[name = tensor("op_821_promoted_to_fp16"), val = tensor(0x1p+1)]; + tensor var_847_cast_fp16 = pow(x = var_842_cast_fp16, y = var_821_promoted_to_fp16)[name = tensor("op_847_cast_fp16")]; + tensor var_821_promoted_1_to_fp16 = const()[name = tensor("op_821_promoted_1_to_fp16"), val = tensor(0x1p+1)]; + tensor var_848_cast_fp16 = pow(x = var_845_cast_fp16, y = var_821_promoted_1_to_fp16)[name = tensor("op_848_cast_fp16")]; + tensor var_849_cast_fp16 = add(x = var_847_cast_fp16, y = var_848_cast_fp16)[name = tensor("op_849_cast_fp16")]; + tensor var_850_to_fp16 = const()[name = tensor("op_850_to_fp16"), val = tensor(0x1p-24)]; + tensor var_851_cast_fp16 = add(x = var_849_cast_fp16, y = var_850_to_fp16)[name = tensor("op_851_cast_fp16")]; + tensor input_143_cast_fp16 = sqrt(x = var_851_cast_fp16)[name = tensor("input_143_cast_fp16")]; + tensor input_145_pad_type_0 = const()[name = tensor("input_145_pad_type_0"), val = tensor("custom")]; + tensor input_145_pad_0 = const()[name = tensor("input_145_pad_0"), val = tensor([1, 1])]; + tensor input_145_strides_0 = const()[name = tensor("input_145_strides_0"), val = tensor([1])]; + tensor input_145_dilations_0 = const()[name = tensor("input_145_dilations_0"), val = tensor([1])]; + tensor input_145_groups_0 = const()[name = tensor("input_145_groups_0"), val = tensor(1)]; + tensor input_145_cast_fp16 = conv(bias = encoder_layers_0_bias_to_fp16, dilations = input_145_dilations_0, groups = input_145_groups_0, pad = input_145_pad_0, pad_type = input_145_pad_type_0, strides = input_145_strides_0, weight = encoder_layers_0_weight_to_fp16, x = input_143_cast_fp16)[name = tensor("input_145_cast_fp16")]; + tensor x_75_cast_fp16 = relu(x = input_145_cast_fp16)[name = tensor("x_75_cast_fp16")]; + tensor const_26_to_fp16 = const()[name = tensor("const_26_to_fp16"), val = tensor(-inf)]; + tensor var_854_to_fp16 = const()[name = tensor("op_854_to_fp16"), val = tensor(0x1.388p+13)]; + tensor clip_20_cast_fp16 = clip(alpha = const_26_to_fp16, beta = var_854_to_fp16, x = x_75_cast_fp16)[name = tensor("clip_20_cast_fp16")]; + tensor input_149_pad_type_0 = const()[name = tensor("input_149_pad_type_0"), val = tensor("custom")]; + tensor input_149_pad_0 = const()[name = tensor("input_149_pad_0"), val = tensor([1, 1])]; + tensor input_149_strides_0 = const()[name = tensor("input_149_strides_0"), val = tensor([2])]; + tensor input_149_dilations_0 = const()[name = tensor("input_149_dilations_0"), val = tensor([1])]; + tensor input_149_groups_0 = const()[name = tensor("input_149_groups_0"), val = tensor(1)]; + tensor input_149_cast_fp16 = conv(bias = encoder_layers_2_bias_to_fp16, dilations = input_149_dilations_0, groups = input_149_groups_0, pad = input_149_pad_0, pad_type = input_149_pad_type_0, strides = input_149_strides_0, weight = encoder_layers_2_weight_to_fp16, x = clip_20_cast_fp16)[name = tensor("input_149_cast_fp16")]; + tensor x_77_cast_fp16 = relu(x = input_149_cast_fp16)[name = tensor("x_77_cast_fp16")]; + tensor const_27_to_fp16 = const()[name = tensor("const_27_to_fp16"), val = tensor(-inf)]; + tensor clip_21_cast_fp16 = clip(alpha = const_27_to_fp16, beta = var_854_to_fp16, x = x_77_cast_fp16)[name = tensor("clip_21_cast_fp16")]; + tensor input_153_pad_type_0 = const()[name = tensor("input_153_pad_type_0"), val = tensor("custom")]; + tensor input_153_pad_0 = const()[name = tensor("input_153_pad_0"), val = tensor([1, 1])]; + tensor input_153_strides_0 = const()[name = tensor("input_153_strides_0"), val = tensor([2])]; + tensor input_153_dilations_0 = const()[name = tensor("input_153_dilations_0"), val = tensor([1])]; + tensor input_153_groups_0 = const()[name = tensor("input_153_groups_0"), val = tensor(1)]; + tensor input_153_cast_fp16 = conv(bias = encoder_layers_4_bias_to_fp16, dilations = input_153_dilations_0, groups = input_153_groups_0, pad = input_153_pad_0, pad_type = input_153_pad_type_0, strides = input_153_strides_0, weight = encoder_layers_4_weight_to_fp16, x = clip_21_cast_fp16)[name = tensor("input_153_cast_fp16")]; + tensor x_79_cast_fp16 = relu(x = input_153_cast_fp16)[name = tensor("x_79_cast_fp16")]; + tensor const_28_to_fp16 = const()[name = tensor("const_28_to_fp16"), val = tensor(-inf)]; + tensor clip_22_cast_fp16 = clip(alpha = const_28_to_fp16, beta = var_854_to_fp16, x = x_79_cast_fp16)[name = tensor("clip_22_cast_fp16")]; + tensor input_157_pad_type_0 = const()[name = tensor("input_157_pad_type_0"), val = tensor("custom")]; + tensor input_157_pad_0 = const()[name = tensor("input_157_pad_0"), val = tensor([1, 1])]; + tensor input_157_strides_0 = const()[name = tensor("input_157_strides_0"), val = tensor([1])]; + tensor input_157_dilations_0 = const()[name = tensor("input_157_dilations_0"), val = tensor([1])]; + tensor input_157_groups_0 = const()[name = tensor("input_157_groups_0"), val = tensor(1)]; + tensor input_157_cast_fp16 = conv(bias = encoder_layers_6_bias_to_fp16, dilations = input_157_dilations_0, groups = input_157_groups_0, pad = input_157_pad_0, pad_type = input_157_pad_type_0, strides = input_157_strides_0, weight = encoder_layers_6_weight_to_fp16, x = clip_22_cast_fp16)[name = tensor("input_157_cast_fp16")]; + tensor x_81_cast_fp16 = relu(x = input_157_cast_fp16)[name = tensor("x_81_cast_fp16")]; + tensor const_29_to_fp16 = const()[name = tensor("const_29_to_fp16"), val = tensor(-inf)]; + tensor clip_23_cast_fp16 = clip(alpha = const_29_to_fp16, beta = var_854_to_fp16, x = x_81_cast_fp16)[name = tensor("clip_23_cast_fp16")]; + tensor transpose_10_perm_0 = const()[name = tensor("transpose_10_perm_0"), val = tensor([2, 0, 1])]; + tensor transpose_10_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("transpose_10_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; + tensor lstm_out_11_batch_first_lstm_h0_squeeze_axes_0 = const()[name = tensor("lstm_out_11_batch_first_lstm_h0_squeeze_axes_0"), val = tensor([0])]; + tensor lstm_out_11_batch_first_lstm_h0_squeeze_cast_fp16 = squeeze(axes = lstm_out_11_batch_first_lstm_h0_squeeze_axes_0, x = hn_9_cast_fp16)[name = tensor("lstm_out_11_batch_first_lstm_h0_squeeze_cast_fp16")]; + tensor lstm_out_11_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("lstm_out_11_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; + tensor lstm_out_11_batch_first_lstm_c0_squeeze_axes_0 = const()[name = tensor("lstm_out_11_batch_first_lstm_c0_squeeze_axes_0"), val = tensor([0])]; + tensor lstm_out_11_batch_first_lstm_c0_squeeze_cast_fp16 = squeeze(axes = lstm_out_11_batch_first_lstm_c0_squeeze_axes_0, x = cn_9_cast_fp16)[name = tensor("lstm_out_11_batch_first_lstm_c0_squeeze_cast_fp16")]; + tensor lstm_out_11_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("lstm_out_11_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; + tensor lstm_out_11_batch_first_direction_0 = const()[name = tensor("lstm_out_11_batch_first_direction_0"), val = tensor("forward")]; + tensor lstm_out_11_batch_first_output_sequence_0 = const()[name = tensor("lstm_out_11_batch_first_output_sequence_0"), val = tensor(true)]; + tensor lstm_out_11_batch_first_recurrent_activation_0 = const()[name = tensor("lstm_out_11_batch_first_recurrent_activation_0"), val = tensor("sigmoid")]; + tensor lstm_out_11_batch_first_cell_activation_0 = const()[name = tensor("lstm_out_11_batch_first_cell_activation_0"), val = tensor("tanh")]; + tensor lstm_out_11_batch_first_activation_0 = const()[name = tensor("lstm_out_11_batch_first_activation_0"), val = tensor("tanh")]; + tensor lstm_out_11_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32 = cast(dtype = lstm_out_11_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32_dtype_0, x = lstm_out_11_batch_first_lstm_c0_squeeze_cast_fp16)[name = tensor("cast_18")]; + tensor lstm_out_11_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32 = cast(dtype = lstm_out_11_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32_dtype_0, x = lstm_out_11_batch_first_lstm_h0_squeeze_cast_fp16)[name = tensor("cast_19")]; + tensor transpose_10_cast_fp16 = transpose(perm = transpose_10_perm_0, x = clip_23_cast_fp16)[name = tensor("transpose_21")]; + tensor transpose_10_cast_fp16_to_fp32 = cast(dtype = transpose_10_cast_fp16_to_fp32_dtype_0, x = transpose_10_cast_fp16)[name = tensor("cast_20")]; + tensor lstm_out_11_batch_first_0, tensor lstm_out_11_batch_first_1, tensor lstm_out_11_batch_first_2 = lstm(activation = lstm_out_11_batch_first_activation_0, bias = concat_0, cell_activation = lstm_out_11_batch_first_cell_activation_0, direction = lstm_out_11_batch_first_direction_0, initial_c = lstm_out_11_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32, initial_h = lstm_out_11_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32, output_sequence = lstm_out_11_batch_first_output_sequence_0, recurrent_activation = lstm_out_11_batch_first_recurrent_activation_0, weight_hh = concat_2, weight_ih = concat_1, x = transpose_10_cast_fp16_to_fp32)[name = tensor("lstm_out_11_batch_first")]; + tensor transpose_11_perm_0 = const()[name = tensor("transpose_11_perm_0"), val = tensor([1, 2, 0])]; + tensor lstm_out_11_batch_first_0_to_fp16_dtype_0 = const()[name = tensor("lstm_out_11_batch_first_0_to_fp16_dtype_0"), val = tensor("fp16")]; + tensor hn_11_axes_0 = const()[name = tensor("hn_11_axes_0"), val = tensor([0])]; + tensor lstm_out_11_batch_first_1_to_fp16_dtype_0 = const()[name = tensor("lstm_out_11_batch_first_1_to_fp16_dtype_0"), val = tensor("fp16")]; + tensor lstm_out_11_batch_first_1_to_fp16 = cast(dtype = lstm_out_11_batch_first_1_to_fp16_dtype_0, x = lstm_out_11_batch_first_1)[name = tensor("cast_16")]; + tensor hn_11_cast_fp16 = expand_dims(axes = hn_11_axes_0, x = lstm_out_11_batch_first_1_to_fp16)[name = tensor("hn_11_cast_fp16")]; + tensor cn_11_axes_0 = const()[name = tensor("cn_11_axes_0"), val = tensor([0])]; + tensor lstm_out_11_batch_first_2_to_fp16_dtype_0 = const()[name = tensor("lstm_out_11_batch_first_2_to_fp16_dtype_0"), val = tensor("fp16")]; + tensor lstm_out_11_batch_first_2_to_fp16 = cast(dtype = lstm_out_11_batch_first_2_to_fp16_dtype_0, x = lstm_out_11_batch_first_2)[name = tensor("cast_15")]; + tensor cn_11_cast_fp16 = expand_dims(axes = cn_11_axes_0, x = lstm_out_11_batch_first_2_to_fp16)[name = tensor("cn_11_cast_fp16")]; + tensor lstm_out_11_batch_first_0_to_fp16 = cast(dtype = lstm_out_11_batch_first_0_to_fp16_dtype_0, x = lstm_out_11_batch_first_0)[name = tensor("cast_17")]; + tensor transpose_11_cast_fp16 = transpose(perm = transpose_11_perm_0, x = lstm_out_11_batch_first_0_to_fp16)[name = tensor("transpose_20")]; + tensor input_165_cast_fp16 = relu(x = transpose_11_cast_fp16)[name = tensor("input_165_cast_fp16")]; + tensor input_167_pad_type_0 = const()[name = tensor("input_167_pad_type_0"), val = tensor("valid")]; + tensor input_167_strides_0 = const()[name = tensor("input_167_strides_0"), val = tensor([1])]; + tensor input_167_pad_0 = const()[name = tensor("input_167_pad_0"), val = tensor([0, 0])]; + tensor input_167_dilations_0 = const()[name = tensor("input_167_dilations_0"), val = tensor([1])]; + tensor input_167_groups_0 = const()[name = tensor("input_167_groups_0"), val = tensor(1)]; + tensor input_167_cast_fp16 = conv(bias = decoder_final_conv_bias_to_fp16, dilations = input_167_dilations_0, groups = input_167_groups_0, pad = input_167_pad_0, pad_type = input_167_pad_type_0, strides = input_167_strides_0, weight = decoder_final_conv_weight_to_fp16, x = input_165_cast_fp16)[name = tensor("input_167_cast_fp16")]; + tensor var_938_cast_fp16 = sigmoid(x = input_167_cast_fp16)[name = tensor("op_938_cast_fp16")]; + tensor x_85_pad_0 = const()[name = tensor("x_85_pad_0"), val = tensor([0, 0, 0, 64])]; + tensor x_85_mode_0 = const()[name = tensor("x_85_mode_0"), val = tensor("reflect")]; + tensor const_30_to_fp16 = const()[name = tensor("const_30_to_fp16"), val = tensor(0x0p+0)]; + tensor x_85_cast_fp16 = pad(constant_val = const_30_to_fp16, mode = x_85_mode_0, pad = x_85_pad_0, x = input_169_cast_fp16)[name = tensor("x_85_cast_fp16")]; + tensor x_87_axes_0 = const()[name = tensor("x_87_axes_0"), val = tensor([1])]; + tensor x_87_cast_fp16 = expand_dims(axes = x_87_axes_0, x = x_85_cast_fp16)[name = tensor("x_87_cast_fp16")]; + tensor stft_out_13_pad_type_0 = const()[name = tensor("stft_out_13_pad_type_0"), val = tensor("valid")]; + tensor stft_out_13_strides_0 = const()[name = tensor("stft_out_13_strides_0"), val = tensor([128])]; + tensor stft_out_13_pad_0 = const()[name = tensor("stft_out_13_pad_0"), val = tensor([0, 0])]; + tensor stft_out_13_dilations_0 = const()[name = tensor("stft_out_13_dilations_0"), val = tensor([1])]; + tensor stft_out_13_groups_0 = const()[name = tensor("stft_out_13_groups_0"), val = tensor(1)]; + tensor stft_out_13_cast_fp16 = conv(dilations = stft_out_13_dilations_0, groups = stft_out_13_groups_0, pad = stft_out_13_pad_0, pad_type = stft_out_13_pad_type_0, strides = stft_out_13_strides_0, weight = stft_forward_basis_to_fp16, x = x_87_cast_fp16)[name = tensor("stft_out_13_cast_fp16")]; + tensor var_966_begin_0 = const()[name = tensor("op_966_begin_0"), val = tensor([0, 0, 0])]; + tensor var_966_end_0 = const()[name = tensor("op_966_end_0"), val = tensor([1, 129, 4])]; + tensor var_966_end_mask_0 = const()[name = tensor("op_966_end_mask_0"), val = tensor([true, false, true])]; + tensor var_966_cast_fp16 = slice_by_index(begin = var_966_begin_0, end = var_966_end_0, end_mask = var_966_end_mask_0, x = stft_out_13_cast_fp16)[name = tensor("op_966_cast_fp16")]; + tensor var_969_begin_0 = const()[name = tensor("op_969_begin_0"), val = tensor([0, 129, 0])]; + tensor var_969_end_0 = const()[name = tensor("op_969_end_0"), val = tensor([1, 258, 4])]; + tensor var_969_end_mask_0 = const()[name = tensor("op_969_end_mask_0"), val = tensor([true, true, true])]; + tensor var_969_cast_fp16 = slice_by_index(begin = var_969_begin_0, end = var_969_end_0, end_mask = var_969_end_mask_0, x = stft_out_13_cast_fp16)[name = tensor("op_969_cast_fp16")]; + tensor var_945_promoted_to_fp16 = const()[name = tensor("op_945_promoted_to_fp16"), val = tensor(0x1p+1)]; + tensor var_971_cast_fp16 = pow(x = var_966_cast_fp16, y = var_945_promoted_to_fp16)[name = tensor("op_971_cast_fp16")]; + tensor var_945_promoted_1_to_fp16 = const()[name = tensor("op_945_promoted_1_to_fp16"), val = tensor(0x1p+1)]; + tensor var_972_cast_fp16 = pow(x = var_969_cast_fp16, y = var_945_promoted_1_to_fp16)[name = tensor("op_972_cast_fp16")]; + tensor var_973_cast_fp16 = add(x = var_971_cast_fp16, y = var_972_cast_fp16)[name = tensor("op_973_cast_fp16")]; + tensor var_974_to_fp16 = const()[name = tensor("op_974_to_fp16"), val = tensor(0x1p-24)]; + tensor var_975_cast_fp16 = add(x = var_973_cast_fp16, y = var_974_to_fp16)[name = tensor("op_975_cast_fp16")]; + tensor input_171_cast_fp16 = sqrt(x = var_975_cast_fp16)[name = tensor("input_171_cast_fp16")]; + tensor input_173_pad_type_0 = const()[name = tensor("input_173_pad_type_0"), val = tensor("custom")]; + tensor input_173_pad_0 = const()[name = tensor("input_173_pad_0"), val = tensor([1, 1])]; + tensor input_173_strides_0 = const()[name = tensor("input_173_strides_0"), val = tensor([1])]; + tensor input_173_dilations_0 = const()[name = tensor("input_173_dilations_0"), val = tensor([1])]; + tensor input_173_groups_0 = const()[name = tensor("input_173_groups_0"), val = tensor(1)]; + tensor input_173_cast_fp16 = conv(bias = encoder_layers_0_bias_to_fp16, dilations = input_173_dilations_0, groups = input_173_groups_0, pad = input_173_pad_0, pad_type = input_173_pad_type_0, strides = input_173_strides_0, weight = encoder_layers_0_weight_to_fp16, x = input_171_cast_fp16)[name = tensor("input_173_cast_fp16")]; + tensor x_89_cast_fp16 = relu(x = input_173_cast_fp16)[name = tensor("x_89_cast_fp16")]; + tensor const_31_to_fp16 = const()[name = tensor("const_31_to_fp16"), val = tensor(-inf)]; + tensor var_978_to_fp16 = const()[name = tensor("op_978_to_fp16"), val = tensor(0x1.388p+13)]; + tensor clip_24_cast_fp16 = clip(alpha = const_31_to_fp16, beta = var_978_to_fp16, x = x_89_cast_fp16)[name = tensor("clip_24_cast_fp16")]; + tensor input_177_pad_type_0 = const()[name = tensor("input_177_pad_type_0"), val = tensor("custom")]; + tensor input_177_pad_0 = const()[name = tensor("input_177_pad_0"), val = tensor([1, 1])]; + tensor input_177_strides_0 = const()[name = tensor("input_177_strides_0"), val = tensor([2])]; + tensor input_177_dilations_0 = const()[name = tensor("input_177_dilations_0"), val = tensor([1])]; + tensor input_177_groups_0 = const()[name = tensor("input_177_groups_0"), val = tensor(1)]; + tensor input_177_cast_fp16 = conv(bias = encoder_layers_2_bias_to_fp16, dilations = input_177_dilations_0, groups = input_177_groups_0, pad = input_177_pad_0, pad_type = input_177_pad_type_0, strides = input_177_strides_0, weight = encoder_layers_2_weight_to_fp16, x = clip_24_cast_fp16)[name = tensor("input_177_cast_fp16")]; + tensor x_91_cast_fp16 = relu(x = input_177_cast_fp16)[name = tensor("x_91_cast_fp16")]; + tensor const_32_to_fp16 = const()[name = tensor("const_32_to_fp16"), val = tensor(-inf)]; + tensor clip_25_cast_fp16 = clip(alpha = const_32_to_fp16, beta = var_978_to_fp16, x = x_91_cast_fp16)[name = tensor("clip_25_cast_fp16")]; + tensor input_181_pad_type_0 = const()[name = tensor("input_181_pad_type_0"), val = tensor("custom")]; + tensor input_181_pad_0 = const()[name = tensor("input_181_pad_0"), val = tensor([1, 1])]; + tensor input_181_strides_0 = const()[name = tensor("input_181_strides_0"), val = tensor([2])]; + tensor input_181_dilations_0 = const()[name = tensor("input_181_dilations_0"), val = tensor([1])]; + tensor input_181_groups_0 = const()[name = tensor("input_181_groups_0"), val = tensor(1)]; + tensor input_181_cast_fp16 = conv(bias = encoder_layers_4_bias_to_fp16, dilations = input_181_dilations_0, groups = input_181_groups_0, pad = input_181_pad_0, pad_type = input_181_pad_type_0, strides = input_181_strides_0, weight = encoder_layers_4_weight_to_fp16, x = clip_25_cast_fp16)[name = tensor("input_181_cast_fp16")]; + tensor x_93_cast_fp16 = relu(x = input_181_cast_fp16)[name = tensor("x_93_cast_fp16")]; + tensor const_33_to_fp16 = const()[name = tensor("const_33_to_fp16"), val = tensor(-inf)]; + tensor clip_26_cast_fp16 = clip(alpha = const_33_to_fp16, beta = var_978_to_fp16, x = x_93_cast_fp16)[name = tensor("clip_26_cast_fp16")]; + tensor input_185_pad_type_0 = const()[name = tensor("input_185_pad_type_0"), val = tensor("custom")]; + tensor input_185_pad_0 = const()[name = tensor("input_185_pad_0"), val = tensor([1, 1])]; + tensor input_185_strides_0 = const()[name = tensor("input_185_strides_0"), val = tensor([1])]; + tensor input_185_dilations_0 = const()[name = tensor("input_185_dilations_0"), val = tensor([1])]; + tensor input_185_groups_0 = const()[name = tensor("input_185_groups_0"), val = tensor(1)]; + tensor input_185_cast_fp16 = conv(bias = encoder_layers_6_bias_to_fp16, dilations = input_185_dilations_0, groups = input_185_groups_0, pad = input_185_pad_0, pad_type = input_185_pad_type_0, strides = input_185_strides_0, weight = encoder_layers_6_weight_to_fp16, x = clip_26_cast_fp16)[name = tensor("input_185_cast_fp16")]; + tensor x_95_cast_fp16 = relu(x = input_185_cast_fp16)[name = tensor("x_95_cast_fp16")]; + tensor const_34_to_fp16 = const()[name = tensor("const_34_to_fp16"), val = tensor(-inf)]; + tensor clip_27_cast_fp16 = clip(alpha = const_34_to_fp16, beta = var_978_to_fp16, x = x_95_cast_fp16)[name = tensor("clip_27_cast_fp16")]; + tensor transpose_12_perm_0 = const()[name = tensor("transpose_12_perm_0"), val = tensor([2, 0, 1])]; + tensor transpose_12_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("transpose_12_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; + tensor lstm_out_13_batch_first_lstm_h0_squeeze_axes_0 = const()[name = tensor("lstm_out_13_batch_first_lstm_h0_squeeze_axes_0"), val = tensor([0])]; + tensor lstm_out_13_batch_first_lstm_h0_squeeze_cast_fp16 = squeeze(axes = lstm_out_13_batch_first_lstm_h0_squeeze_axes_0, x = hn_11_cast_fp16)[name = tensor("lstm_out_13_batch_first_lstm_h0_squeeze_cast_fp16")]; + tensor lstm_out_13_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("lstm_out_13_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; + tensor lstm_out_13_batch_first_lstm_c0_squeeze_axes_0 = const()[name = tensor("lstm_out_13_batch_first_lstm_c0_squeeze_axes_0"), val = tensor([0])]; + tensor lstm_out_13_batch_first_lstm_c0_squeeze_cast_fp16 = squeeze(axes = lstm_out_13_batch_first_lstm_c0_squeeze_axes_0, x = cn_11_cast_fp16)[name = tensor("lstm_out_13_batch_first_lstm_c0_squeeze_cast_fp16")]; + tensor lstm_out_13_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("lstm_out_13_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; + tensor lstm_out_13_batch_first_direction_0 = const()[name = tensor("lstm_out_13_batch_first_direction_0"), val = tensor("forward")]; + tensor lstm_out_13_batch_first_output_sequence_0 = const()[name = tensor("lstm_out_13_batch_first_output_sequence_0"), val = tensor(true)]; + tensor lstm_out_13_batch_first_recurrent_activation_0 = const()[name = tensor("lstm_out_13_batch_first_recurrent_activation_0"), val = tensor("sigmoid")]; + tensor lstm_out_13_batch_first_cell_activation_0 = const()[name = tensor("lstm_out_13_batch_first_cell_activation_0"), val = tensor("tanh")]; + tensor lstm_out_13_batch_first_activation_0 = const()[name = tensor("lstm_out_13_batch_first_activation_0"), val = tensor("tanh")]; + tensor lstm_out_13_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32 = cast(dtype = lstm_out_13_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32_dtype_0, x = lstm_out_13_batch_first_lstm_c0_squeeze_cast_fp16)[name = tensor("cast_12")]; + tensor lstm_out_13_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32 = cast(dtype = lstm_out_13_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32_dtype_0, x = lstm_out_13_batch_first_lstm_h0_squeeze_cast_fp16)[name = tensor("cast_13")]; + tensor transpose_12_cast_fp16 = transpose(perm = transpose_12_perm_0, x = clip_27_cast_fp16)[name = tensor("transpose_19")]; + tensor transpose_12_cast_fp16_to_fp32 = cast(dtype = transpose_12_cast_fp16_to_fp32_dtype_0, x = transpose_12_cast_fp16)[name = tensor("cast_14")]; + tensor lstm_out_13_batch_first_0, tensor lstm_out_13_batch_first_1, tensor lstm_out_13_batch_first_2 = lstm(activation = lstm_out_13_batch_first_activation_0, bias = concat_0, cell_activation = lstm_out_13_batch_first_cell_activation_0, direction = lstm_out_13_batch_first_direction_0, initial_c = lstm_out_13_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32, initial_h = lstm_out_13_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32, output_sequence = lstm_out_13_batch_first_output_sequence_0, recurrent_activation = lstm_out_13_batch_first_recurrent_activation_0, weight_hh = concat_2, weight_ih = concat_1, x = transpose_12_cast_fp16_to_fp32)[name = tensor("lstm_out_13_batch_first")]; + tensor transpose_13_perm_0 = const()[name = tensor("transpose_13_perm_0"), val = tensor([1, 2, 0])]; + tensor lstm_out_13_batch_first_0_to_fp16_dtype_0 = const()[name = tensor("lstm_out_13_batch_first_0_to_fp16_dtype_0"), val = tensor("fp16")]; + tensor hn_13_axes_0 = const()[name = tensor("hn_13_axes_0"), val = tensor([0])]; + tensor lstm_out_13_batch_first_1_to_fp16_dtype_0 = const()[name = tensor("lstm_out_13_batch_first_1_to_fp16_dtype_0"), val = tensor("fp16")]; + tensor lstm_out_13_batch_first_1_to_fp16 = cast(dtype = lstm_out_13_batch_first_1_to_fp16_dtype_0, x = lstm_out_13_batch_first_1)[name = tensor("cast_10")]; + tensor hn_13_cast_fp16 = expand_dims(axes = hn_13_axes_0, x = lstm_out_13_batch_first_1_to_fp16)[name = tensor("hn_13_cast_fp16")]; + tensor cn_13_axes_0 = const()[name = tensor("cn_13_axes_0"), val = tensor([0])]; + tensor lstm_out_13_batch_first_2_to_fp16_dtype_0 = const()[name = tensor("lstm_out_13_batch_first_2_to_fp16_dtype_0"), val = tensor("fp16")]; + tensor lstm_out_13_batch_first_2_to_fp16 = cast(dtype = lstm_out_13_batch_first_2_to_fp16_dtype_0, x = lstm_out_13_batch_first_2)[name = tensor("cast_9")]; + tensor cn_13_cast_fp16 = expand_dims(axes = cn_13_axes_0, x = lstm_out_13_batch_first_2_to_fp16)[name = tensor("cn_13_cast_fp16")]; + tensor lstm_out_13_batch_first_0_to_fp16 = cast(dtype = lstm_out_13_batch_first_0_to_fp16_dtype_0, x = lstm_out_13_batch_first_0)[name = tensor("cast_11")]; + tensor transpose_13_cast_fp16 = transpose(perm = transpose_13_perm_0, x = lstm_out_13_batch_first_0_to_fp16)[name = tensor("transpose_18")]; + tensor input_193_cast_fp16 = relu(x = transpose_13_cast_fp16)[name = tensor("input_193_cast_fp16")]; + tensor input_195_pad_type_0 = const()[name = tensor("input_195_pad_type_0"), val = tensor("valid")]; + tensor input_195_strides_0 = const()[name = tensor("input_195_strides_0"), val = tensor([1])]; + tensor input_195_pad_0 = const()[name = tensor("input_195_pad_0"), val = tensor([0, 0])]; + tensor input_195_dilations_0 = const()[name = tensor("input_195_dilations_0"), val = tensor([1])]; + tensor input_195_groups_0 = const()[name = tensor("input_195_groups_0"), val = tensor(1)]; + tensor input_195_cast_fp16 = conv(bias = decoder_final_conv_bias_to_fp16, dilations = input_195_dilations_0, groups = input_195_groups_0, pad = input_195_pad_0, pad_type = input_195_pad_type_0, strides = input_195_strides_0, weight = decoder_final_conv_weight_to_fp16, x = input_193_cast_fp16)[name = tensor("input_195_cast_fp16")]; + tensor var_1062_cast_fp16 = sigmoid(x = input_195_cast_fp16)[name = tensor("op_1062_cast_fp16")]; + tensor x_99_pad_0 = const()[name = tensor("x_99_pad_0"), val = tensor([0, 0, 0, 64])]; + tensor x_99_mode_0 = const()[name = tensor("x_99_mode_0"), val = tensor("reflect")]; + tensor const_35_to_fp16 = const()[name = tensor("const_35_to_fp16"), val = tensor(0x0p+0)]; + tensor x_99_cast_fp16 = pad(constant_val = const_35_to_fp16, mode = x_99_mode_0, pad = x_99_pad_0, x = input_197_cast_fp16)[name = tensor("x_99_cast_fp16")]; + tensor x_101_axes_0 = const()[name = tensor("x_101_axes_0"), val = tensor([1])]; + tensor x_101_cast_fp16 = expand_dims(axes = x_101_axes_0, x = x_99_cast_fp16)[name = tensor("x_101_cast_fp16")]; + tensor stft_out_pad_type_0 = const()[name = tensor("stft_out_pad_type_0"), val = tensor("valid")]; + tensor stft_out_strides_0 = const()[name = tensor("stft_out_strides_0"), val = tensor([128])]; + tensor stft_out_pad_0 = const()[name = tensor("stft_out_pad_0"), val = tensor([0, 0])]; + tensor stft_out_dilations_0 = const()[name = tensor("stft_out_dilations_0"), val = tensor([1])]; + tensor stft_out_groups_0 = const()[name = tensor("stft_out_groups_0"), val = tensor(1)]; + tensor stft_out_cast_fp16 = conv(dilations = stft_out_dilations_0, groups = stft_out_groups_0, pad = stft_out_pad_0, pad_type = stft_out_pad_type_0, strides = stft_out_strides_0, weight = stft_forward_basis_to_fp16, x = x_101_cast_fp16)[name = tensor("stft_out_cast_fp16")]; + tensor var_1090_begin_0 = const()[name = tensor("op_1090_begin_0"), val = tensor([0, 0, 0])]; + tensor var_1090_end_0 = const()[name = tensor("op_1090_end_0"), val = tensor([1, 129, 4])]; + tensor var_1090_end_mask_0 = const()[name = tensor("op_1090_end_mask_0"), val = tensor([true, false, true])]; + tensor var_1090_cast_fp16 = slice_by_index(begin = var_1090_begin_0, end = var_1090_end_0, end_mask = var_1090_end_mask_0, x = stft_out_cast_fp16)[name = tensor("op_1090_cast_fp16")]; + tensor var_1093_begin_0 = const()[name = tensor("op_1093_begin_0"), val = tensor([0, 129, 0])]; + tensor var_1093_end_0 = const()[name = tensor("op_1093_end_0"), val = tensor([1, 258, 4])]; + tensor var_1093_end_mask_0 = const()[name = tensor("op_1093_end_mask_0"), val = tensor([true, true, true])]; + tensor var_1093_cast_fp16 = slice_by_index(begin = var_1093_begin_0, end = var_1093_end_0, end_mask = var_1093_end_mask_0, x = stft_out_cast_fp16)[name = tensor("op_1093_cast_fp16")]; + tensor var_1069_promoted_to_fp16 = const()[name = tensor("op_1069_promoted_to_fp16"), val = tensor(0x1p+1)]; + tensor var_1095_cast_fp16 = pow(x = var_1090_cast_fp16, y = var_1069_promoted_to_fp16)[name = tensor("op_1095_cast_fp16")]; + tensor var_1069_promoted_1_to_fp16 = const()[name = tensor("op_1069_promoted_1_to_fp16"), val = tensor(0x1p+1)]; + tensor var_1096_cast_fp16 = pow(x = var_1093_cast_fp16, y = var_1069_promoted_1_to_fp16)[name = tensor("op_1096_cast_fp16")]; + tensor var_1097_cast_fp16 = add(x = var_1095_cast_fp16, y = var_1096_cast_fp16)[name = tensor("op_1097_cast_fp16")]; + tensor var_1098_to_fp16 = const()[name = tensor("op_1098_to_fp16"), val = tensor(0x1p-24)]; + tensor var_1099_cast_fp16 = add(x = var_1097_cast_fp16, y = var_1098_to_fp16)[name = tensor("op_1099_cast_fp16")]; + tensor input_199_cast_fp16 = sqrt(x = var_1099_cast_fp16)[name = tensor("input_199_cast_fp16")]; + tensor input_201_pad_type_0 = const()[name = tensor("input_201_pad_type_0"), val = tensor("custom")]; + tensor input_201_pad_0 = const()[name = tensor("input_201_pad_0"), val = tensor([1, 1])]; + tensor input_201_strides_0 = const()[name = tensor("input_201_strides_0"), val = tensor([1])]; + tensor input_201_dilations_0 = const()[name = tensor("input_201_dilations_0"), val = tensor([1])]; + tensor input_201_groups_0 = const()[name = tensor("input_201_groups_0"), val = tensor(1)]; + tensor input_201_cast_fp16 = conv(bias = encoder_layers_0_bias_to_fp16, dilations = input_201_dilations_0, groups = input_201_groups_0, pad = input_201_pad_0, pad_type = input_201_pad_type_0, strides = input_201_strides_0, weight = encoder_layers_0_weight_to_fp16, x = input_199_cast_fp16)[name = tensor("input_201_cast_fp16")]; + tensor x_103_cast_fp16 = relu(x = input_201_cast_fp16)[name = tensor("x_103_cast_fp16")]; + tensor const_36_to_fp16 = const()[name = tensor("const_36_to_fp16"), val = tensor(-inf)]; + tensor var_1102_to_fp16 = const()[name = tensor("op_1102_to_fp16"), val = tensor(0x1.388p+13)]; + tensor clip_28_cast_fp16 = clip(alpha = const_36_to_fp16, beta = var_1102_to_fp16, x = x_103_cast_fp16)[name = tensor("clip_28_cast_fp16")]; + tensor input_205_pad_type_0 = const()[name = tensor("input_205_pad_type_0"), val = tensor("custom")]; + tensor input_205_pad_0 = const()[name = tensor("input_205_pad_0"), val = tensor([1, 1])]; + tensor input_205_strides_0 = const()[name = tensor("input_205_strides_0"), val = tensor([2])]; + tensor input_205_dilations_0 = const()[name = tensor("input_205_dilations_0"), val = tensor([1])]; + tensor input_205_groups_0 = const()[name = tensor("input_205_groups_0"), val = tensor(1)]; + tensor input_205_cast_fp16 = conv(bias = encoder_layers_2_bias_to_fp16, dilations = input_205_dilations_0, groups = input_205_groups_0, pad = input_205_pad_0, pad_type = input_205_pad_type_0, strides = input_205_strides_0, weight = encoder_layers_2_weight_to_fp16, x = clip_28_cast_fp16)[name = tensor("input_205_cast_fp16")]; + tensor x_105_cast_fp16 = relu(x = input_205_cast_fp16)[name = tensor("x_105_cast_fp16")]; + tensor const_37_to_fp16 = const()[name = tensor("const_37_to_fp16"), val = tensor(-inf)]; + tensor clip_29_cast_fp16 = clip(alpha = const_37_to_fp16, beta = var_1102_to_fp16, x = x_105_cast_fp16)[name = tensor("clip_29_cast_fp16")]; + tensor input_209_pad_type_0 = const()[name = tensor("input_209_pad_type_0"), val = tensor("custom")]; + tensor input_209_pad_0 = const()[name = tensor("input_209_pad_0"), val = tensor([1, 1])]; + tensor input_209_strides_0 = const()[name = tensor("input_209_strides_0"), val = tensor([2])]; + tensor input_209_dilations_0 = const()[name = tensor("input_209_dilations_0"), val = tensor([1])]; + tensor input_209_groups_0 = const()[name = tensor("input_209_groups_0"), val = tensor(1)]; + tensor input_209_cast_fp16 = conv(bias = encoder_layers_4_bias_to_fp16, dilations = input_209_dilations_0, groups = input_209_groups_0, pad = input_209_pad_0, pad_type = input_209_pad_type_0, strides = input_209_strides_0, weight = encoder_layers_4_weight_to_fp16, x = clip_29_cast_fp16)[name = tensor("input_209_cast_fp16")]; + tensor x_107_cast_fp16 = relu(x = input_209_cast_fp16)[name = tensor("x_107_cast_fp16")]; + tensor const_38_to_fp16 = const()[name = tensor("const_38_to_fp16"), val = tensor(-inf)]; + tensor clip_30_cast_fp16 = clip(alpha = const_38_to_fp16, beta = var_1102_to_fp16, x = x_107_cast_fp16)[name = tensor("clip_30_cast_fp16")]; + tensor input_213_pad_type_0 = const()[name = tensor("input_213_pad_type_0"), val = tensor("custom")]; + tensor input_213_pad_0 = const()[name = tensor("input_213_pad_0"), val = tensor([1, 1])]; + tensor input_213_strides_0 = const()[name = tensor("input_213_strides_0"), val = tensor([1])]; + tensor input_213_dilations_0 = const()[name = tensor("input_213_dilations_0"), val = tensor([1])]; + tensor input_213_groups_0 = const()[name = tensor("input_213_groups_0"), val = tensor(1)]; + tensor input_213_cast_fp16 = conv(bias = encoder_layers_6_bias_to_fp16, dilations = input_213_dilations_0, groups = input_213_groups_0, pad = input_213_pad_0, pad_type = input_213_pad_type_0, strides = input_213_strides_0, weight = encoder_layers_6_weight_to_fp16, x = clip_30_cast_fp16)[name = tensor("input_213_cast_fp16")]; + tensor x_109_cast_fp16 = relu(x = input_213_cast_fp16)[name = tensor("x_109_cast_fp16")]; + tensor const_39_to_fp16 = const()[name = tensor("const_39_to_fp16"), val = tensor(-inf)]; + tensor clip_31_cast_fp16 = clip(alpha = const_39_to_fp16, beta = var_1102_to_fp16, x = x_109_cast_fp16)[name = tensor("clip_31_cast_fp16")]; + tensor transpose_14_perm_0 = const()[name = tensor("transpose_14_perm_0"), val = tensor([2, 0, 1])]; + tensor transpose_14_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("transpose_14_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; + tensor lstm_out_batch_first_lstm_h0_squeeze_axes_0 = const()[name = tensor("lstm_out_batch_first_lstm_h0_squeeze_axes_0"), val = tensor([0])]; + tensor lstm_out_batch_first_lstm_h0_squeeze_cast_fp16 = squeeze(axes = lstm_out_batch_first_lstm_h0_squeeze_axes_0, x = hn_13_cast_fp16)[name = tensor("lstm_out_batch_first_lstm_h0_squeeze_cast_fp16")]; + tensor lstm_out_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("lstm_out_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; + tensor lstm_out_batch_first_lstm_c0_squeeze_axes_0 = const()[name = tensor("lstm_out_batch_first_lstm_c0_squeeze_axes_0"), val = tensor([0])]; + tensor lstm_out_batch_first_lstm_c0_squeeze_cast_fp16 = squeeze(axes = lstm_out_batch_first_lstm_c0_squeeze_axes_0, x = cn_13_cast_fp16)[name = tensor("lstm_out_batch_first_lstm_c0_squeeze_cast_fp16")]; + tensor lstm_out_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("lstm_out_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; + tensor lstm_out_batch_first_direction_0 = const()[name = tensor("lstm_out_batch_first_direction_0"), val = tensor("forward")]; + tensor lstm_out_batch_first_output_sequence_0 = const()[name = tensor("lstm_out_batch_first_output_sequence_0"), val = tensor(true)]; + tensor lstm_out_batch_first_recurrent_activation_0 = const()[name = tensor("lstm_out_batch_first_recurrent_activation_0"), val = tensor("sigmoid")]; + tensor lstm_out_batch_first_cell_activation_0 = const()[name = tensor("lstm_out_batch_first_cell_activation_0"), val = tensor("tanh")]; + tensor lstm_out_batch_first_activation_0 = const()[name = tensor("lstm_out_batch_first_activation_0"), val = tensor("tanh")]; + tensor lstm_out_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32 = cast(dtype = lstm_out_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32_dtype_0, x = lstm_out_batch_first_lstm_c0_squeeze_cast_fp16)[name = tensor("cast_6")]; + tensor lstm_out_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32 = cast(dtype = lstm_out_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32_dtype_0, x = lstm_out_batch_first_lstm_h0_squeeze_cast_fp16)[name = tensor("cast_7")]; + tensor transpose_14_cast_fp16 = transpose(perm = transpose_14_perm_0, x = clip_31_cast_fp16)[name = tensor("transpose_17")]; + tensor transpose_14_cast_fp16_to_fp32 = cast(dtype = transpose_14_cast_fp16_to_fp32_dtype_0, x = transpose_14_cast_fp16)[name = tensor("cast_8")]; + tensor lstm_out_batch_first_0, tensor lstm_out_batch_first_1, tensor lstm_out_batch_first_2 = lstm(activation = lstm_out_batch_first_activation_0, bias = concat_0, cell_activation = lstm_out_batch_first_cell_activation_0, direction = lstm_out_batch_first_direction_0, initial_c = lstm_out_batch_first_lstm_c0_squeeze_cast_fp16_to_fp32, initial_h = lstm_out_batch_first_lstm_h0_squeeze_cast_fp16_to_fp32, output_sequence = lstm_out_batch_first_output_sequence_0, recurrent_activation = lstm_out_batch_first_recurrent_activation_0, weight_hh = concat_2, weight_ih = concat_1, x = transpose_14_cast_fp16_to_fp32)[name = tensor("lstm_out_batch_first")]; + tensor transpose_15_perm_0 = const()[name = tensor("transpose_15_perm_0"), val = tensor([1, 2, 0])]; + tensor lstm_out_batch_first_0_to_fp16_dtype_0 = const()[name = tensor("lstm_out_batch_first_0_to_fp16_dtype_0"), val = tensor("fp16")]; + tensor hn_axes_0 = const()[name = tensor("hn_axes_0"), val = tensor([0])]; + tensor lstm_out_batch_first_1_to_fp16_dtype_0 = const()[name = tensor("lstm_out_batch_first_1_to_fp16_dtype_0"), val = tensor("fp16")]; + tensor lstm_out_batch_first_1_to_fp16 = cast(dtype = lstm_out_batch_first_1_to_fp16_dtype_0, x = lstm_out_batch_first_1)[name = tensor("cast_4")]; + tensor hn_cast_fp16 = expand_dims(axes = hn_axes_0, x = lstm_out_batch_first_1_to_fp16)[name = tensor("hn_cast_fp16")]; + tensor cn_axes_0 = const()[name = tensor("cn_axes_0"), val = tensor([0])]; + tensor lstm_out_batch_first_2_to_fp16_dtype_0 = const()[name = tensor("lstm_out_batch_first_2_to_fp16_dtype_0"), val = tensor("fp16")]; + tensor lstm_out_batch_first_2_to_fp16 = cast(dtype = lstm_out_batch_first_2_to_fp16_dtype_0, x = lstm_out_batch_first_2)[name = tensor("cast_3")]; + tensor cn_cast_fp16 = expand_dims(axes = cn_axes_0, x = lstm_out_batch_first_2_to_fp16)[name = tensor("cn_cast_fp16")]; + tensor lstm_out_batch_first_0_to_fp16 = cast(dtype = lstm_out_batch_first_0_to_fp16_dtype_0, x = lstm_out_batch_first_0)[name = tensor("cast_5")]; + tensor transpose_15_cast_fp16 = transpose(perm = transpose_15_perm_0, x = lstm_out_batch_first_0_to_fp16)[name = tensor("transpose_16")]; + tensor input_221_cast_fp16 = relu(x = transpose_15_cast_fp16)[name = tensor("input_221_cast_fp16")]; + tensor input_pad_type_0 = const()[name = tensor("input_pad_type_0"), val = tensor("valid")]; + tensor input_strides_0 = const()[name = tensor("input_strides_0"), val = tensor([1])]; + tensor input_pad_0 = const()[name = tensor("input_pad_0"), val = tensor([0, 0])]; + tensor input_dilations_0 = const()[name = tensor("input_dilations_0"), val = tensor([1])]; + tensor input_groups_0 = const()[name = tensor("input_groups_0"), val = tensor(1)]; + tensor input_cast_fp16 = conv(bias = decoder_final_conv_bias_to_fp16, dilations = input_dilations_0, groups = input_groups_0, pad = input_pad_0, pad_type = input_pad_type_0, strides = input_strides_0, weight = decoder_final_conv_weight_to_fp16, x = input_221_cast_fp16)[name = tensor("input_cast_fp16")]; + tensor vad_out_cast_fp16 = sigmoid(x = input_cast_fp16)[name = tensor("vad_out_cast_fp16")]; + tensor var_1187_axes_0 = const()[name = tensor("op_1187_axes_0"), val = tensor([0])]; + tensor var_1187_cast_fp16 = squeeze(axes = var_1187_axes_0, x = hn_cast_fp16)[name = tensor("op_1187_cast_fp16")]; + tensor var_1187_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("op_1187_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; + tensor var_1188_axes_0 = const()[name = tensor("op_1188_axes_0"), val = tensor([0])]; + tensor var_1188_cast_fp16 = squeeze(axes = var_1188_axes_0, x = cn_cast_fp16)[name = tensor("op_1188_cast_fp16")]; + tensor var_1188_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("op_1188_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; + tensor var_1194 = const()[name = tensor("op_1194"), val = tensor(2)]; + tensor var_1195_interleave_0 = const()[name = tensor("op_1195_interleave_0"), val = tensor(false)]; + tensor var_1195_cast_fp16 = concat(axis = var_1194, interleave = var_1195_interleave_0, values = (var_318_cast_fp16, var_442_cast_fp16, var_566_cast_fp16, var_690_cast_fp16, var_814_cast_fp16, var_938_cast_fp16, var_1062_cast_fp16, vad_out_cast_fp16))[name = tensor("op_1195_cast_fp16")]; + tensor var_1196_to_fp16 = const()[name = tensor("op_1196_to_fp16"), val = tensor(0x1p+0)]; + tensor one_minus_probs_cast_fp16 = sub(x = var_1196_to_fp16, y = var_1195_cast_fp16)[name = tensor("one_minus_probs_cast_fp16")]; + tensor product_1_begin_0 = const()[name = tensor("product_1_begin_0"), val = tensor([0, 0, 0])]; + tensor product_1_end_0 = const()[name = tensor("product_1_end_0"), val = tensor([1, 1, 1])]; + tensor product_1_end_mask_0 = const()[name = tensor("product_1_end_mask_0"), val = tensor([true, true, false])]; + tensor product_1_cast_fp16 = slice_by_index(begin = product_1_begin_0, end = product_1_end_0, end_mask = product_1_end_mask_0, x = one_minus_probs_cast_fp16)[name = tensor("product_1_cast_fp16")]; + tensor var_1228_begin_0 = const()[name = tensor("op_1228_begin_0"), val = tensor([0, 0, 1])]; + tensor var_1228_end_0 = const()[name = tensor("op_1228_end_0"), val = tensor([1, 1, 2])]; + tensor var_1228_end_mask_0 = const()[name = tensor("op_1228_end_mask_0"), val = tensor([true, true, false])]; + tensor var_1228_cast_fp16 = slice_by_index(begin = var_1228_begin_0, end = var_1228_end_0, end_mask = var_1228_end_mask_0, x = one_minus_probs_cast_fp16)[name = tensor("op_1228_cast_fp16")]; + tensor product_3_cast_fp16 = mul(x = product_1_cast_fp16, y = var_1228_cast_fp16)[name = tensor("product_3_cast_fp16")]; + tensor var_1244_begin_0 = const()[name = tensor("op_1244_begin_0"), val = tensor([0, 0, 2])]; + tensor var_1244_end_0 = const()[name = tensor("op_1244_end_0"), val = tensor([1, 1, 3])]; + tensor var_1244_end_mask_0 = const()[name = tensor("op_1244_end_mask_0"), val = tensor([true, true, false])]; + tensor var_1244_cast_fp16 = slice_by_index(begin = var_1244_begin_0, end = var_1244_end_0, end_mask = var_1244_end_mask_0, x = one_minus_probs_cast_fp16)[name = tensor("op_1244_cast_fp16")]; + tensor product_5_cast_fp16 = mul(x = product_3_cast_fp16, y = var_1244_cast_fp16)[name = tensor("product_5_cast_fp16")]; + tensor var_1260_begin_0 = const()[name = tensor("op_1260_begin_0"), val = tensor([0, 0, 3])]; + tensor var_1260_end_0 = const()[name = tensor("op_1260_end_0"), val = tensor([1, 1, 4])]; + tensor var_1260_end_mask_0 = const()[name = tensor("op_1260_end_mask_0"), val = tensor([true, true, false])]; + tensor var_1260_cast_fp16 = slice_by_index(begin = var_1260_begin_0, end = var_1260_end_0, end_mask = var_1260_end_mask_0, x = one_minus_probs_cast_fp16)[name = tensor("op_1260_cast_fp16")]; + tensor product_7_cast_fp16 = mul(x = product_5_cast_fp16, y = var_1260_cast_fp16)[name = tensor("product_7_cast_fp16")]; + tensor var_1276_begin_0 = const()[name = tensor("op_1276_begin_0"), val = tensor([0, 0, 4])]; + tensor var_1276_end_0 = const()[name = tensor("op_1276_end_0"), val = tensor([1, 1, 5])]; + tensor var_1276_end_mask_0 = const()[name = tensor("op_1276_end_mask_0"), val = tensor([true, true, false])]; + tensor var_1276_cast_fp16 = slice_by_index(begin = var_1276_begin_0, end = var_1276_end_0, end_mask = var_1276_end_mask_0, x = one_minus_probs_cast_fp16)[name = tensor("op_1276_cast_fp16")]; + tensor product_9_cast_fp16 = mul(x = product_7_cast_fp16, y = var_1276_cast_fp16)[name = tensor("product_9_cast_fp16")]; + tensor var_1292_begin_0 = const()[name = tensor("op_1292_begin_0"), val = tensor([0, 0, 5])]; + tensor var_1292_end_0 = const()[name = tensor("op_1292_end_0"), val = tensor([1, 1, 6])]; + tensor var_1292_end_mask_0 = const()[name = tensor("op_1292_end_mask_0"), val = tensor([true, true, false])]; + tensor var_1292_cast_fp16 = slice_by_index(begin = var_1292_begin_0, end = var_1292_end_0, end_mask = var_1292_end_mask_0, x = one_minus_probs_cast_fp16)[name = tensor("op_1292_cast_fp16")]; + tensor product_11_cast_fp16 = mul(x = product_9_cast_fp16, y = var_1292_cast_fp16)[name = tensor("product_11_cast_fp16")]; + tensor var_1308_begin_0 = const()[name = tensor("op_1308_begin_0"), val = tensor([0, 0, 6])]; + tensor var_1308_end_0 = const()[name = tensor("op_1308_end_0"), val = tensor([1, 1, 7])]; + tensor var_1308_end_mask_0 = const()[name = tensor("op_1308_end_mask_0"), val = tensor([true, true, false])]; + tensor var_1308_cast_fp16 = slice_by_index(begin = var_1308_begin_0, end = var_1308_end_0, end_mask = var_1308_end_mask_0, x = one_minus_probs_cast_fp16)[name = tensor("op_1308_cast_fp16")]; + tensor product_cast_fp16 = mul(x = product_11_cast_fp16, y = var_1308_cast_fp16)[name = tensor("product_cast_fp16")]; + tensor var_1324_begin_0 = const()[name = tensor("op_1324_begin_0"), val = tensor([0, 0, 7])]; + tensor var_1324_end_0 = const()[name = tensor("op_1324_end_0"), val = tensor([1, 1, 1])]; + tensor var_1324_end_mask_0 = const()[name = tensor("op_1324_end_mask_0"), val = tensor([true, true, true])]; + tensor var_1324_cast_fp16 = slice_by_index(begin = var_1324_begin_0, end = var_1324_end_0, end_mask = var_1324_end_mask_0, x = one_minus_probs_cast_fp16)[name = tensor("op_1324_cast_fp16")]; + tensor var_1325_cast_fp16 = mul(x = product_cast_fp16, y = var_1324_cast_fp16)[name = tensor("op_1325_cast_fp16")]; + tensor var_1326_to_fp16 = const()[name = tensor("op_1326_to_fp16"), val = tensor(0x1p+0)]; + tensor var_1328_cast_fp16 = sub(x = var_1326_to_fp16, y = var_1325_cast_fp16)[name = tensor("op_1328_cast_fp16")]; + tensor var_1328_cast_fp16_to_fp32_dtype_0 = const()[name = tensor("op_1328_cast_fp16_to_fp32_dtype_0"), val = tensor("fp32")]; + tensor vad_output = cast(dtype = var_1328_cast_fp16_to_fp32_dtype_0, x = var_1328_cast_fp16)[name = tensor("cast_0")]; + tensor new_cell_state = cast(dtype = var_1188_cast_fp16_to_fp32_dtype_0, x = var_1188_cast_fp16)[name = tensor("cast_1")]; + tensor new_hidden_state = cast(dtype = var_1187_cast_fp16_to_fp32_dtype_0, x = var_1187_cast_fp16)[name = tensor("cast_2")]; + } -> (vad_output, new_hidden_state, new_cell_state); +} \ No newline at end of file diff --git a/VoiceInk/Resources/models/silero-vad-unified-256ms-v6.0.0.mlmodelc/weights/weight.bin b/VoiceInk/Resources/models/silero-vad-unified-256ms-v6.0.0.mlmodelc/weights/weight.bin new file mode 100644 index 0000000..a049880 Binary files /dev/null and b/VoiceInk/Resources/models/silero-vad-unified-256ms-v6.0.0.mlmodelc/weights/weight.bin differ diff --git a/VoiceInk/Services/ParakeetTranscriptionService.swift b/VoiceInk/Services/ParakeetTranscriptionService.swift index da1feaf..fbb670c 100644 --- a/VoiceInk/Services/ParakeetTranscriptionService.swift +++ b/VoiceInk/Services/ParakeetTranscriptionService.swift @@ -1,4 +1,5 @@ import Foundation +import CoreML import AVFoundation import FluidAudio import os.log @@ -7,10 +8,9 @@ import os.log class ParakeetTranscriptionService: TranscriptionService { private var asrManager: AsrManager? + private var vadManager: VadManager? private let customModelsDirectory: URL? @Published var isModelLoaded = false - - // Logger for Parakeet transcription service private let logger = Logger(subsystem: "com.voiceink.app", category: "ParakeetTranscriptionService") init(customModelsDirectory: URL? = nil) { @@ -21,8 +21,6 @@ class ParakeetTranscriptionService: TranscriptionService { if isModelLoaded { return } - - do { @@ -30,10 +28,11 @@ class ParakeetTranscriptionService: TranscriptionService { let models: AsrModels if let customDirectory = customModelsDirectory { logger.notice("🦜 Loading Parakeet models from: \(customDirectory.path)") - models = try await AsrModels.downloadAndLoad(to: customDirectory) + models = try await AsrModels.load(from: customDirectory) } else { logger.notice("🦜 Loading Parakeet models from default directory") - models = try await AsrModels.downloadAndLoad() + let defaultDir = AsrModels.defaultCacheDirectory() + models = try await AsrModels.load(from: defaultDir) } try await asrManager?.initialize(models: models) @@ -60,13 +59,43 @@ class ParakeetTranscriptionService: TranscriptionService { let audioSamples = try readAudioSamples(from: audioURL) - // Use full audio for transcription - let speechAudio: [Float] = audioSamples + let sampleRate = 16000.0 + let durationSeconds = Double(audioSamples.count) / sampleRate + + let speechAudio: [Float] + if durationSeconds < 20.0 { + speechAudio = audioSamples + } else { + let vadConfig = VadConfig(threshold: 0.7) + if vadManager == nil { + if let bundledVadURL = Bundle.main.url(forResource: ModelNames.VAD.sileroVad, withExtension: "mlmodelc") { + do { + let bundledModel = try MLModel(contentsOf: bundledVadURL) + vadManager = VadManager(config: vadConfig, vadModel: bundledModel) + } catch { + } + } else { + } + } + + do { + if let vadManager { + let segments = try await vadManager.segmentSpeechAudio(audioSamples) + if segments.isEmpty { + speechAudio = audioSamples + } else { + speechAudio = segments.flatMap { $0 } + } + } else { + speechAudio = audioSamples + } + } catch { + speechAudio = audioSamples + } + } let result = try await asrManager.transcribe(speechAudio) - - // Reset decoder state and cleanup after transcription to avoid blocking the transcription start Task { asrManager.cleanup() isModelLoaded = false @@ -81,8 +110,6 @@ class ParakeetTranscriptionService: TranscriptionService { private func readAudioSamples(from url: URL) throws -> [Float] { do { let data = try Data(contentsOf: url) - - // Check minimum file size for valid WAV header guard data.count > 44 else { throw ASRError.invalidAudioData }