Better Parakeet error handling
This commit is contained in:
parent
29722d0a31
commit
3eebbc4e3b
@ -7,7 +7,7 @@
|
|||||||
objects = {
|
objects = {
|
||||||
|
|
||||||
/* Begin PBXBuildFile section */
|
/* Begin PBXBuildFile section */
|
||||||
E12E7E972E3F109C006276F2 /* FluidAudio in Frameworks */ = {isa = PBXBuildFile; productRef = E12E7E962E3F109C006276F2 /* FluidAudio */; };
|
E10F06092E3F390600F7FBDC /* FluidAudio in Frameworks */ = {isa = PBXBuildFile; productRef = E10F06082E3F390600F7FBDC /* FluidAudio */; };
|
||||||
E1A261122CC143AC00B233D1 /* KeyboardShortcuts in Frameworks */ = {isa = PBXBuildFile; productRef = E1A261112CC143AC00B233D1 /* KeyboardShortcuts */; };
|
E1A261122CC143AC00B233D1 /* KeyboardShortcuts in Frameworks */ = {isa = PBXBuildFile; productRef = E1A261112CC143AC00B233D1 /* KeyboardShortcuts */; };
|
||||||
E1ADD45A2CC5352A00303ECB /* LaunchAtLogin in Frameworks */ = {isa = PBXBuildFile; productRef = E1ADD4592CC5352A00303ECB /* LaunchAtLogin */; };
|
E1ADD45A2CC5352A00303ECB /* LaunchAtLogin in Frameworks */ = {isa = PBXBuildFile; productRef = E1ADD4592CC5352A00303ECB /* LaunchAtLogin */; };
|
||||||
E1ADD45F2CC544F100303ECB /* Sparkle in Frameworks */ = {isa = PBXBuildFile; productRef = E1ADD45E2CC544F100303ECB /* Sparkle */; };
|
E1ADD45F2CC544F100303ECB /* Sparkle in Frameworks */ = {isa = PBXBuildFile; productRef = E1ADD45E2CC544F100303ECB /* Sparkle */; };
|
||||||
@ -82,7 +82,7 @@
|
|||||||
files = (
|
files = (
|
||||||
E1ADD45A2CC5352A00303ECB /* LaunchAtLogin in Frameworks */,
|
E1ADD45A2CC5352A00303ECB /* LaunchAtLogin in Frameworks */,
|
||||||
E1D7EF992E35E16C00640029 /* MediaRemoteAdapter in Frameworks */,
|
E1D7EF992E35E16C00640029 /* MediaRemoteAdapter in Frameworks */,
|
||||||
E12E7E972E3F109C006276F2 /* FluidAudio in Frameworks */,
|
E10F06092E3F390600F7FBDC /* FluidAudio in Frameworks */,
|
||||||
E1B2DCAB2E3DE70A008DFD68 /* whisper.xcframework in Frameworks */,
|
E1B2DCAB2E3DE70A008DFD68 /* whisper.xcframework in Frameworks */,
|
||||||
E1ADD45F2CC544F100303ECB /* Sparkle in Frameworks */,
|
E1ADD45F2CC544F100303ECB /* Sparkle in Frameworks */,
|
||||||
E1A261122CC143AC00B233D1 /* KeyboardShortcuts in Frameworks */,
|
E1A261122CC143AC00B233D1 /* KeyboardShortcuts in Frameworks */,
|
||||||
@ -162,7 +162,7 @@
|
|||||||
E1ADD45E2CC544F100303ECB /* Sparkle */,
|
E1ADD45E2CC544F100303ECB /* Sparkle */,
|
||||||
E1F5FA792DA6CBF900B1FD8A /* Zip */,
|
E1F5FA792DA6CBF900B1FD8A /* Zip */,
|
||||||
E1D7EF982E35E16C00640029 /* MediaRemoteAdapter */,
|
E1D7EF982E35E16C00640029 /* MediaRemoteAdapter */,
|
||||||
E12E7E962E3F109C006276F2 /* FluidAudio */,
|
E10F06082E3F390600F7FBDC /* FluidAudio */,
|
||||||
);
|
);
|
||||||
productName = VoiceInk;
|
productName = VoiceInk;
|
||||||
productReference = E11473B02CBE0F0A00318EE4 /* VoiceInk.app */;
|
productReference = E11473B02CBE0F0A00318EE4 /* VoiceInk.app */;
|
||||||
@ -252,7 +252,7 @@
|
|||||||
E1ADD45D2CC544F100303ECB /* XCRemoteSwiftPackageReference "Sparkle" */,
|
E1ADD45D2CC544F100303ECB /* XCRemoteSwiftPackageReference "Sparkle" */,
|
||||||
E1F5FA782DA6CBF900B1FD8A /* XCRemoteSwiftPackageReference "Zip" */,
|
E1F5FA782DA6CBF900B1FD8A /* XCRemoteSwiftPackageReference "Zip" */,
|
||||||
E1D7EF972E35E16C00640029 /* XCRemoteSwiftPackageReference "mediaremote-adapter" */,
|
E1D7EF972E35E16C00640029 /* XCRemoteSwiftPackageReference "mediaremote-adapter" */,
|
||||||
E10A90B12E3F0E0400E5D6AF /* XCLocalSwiftPackageReference "../FluidAudio" */,
|
E10FFA112E3F37D100F7FBDC /* XCRemoteSwiftPackageReference "FluidAudio" */,
|
||||||
);
|
);
|
||||||
preferredProjectObjectVersion = 77;
|
preferredProjectObjectVersion = 77;
|
||||||
productRefGroup = E11473B12CBE0F0A00318EE4 /* Products */;
|
productRefGroup = E11473B12CBE0F0A00318EE4 /* Products */;
|
||||||
@ -621,14 +621,15 @@
|
|||||||
};
|
};
|
||||||
/* End XCConfigurationList section */
|
/* End XCConfigurationList section */
|
||||||
|
|
||||||
/* Begin XCLocalSwiftPackageReference section */
|
|
||||||
E10A90B12E3F0E0400E5D6AF /* XCLocalSwiftPackageReference "../FluidAudio" */ = {
|
|
||||||
isa = XCLocalSwiftPackageReference;
|
|
||||||
relativePath = ../FluidAudio;
|
|
||||||
};
|
|
||||||
/* End XCLocalSwiftPackageReference section */
|
|
||||||
|
|
||||||
/* Begin XCRemoteSwiftPackageReference section */
|
/* Begin XCRemoteSwiftPackageReference section */
|
||||||
|
E10FFA112E3F37D100F7FBDC /* XCRemoteSwiftPackageReference "FluidAudio" */ = {
|
||||||
|
isa = XCRemoteSwiftPackageReference;
|
||||||
|
repositoryURL = "https://github.com/FluidInference/FluidAudio";
|
||||||
|
requirement = {
|
||||||
|
branch = main;
|
||||||
|
kind = branch;
|
||||||
|
};
|
||||||
|
};
|
||||||
E1A261102CC143AC00B233D1 /* XCRemoteSwiftPackageReference "KeyboardShortcuts" */ = {
|
E1A261102CC143AC00B233D1 /* XCRemoteSwiftPackageReference "KeyboardShortcuts" */ = {
|
||||||
isa = XCRemoteSwiftPackageReference;
|
isa = XCRemoteSwiftPackageReference;
|
||||||
repositoryURL = "https://github.com/sindresorhus/KeyboardShortcuts";
|
repositoryURL = "https://github.com/sindresorhus/KeyboardShortcuts";
|
||||||
@ -663,7 +664,7 @@
|
|||||||
};
|
};
|
||||||
E1F5FA782DA6CBF900B1FD8A /* XCRemoteSwiftPackageReference "Zip" */ = {
|
E1F5FA782DA6CBF900B1FD8A /* XCRemoteSwiftPackageReference "Zip" */ = {
|
||||||
isa = XCRemoteSwiftPackageReference;
|
isa = XCRemoteSwiftPackageReference;
|
||||||
repositoryURL = "https://github.com/marmelroy/Zip?tab=readme-ov-file";
|
repositoryURL = "https://github.com/marmelroy/Zip";
|
||||||
requirement = {
|
requirement = {
|
||||||
kind = upToNextMajorVersion;
|
kind = upToNextMajorVersion;
|
||||||
minimumVersion = 2.1.2;
|
minimumVersion = 2.1.2;
|
||||||
@ -672,8 +673,9 @@
|
|||||||
/* End XCRemoteSwiftPackageReference section */
|
/* End XCRemoteSwiftPackageReference section */
|
||||||
|
|
||||||
/* Begin XCSwiftPackageProductDependency section */
|
/* Begin XCSwiftPackageProductDependency section */
|
||||||
E12E7E962E3F109C006276F2 /* FluidAudio */ = {
|
E10F06082E3F390600F7FBDC /* FluidAudio */ = {
|
||||||
isa = XCSwiftPackageProductDependency;
|
isa = XCSwiftPackageProductDependency;
|
||||||
|
package = E10FFA112E3F37D100F7FBDC /* XCRemoteSwiftPackageReference "FluidAudio" */;
|
||||||
productName = FluidAudio;
|
productName = FluidAudio;
|
||||||
};
|
};
|
||||||
E1A261112CC143AC00B233D1 /* KeyboardShortcuts */ = {
|
E1A261112CC143AC00B233D1 /* KeyboardShortcuts */ = {
|
||||||
|
|||||||
@ -1,6 +1,15 @@
|
|||||||
{
|
{
|
||||||
"originHash" : "ae3f634e8c4b39a1a80bcd04f018e2208c0491e42ee824cd94a92d7b88893420",
|
"originHash" : "0b9379abd19d2f53581c233273d09235e935a8d2b1180cf253dd69baa2784b39",
|
||||||
"pins" : [
|
"pins" : [
|
||||||
|
{
|
||||||
|
"identity" : "fluidaudio",
|
||||||
|
"kind" : "remoteSourceControl",
|
||||||
|
"location" : "https://github.com/FluidInference/FluidAudio",
|
||||||
|
"state" : {
|
||||||
|
"branch" : "main",
|
||||||
|
"revision" : "2a3d6a948cb332b3fd8ae479a9942e33ade2cc9e"
|
||||||
|
}
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"identity" : "keyboardshortcuts",
|
"identity" : "keyboardshortcuts",
|
||||||
"kind" : "remoteSourceControl",
|
"kind" : "remoteSourceControl",
|
||||||
@ -33,14 +42,14 @@
|
|||||||
"kind" : "remoteSourceControl",
|
"kind" : "remoteSourceControl",
|
||||||
"location" : "https://github.com/sparkle-project/Sparkle",
|
"location" : "https://github.com/sparkle-project/Sparkle",
|
||||||
"state" : {
|
"state" : {
|
||||||
"revision" : "0ca3004e98712ea2b39dd881d28448630cce1c99",
|
"revision" : "df074165274afaa39539c05d57b0832620775b11",
|
||||||
"version" : "2.7.0"
|
"version" : "2.7.1"
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
{
|
{
|
||||||
"identity" : "zip?tab=readme-ov-file",
|
"identity" : "zip",
|
||||||
"kind" : "remoteSourceControl",
|
"kind" : "remoteSourceControl",
|
||||||
"location" : "https://github.com/marmelroy/Zip?tab=readme-ov-file",
|
"location" : "https://github.com/marmelroy/Zip",
|
||||||
"state" : {
|
"state" : {
|
||||||
"revision" : "67fa55813b9e7b3b9acee9c0ae501def28746d76",
|
"revision" : "67fa55813b9e7b3b9acee9c0ae501def28746d76",
|
||||||
"version" : "2.1.2"
|
"version" : "2.1.2"
|
||||||
|
|||||||
@ -46,27 +46,22 @@ class ParakeetTranscriptionService: TranscriptionService {
|
|||||||
models = try await AsrModels.downloadAndLoad()
|
models = try await AsrModels.downloadAndLoad()
|
||||||
}
|
}
|
||||||
|
|
||||||
// Check vocabulary file before initialization
|
|
||||||
let vocabPath = getVocabularyPath()
|
|
||||||
let vocabExists = FileManager.default.fileExists(atPath: vocabPath.path)
|
|
||||||
logger.notice("🦜 Vocabulary file exists at \(vocabPath.lastPathComponent): \(vocabExists)")
|
|
||||||
|
|
||||||
if vocabExists {
|
|
||||||
do {
|
|
||||||
let vocabData = try Data(contentsOf: vocabPath)
|
|
||||||
let vocabDict = try JSONSerialization.jsonObject(with: vocabData) as? [String: String] ?? [:]
|
|
||||||
logger.notice("🦜 Vocabulary loaded with \(vocabDict.count) entries")
|
|
||||||
} catch {
|
|
||||||
logger.notice("🦜 Failed to parse vocabulary file: \(error.localizedDescription)")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
try await asrManager?.initialize(models: models)
|
try await asrManager?.initialize(models: models)
|
||||||
isModelLoaded = true
|
isModelLoaded = true
|
||||||
logger.notice("🦜 Parakeet model loaded successfully")
|
logger.notice("🦜 Parakeet model loaded successfully")
|
||||||
|
|
||||||
|
} catch let error as ASRError {
|
||||||
|
logger.notice("🦜 Parakeet-specific error loading model: \(error.localizedDescription)")
|
||||||
|
isModelLoaded = false
|
||||||
|
asrManager = nil
|
||||||
|
throw error
|
||||||
|
} catch let error as AsrModelsError {
|
||||||
|
logger.notice("🦜 Parakeet model management error loading model: \(error.localizedDescription)")
|
||||||
|
isModelLoaded = false
|
||||||
|
asrManager = nil
|
||||||
|
throw error
|
||||||
} catch {
|
} catch {
|
||||||
logger.notice("🦜 Failed to load Parakeet model: \(error.localizedDescription)")
|
logger.notice("🦜 Unexpected error loading Parakeet model: \(error.localizedDescription)")
|
||||||
isModelLoaded = false
|
isModelLoaded = false
|
||||||
asrManager = nil
|
asrManager = nil
|
||||||
throw error
|
throw error
|
||||||
@ -74,87 +69,64 @@ class ParakeetTranscriptionService: TranscriptionService {
|
|||||||
}
|
}
|
||||||
|
|
||||||
func transcribe(audioURL: URL, model: any TranscriptionModel) async throws -> String {
|
func transcribe(audioURL: URL, model: any TranscriptionModel) async throws -> String {
|
||||||
do {
|
if asrManager == nil || !isModelLoaded {
|
||||||
|
try await loadModel()
|
||||||
if !isModelLoaded {
|
|
||||||
try await loadModel()
|
|
||||||
}
|
|
||||||
|
|
||||||
guard let asrManager = asrManager else {
|
|
||||||
logger.notice("🦜 ASR manager is nil after model loading")
|
|
||||||
throw NSError(domain: "ParakeetTranscriptionService", code: -1, userInfo: [NSLocalizedDescriptionKey: "Failed to initialize ASR manager."])
|
|
||||||
}
|
|
||||||
|
|
||||||
logger.notice("🦜 Starting Parakeet transcription")
|
|
||||||
let audioSamples = try readAudioSamples(from: audioURL)
|
|
||||||
logger.notice("🦜 Audio samples loaded: \(audioSamples.count) samples")
|
|
||||||
|
|
||||||
let result = try await asrManager.transcribe(audioSamples)
|
|
||||||
logger.notice("🦜 Parakeet transcription completed")
|
|
||||||
|
|
||||||
// Check for empty results (vocabulary issue indicator)
|
|
||||||
if result.text.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty {
|
|
||||||
logger.notice("🦜 Warning: Empty transcription result for \(audioSamples.count) samples - possible vocabulary issue")
|
|
||||||
}
|
|
||||||
|
|
||||||
if UserDefaults.standard.object(forKey: "IsTextFormattingEnabled") as? Bool ?? true {
|
|
||||||
return WhisperTextFormatter.format(result.text)
|
|
||||||
}
|
|
||||||
return result.text
|
|
||||||
} catch {
|
|
||||||
logger.notice("🦜 Parakeet transcription failed: \(error.localizedDescription)")
|
|
||||||
let errorMessage = error.localizedDescription
|
|
||||||
await MainActor.run {
|
|
||||||
NotificationManager.shared.showNotification(
|
|
||||||
title: "Transcription Failed: \(errorMessage)",
|
|
||||||
type: .error
|
|
||||||
)
|
|
||||||
}
|
|
||||||
return ""
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
guard let asrManager = asrManager else {
|
||||||
|
logger.notice("🦜 Parakeet manager is still nil after attempting to load the model.")
|
||||||
|
throw ASRError.notInitialized
|
||||||
|
}
|
||||||
|
|
||||||
|
let audioSamples = try readAudioSamples(from: audioURL)
|
||||||
|
|
||||||
|
// Validate audio data before transcription
|
||||||
|
guard audioSamples.count >= 16000 else {
|
||||||
|
logger.notice("🦜 Audio too short for transcription: \(audioSamples.count) samples")
|
||||||
|
throw ASRError.invalidAudioData
|
||||||
|
}
|
||||||
|
|
||||||
|
let result = try await asrManager.transcribe(audioSamples)
|
||||||
|
|
||||||
|
Task {
|
||||||
|
asrManager.cleanup()
|
||||||
|
isModelLoaded = false
|
||||||
|
logger.notice("🦜 Parakeet ASR models cleaned up from memory")
|
||||||
|
}
|
||||||
|
|
||||||
|
// Check for empty results (vocabulary issue indicator)
|
||||||
|
if result.text.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty {
|
||||||
|
logger.notice("🦜 Warning: Empty transcription result for \(audioSamples.count) samples - possible vocabulary issue")
|
||||||
|
}
|
||||||
|
|
||||||
|
if UserDefaults.standard.object(forKey: "IsTextFormattingEnabled") as? Bool ?? true {
|
||||||
|
return WhisperTextFormatter.format(result.text)
|
||||||
|
}
|
||||||
|
return result.text
|
||||||
}
|
}
|
||||||
|
|
||||||
private func readAudioSamples(from url: URL) throws -> [Float] {
|
private func readAudioSamples(from url: URL) throws -> [Float] {
|
||||||
logger.notice("🦜 Reading audio file: \(url.lastPathComponent)")
|
do {
|
||||||
let data = try Data(contentsOf: url)
|
let data = try Data(contentsOf: url)
|
||||||
logger.notice("🦜 Audio file size: \(data.count) bytes")
|
|
||||||
|
|
||||||
// A basic check, assuming a more robust check happens elsewhere.
|
// Check minimum file size for valid WAV header
|
||||||
guard data.count > 44 else {
|
guard data.count > 44 else {
|
||||||
logger.notice("🦜 Warning: Audio file too small (\(data.count) bytes), expected > 44 bytes")
|
logger.notice("🦜 Audio file too small (\(data.count) bytes), expected > 44 bytes")
|
||||||
return []
|
throw ASRError.invalidAudioData
|
||||||
}
|
|
||||||
|
|
||||||
let floats = stride(from: 44, to: data.count, by: 2).map {
|
|
||||||
return data[$0..<$0 + 2].withUnsafeBytes {
|
|
||||||
let short = Int16(littleEndian: $0.load(as: Int16.self))
|
|
||||||
return max(-1.0, min(Float(short) / 32767.0, 1.0))
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
logger.notice("🦜 Processed audio: \(floats.count) samples from \(data.count) bytes")
|
let floats = stride(from: 44, to: data.count, by: 2).map {
|
||||||
|
return data[$0..<$0 + 2].withUnsafeBytes {
|
||||||
|
let short = Int16(littleEndian: $0.load(as: Int16.self))
|
||||||
|
return max(-1.0, min(Float(short) / 32767.0, 1.0))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Check if we have enough samples for transcription (minimum 16,000 samples = 1 second at 16kHz)
|
return floats
|
||||||
if floats.count < 16000 {
|
} catch {
|
||||||
logger.notice("🦜 Warning: Audio too short (\(floats.count) samples), minimum 16,000 required")
|
logger.notice("🦜 Failed to read audio file: \(error.localizedDescription)")
|
||||||
}
|
throw ASRError.invalidAudioData
|
||||||
|
|
||||||
return floats
|
|
||||||
}
|
|
||||||
|
|
||||||
// Helper function to get vocabulary path based on model directory
|
|
||||||
private func getVocabularyPath() -> URL {
|
|
||||||
if let customDirectory = customModelsDirectory {
|
|
||||||
return customDirectory.appendingPathComponent("parakeet_vocab.json")
|
|
||||||
} else {
|
|
||||||
let applicationSupportURL = FileManager.default.urls(
|
|
||||||
for: .applicationSupportDirectory, in: .userDomainMask
|
|
||||||
).first!
|
|
||||||
return applicationSupportURL
|
|
||||||
.appendingPathComponent("FluidAudio", isDirectory: true)
|
|
||||||
.appendingPathComponent("Models", isDirectory: true)
|
|
||||||
.appendingPathComponent("parakeet-tdt-0.6b-v2-coreml", isDirectory: true)
|
|
||||||
.appendingPathComponent("parakeet_vocab.json")
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user