more logging in parakeettranscription service

This commit is contained in:
Beingpax 2025-08-03 09:35:49 +05:45
parent ed298f604b
commit 29722d0a31
3 changed files with 72 additions and 33 deletions

View File

@ -7,7 +7,7 @@
objects = { objects = {
/* Begin PBXBuildFile section */ /* Begin PBXBuildFile section */
E1304F742E3B9E8A0001F9E2 /* FluidAudio in Frameworks */ = {isa = PBXBuildFile; productRef = E1304F732E3B9E8A0001F9E2 /* FluidAudio */; }; E12E7E972E3F109C006276F2 /* FluidAudio in Frameworks */ = {isa = PBXBuildFile; productRef = E12E7E962E3F109C006276F2 /* FluidAudio */; };
E1A261122CC143AC00B233D1 /* KeyboardShortcuts in Frameworks */ = {isa = PBXBuildFile; productRef = E1A261112CC143AC00B233D1 /* KeyboardShortcuts */; }; E1A261122CC143AC00B233D1 /* KeyboardShortcuts in Frameworks */ = {isa = PBXBuildFile; productRef = E1A261112CC143AC00B233D1 /* KeyboardShortcuts */; };
E1ADD45A2CC5352A00303ECB /* LaunchAtLogin in Frameworks */ = {isa = PBXBuildFile; productRef = E1ADD4592CC5352A00303ECB /* LaunchAtLogin */; }; E1ADD45A2CC5352A00303ECB /* LaunchAtLogin in Frameworks */ = {isa = PBXBuildFile; productRef = E1ADD4592CC5352A00303ECB /* LaunchAtLogin */; };
E1ADD45F2CC544F100303ECB /* Sparkle in Frameworks */ = {isa = PBXBuildFile; productRef = E1ADD45E2CC544F100303ECB /* Sparkle */; }; E1ADD45F2CC544F100303ECB /* Sparkle in Frameworks */ = {isa = PBXBuildFile; productRef = E1ADD45E2CC544F100303ECB /* Sparkle */; };
@ -82,7 +82,7 @@
files = ( files = (
E1ADD45A2CC5352A00303ECB /* LaunchAtLogin in Frameworks */, E1ADD45A2CC5352A00303ECB /* LaunchAtLogin in Frameworks */,
E1D7EF992E35E16C00640029 /* MediaRemoteAdapter in Frameworks */, E1D7EF992E35E16C00640029 /* MediaRemoteAdapter in Frameworks */,
E1304F742E3B9E8A0001F9E2 /* FluidAudio in Frameworks */, E12E7E972E3F109C006276F2 /* FluidAudio in Frameworks */,
E1B2DCAB2E3DE70A008DFD68 /* whisper.xcframework in Frameworks */, E1B2DCAB2E3DE70A008DFD68 /* whisper.xcframework in Frameworks */,
E1ADD45F2CC544F100303ECB /* Sparkle in Frameworks */, E1ADD45F2CC544F100303ECB /* Sparkle in Frameworks */,
E1A261122CC143AC00B233D1 /* KeyboardShortcuts in Frameworks */, E1A261122CC143AC00B233D1 /* KeyboardShortcuts in Frameworks */,
@ -162,7 +162,7 @@
E1ADD45E2CC544F100303ECB /* Sparkle */, E1ADD45E2CC544F100303ECB /* Sparkle */,
E1F5FA792DA6CBF900B1FD8A /* Zip */, E1F5FA792DA6CBF900B1FD8A /* Zip */,
E1D7EF982E35E16C00640029 /* MediaRemoteAdapter */, E1D7EF982E35E16C00640029 /* MediaRemoteAdapter */,
E1304F732E3B9E8A0001F9E2 /* FluidAudio */, E12E7E962E3F109C006276F2 /* FluidAudio */,
); );
productName = VoiceInk; productName = VoiceInk;
productReference = E11473B02CBE0F0A00318EE4 /* VoiceInk.app */; productReference = E11473B02CBE0F0A00318EE4 /* VoiceInk.app */;
@ -252,7 +252,7 @@
E1ADD45D2CC544F100303ECB /* XCRemoteSwiftPackageReference "Sparkle" */, E1ADD45D2CC544F100303ECB /* XCRemoteSwiftPackageReference "Sparkle" */,
E1F5FA782DA6CBF900B1FD8A /* XCRemoteSwiftPackageReference "Zip" */, E1F5FA782DA6CBF900B1FD8A /* XCRemoteSwiftPackageReference "Zip" */,
E1D7EF972E35E16C00640029 /* XCRemoteSwiftPackageReference "mediaremote-adapter" */, E1D7EF972E35E16C00640029 /* XCRemoteSwiftPackageReference "mediaremote-adapter" */,
E1304F722E3B9E8A0001F9E2 /* XCRemoteSwiftPackageReference "FluidAudio" */, E10A90B12E3F0E0400E5D6AF /* XCLocalSwiftPackageReference "../FluidAudio" */,
); );
preferredProjectObjectVersion = 77; preferredProjectObjectVersion = 77;
productRefGroup = E11473B12CBE0F0A00318EE4 /* Products */; productRefGroup = E11473B12CBE0F0A00318EE4 /* Products */;
@ -621,15 +621,14 @@
}; };
/* End XCConfigurationList section */ /* End XCConfigurationList section */
/* Begin XCRemoteSwiftPackageReference section */ /* Begin XCLocalSwiftPackageReference section */
E1304F722E3B9E8A0001F9E2 /* XCRemoteSwiftPackageReference "FluidAudio" */ = { E10A90B12E3F0E0400E5D6AF /* XCLocalSwiftPackageReference "../FluidAudio" */ = {
isa = XCRemoteSwiftPackageReference; isa = XCLocalSwiftPackageReference;
repositoryURL = "https://github.com/FluidInference/FluidAudio"; relativePath = ../FluidAudio;
requirement = {
branch = main;
kind = branch;
};
}; };
/* End XCLocalSwiftPackageReference section */
/* Begin XCRemoteSwiftPackageReference section */
E1A261102CC143AC00B233D1 /* XCRemoteSwiftPackageReference "KeyboardShortcuts" */ = { E1A261102CC143AC00B233D1 /* XCRemoteSwiftPackageReference "KeyboardShortcuts" */ = {
isa = XCRemoteSwiftPackageReference; isa = XCRemoteSwiftPackageReference;
repositoryURL = "https://github.com/sindresorhus/KeyboardShortcuts"; repositoryURL = "https://github.com/sindresorhus/KeyboardShortcuts";
@ -673,9 +672,8 @@
/* End XCRemoteSwiftPackageReference section */ /* End XCRemoteSwiftPackageReference section */
/* Begin XCSwiftPackageProductDependency section */ /* Begin XCSwiftPackageProductDependency section */
E1304F732E3B9E8A0001F9E2 /* FluidAudio */ = { E12E7E962E3F109C006276F2 /* FluidAudio */ = {
isa = XCSwiftPackageProductDependency; isa = XCSwiftPackageProductDependency;
package = E1304F722E3B9E8A0001F9E2 /* XCRemoteSwiftPackageReference "FluidAudio" */;
productName = FluidAudio; productName = FluidAudio;
}; };
E1A261112CC143AC00B233D1 /* KeyboardShortcuts */ = { E1A261112CC143AC00B233D1 /* KeyboardShortcuts */ = {

View File

@ -1,15 +1,6 @@
{ {
"originHash" : "62ab4f67009fbd19bb1832b7d8f33b824ec7e4c91e83e35d9508a7fc51707a31", "originHash" : "ae3f634e8c4b39a1a80bcd04f018e2208c0491e42ee824cd94a92d7b88893420",
"pins" : [ "pins" : [
{
"identity" : "fluidaudio",
"kind" : "remoteSourceControl",
"location" : "https://github.com/FluidInference/FluidAudio",
"state" : {
"branch" : "main",
"revision" : "826d9e415008d83f6f01d308456d4ed4a13722a7"
}
},
{ {
"identity" : "keyboardshortcuts", "identity" : "keyboardshortcuts",
"kind" : "remoteSourceControl", "kind" : "remoteSourceControl",

View File

@ -15,6 +15,7 @@ class ParakeetTranscriptionService: TranscriptionService {
init(customModelsDirectory: URL? = nil) { init(customModelsDirectory: URL? = nil) {
self.customModelsDirectory = customModelsDirectory self.customModelsDirectory = customModelsDirectory
logger.notice("🦜 ParakeetTranscriptionService initialized with directory: \(customModelsDirectory?.path ?? "default")")
} }
func loadModel() async throws { func loadModel() async throws {
@ -38,17 +39,34 @@ class ParakeetTranscriptionService: TranscriptionService {
let models: AsrModels let models: AsrModels
if let customDirectory = customModelsDirectory { if let customDirectory = customModelsDirectory {
logger.notice("🦜 Loading models from custom directory: \(customDirectory.path)")
models = try await AsrModels.downloadAndLoad(to: customDirectory) models = try await AsrModels.downloadAndLoad(to: customDirectory)
} else { } else {
logger.notice("🦜 Loading models from default directory")
models = try await AsrModels.downloadAndLoad() models = try await AsrModels.downloadAndLoad()
} }
// Check vocabulary file before initialization
let vocabPath = getVocabularyPath()
let vocabExists = FileManager.default.fileExists(atPath: vocabPath.path)
logger.notice("🦜 Vocabulary file exists at \(vocabPath.lastPathComponent): \(vocabExists)")
if vocabExists {
do {
let vocabData = try Data(contentsOf: vocabPath)
let vocabDict = try JSONSerialization.jsonObject(with: vocabData) as? [String: String] ?? [:]
logger.notice("🦜 Vocabulary loaded with \(vocabDict.count) entries")
} catch {
logger.notice("🦜 Failed to parse vocabulary file: \(error.localizedDescription)")
}
}
try await asrManager?.initialize(models: models) try await asrManager?.initialize(models: models)
isModelLoaded = true isModelLoaded = true
logger.notice("🦜 Parakeet model loaded successfully") logger.notice("🦜 Parakeet model loaded successfully")
} catch { } catch {
logger.error("🦜 Failed to load Parakeet model: \(error.localizedDescription)") logger.notice("🦜 Failed to load Parakeet model: \(error.localizedDescription)")
isModelLoaded = false isModelLoaded = false
asrManager = nil asrManager = nil
throw error throw error
@ -57,32 +75,34 @@ class ParakeetTranscriptionService: TranscriptionService {
func transcribe(audioURL: URL, model: any TranscriptionModel) async throws -> String { func transcribe(audioURL: URL, model: any TranscriptionModel) async throws -> String {
do { do {
defer {
asrManager?.cleanup()
self.asrManager = nil
self.isModelLoaded = false
}
if !isModelLoaded { if !isModelLoaded {
try await loadModel() try await loadModel()
} }
guard let asrManager = asrManager else { guard let asrManager = asrManager else {
logger.error("🦜 ASR manager is nil after model loading") logger.notice("🦜 ASR manager is nil after model loading")
throw NSError(domain: "ParakeetTranscriptionService", code: -1, userInfo: [NSLocalizedDescriptionKey: "Failed to initialize ASR manager."]) throw NSError(domain: "ParakeetTranscriptionService", code: -1, userInfo: [NSLocalizedDescriptionKey: "Failed to initialize ASR manager."])
} }
logger.notice("🦜 Starting Parakeet transcription") logger.notice("🦜 Starting Parakeet transcription")
let audioSamples = try readAudioSamples(from: audioURL) let audioSamples = try readAudioSamples(from: audioURL)
logger.notice("🦜 Audio samples loaded: \(audioSamples.count) samples")
let result = try await asrManager.transcribe(audioSamples) let result = try await asrManager.transcribe(audioSamples)
logger.notice("🦜 Parakeet transcription completed") logger.notice("🦜 Parakeet transcription completed")
// Check for empty results (vocabulary issue indicator)
if result.text.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty {
logger.notice("🦜 Warning: Empty transcription result for \(audioSamples.count) samples - possible vocabulary issue")
}
if UserDefaults.standard.object(forKey: "IsTextFormattingEnabled") as? Bool ?? true { if UserDefaults.standard.object(forKey: "IsTextFormattingEnabled") as? Bool ?? true {
return WhisperTextFormatter.format(result.text) return WhisperTextFormatter.format(result.text)
} }
return result.text return result.text
} catch { } catch {
logger.error("🦜 Parakeet transcription failed: \(error.localizedDescription)") logger.notice("🦜 Parakeet transcription failed: \(error.localizedDescription)")
let errorMessage = error.localizedDescription let errorMessage = error.localizedDescription
await MainActor.run { await MainActor.run {
NotificationManager.shared.showNotification( NotificationManager.shared.showNotification(
@ -95,9 +115,15 @@ class ParakeetTranscriptionService: TranscriptionService {
} }
private func readAudioSamples(from url: URL) throws -> [Float] { private func readAudioSamples(from url: URL) throws -> [Float] {
logger.notice("🦜 Reading audio file: \(url.lastPathComponent)")
let data = try Data(contentsOf: url) let data = try Data(contentsOf: url)
logger.notice("🦜 Audio file size: \(data.count) bytes")
// A basic check, assuming a more robust check happens elsewhere. // A basic check, assuming a more robust check happens elsewhere.
guard data.count > 44 else { return [] } guard data.count > 44 else {
logger.notice("🦜 Warning: Audio file too small (\(data.count) bytes), expected > 44 bytes")
return []
}
let floats = stride(from: 44, to: data.count, by: 2).map { let floats = stride(from: 44, to: data.count, by: 2).map {
return data[$0..<$0 + 2].withUnsafeBytes { return data[$0..<$0 + 2].withUnsafeBytes {
@ -105,6 +131,30 @@ class ParakeetTranscriptionService: TranscriptionService {
return max(-1.0, min(Float(short) / 32767.0, 1.0)) return max(-1.0, min(Float(short) / 32767.0, 1.0))
} }
} }
logger.notice("🦜 Processed audio: \(floats.count) samples from \(data.count) bytes")
// Check if we have enough samples for transcription (minimum 16,000 samples = 1 second at 16kHz)
if floats.count < 16000 {
logger.notice("🦜 Warning: Audio too short (\(floats.count) samples), minimum 16,000 required")
}
return floats return floats
} }
// Helper function to get vocabulary path based on model directory
private func getVocabularyPath() -> URL {
if let customDirectory = customModelsDirectory {
return customDirectory.appendingPathComponent("parakeet_vocab.json")
} else {
let applicationSupportURL = FileManager.default.urls(
for: .applicationSupportDirectory, in: .userDomainMask
).first!
return applicationSupportURL
.appendingPathComponent("FluidAudio", isDirectory: true)
.appendingPathComponent("Models", isDirectory: true)
.appendingPathComponent("parakeet-tdt-0.6b-v2-coreml", isDirectory: true)
.appendingPathComponent("parakeet_vocab.json")
}
}
} }