From 29722d0a31208b2e2ac2a770f00d74dbf9685e4d Mon Sep 17 00:00:00 2001 From: Beingpax Date: Sun, 3 Aug 2025 09:35:49 +0545 Subject: [PATCH] more logging in parakeettranscription service --- VoiceInk.xcodeproj/project.pbxproj | 26 ++++--- .../xcshareddata/swiftpm/Package.resolved | 11 +-- .../ParakeetTranscriptionService.swift | 68 ++++++++++++++++--- 3 files changed, 72 insertions(+), 33 deletions(-) diff --git a/VoiceInk.xcodeproj/project.pbxproj b/VoiceInk.xcodeproj/project.pbxproj index afc43c5..d6fe44b 100644 --- a/VoiceInk.xcodeproj/project.pbxproj +++ b/VoiceInk.xcodeproj/project.pbxproj @@ -7,7 +7,7 @@ objects = { /* Begin PBXBuildFile section */ - E1304F742E3B9E8A0001F9E2 /* FluidAudio in Frameworks */ = {isa = PBXBuildFile; productRef = E1304F732E3B9E8A0001F9E2 /* FluidAudio */; }; + E12E7E972E3F109C006276F2 /* FluidAudio in Frameworks */ = {isa = PBXBuildFile; productRef = E12E7E962E3F109C006276F2 /* FluidAudio */; }; E1A261122CC143AC00B233D1 /* KeyboardShortcuts in Frameworks */ = {isa = PBXBuildFile; productRef = E1A261112CC143AC00B233D1 /* KeyboardShortcuts */; }; E1ADD45A2CC5352A00303ECB /* LaunchAtLogin in Frameworks */ = {isa = PBXBuildFile; productRef = E1ADD4592CC5352A00303ECB /* LaunchAtLogin */; }; E1ADD45F2CC544F100303ECB /* Sparkle in Frameworks */ = {isa = PBXBuildFile; productRef = E1ADD45E2CC544F100303ECB /* Sparkle */; }; @@ -82,7 +82,7 @@ files = ( E1ADD45A2CC5352A00303ECB /* LaunchAtLogin in Frameworks */, E1D7EF992E35E16C00640029 /* MediaRemoteAdapter in Frameworks */, - E1304F742E3B9E8A0001F9E2 /* FluidAudio in Frameworks */, + E12E7E972E3F109C006276F2 /* FluidAudio in Frameworks */, E1B2DCAB2E3DE70A008DFD68 /* whisper.xcframework in Frameworks */, E1ADD45F2CC544F100303ECB /* Sparkle in Frameworks */, E1A261122CC143AC00B233D1 /* KeyboardShortcuts in Frameworks */, @@ -162,7 +162,7 @@ E1ADD45E2CC544F100303ECB /* Sparkle */, E1F5FA792DA6CBF900B1FD8A /* Zip */, E1D7EF982E35E16C00640029 /* MediaRemoteAdapter */, - E1304F732E3B9E8A0001F9E2 /* FluidAudio */, + E12E7E962E3F109C006276F2 /* FluidAudio */, ); productName = VoiceInk; productReference = E11473B02CBE0F0A00318EE4 /* VoiceInk.app */; @@ -252,7 +252,7 @@ E1ADD45D2CC544F100303ECB /* XCRemoteSwiftPackageReference "Sparkle" */, E1F5FA782DA6CBF900B1FD8A /* XCRemoteSwiftPackageReference "Zip" */, E1D7EF972E35E16C00640029 /* XCRemoteSwiftPackageReference "mediaremote-adapter" */, - E1304F722E3B9E8A0001F9E2 /* XCRemoteSwiftPackageReference "FluidAudio" */, + E10A90B12E3F0E0400E5D6AF /* XCLocalSwiftPackageReference "../FluidAudio" */, ); preferredProjectObjectVersion = 77; productRefGroup = E11473B12CBE0F0A00318EE4 /* Products */; @@ -621,15 +621,14 @@ }; /* End XCConfigurationList section */ -/* Begin XCRemoteSwiftPackageReference section */ - E1304F722E3B9E8A0001F9E2 /* XCRemoteSwiftPackageReference "FluidAudio" */ = { - isa = XCRemoteSwiftPackageReference; - repositoryURL = "https://github.com/FluidInference/FluidAudio"; - requirement = { - branch = main; - kind = branch; - }; +/* Begin XCLocalSwiftPackageReference section */ + E10A90B12E3F0E0400E5D6AF /* XCLocalSwiftPackageReference "../FluidAudio" */ = { + isa = XCLocalSwiftPackageReference; + relativePath = ../FluidAudio; }; +/* End XCLocalSwiftPackageReference section */ + +/* Begin XCRemoteSwiftPackageReference section */ E1A261102CC143AC00B233D1 /* XCRemoteSwiftPackageReference "KeyboardShortcuts" */ = { isa = XCRemoteSwiftPackageReference; repositoryURL = "https://github.com/sindresorhus/KeyboardShortcuts"; @@ -673,9 +672,8 @@ /* End XCRemoteSwiftPackageReference section */ /* Begin XCSwiftPackageProductDependency section */ - E1304F732E3B9E8A0001F9E2 /* FluidAudio */ = { + E12E7E962E3F109C006276F2 /* FluidAudio */ = { isa = XCSwiftPackageProductDependency; - package = E1304F722E3B9E8A0001F9E2 /* XCRemoteSwiftPackageReference "FluidAudio" */; productName = FluidAudio; }; E1A261112CC143AC00B233D1 /* KeyboardShortcuts */ = { diff --git a/VoiceInk.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved b/VoiceInk.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved index 733ba50..9435b1e 100644 --- a/VoiceInk.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved +++ b/VoiceInk.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved @@ -1,15 +1,6 @@ { - "originHash" : "62ab4f67009fbd19bb1832b7d8f33b824ec7e4c91e83e35d9508a7fc51707a31", + "originHash" : "ae3f634e8c4b39a1a80bcd04f018e2208c0491e42ee824cd94a92d7b88893420", "pins" : [ - { - "identity" : "fluidaudio", - "kind" : "remoteSourceControl", - "location" : "https://github.com/FluidInference/FluidAudio", - "state" : { - "branch" : "main", - "revision" : "826d9e415008d83f6f01d308456d4ed4a13722a7" - } - }, { "identity" : "keyboardshortcuts", "kind" : "remoteSourceControl", diff --git a/VoiceInk/Services/ParakeetTranscriptionService.swift b/VoiceInk/Services/ParakeetTranscriptionService.swift index 6428782..2aaf2d5 100644 --- a/VoiceInk/Services/ParakeetTranscriptionService.swift +++ b/VoiceInk/Services/ParakeetTranscriptionService.swift @@ -15,6 +15,7 @@ class ParakeetTranscriptionService: TranscriptionService { init(customModelsDirectory: URL? = nil) { self.customModelsDirectory = customModelsDirectory + logger.notice("🦜 ParakeetTranscriptionService initialized with directory: \(customModelsDirectory?.path ?? "default")") } func loadModel() async throws { @@ -38,17 +39,34 @@ class ParakeetTranscriptionService: TranscriptionService { let models: AsrModels if let customDirectory = customModelsDirectory { + logger.notice("🦜 Loading models from custom directory: \(customDirectory.path)") models = try await AsrModels.downloadAndLoad(to: customDirectory) } else { + logger.notice("🦜 Loading models from default directory") models = try await AsrModels.downloadAndLoad() } + // Check vocabulary file before initialization + let vocabPath = getVocabularyPath() + let vocabExists = FileManager.default.fileExists(atPath: vocabPath.path) + logger.notice("🦜 Vocabulary file exists at \(vocabPath.lastPathComponent): \(vocabExists)") + + if vocabExists { + do { + let vocabData = try Data(contentsOf: vocabPath) + let vocabDict = try JSONSerialization.jsonObject(with: vocabData) as? [String: String] ?? [:] + logger.notice("🦜 Vocabulary loaded with \(vocabDict.count) entries") + } catch { + logger.notice("🦜 Failed to parse vocabulary file: \(error.localizedDescription)") + } + } + try await asrManager?.initialize(models: models) isModelLoaded = true logger.notice("🦜 Parakeet model loaded successfully") } catch { - logger.error("🦜 Failed to load Parakeet model: \(error.localizedDescription)") + logger.notice("🦜 Failed to load Parakeet model: \(error.localizedDescription)") isModelLoaded = false asrManager = nil throw error @@ -57,32 +75,34 @@ class ParakeetTranscriptionService: TranscriptionService { func transcribe(audioURL: URL, model: any TranscriptionModel) async throws -> String { do { - defer { - asrManager?.cleanup() - self.asrManager = nil - self.isModelLoaded = false - } if !isModelLoaded { try await loadModel() } guard let asrManager = asrManager else { - logger.error("🦜 ASR manager is nil after model loading") + logger.notice("🦜 ASR manager is nil after model loading") throw NSError(domain: "ParakeetTranscriptionService", code: -1, userInfo: [NSLocalizedDescriptionKey: "Failed to initialize ASR manager."]) } logger.notice("🦜 Starting Parakeet transcription") let audioSamples = try readAudioSamples(from: audioURL) + logger.notice("🦜 Audio samples loaded: \(audioSamples.count) samples") + let result = try await asrManager.transcribe(audioSamples) logger.notice("🦜 Parakeet transcription completed") + // Check for empty results (vocabulary issue indicator) + if result.text.trimmingCharacters(in: .whitespacesAndNewlines).isEmpty { + logger.notice("🦜 Warning: Empty transcription result for \(audioSamples.count) samples - possible vocabulary issue") + } + if UserDefaults.standard.object(forKey: "IsTextFormattingEnabled") as? Bool ?? true { return WhisperTextFormatter.format(result.text) } return result.text } catch { - logger.error("🦜 Parakeet transcription failed: \(error.localizedDescription)") + logger.notice("🦜 Parakeet transcription failed: \(error.localizedDescription)") let errorMessage = error.localizedDescription await MainActor.run { NotificationManager.shared.showNotification( @@ -95,9 +115,15 @@ class ParakeetTranscriptionService: TranscriptionService { } private func readAudioSamples(from url: URL) throws -> [Float] { + logger.notice("🦜 Reading audio file: \(url.lastPathComponent)") let data = try Data(contentsOf: url) + logger.notice("🦜 Audio file size: \(data.count) bytes") + // A basic check, assuming a more robust check happens elsewhere. - guard data.count > 44 else { return [] } + guard data.count > 44 else { + logger.notice("🦜 Warning: Audio file too small (\(data.count) bytes), expected > 44 bytes") + return [] + } let floats = stride(from: 44, to: data.count, by: 2).map { return data[$0..<$0 + 2].withUnsafeBytes { @@ -105,6 +131,30 @@ class ParakeetTranscriptionService: TranscriptionService { return max(-1.0, min(Float(short) / 32767.0, 1.0)) } } + + logger.notice("🦜 Processed audio: \(floats.count) samples from \(data.count) bytes") + + // Check if we have enough samples for transcription (minimum 16,000 samples = 1 second at 16kHz) + if floats.count < 16000 { + logger.notice("🦜 Warning: Audio too short (\(floats.count) samples), minimum 16,000 required") + } + return floats } + + // Helper function to get vocabulary path based on model directory + private func getVocabularyPath() -> URL { + if let customDirectory = customModelsDirectory { + return customDirectory.appendingPathComponent("parakeet_vocab.json") + } else { + let applicationSupportURL = FileManager.default.urls( + for: .applicationSupportDirectory, in: .userDomainMask + ).first! + return applicationSupportURL + .appendingPathComponent("FluidAudio", isDirectory: true) + .appendingPathComponent("Models", isDirectory: true) + .appendingPathComponent("parakeet-tdt-0.6b-v2-coreml", isDirectory: true) + .appendingPathComponent("parakeet_vocab.json") + } + } } \ No newline at end of file