From abe00c718a0720a2b919d2f3f0a16159a31c0122 Mon Sep 17 00:00:00 2001 From: Beingpax Date: Thu, 24 Apr 2025 10:06:02 +0545 Subject: [PATCH] Fixed audio duration inconsistenncies --- .../Services/AudioTranscriptionManager.swift | 9 +-------- .../Services/AudioTranscriptionService.swift | 9 +-------- VoiceInk/Whisper/WhisperState.swift | 19 +++++++++++-------- 3 files changed, 13 insertions(+), 24 deletions(-) diff --git a/VoiceInk/Services/AudioTranscriptionManager.swift b/VoiceInk/Services/AudioTranscriptionManager.swift index c9a308a..6c6aafb 100644 --- a/VoiceInk/Services/AudioTranscriptionManager.swift +++ b/VoiceInk/Services/AudioTranscriptionManager.swift @@ -71,14 +71,7 @@ class AudioTranscriptionManager: ObservableObject { // Get audio duration let audioAsset = AVURLAsset(url: url) - var duration: TimeInterval = 0 - - if #available(macOS 13.0, *) { - let durationValue = try await audioAsset.load(.duration) - duration = CMTimeGetSeconds(durationValue) - } else { - duration = CMTimeGetSeconds(audioAsset.duration) - } + let duration = CMTimeGetSeconds(audioAsset.duration) // Create permanent copy of the audio file let recordingsDirectory = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask)[0] diff --git a/VoiceInk/Services/AudioTranscriptionService.swift b/VoiceInk/Services/AudioTranscriptionService.swift index b92d9dc..93e0069 100644 --- a/VoiceInk/Services/AudioTranscriptionService.swift +++ b/VoiceInk/Services/AudioTranscriptionService.swift @@ -59,14 +59,7 @@ class AudioTranscriptionService: ObservableObject { // Get audio duration let audioAsset = AVURLAsset(url: url) - var duration: TimeInterval = 0 - - if #available(macOS 13.0, *) { - let durationValue = try await audioAsset.load(.duration) - duration = CMTimeGetSeconds(durationValue) - } else { - duration = CMTimeGetSeconds(audioAsset.duration) - } + let duration = CMTimeGetSeconds(audioAsset.duration) // Create a permanent copy of the audio file let recordingsDirectory = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask)[0] diff --git a/VoiceInk/Whisper/WhisperState.swift b/VoiceInk/Whisper/WhisperState.swift index fa1230d..9a50e20 100644 --- a/VoiceInk/Whisper/WhisperState.swift +++ b/VoiceInk/Whisper/WhisperState.swift @@ -74,7 +74,6 @@ class WhisperState: NSObject, ObservableObject, AVAudioRecorderDelegate { let enhancementService: AIEnhancementService? var licenseViewModel: LicenseViewModel let logger = Logger(subsystem: "com.prakashjoshipax.voiceink", category: "WhisperState") - private var transcriptionStartTime: Date? var notchWindowManager: NotchWindowManager? var miniWindowManager: MiniWindowManager? @@ -122,9 +121,8 @@ class WhisperState: NSObject, ObservableObject, AVAudioRecorderDelegate { } await recorder.stopRecording() if let recordedFile { - let duration = Date().timeIntervalSince(transcriptionStartTime ?? Date()) if !shouldCancelRecording { - await transcribeAudio(recordedFile, duration: duration) + await transcribeAudio(recordedFile) } } else { logger.error("❌ No recorded file found after stopping recording") @@ -153,7 +151,6 @@ class WhisperState: NSObject, ObservableObject, AVAudioRecorderDelegate { create: true) .appending(path: "output.wav") self.recordedFile = file - self.transcriptionStartTime = Date() await MainActor.run { self.isRecording = true self.isVisualizerActive = true @@ -227,7 +224,7 @@ class WhisperState: NSObject, ObservableObject, AVAudioRecorderDelegate { } } - private func transcribeAudio(_ url: URL, duration: TimeInterval) async { + private func transcribeAudio(_ url: URL) async { if shouldCancelRecording { return } await MainActor.run { isProcessing = true @@ -268,6 +265,12 @@ class WhisperState: NSObject, ObservableObject, AVAudioRecorderDelegate { if shouldCancelRecording { return } let data = try readAudioSamples(url) if shouldCancelRecording { return } + + // Get the actual audio duration from the file + let audioAsset = AVURLAsset(url: url) + let actualDuration = CMTimeGetSeconds(audioAsset.duration) + logger.notice("📊 Audio file duration: \(actualDuration) seconds") + await whisperContext.setPrompt(whisperPrompt.transcriptionPrompt) if shouldCancelRecording { return } await whisperContext.fullTranscribe(samples: data) @@ -287,7 +290,7 @@ class WhisperState: NSObject, ObservableObject, AVAudioRecorderDelegate { let enhancedText = try await enhancementService.enhance(text) let newTranscription = Transcription( text: text, - duration: duration, + duration: actualDuration, enhancedText: enhancedText, audioFileURL: permanentURLString ) @@ -297,7 +300,7 @@ class WhisperState: NSObject, ObservableObject, AVAudioRecorderDelegate { } catch { let newTranscription = Transcription( text: text, - duration: duration, + duration: actualDuration, audioFileURL: permanentURLString ) modelContext.insert(newTranscription) @@ -306,7 +309,7 @@ class WhisperState: NSObject, ObservableObject, AVAudioRecorderDelegate { } else { let newTranscription = Transcription( text: text, - duration: duration, + duration: actualDuration, audioFileURL: permanentURLString ) modelContext.insert(newTranscription)