diff --git a/VoiceInk/Services/AudioTranscriptionService.swift b/VoiceInk/Services/AudioTranscriptionService.swift new file mode 100644 index 0000000..7a42c04 --- /dev/null +++ b/VoiceInk/Services/AudioTranscriptionService.swift @@ -0,0 +1,201 @@ +import Foundation +import SwiftUI +import AVFoundation +import SwiftData +import os + +@MainActor +class AudioTranscriptionService: ObservableObject { + @Published var isTranscribing = false + @Published var messageLog = "" + @Published var currentError: TranscriptionError? + + private var whisperContext: WhisperContext? + private let modelContext: ModelContext + private let enhancementService: AIEnhancementService? + private let whisperState: WhisperState + private let logger = Logger(subsystem: "com.prakashjoshipax.voiceink", category: "AudioTranscriptionService") + + enum TranscriptionError: Error { + case noAudioFile + case transcriptionFailed + case modelNotLoaded + case invalidAudioFormat + } + + init(modelContext: ModelContext, whisperState: WhisperState) { + self.modelContext = modelContext + self.whisperState = whisperState + self.enhancementService = whisperState.enhancementService + } + + func retranscribeAudio(from url: URL, using whisperModel: WhisperModel) async throws -> Transcription { + guard FileManager.default.fileExists(atPath: url.path) else { + throw TranscriptionError.noAudioFile + } + + await MainActor.run { + isTranscribing = true + messageLog = "Loading model...\n" + } + + // Load the whisper model if needed + if whisperContext == nil { + do { + whisperContext = try await WhisperContext.createContext(path: whisperModel.url.path) + messageLog += "Model loaded successfully.\n" + } catch { + logger.error("❌ Failed to load model: \(error.localizedDescription)") + messageLog += "Failed to load model: \(error.localizedDescription)\n" + isTranscribing = false + throw TranscriptionError.modelNotLoaded + } + } + + guard let whisperContext = whisperContext else { + isTranscribing = false + throw TranscriptionError.modelNotLoaded + } + + // Get audio duration + let audioAsset = AVURLAsset(url: url) + var duration: TimeInterval = 0 + + if #available(macOS 13.0, *) { + let durationValue = try await audioAsset.load(.duration) + duration = CMTimeGetSeconds(durationValue) + } else { + duration = CMTimeGetSeconds(audioAsset.duration) + } + + // Create a permanent copy of the audio file + let recordingsDirectory = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask)[0] + .appendingPathComponent("com.prakashjoshipax.VoiceInk") + .appendingPathComponent("Recordings") + + let fileName = "retranscribed_\(UUID().uuidString).wav" + let permanentURL = recordingsDirectory.appendingPathComponent(fileName) + + do { + try FileManager.default.copyItem(at: url, to: permanentURL) + } catch { + logger.error("❌ Failed to create permanent copy of audio: \(error.localizedDescription)") + messageLog += "Failed to create permanent copy of audio: \(error.localizedDescription)\n" + isTranscribing = false + throw error + } + + let permanentURLString = permanentURL.absoluteString + + // Transcribe the audio + messageLog += "Transcribing audio...\n" + + do { + // Read audio samples + let samples = try readAudioSamples(permanentURL) + + // Process with Whisper - using the same prompt as WhisperState + messageLog += "Setting prompt: \(whisperState.whisperPrompt.transcriptionPrompt)\n" + await whisperContext.setPrompt(whisperState.whisperPrompt.transcriptionPrompt) + + try await whisperContext.fullTranscribe(samples: samples) + var text = await whisperContext.getTranscription() + text = text.trimmingCharacters(in: .whitespacesAndNewlines) + logger.notice("✅ Retranscription completed successfully, length: \(text.count) characters") + + // Apply AI enhancement if enabled - using the same enhancement service as WhisperState + if let enhancementService = enhancementService, + enhancementService.isEnhancementEnabled, + enhancementService.isConfigured { + do { + messageLog += "Enhancing transcription with AI...\n" + let enhancedText = try await enhancementService.enhance(text) + messageLog += "Enhancement completed.\n" + + let newTranscription = Transcription( + text: text, + duration: duration, + enhancedText: enhancedText, + audioFileURL: permanentURLString + ) + modelContext.insert(newTranscription) + do { + try modelContext.save() + } catch { + logger.error("❌ Failed to save transcription: \(error.localizedDescription)") + messageLog += "Failed to save transcription: \(error.localizedDescription)\n" + } + + await MainActor.run { + isTranscribing = false + messageLog += "Done: \(enhancedText)\n" + } + + return newTranscription + } catch { + messageLog += "Enhancement failed: \(error.localizedDescription). Using original transcription.\n" + let newTranscription = Transcription( + text: text, + duration: duration, + audioFileURL: permanentURLString + ) + modelContext.insert(newTranscription) + do { + try modelContext.save() + } catch { + logger.error("❌ Failed to save transcription: \(error.localizedDescription)") + messageLog += "Failed to save transcription: \(error.localizedDescription)\n" + } + + await MainActor.run { + isTranscribing = false + messageLog += "Done: \(text)\n" + } + + return newTranscription + } + } else { + let newTranscription = Transcription( + text: text, + duration: duration, + audioFileURL: permanentURLString + ) + modelContext.insert(newTranscription) + do { + try modelContext.save() + } catch { + logger.error("❌ Failed to save transcription: \(error.localizedDescription)") + messageLog += "Failed to save transcription: \(error.localizedDescription)\n" + } + + await MainActor.run { + isTranscribing = false + messageLog += "Done: \(text)\n" + } + + return newTranscription + } + } catch { + logger.error("❌ Transcription failed: \(error.localizedDescription)") + messageLog += "Transcription failed: \(error.localizedDescription)\n" + currentError = .transcriptionFailed + isTranscribing = false + throw error + } + } + + private func readAudioSamples(_ url: URL) throws -> [Float] { + return try decodeWaveFile(url) + } + + private func decodeWaveFile(_ url: URL) throws -> [Float] { + let data = try Data(contentsOf: url) + let floats = stride(from: 44, to: data.count, by: 2).map { + return data[$0..<$0 + 2].withUnsafeBytes { + let short = Int16(littleEndian: $0.load(as: Int16.self)) + return max(-1.0, min(Float(short) / 32767.0, 1.0)) + } + } + return floats + } +} diff --git a/VoiceInk/Views/AudioPlayerView.swift b/VoiceInk/Views/AudioPlayerView.swift index 4989825..c793c7a 100644 --- a/VoiceInk/Views/AudioPlayerView.swift +++ b/VoiceInk/Views/AudioPlayerView.swift @@ -237,6 +237,22 @@ struct AudioPlayerView: View { @StateObject private var playerManager = AudioPlayerManager() @State private var isHovering = false @State private var showingTooltip = false + @State private var isRetranscribing = false + @State private var showRetranscribeSuccess = false + @State private var showRetranscribeError = false + @State private var errorMessage = "" + + // Add environment objects for retranscription + @EnvironmentObject private var whisperState: WhisperState + @Environment(\.modelContext) private var modelContext + + // Create the audio transcription service lazily + private var transcriptionService: AudioTranscriptionService { + AudioTranscriptionService( + modelContext: modelContext, + whisperState: whisperState + ) + } var body: some View { VStack(spacing: 16) { @@ -298,6 +314,34 @@ struct AudioPlayerView: View { } } + // Add Retranscribe button + Button(action: { + retranscribeAudio() + }) { + Circle() + .fill(Color.green.opacity(0.1)) + .frame(width: 44, height: 44) + .overlay( + Group { + if isRetranscribing { + ProgressView() + .controlSize(.small) + } else if showRetranscribeSuccess { + Image(systemName: "checkmark") + .font(.system(size: 18, weight: .semibold)) + .foregroundStyle(Color.green) + } else { + Image(systemName: "arrow.clockwise") + .font(.system(size: 18, weight: .semibold)) + .foregroundStyle(Color.green) + } + } + ) + } + .buttonStyle(.plain) + .disabled(isRetranscribing) + .help("Retranscribe this audio") + // Time Text(formatTime(playerManager.currentTime)) .font(.system(size: 14, weight: .medium)) @@ -311,6 +355,55 @@ struct AudioPlayerView: View { .onAppear { playerManager.loadAudio(from: url) } + .overlay( + // Success notification + VStack { + if showRetranscribeSuccess { + HStack(spacing: 8) { + Image(systemName: "checkmark.circle.fill") + .foregroundColor(.green) + Text("Retranscription successful") + .font(.system(size: 14, weight: .medium)) + } + .padding(.horizontal, 16) + .padding(.vertical, 10) + .background( + RoundedRectangle(cornerRadius: 8) + .fill(Color.green.opacity(0.1)) + .overlay( + RoundedRectangle(cornerRadius: 8) + .stroke(Color.green.opacity(0.2), lineWidth: 1) + ) + ) + .transition(.move(edge: .top).combined(with: .opacity)) + } + + if showRetranscribeError { + HStack(spacing: 8) { + Image(systemName: "exclamationmark.circle.fill") + .foregroundColor(.red) + Text(errorMessage.isEmpty ? "Retranscription failed" : errorMessage) + .font(.system(size: 14, weight: .medium)) + } + .padding(.horizontal, 16) + .padding(.vertical, 10) + .background( + RoundedRectangle(cornerRadius: 8) + .fill(Color.red.opacity(0.1)) + .overlay( + RoundedRectangle(cornerRadius: 8) + .stroke(Color.red.opacity(0.2), lineWidth: 1) + ) + ) + .transition(.move(edge: .top).combined(with: .opacity)) + } + + Spacer() + } + .padding(.top, 16) + .animation(.spring(response: 0.3, dampingFraction: 0.7), value: showRetranscribeSuccess) + .animation(.spring(response: 0.3, dampingFraction: 0.7), value: showRetranscribeError) + ) } private func formatTime(_ time: TimeInterval) -> String { @@ -318,4 +411,57 @@ struct AudioPlayerView: View { let seconds = Int(time) % 60 return String(format: "%d:%02d", minutes, seconds) } + + private func retranscribeAudio() { + guard let currentModel = whisperState.currentModel else { + errorMessage = "No transcription model selected" + showRetranscribeError = true + + // Hide error after 3 seconds + DispatchQueue.main.asyncAfter(deadline: .now() + 3) { + withAnimation { + showRetranscribeError = false + } + } + return + } + + isRetranscribing = true + + Task { + do { + // Use the AudioTranscriptionService to retranscribe the audio + let _ = try await transcriptionService.retranscribeAudio( + from: url, + using: currentModel + ) + + // Show success notification + await MainActor.run { + isRetranscribing = false + showRetranscribeSuccess = true + + // Hide success after 3 seconds + DispatchQueue.main.asyncAfter(deadline: .now() + 3) { + withAnimation { + showRetranscribeSuccess = false + } + } + } + } catch { + await MainActor.run { + isRetranscribing = false + errorMessage = error.localizedDescription + showRetranscribeError = true + + // Hide error after 3 seconds + DispatchQueue.main.asyncAfter(deadline: .now() + 3) { + withAnimation { + showRetranscribeError = false + } + } + } + } + } + } } \ No newline at end of file diff --git a/VoiceInk/Views/TranscriptionHistoryView.swift b/VoiceInk/Views/TranscriptionHistoryView.swift index 07b69c0..45f1349 100644 --- a/VoiceInk/Views/TranscriptionHistoryView.swift +++ b/VoiceInk/Views/TranscriptionHistoryView.swift @@ -17,8 +17,8 @@ struct TranscriptionHistoryView: View { @State private var lastTimestamp: Date? private let pageSize = 20 - // Query for latest transcriptions (used only for monitoring new additions) - @Query(sort: \Transcription.timestamp, order: .reverse) + // Query for latest transcriptions (used for real-time updates) + @Query(sort: \Transcription.timestamp, order: .reverse, animation: .default) private var latestTranscriptions: [Transcription] // Cursor-based query descriptor @@ -130,9 +130,27 @@ struct TranscriptionHistoryView: View { await loadInitialContent() } } - // Monitor for new transcriptions - .onChange(of: latestTranscriptions) { _, newTranscriptions in - handleNewTranscriptions(newTranscriptions) + // Improved change detection for new transcriptions + .onChange(of: latestTranscriptions) { oldValue, newValue in + // Check if a new transcription was added + if !newValue.isEmpty && (oldValue.isEmpty || newValue[0].id != oldValue[0].id) { + // Only refresh if we're on the first page (no pagination cursor set) + if lastTimestamp == nil { + Task { + await loadInitialContent() + } + } else { + // If we're on a paginated view, show a notification or indicator that new content is available + // This could be a banner or button to "Show new transcriptions" + withAnimation { + // Reset pagination to show the latest content + Task { + await resetPagination() + await loadInitialContent() + } + } + } + } } } @@ -305,17 +323,6 @@ struct TranscriptionHistoryView: View { } } - // Simplified function to handle new transcriptions - private func handleNewTranscriptions(_ newTranscriptions: [Transcription]) { - // Only update if we're on the first page and not searching - // Only check the first item since we only care about the newest one - if lastTimestamp == nil && searchText.isEmpty && !newTranscriptions.isEmpty { - Task { - await loadInitialContent() - } - } - } - // Modified function to select all transcriptions in the database private func selectAllTranscriptions() async { do { diff --git a/VoiceInk/Whisper/WhisperState.swift b/VoiceInk/Whisper/WhisperState.swift index 7820a9b..0ec6c85 100644 --- a/VoiceInk/Whisper/WhisperState.swift +++ b/VoiceInk/Whisper/WhisperState.swift @@ -60,7 +60,7 @@ class WhisperState: NSObject, ObservableObject, AVAudioRecorderDelegate { let modelsDirectory: URL let recordingsDirectory: URL - private let enhancementService: AIEnhancementService? + let enhancementService: AIEnhancementService? private let licenseViewModel: LicenseViewModel private let logger = Logger(subsystem: "com.prakashjoshipax.voiceink", category: "WhisperState") private var transcriptionStartTime: Date?