Retranscription Ability
This commit is contained in:
parent
6aafc992e2
commit
8b1e27e1cd
201
VoiceInk/Services/AudioTranscriptionService.swift
Normal file
201
VoiceInk/Services/AudioTranscriptionService.swift
Normal file
@ -0,0 +1,201 @@
|
||||
import Foundation
|
||||
import SwiftUI
|
||||
import AVFoundation
|
||||
import SwiftData
|
||||
import os
|
||||
|
||||
@MainActor
|
||||
class AudioTranscriptionService: ObservableObject {
|
||||
@Published var isTranscribing = false
|
||||
@Published var messageLog = ""
|
||||
@Published var currentError: TranscriptionError?
|
||||
|
||||
private var whisperContext: WhisperContext?
|
||||
private let modelContext: ModelContext
|
||||
private let enhancementService: AIEnhancementService?
|
||||
private let whisperState: WhisperState
|
||||
private let logger = Logger(subsystem: "com.prakashjoshipax.voiceink", category: "AudioTranscriptionService")
|
||||
|
||||
enum TranscriptionError: Error {
|
||||
case noAudioFile
|
||||
case transcriptionFailed
|
||||
case modelNotLoaded
|
||||
case invalidAudioFormat
|
||||
}
|
||||
|
||||
init(modelContext: ModelContext, whisperState: WhisperState) {
|
||||
self.modelContext = modelContext
|
||||
self.whisperState = whisperState
|
||||
self.enhancementService = whisperState.enhancementService
|
||||
}
|
||||
|
||||
func retranscribeAudio(from url: URL, using whisperModel: WhisperModel) async throws -> Transcription {
|
||||
guard FileManager.default.fileExists(atPath: url.path) else {
|
||||
throw TranscriptionError.noAudioFile
|
||||
}
|
||||
|
||||
await MainActor.run {
|
||||
isTranscribing = true
|
||||
messageLog = "Loading model...\n"
|
||||
}
|
||||
|
||||
// Load the whisper model if needed
|
||||
if whisperContext == nil {
|
||||
do {
|
||||
whisperContext = try await WhisperContext.createContext(path: whisperModel.url.path)
|
||||
messageLog += "Model loaded successfully.\n"
|
||||
} catch {
|
||||
logger.error("❌ Failed to load model: \(error.localizedDescription)")
|
||||
messageLog += "Failed to load model: \(error.localizedDescription)\n"
|
||||
isTranscribing = false
|
||||
throw TranscriptionError.modelNotLoaded
|
||||
}
|
||||
}
|
||||
|
||||
guard let whisperContext = whisperContext else {
|
||||
isTranscribing = false
|
||||
throw TranscriptionError.modelNotLoaded
|
||||
}
|
||||
|
||||
// Get audio duration
|
||||
let audioAsset = AVURLAsset(url: url)
|
||||
var duration: TimeInterval = 0
|
||||
|
||||
if #available(macOS 13.0, *) {
|
||||
let durationValue = try await audioAsset.load(.duration)
|
||||
duration = CMTimeGetSeconds(durationValue)
|
||||
} else {
|
||||
duration = CMTimeGetSeconds(audioAsset.duration)
|
||||
}
|
||||
|
||||
// Create a permanent copy of the audio file
|
||||
let recordingsDirectory = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask)[0]
|
||||
.appendingPathComponent("com.prakashjoshipax.VoiceInk")
|
||||
.appendingPathComponent("Recordings")
|
||||
|
||||
let fileName = "retranscribed_\(UUID().uuidString).wav"
|
||||
let permanentURL = recordingsDirectory.appendingPathComponent(fileName)
|
||||
|
||||
do {
|
||||
try FileManager.default.copyItem(at: url, to: permanentURL)
|
||||
} catch {
|
||||
logger.error("❌ Failed to create permanent copy of audio: \(error.localizedDescription)")
|
||||
messageLog += "Failed to create permanent copy of audio: \(error.localizedDescription)\n"
|
||||
isTranscribing = false
|
||||
throw error
|
||||
}
|
||||
|
||||
let permanentURLString = permanentURL.absoluteString
|
||||
|
||||
// Transcribe the audio
|
||||
messageLog += "Transcribing audio...\n"
|
||||
|
||||
do {
|
||||
// Read audio samples
|
||||
let samples = try readAudioSamples(permanentURL)
|
||||
|
||||
// Process with Whisper - using the same prompt as WhisperState
|
||||
messageLog += "Setting prompt: \(whisperState.whisperPrompt.transcriptionPrompt)\n"
|
||||
await whisperContext.setPrompt(whisperState.whisperPrompt.transcriptionPrompt)
|
||||
|
||||
try await whisperContext.fullTranscribe(samples: samples)
|
||||
var text = await whisperContext.getTranscription()
|
||||
text = text.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
logger.notice("✅ Retranscription completed successfully, length: \(text.count) characters")
|
||||
|
||||
// Apply AI enhancement if enabled - using the same enhancement service as WhisperState
|
||||
if let enhancementService = enhancementService,
|
||||
enhancementService.isEnhancementEnabled,
|
||||
enhancementService.isConfigured {
|
||||
do {
|
||||
messageLog += "Enhancing transcription with AI...\n"
|
||||
let enhancedText = try await enhancementService.enhance(text)
|
||||
messageLog += "Enhancement completed.\n"
|
||||
|
||||
let newTranscription = Transcription(
|
||||
text: text,
|
||||
duration: duration,
|
||||
enhancedText: enhancedText,
|
||||
audioFileURL: permanentURLString
|
||||
)
|
||||
modelContext.insert(newTranscription)
|
||||
do {
|
||||
try modelContext.save()
|
||||
} catch {
|
||||
logger.error("❌ Failed to save transcription: \(error.localizedDescription)")
|
||||
messageLog += "Failed to save transcription: \(error.localizedDescription)\n"
|
||||
}
|
||||
|
||||
await MainActor.run {
|
||||
isTranscribing = false
|
||||
messageLog += "Done: \(enhancedText)\n"
|
||||
}
|
||||
|
||||
return newTranscription
|
||||
} catch {
|
||||
messageLog += "Enhancement failed: \(error.localizedDescription). Using original transcription.\n"
|
||||
let newTranscription = Transcription(
|
||||
text: text,
|
||||
duration: duration,
|
||||
audioFileURL: permanentURLString
|
||||
)
|
||||
modelContext.insert(newTranscription)
|
||||
do {
|
||||
try modelContext.save()
|
||||
} catch {
|
||||
logger.error("❌ Failed to save transcription: \(error.localizedDescription)")
|
||||
messageLog += "Failed to save transcription: \(error.localizedDescription)\n"
|
||||
}
|
||||
|
||||
await MainActor.run {
|
||||
isTranscribing = false
|
||||
messageLog += "Done: \(text)\n"
|
||||
}
|
||||
|
||||
return newTranscription
|
||||
}
|
||||
} else {
|
||||
let newTranscription = Transcription(
|
||||
text: text,
|
||||
duration: duration,
|
||||
audioFileURL: permanentURLString
|
||||
)
|
||||
modelContext.insert(newTranscription)
|
||||
do {
|
||||
try modelContext.save()
|
||||
} catch {
|
||||
logger.error("❌ Failed to save transcription: \(error.localizedDescription)")
|
||||
messageLog += "Failed to save transcription: \(error.localizedDescription)\n"
|
||||
}
|
||||
|
||||
await MainActor.run {
|
||||
isTranscribing = false
|
||||
messageLog += "Done: \(text)\n"
|
||||
}
|
||||
|
||||
return newTranscription
|
||||
}
|
||||
} catch {
|
||||
logger.error("❌ Transcription failed: \(error.localizedDescription)")
|
||||
messageLog += "Transcription failed: \(error.localizedDescription)\n"
|
||||
currentError = .transcriptionFailed
|
||||
isTranscribing = false
|
||||
throw error
|
||||
}
|
||||
}
|
||||
|
||||
private func readAudioSamples(_ url: URL) throws -> [Float] {
|
||||
return try decodeWaveFile(url)
|
||||
}
|
||||
|
||||
private func decodeWaveFile(_ url: URL) throws -> [Float] {
|
||||
let data = try Data(contentsOf: url)
|
||||
let floats = stride(from: 44, to: data.count, by: 2).map {
|
||||
return data[$0..<$0 + 2].withUnsafeBytes {
|
||||
let short = Int16(littleEndian: $0.load(as: Int16.self))
|
||||
return max(-1.0, min(Float(short) / 32767.0, 1.0))
|
||||
}
|
||||
}
|
||||
return floats
|
||||
}
|
||||
}
|
||||
@ -237,6 +237,22 @@ struct AudioPlayerView: View {
|
||||
@StateObject private var playerManager = AudioPlayerManager()
|
||||
@State private var isHovering = false
|
||||
@State private var showingTooltip = false
|
||||
@State private var isRetranscribing = false
|
||||
@State private var showRetranscribeSuccess = false
|
||||
@State private var showRetranscribeError = false
|
||||
@State private var errorMessage = ""
|
||||
|
||||
// Add environment objects for retranscription
|
||||
@EnvironmentObject private var whisperState: WhisperState
|
||||
@Environment(\.modelContext) private var modelContext
|
||||
|
||||
// Create the audio transcription service lazily
|
||||
private var transcriptionService: AudioTranscriptionService {
|
||||
AudioTranscriptionService(
|
||||
modelContext: modelContext,
|
||||
whisperState: whisperState
|
||||
)
|
||||
}
|
||||
|
||||
var body: some View {
|
||||
VStack(spacing: 16) {
|
||||
@ -298,6 +314,34 @@ struct AudioPlayerView: View {
|
||||
}
|
||||
}
|
||||
|
||||
// Add Retranscribe button
|
||||
Button(action: {
|
||||
retranscribeAudio()
|
||||
}) {
|
||||
Circle()
|
||||
.fill(Color.green.opacity(0.1))
|
||||
.frame(width: 44, height: 44)
|
||||
.overlay(
|
||||
Group {
|
||||
if isRetranscribing {
|
||||
ProgressView()
|
||||
.controlSize(.small)
|
||||
} else if showRetranscribeSuccess {
|
||||
Image(systemName: "checkmark")
|
||||
.font(.system(size: 18, weight: .semibold))
|
||||
.foregroundStyle(Color.green)
|
||||
} else {
|
||||
Image(systemName: "arrow.clockwise")
|
||||
.font(.system(size: 18, weight: .semibold))
|
||||
.foregroundStyle(Color.green)
|
||||
}
|
||||
}
|
||||
)
|
||||
}
|
||||
.buttonStyle(.plain)
|
||||
.disabled(isRetranscribing)
|
||||
.help("Retranscribe this audio")
|
||||
|
||||
// Time
|
||||
Text(formatTime(playerManager.currentTime))
|
||||
.font(.system(size: 14, weight: .medium))
|
||||
@ -311,6 +355,55 @@ struct AudioPlayerView: View {
|
||||
.onAppear {
|
||||
playerManager.loadAudio(from: url)
|
||||
}
|
||||
.overlay(
|
||||
// Success notification
|
||||
VStack {
|
||||
if showRetranscribeSuccess {
|
||||
HStack(spacing: 8) {
|
||||
Image(systemName: "checkmark.circle.fill")
|
||||
.foregroundColor(.green)
|
||||
Text("Retranscription successful")
|
||||
.font(.system(size: 14, weight: .medium))
|
||||
}
|
||||
.padding(.horizontal, 16)
|
||||
.padding(.vertical, 10)
|
||||
.background(
|
||||
RoundedRectangle(cornerRadius: 8)
|
||||
.fill(Color.green.opacity(0.1))
|
||||
.overlay(
|
||||
RoundedRectangle(cornerRadius: 8)
|
||||
.stroke(Color.green.opacity(0.2), lineWidth: 1)
|
||||
)
|
||||
)
|
||||
.transition(.move(edge: .top).combined(with: .opacity))
|
||||
}
|
||||
|
||||
if showRetranscribeError {
|
||||
HStack(spacing: 8) {
|
||||
Image(systemName: "exclamationmark.circle.fill")
|
||||
.foregroundColor(.red)
|
||||
Text(errorMessage.isEmpty ? "Retranscription failed" : errorMessage)
|
||||
.font(.system(size: 14, weight: .medium))
|
||||
}
|
||||
.padding(.horizontal, 16)
|
||||
.padding(.vertical, 10)
|
||||
.background(
|
||||
RoundedRectangle(cornerRadius: 8)
|
||||
.fill(Color.red.opacity(0.1))
|
||||
.overlay(
|
||||
RoundedRectangle(cornerRadius: 8)
|
||||
.stroke(Color.red.opacity(0.2), lineWidth: 1)
|
||||
)
|
||||
)
|
||||
.transition(.move(edge: .top).combined(with: .opacity))
|
||||
}
|
||||
|
||||
Spacer()
|
||||
}
|
||||
.padding(.top, 16)
|
||||
.animation(.spring(response: 0.3, dampingFraction: 0.7), value: showRetranscribeSuccess)
|
||||
.animation(.spring(response: 0.3, dampingFraction: 0.7), value: showRetranscribeError)
|
||||
)
|
||||
}
|
||||
|
||||
private func formatTime(_ time: TimeInterval) -> String {
|
||||
@ -318,4 +411,57 @@ struct AudioPlayerView: View {
|
||||
let seconds = Int(time) % 60
|
||||
return String(format: "%d:%02d", minutes, seconds)
|
||||
}
|
||||
|
||||
private func retranscribeAudio() {
|
||||
guard let currentModel = whisperState.currentModel else {
|
||||
errorMessage = "No transcription model selected"
|
||||
showRetranscribeError = true
|
||||
|
||||
// Hide error after 3 seconds
|
||||
DispatchQueue.main.asyncAfter(deadline: .now() + 3) {
|
||||
withAnimation {
|
||||
showRetranscribeError = false
|
||||
}
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
isRetranscribing = true
|
||||
|
||||
Task {
|
||||
do {
|
||||
// Use the AudioTranscriptionService to retranscribe the audio
|
||||
let _ = try await transcriptionService.retranscribeAudio(
|
||||
from: url,
|
||||
using: currentModel
|
||||
)
|
||||
|
||||
// Show success notification
|
||||
await MainActor.run {
|
||||
isRetranscribing = false
|
||||
showRetranscribeSuccess = true
|
||||
|
||||
// Hide success after 3 seconds
|
||||
DispatchQueue.main.asyncAfter(deadline: .now() + 3) {
|
||||
withAnimation {
|
||||
showRetranscribeSuccess = false
|
||||
}
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
await MainActor.run {
|
||||
isRetranscribing = false
|
||||
errorMessage = error.localizedDescription
|
||||
showRetranscribeError = true
|
||||
|
||||
// Hide error after 3 seconds
|
||||
DispatchQueue.main.asyncAfter(deadline: .now() + 3) {
|
||||
withAnimation {
|
||||
showRetranscribeError = false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -17,8 +17,8 @@ struct TranscriptionHistoryView: View {
|
||||
@State private var lastTimestamp: Date?
|
||||
private let pageSize = 20
|
||||
|
||||
// Query for latest transcriptions (used only for monitoring new additions)
|
||||
@Query(sort: \Transcription.timestamp, order: .reverse)
|
||||
// Query for latest transcriptions (used for real-time updates)
|
||||
@Query(sort: \Transcription.timestamp, order: .reverse, animation: .default)
|
||||
private var latestTranscriptions: [Transcription]
|
||||
|
||||
// Cursor-based query descriptor
|
||||
@ -130,9 +130,27 @@ struct TranscriptionHistoryView: View {
|
||||
await loadInitialContent()
|
||||
}
|
||||
}
|
||||
// Monitor for new transcriptions
|
||||
.onChange(of: latestTranscriptions) { _, newTranscriptions in
|
||||
handleNewTranscriptions(newTranscriptions)
|
||||
// Improved change detection for new transcriptions
|
||||
.onChange(of: latestTranscriptions) { oldValue, newValue in
|
||||
// Check if a new transcription was added
|
||||
if !newValue.isEmpty && (oldValue.isEmpty || newValue[0].id != oldValue[0].id) {
|
||||
// Only refresh if we're on the first page (no pagination cursor set)
|
||||
if lastTimestamp == nil {
|
||||
Task {
|
||||
await loadInitialContent()
|
||||
}
|
||||
} else {
|
||||
// If we're on a paginated view, show a notification or indicator that new content is available
|
||||
// This could be a banner or button to "Show new transcriptions"
|
||||
withAnimation {
|
||||
// Reset pagination to show the latest content
|
||||
Task {
|
||||
await resetPagination()
|
||||
await loadInitialContent()
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@ -305,17 +323,6 @@ struct TranscriptionHistoryView: View {
|
||||
}
|
||||
}
|
||||
|
||||
// Simplified function to handle new transcriptions
|
||||
private func handleNewTranscriptions(_ newTranscriptions: [Transcription]) {
|
||||
// Only update if we're on the first page and not searching
|
||||
// Only check the first item since we only care about the newest one
|
||||
if lastTimestamp == nil && searchText.isEmpty && !newTranscriptions.isEmpty {
|
||||
Task {
|
||||
await loadInitialContent()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Modified function to select all transcriptions in the database
|
||||
private func selectAllTranscriptions() async {
|
||||
do {
|
||||
|
||||
@ -60,7 +60,7 @@ class WhisperState: NSObject, ObservableObject, AVAudioRecorderDelegate {
|
||||
|
||||
let modelsDirectory: URL
|
||||
let recordingsDirectory: URL
|
||||
private let enhancementService: AIEnhancementService?
|
||||
let enhancementService: AIEnhancementService?
|
||||
private let licenseViewModel: LicenseViewModel
|
||||
private let logger = Logger(subsystem: "com.prakashjoshipax.voiceink", category: "WhisperState")
|
||||
private var transcriptionStartTime: Date?
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user