vOOice/VoiceInk/Services/AudioFileTranscriptionService.swift
2025-11-07 10:47:25 +05:45

209 lines
9.4 KiB
Swift

import Foundation
import SwiftUI
import AVFoundation
import SwiftData
import os
@MainActor
class AudioTranscriptionService: ObservableObject {
@Published var isTranscribing = false
@Published var currentError: TranscriptionError?
private let modelContext: ModelContext
private let enhancementService: AIEnhancementService?
private let whisperState: WhisperState
private let promptDetectionService = PromptDetectionService()
private let logger = Logger(subsystem: "com.prakashjoshipax.voiceink", category: "AudioTranscriptionService")
// Transcription services
private let localTranscriptionService: LocalTranscriptionService
private lazy var cloudTranscriptionService = CloudTranscriptionService()
private lazy var nativeAppleTranscriptionService = NativeAppleTranscriptionService()
private lazy var parakeetTranscriptionService = ParakeetTranscriptionService()
enum TranscriptionError: Error {
case noAudioFile
case transcriptionFailed
case modelNotLoaded
case invalidAudioFormat
}
init(modelContext: ModelContext, whisperState: WhisperState) {
self.modelContext = modelContext
self.whisperState = whisperState
self.enhancementService = whisperState.enhancementService
self.localTranscriptionService = LocalTranscriptionService(modelsDirectory: whisperState.modelsDirectory, whisperState: whisperState)
}
func retranscribeAudio(from url: URL, using model: any TranscriptionModel) async throws -> Transcription {
guard FileManager.default.fileExists(atPath: url.path) else {
throw TranscriptionError.noAudioFile
}
await MainActor.run {
isTranscribing = true
}
do {
// Delegate transcription to appropriate service
let transcriptionStart = Date()
var text: String
switch model.provider {
case .local:
text = try await localTranscriptionService.transcribe(audioURL: url, model: model)
case .parakeet:
text = try await parakeetTranscriptionService.transcribe(audioURL: url, model: model)
case .nativeApple:
text = try await nativeAppleTranscriptionService.transcribe(audioURL: url, model: model)
default: // Cloud models
text = try await cloudTranscriptionService.transcribe(audioURL: url, model: model)
}
let transcriptionDuration = Date().timeIntervalSince(transcriptionStart)
text = TranscriptionOutputFilter.filter(text)
text = text.trimmingCharacters(in: .whitespacesAndNewlines)
let powerModeManager = PowerModeManager.shared
let activePowerModeConfig = powerModeManager.currentActiveConfiguration
let powerModeName = (activePowerModeConfig?.isEnabled == true) ? activePowerModeConfig?.name : nil
let powerModeEmoji = (activePowerModeConfig?.isEnabled == true) ? activePowerModeConfig?.emoji : nil
if UserDefaults.standard.object(forKey: "IsTextFormattingEnabled") as? Bool ?? true {
text = WhisperTextFormatter.format(text)
}
text = WordReplacementService.shared.applyReplacements(to: text)
logger.notice("✅ Word replacements applied")
// Get audio duration
let audioAsset = AVURLAsset(url: url)
let duration = CMTimeGetSeconds(try await audioAsset.load(.duration))
// Create a permanent copy of the audio file
let recordingsDirectory = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask)[0]
.appendingPathComponent("com.prakashjoshipax.VoiceInk")
.appendingPathComponent("Recordings")
let fileName = "retranscribed_\(UUID().uuidString).wav"
let permanentURL = recordingsDirectory.appendingPathComponent(fileName)
do {
try FileManager.default.copyItem(at: url, to: permanentURL)
} catch {
logger.error("❌ Failed to create permanent copy of audio: \(error.localizedDescription)")
isTranscribing = false
throw error
}
let permanentURLString = permanentURL.absoluteString
// Apply prompt detection for trigger words
let originalText = text
var promptDetectionResult: PromptDetectionService.PromptDetectionResult? = nil
if let enhancementService = enhancementService, enhancementService.isConfigured {
let detectionResult = await promptDetectionService.analyzeText(text, with: enhancementService)
promptDetectionResult = detectionResult
await promptDetectionService.applyDetectionResult(detectionResult, to: enhancementService)
}
// Apply AI enhancement if enabled
if let enhancementService = enhancementService,
enhancementService.isEnhancementEnabled,
enhancementService.isConfigured {
do {
let textForAI = promptDetectionResult?.processedText ?? text
let (enhancedText, enhancementDuration, promptName) = try await enhancementService.enhance(textForAI)
let newTranscription = Transcription(
text: originalText,
duration: duration,
enhancedText: enhancedText,
audioFileURL: permanentURLString,
transcriptionModelName: model.displayName,
aiEnhancementModelName: enhancementService.getAIService()?.currentModel,
promptName: promptName,
transcriptionDuration: transcriptionDuration,
enhancementDuration: enhancementDuration,
aiRequestSystemMessage: enhancementService.lastSystemMessageSent,
aiRequestUserMessage: enhancementService.lastUserMessageSent,
powerModeName: powerModeName,
powerModeEmoji: powerModeEmoji
)
modelContext.insert(newTranscription)
do {
try modelContext.save()
NotificationCenter.default.post(name: .transcriptionCreated, object: newTranscription)
} catch {
logger.error("❌ Failed to save transcription: \(error.localizedDescription)")
}
// Restore original prompt settings if AI was temporarily enabled
if let result = promptDetectionResult,
result.shouldEnableAI {
await promptDetectionService.restoreOriginalSettings(result, to: enhancementService)
}
await MainActor.run {
isTranscribing = false
}
return newTranscription
} catch {
let newTranscription = Transcription(
text: originalText,
duration: duration,
audioFileURL: permanentURLString,
transcriptionModelName: model.displayName,
promptName: nil,
transcriptionDuration: transcriptionDuration,
powerModeName: powerModeName,
powerModeEmoji: powerModeEmoji
)
modelContext.insert(newTranscription)
do {
try modelContext.save()
NotificationCenter.default.post(name: .transcriptionCreated, object: newTranscription)
} catch {
logger.error("❌ Failed to save transcription: \(error.localizedDescription)")
}
await MainActor.run {
isTranscribing = false
}
return newTranscription
}
} else {
let newTranscription = Transcription(
text: originalText,
duration: duration,
audioFileURL: permanentURLString,
transcriptionModelName: model.displayName,
promptName: nil,
transcriptionDuration: transcriptionDuration,
powerModeName: powerModeName,
powerModeEmoji: powerModeEmoji
)
modelContext.insert(newTranscription)
do {
try modelContext.save()
} catch {
logger.error("❌ Failed to save transcription: \(error.localizedDescription)")
}
await MainActor.run {
isTranscribing = false
}
return newTranscription
}
} catch {
logger.error("❌ Transcription failed: \(error.localizedDescription)")
currentError = .transcriptionFailed
isTranscribing = false
throw error
}
}
}