Improced the recording/transcription flow

This commit is contained in:
Beingpax 2025-09-28 20:54:05 +05:45
parent 63db449355
commit a37b95620c
2 changed files with 100 additions and 118 deletions

View File

@ -1,6 +1,12 @@
import Foundation
import SwiftData
enum TranscriptionStatus: String, Codable {
case pending
case completed
case failed
}
@Model
final class Transcription {
var id: UUID
@ -18,6 +24,7 @@ final class Transcription {
var aiRequestUserMessage: String?
var powerModeName: String?
var powerModeEmoji: String?
var transcriptionStatus: String?
init(text: String,
duration: TimeInterval,
@ -31,7 +38,8 @@ final class Transcription {
aiRequestSystemMessage: String? = nil,
aiRequestUserMessage: String? = nil,
powerModeName: String? = nil,
powerModeEmoji: String? = nil) {
powerModeEmoji: String? = nil,
transcriptionStatus: TranscriptionStatus = .pending) {
self.id = UUID()
self.text = text
self.enhancedText = enhancedText
@ -47,5 +55,6 @@ final class Transcription {
self.aiRequestUserMessage = aiRequestUserMessage
self.powerModeName = powerModeName
self.powerModeEmoji = powerModeEmoji
self.transcriptionStatus = transcriptionStatus.rawValue
}
}

View File

@ -136,7 +136,20 @@ class WhisperState: NSObject, ObservableObject {
await recorder.stopRecording()
if let recordedFile {
if !shouldCancelRecording {
await transcribeAudio(recordedFile)
let audioAsset = AVURLAsset(url: recordedFile)
let duration = (try? CMTimeGetSeconds(await audioAsset.load(.duration))) ?? 0.0
let transcription = Transcription(
text: "",
duration: duration,
audioFileURL: recordedFile.absoluteString,
transcriptionStatus: .pending
)
modelContext.insert(transcription)
try? modelContext.save()
NotificationCenter.default.post(name: .transcriptionCreated, object: transcription)
await transcribeAudio(on: transcription)
} else {
await MainActor.run {
recordingState = .idle
@ -215,7 +228,18 @@ class WhisperState: NSObject, ObservableObject {
response(true)
}
private func transcribeAudio(_ url: URL) async {
private func transcribeAudio(on transcription: Transcription) async {
guard let urlString = transcription.audioFileURL, let url = URL(string: urlString) else {
logger.error("❌ Invalid audio file URL in transcription object.")
await MainActor.run {
recordingState = .idle
}
transcription.text = "Transcription Failed: Invalid audio file URL"
transcription.transcriptionStatus = TranscriptionStatus.failed.rawValue
try? modelContext.save()
return
}
if shouldCancelRecording {
await MainActor.run {
recordingState = .idle
@ -223,11 +247,11 @@ class WhisperState: NSObject, ObservableObject {
await cleanupModelResources()
return
}
await MainActor.run {
recordingState = .transcribing
}
// Play stop sound when transcription starts with a small delay
Task {
let isSystemMuteEnabled = UserDefaults.standard.bool(forKey: "isSystemMuteEnabled")
@ -238,7 +262,7 @@ class WhisperState: NSObject, ObservableObject {
SoundManager.shared.playStopSound()
}
}
defer {
if shouldCancelRecording {
Task {
@ -246,9 +270,12 @@ class WhisperState: NSObject, ObservableObject {
}
}
}
logger.notice("🔄 Starting transcription...")
var finalPastedText: String?
var promptDetectionResult: PromptDetectionService.PromptDetectionResult?
do {
guard let model = currentTranscriptionModel else {
throw WhisperStateError.transcriptionFailed
@ -275,9 +302,9 @@ class WhisperState: NSObject, ObservableObject {
let activePowerModeConfig = powerModeManager.currentActiveConfiguration
let powerModeName = (activePowerModeConfig?.isEnabled == true) ? activePowerModeConfig?.name : nil
let powerModeEmoji = (activePowerModeConfig?.isEnabled == true) ? activePowerModeConfig?.emoji : nil
if await checkCancellationAndCleanup() { return }
text = text.trimmingCharacters(in: .whitespacesAndNewlines)
if UserDefaults.standard.object(forKey: "IsTextFormattingEnabled") as? Bool ?? true {
@ -287,62 +314,43 @@ class WhisperState: NSObject, ObservableObject {
if UserDefaults.standard.bool(forKey: "IsWordReplacementEnabled") {
text = WordReplacementService.shared.applyReplacements(to: text)
}
let audioAsset = AVURLAsset(url: url)
let actualDuration = (try? CMTimeGetSeconds(await audioAsset.load(.duration))) ?? 0.0
var promptDetectionResult: PromptDetectionService.PromptDetectionResult? = nil
let originalText = text
transcription.text = text
transcription.duration = actualDuration
transcription.transcriptionModelName = model.displayName
transcription.transcriptionDuration = transcriptionDuration
transcription.powerModeName = powerModeName
transcription.powerModeEmoji = powerModeEmoji
finalPastedText = text
if let enhancementService = enhancementService, enhancementService.isConfigured {
let detectionResult = promptDetectionService.analyzeText(text, with: enhancementService)
promptDetectionResult = detectionResult
await promptDetectionService.applyDetectionResult(detectionResult, to: enhancementService)
}
if let enhancementService = enhancementService,
enhancementService.isEnhancementEnabled,
enhancementService.isConfigured {
do {
if await checkCancellationAndCleanup() { return }
if await checkCancellationAndCleanup() { return }
await MainActor.run { self.recordingState = .enhancing }
let textForAI = promptDetectionResult?.processedText ?? text
await MainActor.run { self.recordingState = .enhancing }
let textForAI = promptDetectionResult?.processedText ?? text
do {
let (enhancedText, enhancementDuration, promptName) = try await enhancementService.enhance(textForAI)
let newTranscription = Transcription(
text: originalText,
duration: actualDuration,
enhancedText: enhancedText,
audioFileURL: url.absoluteString,
transcriptionModelName: model.displayName,
aiEnhancementModelName: enhancementService.getAIService()?.currentModel,
promptName: promptName,
transcriptionDuration: transcriptionDuration,
enhancementDuration: enhancementDuration,
aiRequestSystemMessage: enhancementService.lastSystemMessageSent,
aiRequestUserMessage: enhancementService.lastUserMessageSent,
powerModeName: powerModeName,
powerModeEmoji: powerModeEmoji
)
modelContext.insert(newTranscription)
try? modelContext.save()
NotificationCenter.default.post(name: .transcriptionCreated, object: newTranscription)
text = enhancedText
transcription.enhancedText = enhancedText
transcription.aiEnhancementModelName = enhancementService.getAIService()?.currentModel
transcription.promptName = promptName
transcription.enhancementDuration = enhancementDuration
transcription.aiRequestSystemMessage = enhancementService.lastSystemMessageSent
transcription.aiRequestUserMessage = enhancementService.lastUserMessageSent
finalPastedText = enhancedText
} catch {
let newTranscription = Transcription(
text: originalText,
duration: actualDuration,
enhancedText: "Enhancement failed: \(error)",
audioFileURL: url.absoluteString,
transcriptionModelName: model.displayName,
promptName: nil,
transcriptionDuration: transcriptionDuration,
powerModeName: powerModeName,
powerModeEmoji: powerModeEmoji
)
modelContext.insert(newTranscription)
try? modelContext.save()
NotificationCenter.default.post(name: .transcriptionCreated, object: newTranscription)
transcription.enhancedText = "Enhancement failed: \(error)"
await MainActor.run {
NotificationManager.shared.showNotification(
title: "AI enhancement failed",
@ -350,38 +358,40 @@ class WhisperState: NSObject, ObservableObject {
)
}
}
} else {
let newTranscription = Transcription(
text: originalText,
duration: actualDuration,
audioFileURL: url.absoluteString,
transcriptionModelName: model.displayName,
promptName: nil,
transcriptionDuration: transcriptionDuration,
powerModeName: powerModeName,
powerModeEmoji: powerModeEmoji
)
modelContext.insert(newTranscription)
try? modelContext.save()
NotificationCenter.default.post(name: .transcriptionCreated, object: newTranscription)
}
transcription.transcriptionStatus = TranscriptionStatus.completed.rawValue
} catch {
let errorDescription = (error as? LocalizedError)?.errorDescription ?? error.localizedDescription
let recoverySuggestion = (error as? LocalizedError)?.recoverySuggestion ?? ""
let fullErrorText = recoverySuggestion.isEmpty ? errorDescription : "\(errorDescription) \(recoverySuggestion)"
transcription.text = "Transcription Failed: \(fullErrorText)"
transcription.transcriptionStatus = TranscriptionStatus.failed.rawValue
}
// --- Finalize and save ---
try? modelContext.save()
NotificationCenter.default.post(name: .transcriptionCreated, object: transcription)
if var textToPaste = finalPastedText, transcription.transcriptionStatus == TranscriptionStatus.completed.rawValue {
if case .trialExpired = licenseViewModel.licenseState {
text = """
textToPaste = """
Your trial has expired. Upgrade to VoiceInk Pro at tryvoiceink.com/buy
\n\(text)
\n\(textToPaste)
"""
}
let shouldAddSpace = UserDefaults.standard.object(forKey: "AppendTrailingSpace") as? Bool ?? true
if shouldAddSpace {
text += " "
textToPaste += " "
}
if await checkCancellationAndCleanup() { return }
DispatchQueue.main.asyncAfter(deadline: .now() + 0.05) {
CursorPaster.pasteAtCursor(text)
CursorPaster.pasteAtCursor(textToPaste)
let powerMode = PowerModeManager.shared
if let activeConfig = powerMode.currentActiveConfiguration, activeConfig.isAutoSendEnabled {
@ -391,52 +401,15 @@ class WhisperState: NSObject, ObservableObject {
}
}
}
if let result = promptDetectionResult,
let enhancementService = enhancementService,
result.shouldEnableAI {
await promptDetectionService.restoreOriginalSettings(result, to: enhancementService)
}
await self.dismissMiniRecorder()
} catch {
do {
let audioAsset = AVURLAsset(url: url)
let duration = (try? CMTimeGetSeconds(await audioAsset.load(.duration))) ?? 0.0
await MainActor.run {
let errorDescription = (error as? LocalizedError)?.errorDescription ?? error.localizedDescription
let recoverySuggestion = (error as? LocalizedError)?.recoverySuggestion ?? ""
let fullErrorText = recoverySuggestion.isEmpty ? errorDescription : "\(errorDescription) \(recoverySuggestion)"
let failedTranscription = Transcription(
text: "Transcription Failed: \(fullErrorText)",
duration: duration,
enhancedText: nil,
audioFileURL: url.absoluteString,
promptName: nil,
powerModeName: nil,
powerModeEmoji: nil
)
modelContext.insert(failedTranscription)
try? modelContext.save()
NotificationCenter.default.post(name: .transcriptionCreated, object: failedTranscription)
}
} catch {
logger.error("❌ Could not create a record for the failed transcription: \(error.localizedDescription)")
}
await MainActor.run {
NotificationManager.shared.showNotification(
title: "Transcription Failed",
type: .error
)
}
await self.dismissMiniRecorder()
}
if let result = promptDetectionResult,
let enhancementService = enhancementService,
result.shouldEnableAI {
await promptDetectionService.restoreOriginalSettings(result, to: enhancementService)
}
await self.dismissMiniRecorder()
}
func getEnhancementService() -> AIEnhancementService? {