Improced the recording/transcription flow
This commit is contained in:
parent
63db449355
commit
a37b95620c
@ -1,6 +1,12 @@
|
||||
import Foundation
|
||||
import SwiftData
|
||||
|
||||
enum TranscriptionStatus: String, Codable {
|
||||
case pending
|
||||
case completed
|
||||
case failed
|
||||
}
|
||||
|
||||
@Model
|
||||
final class Transcription {
|
||||
var id: UUID
|
||||
@ -18,6 +24,7 @@ final class Transcription {
|
||||
var aiRequestUserMessage: String?
|
||||
var powerModeName: String?
|
||||
var powerModeEmoji: String?
|
||||
var transcriptionStatus: String?
|
||||
|
||||
init(text: String,
|
||||
duration: TimeInterval,
|
||||
@ -31,7 +38,8 @@ final class Transcription {
|
||||
aiRequestSystemMessage: String? = nil,
|
||||
aiRequestUserMessage: String? = nil,
|
||||
powerModeName: String? = nil,
|
||||
powerModeEmoji: String? = nil) {
|
||||
powerModeEmoji: String? = nil,
|
||||
transcriptionStatus: TranscriptionStatus = .pending) {
|
||||
self.id = UUID()
|
||||
self.text = text
|
||||
self.enhancedText = enhancedText
|
||||
@ -47,5 +55,6 @@ final class Transcription {
|
||||
self.aiRequestUserMessage = aiRequestUserMessage
|
||||
self.powerModeName = powerModeName
|
||||
self.powerModeEmoji = powerModeEmoji
|
||||
self.transcriptionStatus = transcriptionStatus.rawValue
|
||||
}
|
||||
}
|
||||
|
||||
@ -136,7 +136,20 @@ class WhisperState: NSObject, ObservableObject {
|
||||
await recorder.stopRecording()
|
||||
if let recordedFile {
|
||||
if !shouldCancelRecording {
|
||||
await transcribeAudio(recordedFile)
|
||||
let audioAsset = AVURLAsset(url: recordedFile)
|
||||
let duration = (try? CMTimeGetSeconds(await audioAsset.load(.duration))) ?? 0.0
|
||||
|
||||
let transcription = Transcription(
|
||||
text: "",
|
||||
duration: duration,
|
||||
audioFileURL: recordedFile.absoluteString,
|
||||
transcriptionStatus: .pending
|
||||
)
|
||||
modelContext.insert(transcription)
|
||||
try? modelContext.save()
|
||||
NotificationCenter.default.post(name: .transcriptionCreated, object: transcription)
|
||||
|
||||
await transcribeAudio(on: transcription)
|
||||
} else {
|
||||
await MainActor.run {
|
||||
recordingState = .idle
|
||||
@ -215,7 +228,18 @@ class WhisperState: NSObject, ObservableObject {
|
||||
response(true)
|
||||
}
|
||||
|
||||
private func transcribeAudio(_ url: URL) async {
|
||||
private func transcribeAudio(on transcription: Transcription) async {
|
||||
guard let urlString = transcription.audioFileURL, let url = URL(string: urlString) else {
|
||||
logger.error("❌ Invalid audio file URL in transcription object.")
|
||||
await MainActor.run {
|
||||
recordingState = .idle
|
||||
}
|
||||
transcription.text = "Transcription Failed: Invalid audio file URL"
|
||||
transcription.transcriptionStatus = TranscriptionStatus.failed.rawValue
|
||||
try? modelContext.save()
|
||||
return
|
||||
}
|
||||
|
||||
if shouldCancelRecording {
|
||||
await MainActor.run {
|
||||
recordingState = .idle
|
||||
@ -223,11 +247,11 @@ class WhisperState: NSObject, ObservableObject {
|
||||
await cleanupModelResources()
|
||||
return
|
||||
}
|
||||
|
||||
|
||||
await MainActor.run {
|
||||
recordingState = .transcribing
|
||||
}
|
||||
|
||||
|
||||
// Play stop sound when transcription starts with a small delay
|
||||
Task {
|
||||
let isSystemMuteEnabled = UserDefaults.standard.bool(forKey: "isSystemMuteEnabled")
|
||||
@ -238,7 +262,7 @@ class WhisperState: NSObject, ObservableObject {
|
||||
SoundManager.shared.playStopSound()
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
defer {
|
||||
if shouldCancelRecording {
|
||||
Task {
|
||||
@ -246,9 +270,12 @@ class WhisperState: NSObject, ObservableObject {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
logger.notice("🔄 Starting transcription...")
|
||||
|
||||
var finalPastedText: String?
|
||||
var promptDetectionResult: PromptDetectionService.PromptDetectionResult?
|
||||
|
||||
do {
|
||||
guard let model = currentTranscriptionModel else {
|
||||
throw WhisperStateError.transcriptionFailed
|
||||
@ -275,9 +302,9 @@ class WhisperState: NSObject, ObservableObject {
|
||||
let activePowerModeConfig = powerModeManager.currentActiveConfiguration
|
||||
let powerModeName = (activePowerModeConfig?.isEnabled == true) ? activePowerModeConfig?.name : nil
|
||||
let powerModeEmoji = (activePowerModeConfig?.isEnabled == true) ? activePowerModeConfig?.emoji : nil
|
||||
|
||||
|
||||
if await checkCancellationAndCleanup() { return }
|
||||
|
||||
|
||||
text = text.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
|
||||
if UserDefaults.standard.object(forKey: "IsTextFormattingEnabled") as? Bool ?? true {
|
||||
@ -287,62 +314,43 @@ class WhisperState: NSObject, ObservableObject {
|
||||
if UserDefaults.standard.bool(forKey: "IsWordReplacementEnabled") {
|
||||
text = WordReplacementService.shared.applyReplacements(to: text)
|
||||
}
|
||||
|
||||
|
||||
let audioAsset = AVURLAsset(url: url)
|
||||
let actualDuration = (try? CMTimeGetSeconds(await audioAsset.load(.duration))) ?? 0.0
|
||||
var promptDetectionResult: PromptDetectionService.PromptDetectionResult? = nil
|
||||
let originalText = text
|
||||
|
||||
transcription.text = text
|
||||
transcription.duration = actualDuration
|
||||
transcription.transcriptionModelName = model.displayName
|
||||
transcription.transcriptionDuration = transcriptionDuration
|
||||
transcription.powerModeName = powerModeName
|
||||
transcription.powerModeEmoji = powerModeEmoji
|
||||
finalPastedText = text
|
||||
|
||||
if let enhancementService = enhancementService, enhancementService.isConfigured {
|
||||
let detectionResult = promptDetectionService.analyzeText(text, with: enhancementService)
|
||||
promptDetectionResult = detectionResult
|
||||
await promptDetectionService.applyDetectionResult(detectionResult, to: enhancementService)
|
||||
}
|
||||
|
||||
|
||||
if let enhancementService = enhancementService,
|
||||
enhancementService.isEnhancementEnabled,
|
||||
enhancementService.isConfigured {
|
||||
do {
|
||||
if await checkCancellationAndCleanup() { return }
|
||||
if await checkCancellationAndCleanup() { return }
|
||||
|
||||
await MainActor.run { self.recordingState = .enhancing }
|
||||
let textForAI = promptDetectionResult?.processedText ?? text
|
||||
await MainActor.run { self.recordingState = .enhancing }
|
||||
let textForAI = promptDetectionResult?.processedText ?? text
|
||||
|
||||
do {
|
||||
let (enhancedText, enhancementDuration, promptName) = try await enhancementService.enhance(textForAI)
|
||||
let newTranscription = Transcription(
|
||||
text: originalText,
|
||||
duration: actualDuration,
|
||||
enhancedText: enhancedText,
|
||||
audioFileURL: url.absoluteString,
|
||||
transcriptionModelName: model.displayName,
|
||||
aiEnhancementModelName: enhancementService.getAIService()?.currentModel,
|
||||
promptName: promptName,
|
||||
transcriptionDuration: transcriptionDuration,
|
||||
enhancementDuration: enhancementDuration,
|
||||
aiRequestSystemMessage: enhancementService.lastSystemMessageSent,
|
||||
aiRequestUserMessage: enhancementService.lastUserMessageSent,
|
||||
powerModeName: powerModeName,
|
||||
powerModeEmoji: powerModeEmoji
|
||||
)
|
||||
modelContext.insert(newTranscription)
|
||||
try? modelContext.save()
|
||||
NotificationCenter.default.post(name: .transcriptionCreated, object: newTranscription)
|
||||
text = enhancedText
|
||||
transcription.enhancedText = enhancedText
|
||||
transcription.aiEnhancementModelName = enhancementService.getAIService()?.currentModel
|
||||
transcription.promptName = promptName
|
||||
transcription.enhancementDuration = enhancementDuration
|
||||
transcription.aiRequestSystemMessage = enhancementService.lastSystemMessageSent
|
||||
transcription.aiRequestUserMessage = enhancementService.lastUserMessageSent
|
||||
finalPastedText = enhancedText
|
||||
} catch {
|
||||
let newTranscription = Transcription(
|
||||
text: originalText,
|
||||
duration: actualDuration,
|
||||
enhancedText: "Enhancement failed: \(error)",
|
||||
audioFileURL: url.absoluteString,
|
||||
transcriptionModelName: model.displayName,
|
||||
promptName: nil,
|
||||
transcriptionDuration: transcriptionDuration,
|
||||
powerModeName: powerModeName,
|
||||
powerModeEmoji: powerModeEmoji
|
||||
)
|
||||
modelContext.insert(newTranscription)
|
||||
try? modelContext.save()
|
||||
NotificationCenter.default.post(name: .transcriptionCreated, object: newTranscription)
|
||||
|
||||
transcription.enhancedText = "Enhancement failed: \(error)"
|
||||
await MainActor.run {
|
||||
NotificationManager.shared.showNotification(
|
||||
title: "AI enhancement failed",
|
||||
@ -350,38 +358,40 @@ class WhisperState: NSObject, ObservableObject {
|
||||
)
|
||||
}
|
||||
}
|
||||
} else {
|
||||
let newTranscription = Transcription(
|
||||
text: originalText,
|
||||
duration: actualDuration,
|
||||
audioFileURL: url.absoluteString,
|
||||
transcriptionModelName: model.displayName,
|
||||
promptName: nil,
|
||||
transcriptionDuration: transcriptionDuration,
|
||||
powerModeName: powerModeName,
|
||||
powerModeEmoji: powerModeEmoji
|
||||
)
|
||||
modelContext.insert(newTranscription)
|
||||
try? modelContext.save()
|
||||
NotificationCenter.default.post(name: .transcriptionCreated, object: newTranscription)
|
||||
}
|
||||
|
||||
|
||||
transcription.transcriptionStatus = TranscriptionStatus.completed.rawValue
|
||||
|
||||
} catch {
|
||||
let errorDescription = (error as? LocalizedError)?.errorDescription ?? error.localizedDescription
|
||||
let recoverySuggestion = (error as? LocalizedError)?.recoverySuggestion ?? ""
|
||||
let fullErrorText = recoverySuggestion.isEmpty ? errorDescription : "\(errorDescription) \(recoverySuggestion)"
|
||||
|
||||
transcription.text = "Transcription Failed: \(fullErrorText)"
|
||||
transcription.transcriptionStatus = TranscriptionStatus.failed.rawValue
|
||||
}
|
||||
|
||||
// --- Finalize and save ---
|
||||
try? modelContext.save()
|
||||
NotificationCenter.default.post(name: .transcriptionCreated, object: transcription)
|
||||
|
||||
if var textToPaste = finalPastedText, transcription.transcriptionStatus == TranscriptionStatus.completed.rawValue {
|
||||
if case .trialExpired = licenseViewModel.licenseState {
|
||||
text = """
|
||||
textToPaste = """
|
||||
Your trial has expired. Upgrade to VoiceInk Pro at tryvoiceink.com/buy
|
||||
\n\(text)
|
||||
\n\(textToPaste)
|
||||
"""
|
||||
}
|
||||
|
||||
|
||||
let shouldAddSpace = UserDefaults.standard.object(forKey: "AppendTrailingSpace") as? Bool ?? true
|
||||
if shouldAddSpace {
|
||||
text += " "
|
||||
textToPaste += " "
|
||||
}
|
||||
|
||||
|
||||
if await checkCancellationAndCleanup() { return }
|
||||
|
||||
|
||||
DispatchQueue.main.asyncAfter(deadline: .now() + 0.05) {
|
||||
CursorPaster.pasteAtCursor(text)
|
||||
CursorPaster.pasteAtCursor(textToPaste)
|
||||
|
||||
let powerMode = PowerModeManager.shared
|
||||
if let activeConfig = powerMode.currentActiveConfiguration, activeConfig.isAutoSendEnabled {
|
||||
@ -391,52 +401,15 @@ class WhisperState: NSObject, ObservableObject {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let result = promptDetectionResult,
|
||||
let enhancementService = enhancementService,
|
||||
result.shouldEnableAI {
|
||||
await promptDetectionService.restoreOriginalSettings(result, to: enhancementService)
|
||||
}
|
||||
|
||||
await self.dismissMiniRecorder()
|
||||
|
||||
} catch {
|
||||
do {
|
||||
let audioAsset = AVURLAsset(url: url)
|
||||
let duration = (try? CMTimeGetSeconds(await audioAsset.load(.duration))) ?? 0.0
|
||||
|
||||
await MainActor.run {
|
||||
let errorDescription = (error as? LocalizedError)?.errorDescription ?? error.localizedDescription
|
||||
let recoverySuggestion = (error as? LocalizedError)?.recoverySuggestion ?? ""
|
||||
let fullErrorText = recoverySuggestion.isEmpty ? errorDescription : "\(errorDescription) \(recoverySuggestion)"
|
||||
|
||||
let failedTranscription = Transcription(
|
||||
text: "Transcription Failed: \(fullErrorText)",
|
||||
duration: duration,
|
||||
enhancedText: nil,
|
||||
audioFileURL: url.absoluteString,
|
||||
promptName: nil,
|
||||
powerModeName: nil,
|
||||
powerModeEmoji: nil
|
||||
)
|
||||
|
||||
modelContext.insert(failedTranscription)
|
||||
try? modelContext.save()
|
||||
NotificationCenter.default.post(name: .transcriptionCreated, object: failedTranscription)
|
||||
}
|
||||
} catch {
|
||||
logger.error("❌ Could not create a record for the failed transcription: \(error.localizedDescription)")
|
||||
}
|
||||
|
||||
await MainActor.run {
|
||||
NotificationManager.shared.showNotification(
|
||||
title: "Transcription Failed",
|
||||
type: .error
|
||||
)
|
||||
}
|
||||
|
||||
await self.dismissMiniRecorder()
|
||||
}
|
||||
|
||||
if let result = promptDetectionResult,
|
||||
let enhancementService = enhancementService,
|
||||
result.shouldEnableAI {
|
||||
await promptDetectionService.restoreOriginalSettings(result, to: enhancementService)
|
||||
}
|
||||
|
||||
await self.dismissMiniRecorder()
|
||||
}
|
||||
|
||||
func getEnhancementService() -> AIEnhancementService? {
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user