feat: Add Power Mode display to transcription history

This commit is contained in:
wobondar 2025-09-25 21:22:52 +01:00
parent 89201fb7d3
commit 2105ae4844
No known key found for this signature in database
GPG Key ID: CEE21E23962E941A
6 changed files with 128 additions and 36 deletions

View File

@ -16,8 +16,22 @@ final class Transcription {
var enhancementDuration: TimeInterval?
var aiRequestSystemMessage: String?
var aiRequestUserMessage: String?
init(text: String, duration: TimeInterval, enhancedText: String? = nil, audioFileURL: String? = nil, transcriptionModelName: String? = nil, aiEnhancementModelName: String? = nil, promptName: String? = nil, transcriptionDuration: TimeInterval? = nil, enhancementDuration: TimeInterval? = nil, aiRequestSystemMessage: String? = nil, aiRequestUserMessage: String? = nil) {
var powerModeName: String?
var powerModeEmoji: String?
init(text: String,
duration: TimeInterval,
enhancedText: String? = nil,
audioFileURL: String? = nil,
transcriptionModelName: String? = nil,
aiEnhancementModelName: String? = nil,
promptName: String? = nil,
transcriptionDuration: TimeInterval? = nil,
enhancementDuration: TimeInterval? = nil,
aiRequestSystemMessage: String? = nil,
aiRequestUserMessage: String? = nil,
powerModeName: String? = nil,
powerModeEmoji: String? = nil) {
self.id = UUID()
self.text = text
self.enhancedText = enhancedText
@ -31,5 +45,7 @@ final class Transcription {
self.enhancementDuration = enhancementDuration
self.aiRequestSystemMessage = aiRequestSystemMessage
self.aiRequestUserMessage = aiRequestUserMessage
self.powerModeName = powerModeName
self.powerModeEmoji = powerModeEmoji
}
}

View File

@ -114,6 +114,11 @@ class AudioTranscriptionManager: ObservableObject {
text = WhisperHallucinationFilter.filter(text)
text = text.trimmingCharacters(in: .whitespacesAndNewlines)
let powerModeManager = PowerModeManager.shared
let activePowerModeConfig = powerModeManager.currentActiveConfiguration
let powerModeName = (activePowerModeConfig?.isEnabled == true) ? activePowerModeConfig?.name : nil
let powerModeEmoji = (activePowerModeConfig?.isEnabled == true) ? activePowerModeConfig?.emoji : nil
if UserDefaults.standard.object(forKey: "IsTextFormattingEnabled") as? Bool ?? true {
text = WhisperTextFormatter.format(text)
}
@ -142,7 +147,9 @@ class AudioTranscriptionManager: ObservableObject {
transcriptionDuration: transcriptionDuration,
enhancementDuration: enhancementDuration,
aiRequestSystemMessage: enhancementService.lastSystemMessageSent,
aiRequestUserMessage: enhancementService.lastUserMessageSent
aiRequestUserMessage: enhancementService.lastUserMessageSent,
powerModeName: powerModeName,
powerModeEmoji: powerModeEmoji
)
modelContext.insert(transcription)
try modelContext.save()
@ -156,7 +163,9 @@ class AudioTranscriptionManager: ObservableObject {
audioFileURL: permanentURL.absoluteString,
transcriptionModelName: currentModel.displayName,
promptName: nil,
transcriptionDuration: transcriptionDuration
transcriptionDuration: transcriptionDuration,
powerModeName: powerModeName,
powerModeEmoji: powerModeEmoji
)
modelContext.insert(transcription)
try modelContext.save()
@ -170,7 +179,9 @@ class AudioTranscriptionManager: ObservableObject {
audioFileURL: permanentURL.absoluteString,
transcriptionModelName: currentModel.displayName,
promptName: nil,
transcriptionDuration: transcriptionDuration
transcriptionDuration: transcriptionDuration,
powerModeName: powerModeName,
powerModeEmoji: powerModeEmoji
)
modelContext.insert(transcription)
try modelContext.save()

View File

@ -64,6 +64,11 @@ class AudioTranscriptionService: ObservableObject {
text = WhisperHallucinationFilter.filter(text)
text = text.trimmingCharacters(in: .whitespacesAndNewlines)
let powerModeManager = PowerModeManager.shared
let activePowerModeConfig = powerModeManager.currentActiveConfiguration
let powerModeName = (activePowerModeConfig?.isEnabled == true) ? activePowerModeConfig?.name : nil
let powerModeEmoji = (activePowerModeConfig?.isEnabled == true) ? activePowerModeConfig?.emoji : nil
if UserDefaults.standard.object(forKey: "IsTextFormattingEnabled") as? Bool ?? true {
text = WhisperTextFormatter.format(text)
}
@ -124,7 +129,9 @@ class AudioTranscriptionService: ObservableObject {
transcriptionDuration: transcriptionDuration,
enhancementDuration: enhancementDuration,
aiRequestSystemMessage: enhancementService.lastSystemMessageSent,
aiRequestUserMessage: enhancementService.lastUserMessageSent
aiRequestUserMessage: enhancementService.lastUserMessageSent,
powerModeName: powerModeName,
powerModeEmoji: powerModeEmoji
)
modelContext.insert(newTranscription)
do {
@ -152,7 +159,9 @@ class AudioTranscriptionService: ObservableObject {
audioFileURL: permanentURLString,
transcriptionModelName: model.displayName,
promptName: nil,
transcriptionDuration: transcriptionDuration
transcriptionDuration: transcriptionDuration,
powerModeName: powerModeName,
powerModeEmoji: powerModeEmoji
)
modelContext.insert(newTranscription)
do {
@ -175,7 +184,9 @@ class AudioTranscriptionService: ObservableObject {
audioFileURL: permanentURLString,
transcriptionModelName: model.displayName,
promptName: nil,
transcriptionDuration: transcriptionDuration
transcriptionDuration: transcriptionDuration,
powerModeName: powerModeName,
powerModeEmoji: powerModeEmoji
)
modelContext.insert(newTranscription)
do {

View File

@ -24,26 +24,27 @@ class VoiceInkCSVExportService {
}
private func generateCSV(for transcriptions: [Transcription]) -> String {
var csvString = "Original Transcript,Enhanced Transcript,Enhancement Model,Prompt Name,Transcription Model,Enhancement Time,Transcription Time,Timestamp,Duration\n"
var csvString = "Original Transcript,Enhanced Transcript,Enhancement Model,Prompt Name,Transcription Model,Power Mode,Enhancement Time,Transcription Time,Timestamp,Duration\n"
for transcription in transcriptions {
let originalText = escapeCSVString(transcription.text)
let enhancedText = escapeCSVString(transcription.enhancedText ?? "")
let enhancementModel = escapeCSVString(transcription.aiEnhancementModelName ?? "")
let promptName = escapeCSVString(transcription.promptName ?? "")
let transcriptionModel = escapeCSVString(transcription.transcriptionModelName ?? "")
let powerMode = escapeCSVString(powerModeDisplay(name: transcription.powerModeName, emoji: transcription.powerModeEmoji))
let enhancementTime = transcription.enhancementDuration ?? 0
let transcriptionTime = transcription.transcriptionDuration ?? 0
let timestamp = transcription.timestamp.ISO8601Format()
let duration = transcription.duration
let row = "\(originalText),\(enhancedText),\(enhancementModel),\(promptName),\(transcriptionModel),\(enhancementTime),\(transcriptionTime),\(timestamp),\(duration)\n"
let row = "\(originalText),\(enhancedText),\(enhancementModel),\(promptName),\(transcriptionModel),\(powerMode),\(enhancementTime),\(transcriptionTime),\(timestamp),\(duration)\n"
csvString.append(row)
}
return csvString
}
private func escapeCSVString(_ string: String) -> String {
let escapedString = string.replacingOccurrences(of: "\"", with: "\"\"")
if escapedString.contains(",") || escapedString.contains("\n") {
@ -51,4 +52,17 @@ class VoiceInkCSVExportService {
}
return escapedString
}
}
private func powerModeDisplay(name: String?, emoji: String?) -> String {
switch (emoji?.trimmingCharacters(in: .whitespacesAndNewlines), name?.trimmingCharacters(in: .whitespacesAndNewlines)) {
case let (.some(emojiValue), .some(nameValue)) where !emojiValue.isEmpty && !nameValue.isEmpty:
return "\(emojiValue) \(nameValue)"
case let (.some(emojiValue), _) where !emojiValue.isEmpty:
return emojiValue
case let (_, .some(nameValue)) where !nameValue.isEmpty:
return nameValue
default:
return ""
}
}
}

View File

@ -148,8 +148,18 @@ struct TranscriptionCard: View {
if isExpanded && hasMetadata {
Divider()
.padding(.vertical, 8)
VStack(alignment: .leading, spacing: 10) {
if let powerModeValue = powerModeDisplay(
name: transcription.powerModeName,
emoji: transcription.powerModeEmoji
) {
metadataRow(
icon: "bolt.fill",
label: "Power Mode",
value: powerModeValue
)
}
metadataRow(icon: "hourglass", label: "Audio Duration", value: formatTiming(transcription.duration))
if let modelName = transcription.transcriptionModelName {
metadataRow(icon: "cpu.fill", label: "Transcription Model", value: modelName)
@ -196,8 +206,10 @@ struct TranscriptionCard: View {
}
}
}
private var hasMetadata: Bool {
transcription.powerModeName != nil ||
transcription.powerModeEmoji != nil ||
transcription.transcriptionModelName != nil ||
transcription.aiEnhancementModelName != nil ||
transcription.promptName != nil ||
@ -233,4 +245,19 @@ struct TranscriptionCard: View {
.foregroundColor(.secondary)
}
}
private func powerModeDisplay(name: String?, emoji: String?) -> String? {
guard name != nil || emoji != nil else { return nil }
switch (emoji?.trimmingCharacters(in: .whitespacesAndNewlines), name?.trimmingCharacters(in: .whitespacesAndNewlines)) {
case let (.some(emojiValue), .some(nameValue)) where !emojiValue.isEmpty && !nameValue.isEmpty:
return "\(emojiValue) \(nameValue)"
case let (.some(emojiValue), _) where !emojiValue.isEmpty:
return emojiValue
case let (_, .some(nameValue)) where !nameValue.isEmpty:
return nameValue
default:
return nil
}
}
}

View File

@ -253,13 +253,13 @@ class WhisperState: NSObject, ObservableObject {
guard let model = currentTranscriptionModel else {
throw WhisperStateError.transcriptionFailed
}
let transcriptionService: TranscriptionService
switch model.provider {
case .local:
transcriptionService = localTranscriptionService
case .parakeet:
transcriptionService = parakeetTranscriptionService
case .parakeet:
transcriptionService = parakeetTranscriptionService
case .nativeApple:
transcriptionService = nativeAppleTranscriptionService
default:
@ -270,6 +270,11 @@ class WhisperState: NSObject, ObservableObject {
var text = try await transcriptionService.transcribe(audioURL: url, model: model)
text = WhisperHallucinationFilter.filter(text)
let transcriptionDuration = Date().timeIntervalSince(transcriptionStart)
let powerModeManager = PowerModeManager.shared
let activePowerModeConfig = powerModeManager.currentActiveConfiguration
let powerModeName = (activePowerModeConfig?.isEnabled == true) ? activePowerModeConfig?.name : nil
let powerModeEmoji = (activePowerModeConfig?.isEnabled == true) ? activePowerModeConfig?.emoji : nil
if await checkCancellationAndCleanup() { return }
@ -303,19 +308,21 @@ class WhisperState: NSObject, ObservableObject {
await MainActor.run { self.recordingState = .enhancing }
let textForAI = promptDetectionResult?.processedText ?? text
let (enhancedText, enhancementDuration, promptName) = try await enhancementService.enhance(textForAI)
let newTranscription = Transcription(
text: originalText,
duration: actualDuration,
enhancedText: enhancedText,
audioFileURL: url.absoluteString,
transcriptionModelName: model.displayName,
aiEnhancementModelName: enhancementService.getAIService()?.currentModel,
promptName: promptName,
transcriptionDuration: transcriptionDuration,
enhancementDuration: enhancementDuration,
aiRequestSystemMessage: enhancementService.lastSystemMessageSent,
aiRequestUserMessage: enhancementService.lastUserMessageSent
)
let newTranscription = Transcription(
text: originalText,
duration: actualDuration,
enhancedText: enhancedText,
audioFileURL: url.absoluteString,
transcriptionModelName: model.displayName,
aiEnhancementModelName: enhancementService.getAIService()?.currentModel,
promptName: promptName,
transcriptionDuration: transcriptionDuration,
enhancementDuration: enhancementDuration,
aiRequestSystemMessage: enhancementService.lastSystemMessageSent,
aiRequestUserMessage: enhancementService.lastUserMessageSent,
powerModeName: powerModeName,
powerModeEmoji: powerModeEmoji
)
modelContext.insert(newTranscription)
try? modelContext.save()
NotificationCenter.default.post(name: .transcriptionCreated, object: newTranscription)
@ -328,7 +335,9 @@ class WhisperState: NSObject, ObservableObject {
audioFileURL: url.absoluteString,
transcriptionModelName: model.displayName,
promptName: nil,
transcriptionDuration: transcriptionDuration
transcriptionDuration: transcriptionDuration,
powerModeName: powerModeName,
powerModeEmoji: powerModeEmoji
)
modelContext.insert(newTranscription)
try? modelContext.save()
@ -348,7 +357,9 @@ class WhisperState: NSObject, ObservableObject {
audioFileURL: url.absoluteString,
transcriptionModelName: model.displayName,
promptName: nil,
transcriptionDuration: transcriptionDuration
transcriptionDuration: transcriptionDuration,
powerModeName: powerModeName,
powerModeEmoji: powerModeEmoji
)
modelContext.insert(newTranscription)
try? modelContext.save()
@ -404,7 +415,9 @@ class WhisperState: NSObject, ObservableObject {
duration: duration,
enhancedText: nil,
audioFileURL: url.absoluteString,
promptName: nil
promptName: nil,
powerModeName: nil,
powerModeEmoji: nil
)
modelContext.insert(failedTranscription)