Transcribe Audio Files
This commit is contained in:
parent
8b1e27e1cd
commit
4ceccb0990
@ -468,7 +468,7 @@
|
||||
"CODE_SIGN_IDENTITY[sdk=macosx*]" = "Apple Development";
|
||||
CODE_SIGN_STYLE = Automatic;
|
||||
COMBINE_HIDPI_IMAGES = YES;
|
||||
CURRENT_PROJECT_VERSION = 0.99;
|
||||
CURRENT_PROJECT_VERSION = 112;
|
||||
DEVELOPMENT_ASSET_PATHS = "\"VoiceInk/Preview Content\"";
|
||||
DEVELOPMENT_TEAM = V6J6A3VWY2;
|
||||
ENABLE_HARDENED_RUNTIME = YES;
|
||||
@ -483,7 +483,7 @@
|
||||
"@executable_path/../Frameworks",
|
||||
);
|
||||
MACOSX_DEPLOYMENT_TARGET = 14.0;
|
||||
MARKETING_VERSION = 0.99;
|
||||
MARKETING_VERSION = 1.12;
|
||||
PRODUCT_BUNDLE_IDENTIFIER = com.prakashjoshipax.VoiceInk;
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
SWIFT_EMIT_LOC_STRINGS = YES;
|
||||
@ -501,7 +501,7 @@
|
||||
"CODE_SIGN_IDENTITY[sdk=macosx*]" = "Apple Development";
|
||||
CODE_SIGN_STYLE = Automatic;
|
||||
COMBINE_HIDPI_IMAGES = YES;
|
||||
CURRENT_PROJECT_VERSION = 0.99;
|
||||
CURRENT_PROJECT_VERSION = 112;
|
||||
DEVELOPMENT_ASSET_PATHS = "\"VoiceInk/Preview Content\"";
|
||||
DEVELOPMENT_TEAM = V6J6A3VWY2;
|
||||
ENABLE_HARDENED_RUNTIME = YES;
|
||||
@ -516,7 +516,7 @@
|
||||
"@executable_path/../Frameworks",
|
||||
);
|
||||
MACOSX_DEPLOYMENT_TARGET = 14.0;
|
||||
MARKETING_VERSION = 0.99;
|
||||
MARKETING_VERSION = 1.12;
|
||||
PRODUCT_BUNDLE_IDENTIFIER = com.prakashjoshipax.VoiceInk;
|
||||
PRODUCT_NAME = "$(TARGET_NAME)";
|
||||
SWIFT_EMIT_LOC_STRINGS = YES;
|
||||
|
||||
176
VoiceInk/Services/AudioProcessor.swift
Normal file
176
VoiceInk/Services/AudioProcessor.swift
Normal file
@ -0,0 +1,176 @@
|
||||
import Foundation
|
||||
import AVFoundation
|
||||
import os
|
||||
|
||||
class AudioProcessor {
|
||||
private let logger = Logger(subsystem: "com.prakashjoshipax.voiceink", category: "AudioProcessor")
|
||||
|
||||
struct AudioFormat {
|
||||
static let targetSampleRate: Double = 16000.0
|
||||
static let targetChannels: UInt32 = 1
|
||||
static let targetBitDepth: UInt32 = 16
|
||||
}
|
||||
|
||||
enum AudioProcessingError: LocalizedError {
|
||||
case invalidAudioFile
|
||||
case conversionFailed
|
||||
case exportFailed
|
||||
case unsupportedFormat
|
||||
case sampleExtractionFailed
|
||||
|
||||
var errorDescription: String? {
|
||||
switch self {
|
||||
case .invalidAudioFile:
|
||||
return "The audio file is invalid or corrupted"
|
||||
case .conversionFailed:
|
||||
return "Failed to convert the audio format"
|
||||
case .exportFailed:
|
||||
return "Failed to export the processed audio"
|
||||
case .unsupportedFormat:
|
||||
return "The audio format is not supported"
|
||||
case .sampleExtractionFailed:
|
||||
return "Failed to extract audio samples"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Process audio file and return samples ready for Whisper
|
||||
/// - Parameter url: URL of the input audio file
|
||||
/// - Returns: Array of normalized float samples
|
||||
func processAudioToSamples(_ url: URL) async throws -> [Float] {
|
||||
logger.notice("🎵 Processing audio file to samples: \(url.lastPathComponent)")
|
||||
|
||||
// Create AVAudioFile from input
|
||||
guard let audioFile = try? AVAudioFile(forReading: url) else {
|
||||
logger.error("❌ Failed to create AVAudioFile from input")
|
||||
throw AudioProcessingError.invalidAudioFile
|
||||
}
|
||||
|
||||
// Get format information
|
||||
let format = audioFile.processingFormat
|
||||
let sampleRate = format.sampleRate
|
||||
let channels = format.channelCount
|
||||
|
||||
logger.notice("📊 Input format - Sample Rate: \(sampleRate), Channels: \(channels)")
|
||||
|
||||
// Create output format (always 16kHz mono float)
|
||||
let outputFormat = AVAudioFormat(
|
||||
commonFormat: .pcmFormatFloat32,
|
||||
sampleRate: AudioFormat.targetSampleRate,
|
||||
channels: AudioFormat.targetChannels,
|
||||
interleaved: false
|
||||
)
|
||||
|
||||
guard let outputFormat = outputFormat else {
|
||||
logger.error("❌ Failed to create output format")
|
||||
throw AudioProcessingError.unsupportedFormat
|
||||
}
|
||||
|
||||
// Read input file into buffer
|
||||
let inputBuffer = AVAudioPCMBuffer(
|
||||
pcmFormat: format,
|
||||
frameCapacity: AVAudioFrameCount(audioFile.length)
|
||||
)
|
||||
|
||||
guard let inputBuffer = inputBuffer else {
|
||||
logger.error("❌ Failed to create input buffer")
|
||||
throw AudioProcessingError.conversionFailed
|
||||
}
|
||||
|
||||
try audioFile.read(into: inputBuffer)
|
||||
|
||||
// If format matches our target, just convert to samples
|
||||
if sampleRate == AudioFormat.targetSampleRate && channels == AudioFormat.targetChannels {
|
||||
logger.notice("✅ Audio format already matches requirements")
|
||||
return convertToWhisperFormat(inputBuffer)
|
||||
}
|
||||
|
||||
// Create converter for format conversion
|
||||
guard let converter = AVAudioConverter(from: format, to: outputFormat) else {
|
||||
logger.error("❌ Failed to create audio converter")
|
||||
throw AudioProcessingError.conversionFailed
|
||||
}
|
||||
|
||||
// Create output buffer
|
||||
let ratio = AudioFormat.targetSampleRate / sampleRate
|
||||
let outputBuffer = AVAudioPCMBuffer(
|
||||
pcmFormat: outputFormat,
|
||||
frameCapacity: AVAudioFrameCount(Double(inputBuffer.frameLength) * ratio)
|
||||
)
|
||||
|
||||
guard let outputBuffer = outputBuffer else {
|
||||
logger.error("❌ Failed to create output buffer")
|
||||
throw AudioProcessingError.conversionFailed
|
||||
}
|
||||
|
||||
// Perform conversion
|
||||
var error: NSError?
|
||||
let status = converter.convert(
|
||||
to: outputBuffer,
|
||||
error: &error,
|
||||
withInputFrom: { inNumPackets, outStatus in
|
||||
outStatus.pointee = .haveData
|
||||
return inputBuffer
|
||||
}
|
||||
)
|
||||
|
||||
if let error = error {
|
||||
logger.error("❌ Conversion failed: \(error.localizedDescription)")
|
||||
throw AudioProcessingError.conversionFailed
|
||||
}
|
||||
|
||||
if status == .error {
|
||||
logger.error("❌ Conversion failed with status: error")
|
||||
throw AudioProcessingError.conversionFailed
|
||||
}
|
||||
|
||||
logger.notice("✅ Successfully converted audio format")
|
||||
return convertToWhisperFormat(outputBuffer)
|
||||
}
|
||||
|
||||
/// Convert audio buffer to Whisper-compatible samples
|
||||
private func convertToWhisperFormat(_ buffer: AVAudioPCMBuffer) -> [Float] {
|
||||
guard let channelData = buffer.floatChannelData else {
|
||||
logger.error("❌ No channel data available in buffer")
|
||||
return []
|
||||
}
|
||||
|
||||
let channelCount = Int(buffer.format.channelCount)
|
||||
let frameLength = Int(buffer.frameLength)
|
||||
var samples = Array(repeating: Float(0), count: frameLength)
|
||||
|
||||
logger.notice("📊 Converting buffer - Channels: \(channelCount), Frames: \(frameLength)")
|
||||
|
||||
// If mono, just copy the samples
|
||||
if channelCount == 1 {
|
||||
samples = Array(UnsafeBufferPointer(start: channelData[0], count: frameLength))
|
||||
logger.notice("✅ Copied mono samples directly")
|
||||
}
|
||||
// If stereo or more, average all channels
|
||||
else {
|
||||
logger.notice("🔄 Converting \(channelCount) channels to mono")
|
||||
for frame in 0..<frameLength {
|
||||
var sum: Float = 0
|
||||
for channel in 0..<channelCount {
|
||||
sum += channelData[channel][frame]
|
||||
}
|
||||
samples[frame] = sum / Float(channelCount)
|
||||
}
|
||||
}
|
||||
|
||||
// Normalize samples to [-1, 1]
|
||||
let maxSample = samples.map(abs).max() ?? 1
|
||||
if maxSample > 0 {
|
||||
logger.notice("📈 Normalizing samples with max amplitude: \(maxSample)")
|
||||
samples = samples.map { $0 / maxSample }
|
||||
}
|
||||
|
||||
// Log sample statistics
|
||||
if let min = samples.min(), let max = samples.max() {
|
||||
logger.notice("📊 Final sample range: [\(min), \(max)]")
|
||||
}
|
||||
|
||||
logger.notice("✅ Successfully converted \(samples.count) samples")
|
||||
return samples
|
||||
}
|
||||
}
|
||||
189
VoiceInk/Services/AudioTranscriptionManager.swift
Normal file
189
VoiceInk/Services/AudioTranscriptionManager.swift
Normal file
@ -0,0 +1,189 @@
|
||||
import Foundation
|
||||
import SwiftUI
|
||||
import AVFoundation
|
||||
import SwiftData
|
||||
import os
|
||||
|
||||
@MainActor
|
||||
class AudioTranscriptionManager: ObservableObject {
|
||||
static let shared = AudioTranscriptionManager()
|
||||
|
||||
@Published var isProcessing = false
|
||||
@Published var processingPhase: ProcessingPhase = .idle
|
||||
@Published var currentTranscription: Transcription?
|
||||
@Published var messageLog: String = ""
|
||||
@Published var errorMessage: String?
|
||||
|
||||
private var currentTask: Task<Void, Error>?
|
||||
private var whisperContext: WhisperContext?
|
||||
private let audioProcessor = AudioProcessor()
|
||||
private let logger = Logger(subsystem: "com.prakashjoshipax.voiceink", category: "AudioTranscriptionManager")
|
||||
|
||||
enum ProcessingPhase {
|
||||
case idle
|
||||
case loading
|
||||
case processingAudio
|
||||
case transcribing
|
||||
case enhancing
|
||||
case completed
|
||||
|
||||
var message: String {
|
||||
switch self {
|
||||
case .idle:
|
||||
return ""
|
||||
case .loading:
|
||||
return "Loading transcription model..."
|
||||
case .processingAudio:
|
||||
return "Processing audio file for transcription..."
|
||||
case .transcribing:
|
||||
return "Transcribing audio..."
|
||||
case .enhancing:
|
||||
return "Enhancing transcription with AI..."
|
||||
case .completed:
|
||||
return "Transcription completed!"
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private init() {}
|
||||
|
||||
func startProcessing(url: URL, modelContext: ModelContext, whisperState: WhisperState) {
|
||||
// Cancel any existing processing
|
||||
cancelProcessing()
|
||||
|
||||
isProcessing = true
|
||||
processingPhase = .loading
|
||||
messageLog = ""
|
||||
errorMessage = nil
|
||||
|
||||
currentTask = Task {
|
||||
do {
|
||||
guard let currentModel = whisperState.currentModel else {
|
||||
throw TranscriptionError.noModelSelected
|
||||
}
|
||||
|
||||
// Load Whisper model
|
||||
whisperContext = try await WhisperContext.createContext(path: currentModel.url.path)
|
||||
|
||||
// Process audio file
|
||||
processingPhase = .processingAudio
|
||||
let samples = try await audioProcessor.processAudioToSamples(url)
|
||||
|
||||
// Get audio duration
|
||||
let audioAsset = AVURLAsset(url: url)
|
||||
var duration: TimeInterval = 0
|
||||
|
||||
if #available(macOS 13.0, *) {
|
||||
let durationValue = try await audioAsset.load(.duration)
|
||||
duration = CMTimeGetSeconds(durationValue)
|
||||
} else {
|
||||
duration = CMTimeGetSeconds(audioAsset.duration)
|
||||
}
|
||||
|
||||
// Create permanent copy of the audio file
|
||||
let recordingsDirectory = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask)[0]
|
||||
.appendingPathComponent("com.prakashjoshipax.VoiceInk")
|
||||
.appendingPathComponent("Recordings")
|
||||
|
||||
let fileName = "transcribed_\(UUID().uuidString).wav"
|
||||
let permanentURL = recordingsDirectory.appendingPathComponent(fileName)
|
||||
|
||||
try FileManager.default.createDirectory(at: recordingsDirectory, withIntermediateDirectories: true)
|
||||
try FileManager.default.copyItem(at: url, to: permanentURL)
|
||||
|
||||
// Transcribe
|
||||
processingPhase = .transcribing
|
||||
await whisperContext?.setPrompt(whisperState.whisperPrompt.transcriptionPrompt)
|
||||
try await whisperContext?.fullTranscribe(samples: samples)
|
||||
var text = await whisperContext?.getTranscription() ?? ""
|
||||
text = text.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
|
||||
// Handle enhancement if enabled
|
||||
if let enhancementService = whisperState.enhancementService,
|
||||
enhancementService.isEnhancementEnabled,
|
||||
enhancementService.isConfigured {
|
||||
processingPhase = .enhancing
|
||||
do {
|
||||
let enhancedText = try await enhancementService.enhance(text)
|
||||
let transcription = Transcription(
|
||||
text: text,
|
||||
duration: duration,
|
||||
enhancedText: enhancedText,
|
||||
audioFileURL: permanentURL.absoluteString
|
||||
)
|
||||
modelContext.insert(transcription)
|
||||
try modelContext.save()
|
||||
currentTranscription = transcription
|
||||
} catch {
|
||||
logger.error("Enhancement failed: \(error.localizedDescription)")
|
||||
messageLog += "Enhancement failed: \(error.localizedDescription). Using original transcription.\n"
|
||||
let transcription = Transcription(
|
||||
text: text,
|
||||
duration: duration,
|
||||
audioFileURL: permanentURL.absoluteString
|
||||
)
|
||||
modelContext.insert(transcription)
|
||||
try modelContext.save()
|
||||
currentTranscription = transcription
|
||||
}
|
||||
} else {
|
||||
let transcription = Transcription(
|
||||
text: text,
|
||||
duration: duration,
|
||||
audioFileURL: permanentURL.absoluteString
|
||||
)
|
||||
modelContext.insert(transcription)
|
||||
try modelContext.save()
|
||||
currentTranscription = transcription
|
||||
}
|
||||
|
||||
processingPhase = .completed
|
||||
try? await Task.sleep(nanoseconds: 1_500_000_000)
|
||||
await finishProcessing()
|
||||
|
||||
} catch {
|
||||
await handleError(error)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
func cancelProcessing() {
|
||||
currentTask?.cancel()
|
||||
cleanupResources()
|
||||
}
|
||||
|
||||
private func finishProcessing() {
|
||||
isProcessing = false
|
||||
processingPhase = .idle
|
||||
currentTask = nil
|
||||
cleanupResources()
|
||||
}
|
||||
|
||||
private func handleError(_ error: Error) {
|
||||
logger.error("Transcription error: \(error.localizedDescription)")
|
||||
errorMessage = error.localizedDescription
|
||||
messageLog += "Error: \(error.localizedDescription)\n"
|
||||
isProcessing = false
|
||||
processingPhase = .idle
|
||||
currentTask = nil
|
||||
cleanupResources()
|
||||
}
|
||||
|
||||
private func cleanupResources() {
|
||||
whisperContext = nil
|
||||
}
|
||||
}
|
||||
|
||||
enum TranscriptionError: Error, LocalizedError {
|
||||
case noModelSelected
|
||||
case transcriptionCancelled
|
||||
|
||||
var errorDescription: String? {
|
||||
switch self {
|
||||
case .noModelSelected:
|
||||
return "No transcription model selected"
|
||||
case .transcriptionCancelled:
|
||||
return "Transcription was cancelled"
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -2,44 +2,36 @@ import SwiftUI
|
||||
import AVFoundation
|
||||
|
||||
class WaveformGenerator {
|
||||
static func generateWaveformSamples(from url: URL, sampleCount: Int = 200) -> [Float] {
|
||||
static func generateWaveformSamples(from url: URL, sampleCount: Int = 200) async -> [Float] {
|
||||
guard let audioFile = try? AVAudioFile(forReading: url) else { return [] }
|
||||
let format = audioFile.processingFormat
|
||||
|
||||
// Calculate frame count and read size
|
||||
let frameCount = UInt32(audioFile.length)
|
||||
let samplesPerFrame = frameCount / UInt32(sampleCount)
|
||||
var samples = [Float](repeating: 0.0, count: sampleCount)
|
||||
let stride = max(1, Int(frameCount) / sampleCount)
|
||||
let bufferSize = min(UInt32(4096), frameCount)
|
||||
|
||||
guard let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: frameCount) else { return [] }
|
||||
guard let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: bufferSize) else { return [] }
|
||||
|
||||
do {
|
||||
try audioFile.read(into: buffer)
|
||||
var maxValues = [Float](repeating: 0.0, count: sampleCount)
|
||||
var sampleIndex = 0
|
||||
var framePosition: AVAudioFramePosition = 0
|
||||
|
||||
// Get the raw audio data
|
||||
guard let channelData = buffer.floatChannelData?[0] else { return [] }
|
||||
|
||||
// Process the samples
|
||||
for i in 0..<sampleCount {
|
||||
let startFrame = UInt32(i) * samplesPerFrame
|
||||
let endFrame = min(startFrame + samplesPerFrame, frameCount)
|
||||
var maxAmplitude: Float = 0.0
|
||||
while sampleIndex < sampleCount && framePosition < AVAudioFramePosition(frameCount) {
|
||||
audioFile.framePosition = framePosition
|
||||
try audioFile.read(into: buffer)
|
||||
|
||||
// Find the highest amplitude in this segment
|
||||
for frame in startFrame..<endFrame {
|
||||
let amplitude = abs(channelData[Int(frame)])
|
||||
maxAmplitude = max(maxAmplitude, amplitude)
|
||||
if let channelData = buffer.floatChannelData?[0], buffer.frameLength > 0 {
|
||||
maxValues[sampleIndex] = abs(channelData[0])
|
||||
sampleIndex += 1
|
||||
}
|
||||
|
||||
samples[i] = maxAmplitude
|
||||
framePosition += AVAudioFramePosition(stride)
|
||||
}
|
||||
|
||||
// Normalize the samples
|
||||
if let maxSample = samples.max(), maxSample > 0 {
|
||||
samples = samples.map { $0 / maxSample }
|
||||
if let maxSample = maxValues.max(), maxSample > 0 {
|
||||
return maxValues.map { $0 / maxSample }
|
||||
}
|
||||
|
||||
return samples
|
||||
return maxValues
|
||||
} catch {
|
||||
print("Error reading audio file: \(error)")
|
||||
return []
|
||||
@ -49,19 +41,27 @@ class WaveformGenerator {
|
||||
|
||||
class AudioPlayerManager: ObservableObject {
|
||||
private var audioPlayer: AVAudioPlayer?
|
||||
private var timer: Timer?
|
||||
@Published var isPlaying = false
|
||||
@Published var currentTime: TimeInterval = 0
|
||||
@Published var duration: TimeInterval = 0
|
||||
@Published var waveformSamples: [Float] = []
|
||||
private var timer: Timer?
|
||||
@Published var isLoadingWaveform = false
|
||||
|
||||
func loadAudio(from url: URL) {
|
||||
do {
|
||||
audioPlayer = try AVAudioPlayer(contentsOf: url)
|
||||
audioPlayer?.prepareToPlay()
|
||||
duration = audioPlayer?.duration ?? 0
|
||||
// Generate waveform data
|
||||
waveformSamples = WaveformGenerator.generateWaveformSamples(from: url)
|
||||
isLoadingWaveform = true
|
||||
|
||||
Task {
|
||||
let samples = await WaveformGenerator.generateWaveformSamples(from: url)
|
||||
await MainActor.run {
|
||||
self.waveformSamples = samples
|
||||
self.isLoadingWaveform = false
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
print("Error loading audio: \(error.localizedDescription)")
|
||||
}
|
||||
@ -109,6 +109,7 @@ struct WaveformView: View {
|
||||
let samples: [Float]
|
||||
let currentTime: TimeInterval
|
||||
let duration: TimeInterval
|
||||
let isLoading: Bool
|
||||
var onSeek: (Double) -> Void
|
||||
@State private var isHovering = false
|
||||
@State private var hoverLocation: CGFloat = 0
|
||||
@ -116,70 +117,72 @@ struct WaveformView: View {
|
||||
var body: some View {
|
||||
GeometryReader { geometry in
|
||||
ZStack(alignment: .leading) {
|
||||
// Removed the glass-morphic background and its overlays
|
||||
|
||||
// Waveform container
|
||||
HStack(spacing: 1) {
|
||||
ForEach(0..<samples.count, id: \.self) { index in
|
||||
WaveformBar(
|
||||
sample: samples[index],
|
||||
isPlayed: CGFloat(index) / CGFloat(samples.count) <= CGFloat(currentTime / duration),
|
||||
totalBars: samples.count,
|
||||
geometryWidth: geometry.size.width,
|
||||
isHovering: isHovering,
|
||||
hoverProgress: hoverLocation / geometry.size.width
|
||||
)
|
||||
if isLoading {
|
||||
VStack {
|
||||
ProgressView()
|
||||
.controlSize(.small)
|
||||
Text("Generating waveform...")
|
||||
.font(.system(size: 12))
|
||||
.foregroundColor(.secondary)
|
||||
}
|
||||
}
|
||||
.frame(maxHeight: .infinity)
|
||||
.padding(.horizontal, 2)
|
||||
|
||||
// Hover time indicator
|
||||
if isHovering {
|
||||
// Time bubble
|
||||
Text(formatTime(duration * Double(hoverLocation / geometry.size.width)))
|
||||
.font(.system(size: 12, weight: .medium))
|
||||
.monospacedDigit()
|
||||
.foregroundColor(.white)
|
||||
.padding(.horizontal, 8)
|
||||
.padding(.vertical, 4)
|
||||
.background(
|
||||
Capsule()
|
||||
.fill(Color.accentColor)
|
||||
.shadow(color: Color.black.opacity(0.1), radius: 3, x: 0, y: 2)
|
||||
)
|
||||
.offset(x: max(0, min(hoverLocation - 30, geometry.size.width - 60)))
|
||||
.offset(y: -30)
|
||||
.frame(maxWidth: .infinity, maxHeight: .infinity)
|
||||
} else {
|
||||
HStack(spacing: 1) {
|
||||
ForEach(0..<samples.count, id: \.self) { index in
|
||||
WaveformBar(
|
||||
sample: samples[index],
|
||||
isPlayed: CGFloat(index) / CGFloat(samples.count) <= CGFloat(currentTime / duration),
|
||||
totalBars: samples.count,
|
||||
geometryWidth: geometry.size.width,
|
||||
isHovering: isHovering,
|
||||
hoverProgress: hoverLocation / geometry.size.width
|
||||
)
|
||||
}
|
||||
}
|
||||
.frame(maxHeight: .infinity)
|
||||
.padding(.horizontal, 2)
|
||||
|
||||
// Progress line
|
||||
Rectangle()
|
||||
.fill(Color.accentColor)
|
||||
.frame(width: 2)
|
||||
.frame(maxHeight: .infinity)
|
||||
.offset(x: hoverLocation)
|
||||
.transition(.opacity)
|
||||
if isHovering {
|
||||
Text(formatTime(duration * Double(hoverLocation / geometry.size.width)))
|
||||
.font(.system(size: 12, weight: .medium))
|
||||
.monospacedDigit()
|
||||
.foregroundColor(.white)
|
||||
.padding(.horizontal, 8)
|
||||
.padding(.vertical, 4)
|
||||
.background(Capsule().fill(Color.accentColor))
|
||||
.offset(x: max(0, min(hoverLocation - 30, geometry.size.width - 60)))
|
||||
.offset(y: -30)
|
||||
|
||||
Rectangle()
|
||||
.fill(Color.accentColor)
|
||||
.frame(width: 2)
|
||||
.frame(maxHeight: .infinity)
|
||||
.offset(x: hoverLocation)
|
||||
}
|
||||
}
|
||||
}
|
||||
.contentShape(Rectangle())
|
||||
.gesture(
|
||||
DragGesture(minimumDistance: 0)
|
||||
.onChanged { value in
|
||||
hoverLocation = value.location.x
|
||||
let progress = max(0, min(value.location.x / geometry.size.width, 1))
|
||||
onSeek(Double(progress) * duration)
|
||||
if !isLoading {
|
||||
hoverLocation = value.location.x
|
||||
onSeek(Double(value.location.x / geometry.size.width) * duration)
|
||||
}
|
||||
}
|
||||
)
|
||||
.onHover { hovering in
|
||||
withAnimation(.easeInOut(duration: 0.2)) {
|
||||
isHovering = hovering
|
||||
if !isLoading {
|
||||
withAnimation(.easeInOut(duration: 0.2)) {
|
||||
isHovering = hovering
|
||||
}
|
||||
}
|
||||
}
|
||||
.onContinuousHover { phase in
|
||||
switch phase {
|
||||
case .active(let location):
|
||||
hoverLocation = location.x
|
||||
case .ended:
|
||||
break
|
||||
if !isLoading {
|
||||
if case .active(let location) = phase {
|
||||
hoverLocation = location.x
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -201,12 +204,8 @@ struct WaveformBar: View {
|
||||
let isHovering: Bool
|
||||
let hoverProgress: CGFloat
|
||||
|
||||
private var barProgress: CGFloat {
|
||||
CGFloat(sample)
|
||||
}
|
||||
|
||||
private var isNearHover: Bool {
|
||||
let barPosition = CGFloat(geometryWidth) / CGFloat(totalBars)
|
||||
let barPosition = geometryWidth / CGFloat(totalBars)
|
||||
let hoverPosition = hoverProgress * geometryWidth
|
||||
return abs(barPosition - hoverPosition) < 20
|
||||
}
|
||||
@ -215,17 +214,17 @@ struct WaveformBar: View {
|
||||
Capsule()
|
||||
.fill(
|
||||
LinearGradient(
|
||||
gradient: Gradient(colors: [
|
||||
colors: [
|
||||
isPlayed ? Color.accentColor : Color.accentColor.opacity(0.3),
|
||||
isPlayed ? Color.accentColor.opacity(0.8) : Color.accentColor.opacity(0.2)
|
||||
]),
|
||||
],
|
||||
startPoint: .bottom,
|
||||
endPoint: .top
|
||||
)
|
||||
)
|
||||
.frame(
|
||||
width: max((geometryWidth / CGFloat(totalBars)) - 1, 1),
|
||||
height: max(barProgress * 40, 3)
|
||||
height: max(CGFloat(sample) * 40, 3)
|
||||
)
|
||||
.scaleEffect(y: isHovering && isNearHover ? 1.2 : 1.0)
|
||||
.animation(.interpolatingSpring(stiffness: 300, damping: 15), value: isHovering && isNearHover)
|
||||
@ -236,27 +235,19 @@ struct AudioPlayerView: View {
|
||||
let url: URL
|
||||
@StateObject private var playerManager = AudioPlayerManager()
|
||||
@State private var isHovering = false
|
||||
@State private var showingTooltip = false
|
||||
@State private var isRetranscribing = false
|
||||
@State private var showRetranscribeSuccess = false
|
||||
@State private var showRetranscribeError = false
|
||||
@State private var errorMessage = ""
|
||||
|
||||
// Add environment objects for retranscription
|
||||
@EnvironmentObject private var whisperState: WhisperState
|
||||
@Environment(\.modelContext) private var modelContext
|
||||
|
||||
// Create the audio transcription service lazily
|
||||
private var transcriptionService: AudioTranscriptionService {
|
||||
AudioTranscriptionService(
|
||||
modelContext: modelContext,
|
||||
whisperState: whisperState
|
||||
)
|
||||
AudioTranscriptionService(modelContext: modelContext, whisperState: whisperState)
|
||||
}
|
||||
|
||||
var body: some View {
|
||||
VStack(spacing: 16) {
|
||||
// Title and duration
|
||||
HStack {
|
||||
HStack(spacing: 6) {
|
||||
Image(systemName: "waveform")
|
||||
@ -274,21 +265,16 @@ struct AudioPlayerView: View {
|
||||
.foregroundColor(.secondary)
|
||||
}
|
||||
|
||||
// Waveform and controls container
|
||||
VStack(spacing: 16) {
|
||||
// Waveform
|
||||
WaveformView(
|
||||
samples: playerManager.waveformSamples,
|
||||
currentTime: playerManager.currentTime,
|
||||
duration: playerManager.duration,
|
||||
onSeek: { time in
|
||||
playerManager.seek(to: time)
|
||||
}
|
||||
isLoading: playerManager.isLoadingWaveform,
|
||||
onSeek: { playerManager.seek(to: $0) }
|
||||
)
|
||||
|
||||
// Controls
|
||||
HStack(spacing: 20) {
|
||||
// Play/Pause button
|
||||
Button(action: {
|
||||
if playerManager.isPlaying {
|
||||
playerManager.pause()
|
||||
@ -314,10 +300,7 @@ struct AudioPlayerView: View {
|
||||
}
|
||||
}
|
||||
|
||||
// Add Retranscribe button
|
||||
Button(action: {
|
||||
retranscribeAudio()
|
||||
}) {
|
||||
Button(action: retranscribeAudio) {
|
||||
Circle()
|
||||
.fill(Color.green.opacity(0.1))
|
||||
.frame(width: 44, height: 44)
|
||||
@ -342,7 +325,6 @@ struct AudioPlayerView: View {
|
||||
.disabled(isRetranscribing)
|
||||
.help("Retranscribe this audio")
|
||||
|
||||
// Time
|
||||
Text(formatTime(playerManager.currentTime))
|
||||
.font(.system(size: 14, weight: .medium))
|
||||
.monospacedDigit()
|
||||
@ -356,7 +338,6 @@ struct AudioPlayerView: View {
|
||||
playerManager.loadAudio(from: url)
|
||||
}
|
||||
.overlay(
|
||||
// Success notification
|
||||
VStack {
|
||||
if showRetranscribeSuccess {
|
||||
HStack(spacing: 8) {
|
||||
@ -370,10 +351,7 @@ struct AudioPlayerView: View {
|
||||
.background(
|
||||
RoundedRectangle(cornerRadius: 8)
|
||||
.fill(Color.green.opacity(0.1))
|
||||
.overlay(
|
||||
RoundedRectangle(cornerRadius: 8)
|
||||
.stroke(Color.green.opacity(0.2), lineWidth: 1)
|
||||
)
|
||||
.stroke(Color.green.opacity(0.2), lineWidth: 1)
|
||||
)
|
||||
.transition(.move(edge: .top).combined(with: .opacity))
|
||||
}
|
||||
@ -390,10 +368,7 @@ struct AudioPlayerView: View {
|
||||
.background(
|
||||
RoundedRectangle(cornerRadius: 8)
|
||||
.fill(Color.red.opacity(0.1))
|
||||
.overlay(
|
||||
RoundedRectangle(cornerRadius: 8)
|
||||
.stroke(Color.red.opacity(0.2), lineWidth: 1)
|
||||
)
|
||||
.stroke(Color.red.opacity(0.2), lineWidth: 1)
|
||||
)
|
||||
.transition(.move(edge: .top).combined(with: .opacity))
|
||||
}
|
||||
@ -416,12 +391,8 @@ struct AudioPlayerView: View {
|
||||
guard let currentModel = whisperState.currentModel else {
|
||||
errorMessage = "No transcription model selected"
|
||||
showRetranscribeError = true
|
||||
|
||||
// Hide error after 3 seconds
|
||||
DispatchQueue.main.asyncAfter(deadline: .now() + 3) {
|
||||
withAnimation {
|
||||
showRetranscribeError = false
|
||||
}
|
||||
withAnimation { showRetranscribeError = false }
|
||||
}
|
||||
return
|
||||
}
|
||||
@ -430,22 +401,12 @@ struct AudioPlayerView: View {
|
||||
|
||||
Task {
|
||||
do {
|
||||
// Use the AudioTranscriptionService to retranscribe the audio
|
||||
let _ = try await transcriptionService.retranscribeAudio(
|
||||
from: url,
|
||||
using: currentModel
|
||||
)
|
||||
|
||||
// Show success notification
|
||||
let _ = try await transcriptionService.retranscribeAudio(from: url, using: currentModel)
|
||||
await MainActor.run {
|
||||
isRetranscribing = false
|
||||
showRetranscribeSuccess = true
|
||||
|
||||
// Hide success after 3 seconds
|
||||
DispatchQueue.main.asyncAfter(deadline: .now() + 3) {
|
||||
withAnimation {
|
||||
showRetranscribeSuccess = false
|
||||
}
|
||||
withAnimation { showRetranscribeSuccess = false }
|
||||
}
|
||||
}
|
||||
} catch {
|
||||
@ -453,12 +414,8 @@ struct AudioPlayerView: View {
|
||||
isRetranscribing = false
|
||||
errorMessage = error.localizedDescription
|
||||
showRetranscribeError = true
|
||||
|
||||
// Hide error after 3 seconds
|
||||
DispatchQueue.main.asyncAfter(deadline: .now() + 3) {
|
||||
withAnimation {
|
||||
showRetranscribeError = false
|
||||
}
|
||||
withAnimation { showRetranscribeError = false }
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
283
VoiceInk/Views/AudioTranscribeView.swift
Normal file
283
VoiceInk/Views/AudioTranscribeView.swift
Normal file
@ -0,0 +1,283 @@
|
||||
import SwiftUI
|
||||
import SwiftData
|
||||
import UniformTypeIdentifiers
|
||||
import AVFoundation
|
||||
|
||||
struct AudioTranscribeView: View {
|
||||
@Environment(\.modelContext) private var modelContext
|
||||
@EnvironmentObject private var whisperState: WhisperState
|
||||
@StateObject private var transcriptionManager = AudioTranscriptionManager.shared
|
||||
@State private var isDropTargeted = false
|
||||
@State private var selectedAudioURL: URL?
|
||||
@State private var isAudioFileSelected = false
|
||||
@State private var isEnhancementEnabled = false
|
||||
@State private var selectedPromptId: UUID?
|
||||
|
||||
var body: some View {
|
||||
VStack(spacing: 0) {
|
||||
if transcriptionManager.isProcessing {
|
||||
processingView
|
||||
} else {
|
||||
dropZoneView
|
||||
}
|
||||
|
||||
Divider()
|
||||
.padding(.vertical)
|
||||
|
||||
// Show current transcription result
|
||||
if let transcription = transcriptionManager.currentTranscription {
|
||||
ScrollView {
|
||||
VStack(alignment: .leading, spacing: 16) {
|
||||
Text("Transcription Result")
|
||||
.font(.headline)
|
||||
|
||||
if let enhancedText = transcription.enhancedText {
|
||||
VStack(alignment: .leading, spacing: 8) {
|
||||
Text("Enhanced")
|
||||
.font(.subheadline)
|
||||
.foregroundColor(.secondary)
|
||||
Text(enhancedText)
|
||||
.textSelection(.enabled)
|
||||
}
|
||||
|
||||
Divider()
|
||||
|
||||
VStack(alignment: .leading, spacing: 8) {
|
||||
Text("Original")
|
||||
.font(.subheadline)
|
||||
.foregroundColor(.secondary)
|
||||
Text(transcription.text)
|
||||
.textSelection(.enabled)
|
||||
}
|
||||
} else {
|
||||
Text(transcription.text)
|
||||
.textSelection(.enabled)
|
||||
}
|
||||
|
||||
HStack {
|
||||
Text("Duration: \(formatDuration(transcription.duration))")
|
||||
.font(.caption)
|
||||
.foregroundColor(.secondary)
|
||||
Spacer()
|
||||
}
|
||||
}
|
||||
.padding()
|
||||
}
|
||||
}
|
||||
}
|
||||
.alert("Error", isPresented: .constant(transcriptionManager.errorMessage != nil)) {
|
||||
Button("OK", role: .cancel) {
|
||||
transcriptionManager.errorMessage = nil
|
||||
}
|
||||
} message: {
|
||||
if let errorMessage = transcriptionManager.errorMessage {
|
||||
Text(errorMessage)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private var dropZoneView: some View {
|
||||
VStack(spacing: 16) {
|
||||
if isAudioFileSelected {
|
||||
VStack(spacing: 16) {
|
||||
Text("Audio file selected: \(selectedAudioURL?.lastPathComponent ?? "")")
|
||||
.font(.headline)
|
||||
|
||||
// AI Enhancement Settings
|
||||
if let enhancementService = whisperState.getEnhancementService() {
|
||||
VStack(spacing: 16) {
|
||||
// AI Enhancement and Prompt in the same row
|
||||
HStack(spacing: 16) {
|
||||
Toggle("AI Enhancement", isOn: $isEnhancementEnabled)
|
||||
.toggleStyle(.switch)
|
||||
.onChange(of: isEnhancementEnabled) { newValue in
|
||||
enhancementService.isEnhancementEnabled = newValue
|
||||
}
|
||||
|
||||
if isEnhancementEnabled {
|
||||
Divider()
|
||||
.frame(height: 20)
|
||||
|
||||
// Prompt Selection
|
||||
HStack(spacing: 8) {
|
||||
Text("Prompt:")
|
||||
.font(.subheadline)
|
||||
|
||||
Menu {
|
||||
ForEach(enhancementService.allPrompts) { prompt in
|
||||
Button {
|
||||
enhancementService.setActivePrompt(prompt)
|
||||
selectedPromptId = prompt.id
|
||||
} label: {
|
||||
HStack {
|
||||
Image(systemName: prompt.icon.rawValue)
|
||||
.foregroundColor(.accentColor)
|
||||
Text(prompt.title)
|
||||
if selectedPromptId == prompt.id {
|
||||
Spacer()
|
||||
Image(systemName: "checkmark")
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
} label: {
|
||||
HStack {
|
||||
Text(enhancementService.allPrompts.first(where: { $0.id == selectedPromptId })?.title ?? "Select Prompt")
|
||||
.foregroundColor(.primary)
|
||||
Image(systemName: "chevron.down")
|
||||
.font(.caption)
|
||||
}
|
||||
.padding(.horizontal, 8)
|
||||
.padding(.vertical, 4)
|
||||
.background(
|
||||
RoundedRectangle(cornerRadius: 6)
|
||||
.fill(Color(.controlBackgroundColor))
|
||||
)
|
||||
}
|
||||
.fixedSize()
|
||||
.disabled(!isEnhancementEnabled)
|
||||
}
|
||||
}
|
||||
}
|
||||
.padding(.horizontal, 12)
|
||||
.padding(.vertical, 8)
|
||||
.background(
|
||||
RoundedRectangle(cornerRadius: 8)
|
||||
.fill(Color(.windowBackgroundColor).opacity(0.4))
|
||||
)
|
||||
}
|
||||
.frame(maxWidth: .infinity, alignment: .center)
|
||||
.onAppear {
|
||||
// Initialize local state from enhancement service
|
||||
isEnhancementEnabled = enhancementService.isEnhancementEnabled
|
||||
selectedPromptId = enhancementService.selectedPromptId
|
||||
}
|
||||
}
|
||||
|
||||
// Action Buttons in a row
|
||||
HStack(spacing: 12) {
|
||||
Button("Start Transcription") {
|
||||
if let url = selectedAudioURL {
|
||||
transcriptionManager.startProcessing(
|
||||
url: url,
|
||||
modelContext: modelContext,
|
||||
whisperState: whisperState
|
||||
)
|
||||
}
|
||||
}
|
||||
.buttonStyle(.borderedProminent)
|
||||
|
||||
Button("Choose Different File") {
|
||||
selectedAudioURL = nil
|
||||
isAudioFileSelected = false
|
||||
}
|
||||
.buttonStyle(.bordered)
|
||||
}
|
||||
}
|
||||
.padding()
|
||||
} else {
|
||||
ZStack {
|
||||
RoundedRectangle(cornerRadius: 12)
|
||||
.fill(Color(.windowBackgroundColor).opacity(0.4))
|
||||
.overlay(
|
||||
RoundedRectangle(cornerRadius: 12)
|
||||
.strokeBorder(
|
||||
style: StrokeStyle(
|
||||
lineWidth: 2,
|
||||
dash: [8]
|
||||
)
|
||||
)
|
||||
.foregroundColor(isDropTargeted ? .blue : .gray.opacity(0.5))
|
||||
)
|
||||
|
||||
VStack(spacing: 16) {
|
||||
Image(systemName: "arrow.down.doc")
|
||||
.font(.system(size: 32))
|
||||
.foregroundColor(isDropTargeted ? .blue : .gray)
|
||||
|
||||
Text("Drop audio file here")
|
||||
.font(.headline)
|
||||
|
||||
Text("or")
|
||||
.foregroundColor(.secondary)
|
||||
|
||||
Button("Choose File") {
|
||||
selectFile()
|
||||
}
|
||||
.buttonStyle(.bordered)
|
||||
}
|
||||
.padding(32)
|
||||
}
|
||||
.frame(height: 200)
|
||||
.padding(.horizontal)
|
||||
}
|
||||
|
||||
Text("Supported formats: WAV, MP3, M4A, AIFF")
|
||||
.font(.caption)
|
||||
.foregroundColor(.secondary)
|
||||
}
|
||||
.padding()
|
||||
.onDrop(of: [.audio, .fileURL], isTargeted: $isDropTargeted) { providers in
|
||||
Task {
|
||||
await handleDroppedFile(providers)
|
||||
}
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
private var processingView: some View {
|
||||
VStack(spacing: 16) {
|
||||
ProgressView()
|
||||
.scaleEffect(0.8)
|
||||
Text(transcriptionManager.processingPhase.message)
|
||||
.font(.headline)
|
||||
Text(transcriptionManager.messageLog)
|
||||
.font(.caption)
|
||||
.foregroundColor(.secondary)
|
||||
.multilineTextAlignment(.center)
|
||||
}
|
||||
.padding()
|
||||
}
|
||||
|
||||
private func selectFile() {
|
||||
let panel = NSOpenPanel()
|
||||
panel.allowsMultipleSelection = false
|
||||
panel.canChooseDirectories = false
|
||||
panel.canChooseFiles = true
|
||||
panel.allowedContentTypes = [
|
||||
.audio,
|
||||
.wav,
|
||||
.mp3,
|
||||
.mpeg4Audio,
|
||||
.aiff
|
||||
]
|
||||
|
||||
if panel.runModal() == .OK {
|
||||
if let url = panel.url {
|
||||
selectedAudioURL = url
|
||||
isAudioFileSelected = true
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private func handleDroppedFile(_ providers: [NSItemProvider]) async {
|
||||
guard let provider = providers.first else { return }
|
||||
|
||||
if provider.hasItemConformingToTypeIdentifier(UTType.audio.identifier) {
|
||||
try? await provider.loadItem(forTypeIdentifier: UTType.audio.identifier) { item, error in
|
||||
if let url = item as? URL {
|
||||
Task { @MainActor in
|
||||
selectedAudioURL = url
|
||||
isAudioFileSelected = true
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private func formatDuration(_ duration: TimeInterval) -> String {
|
||||
let minutes = Int(duration) / 60
|
||||
let seconds = Int(duration) % 60
|
||||
return String(format: "%d:%02d", minutes, seconds)
|
||||
}
|
||||
}
|
||||
@ -6,6 +6,7 @@ import KeyboardShortcuts
|
||||
enum ViewType: String, CaseIterable {
|
||||
case metrics = "Dashboard"
|
||||
case record = "Record Audio"
|
||||
case transcribeAudio = "Transcribe Audio"
|
||||
case history = "History"
|
||||
case models = "AI Models"
|
||||
case enhancement = "Enhancement"
|
||||
@ -21,6 +22,7 @@ enum ViewType: String, CaseIterable {
|
||||
switch self {
|
||||
case .metrics: return "gauge.medium"
|
||||
case .record: return "mic.circle.fill"
|
||||
case .transcribeAudio: return "waveform.circle.fill"
|
||||
case .history: return "doc.text.fill"
|
||||
case .models: return "brain.head.profile"
|
||||
case .enhancement: return "wand.and.stars"
|
||||
@ -243,6 +245,8 @@ struct ContentView: View {
|
||||
EnhancementSettingsView()
|
||||
case .record:
|
||||
RecordView()
|
||||
case .transcribeAudio:
|
||||
AudioTranscribeView()
|
||||
case .history:
|
||||
TranscriptionHistoryView()
|
||||
case .audioInput:
|
||||
|
||||
189
VoiceInk/Views/TranscriptionCard.swift
Normal file
189
VoiceInk/Views/TranscriptionCard.swift
Normal file
@ -0,0 +1,189 @@
|
||||
import SwiftUI
|
||||
import SwiftData
|
||||
|
||||
struct TranscriptionCard: View {
|
||||
let transcription: Transcription
|
||||
let isExpanded: Bool
|
||||
let isSelected: Bool
|
||||
let onDelete: () -> Void
|
||||
let onToggleSelection: () -> Void
|
||||
@State private var showOriginalCopiedAlert = false
|
||||
@State private var showEnhancedCopiedAlert = false
|
||||
|
||||
var body: some View {
|
||||
HStack(spacing: 12) {
|
||||
// Selection checkbox in macOS style
|
||||
Toggle("", isOn: Binding(
|
||||
get: { isSelected },
|
||||
set: { _ in onToggleSelection() }
|
||||
))
|
||||
.toggleStyle(CircularCheckboxStyle())
|
||||
.labelsHidden()
|
||||
|
||||
VStack(alignment: .leading, spacing: 8) {
|
||||
// Header with date and duration
|
||||
HStack {
|
||||
Text(transcription.timestamp, style: .date)
|
||||
.font(.system(size: 14, weight: .medium, design: .default))
|
||||
.foregroundColor(.secondary)
|
||||
Spacer()
|
||||
|
||||
Text(formatDuration(transcription.duration))
|
||||
.font(.system(size: 14, weight: .medium, design: .default))
|
||||
.padding(.horizontal, 8)
|
||||
.padding(.vertical, 4)
|
||||
.background(Color.blue.opacity(0.1))
|
||||
.foregroundColor(.blue)
|
||||
.cornerRadius(6)
|
||||
}
|
||||
|
||||
// Original text section
|
||||
VStack(alignment: .leading, spacing: 8) {
|
||||
if isExpanded {
|
||||
HStack {
|
||||
Text("Original")
|
||||
.font(.system(size: 14, weight: .medium))
|
||||
.foregroundColor(.secondary)
|
||||
Spacer()
|
||||
Button {
|
||||
copyToClipboard(transcription.text)
|
||||
showOriginalCopiedAlert = true
|
||||
} label: {
|
||||
HStack(spacing: 4) {
|
||||
Image(systemName: showOriginalCopiedAlert ? "checkmark" : "doc.on.doc")
|
||||
Text(showOriginalCopiedAlert ? "Copied" : "Copy")
|
||||
}
|
||||
.foregroundColor(showOriginalCopiedAlert ? .green : .blue)
|
||||
.padding(.horizontal, 8)
|
||||
.padding(.vertical, 4)
|
||||
.background(Color.blue.opacity(0.1))
|
||||
.cornerRadius(6)
|
||||
}
|
||||
.buttonStyle(.plain)
|
||||
}
|
||||
}
|
||||
|
||||
Text(transcription.text)
|
||||
.font(.system(size: 15, weight: .regular, design: .default))
|
||||
.lineLimit(isExpanded ? nil : 2)
|
||||
.lineSpacing(2)
|
||||
}
|
||||
|
||||
// Enhanced text section (only when expanded)
|
||||
if isExpanded, let enhancedText = transcription.enhancedText {
|
||||
Divider()
|
||||
.padding(.vertical, 8)
|
||||
|
||||
VStack(alignment: .leading, spacing: 8) {
|
||||
HStack {
|
||||
HStack(spacing: 4) {
|
||||
Image(systemName: "sparkles")
|
||||
.foregroundColor(.blue)
|
||||
Text("Enhanced")
|
||||
.font(.system(size: 14, weight: .medium))
|
||||
.foregroundColor(.blue)
|
||||
}
|
||||
Spacer()
|
||||
Button {
|
||||
copyToClipboard(enhancedText)
|
||||
showEnhancedCopiedAlert = true
|
||||
} label: {
|
||||
HStack(spacing: 4) {
|
||||
Image(systemName: showEnhancedCopiedAlert ? "checkmark" : "doc.on.doc")
|
||||
Text(showEnhancedCopiedAlert ? "Copied" : "Copy")
|
||||
}
|
||||
.foregroundColor(showEnhancedCopiedAlert ? .green : .blue)
|
||||
.padding(.horizontal, 8)
|
||||
.padding(.vertical, 4)
|
||||
.background(Color.blue.opacity(0.1))
|
||||
.cornerRadius(6)
|
||||
}
|
||||
.buttonStyle(.plain)
|
||||
}
|
||||
|
||||
Text(enhancedText)
|
||||
.font(.system(size: 15, weight: .regular, design: .default))
|
||||
.lineSpacing(2)
|
||||
}
|
||||
}
|
||||
|
||||
// Audio player (if available)
|
||||
if isExpanded, let urlString = transcription.audioFileURL,
|
||||
let url = URL(string: urlString),
|
||||
FileManager.default.fileExists(atPath: url.path) {
|
||||
Divider()
|
||||
.padding(.vertical, 8)
|
||||
AudioPlayerView(url: url)
|
||||
}
|
||||
|
||||
// Timestamp (only when expanded)
|
||||
if isExpanded {
|
||||
HStack {
|
||||
Text(transcription.timestamp, style: .time)
|
||||
.font(.system(size: 14, weight: .regular, design: .default))
|
||||
.foregroundColor(.secondary)
|
||||
Spacer()
|
||||
}
|
||||
.padding(.top, 4)
|
||||
}
|
||||
}
|
||||
}
|
||||
.padding(16)
|
||||
.background(
|
||||
RoundedRectangle(cornerRadius: 12)
|
||||
.fill(Color(.windowBackgroundColor).opacity(0.4))
|
||||
)
|
||||
.cornerRadius(12)
|
||||
.shadow(color: Color.black.opacity(0.05), radius: 3, x: 0, y: 2)
|
||||
.contextMenu {
|
||||
if let enhancedText = transcription.enhancedText {
|
||||
Button {
|
||||
copyToClipboard(enhancedText)
|
||||
showEnhancedCopiedAlert = true
|
||||
} label: {
|
||||
Label("Copy Enhanced", systemImage: "doc.on.doc")
|
||||
}
|
||||
}
|
||||
|
||||
Button {
|
||||
copyToClipboard(transcription.text)
|
||||
showOriginalCopiedAlert = true
|
||||
} label: {
|
||||
Label("Copy Original", systemImage: "doc.on.doc")
|
||||
}
|
||||
|
||||
Button(role: .destructive) {
|
||||
onDelete()
|
||||
} label: {
|
||||
Label("Delete", systemImage: "trash")
|
||||
}
|
||||
}
|
||||
.onChange(of: showOriginalCopiedAlert) { _, isShowing in
|
||||
if isShowing {
|
||||
DispatchQueue.main.asyncAfter(deadline: .now() + 1.5) {
|
||||
showOriginalCopiedAlert = false
|
||||
}
|
||||
}
|
||||
}
|
||||
.onChange(of: showEnhancedCopiedAlert) { _, isShowing in
|
||||
if isShowing {
|
||||
DispatchQueue.main.asyncAfter(deadline: .now() + 1.5) {
|
||||
showEnhancedCopiedAlert = false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private func copyToClipboard(_ text: String) {
|
||||
let success = ClipboardManager.copyToClipboard(text)
|
||||
if !success {
|
||||
print("Failed to copy text to clipboard")
|
||||
}
|
||||
}
|
||||
|
||||
private func formatDuration(_ duration: TimeInterval) -> String {
|
||||
let minutes = Int(duration) / 60
|
||||
let seconds = Int(duration) % 60
|
||||
return String(format: "%d:%02d", minutes, seconds)
|
||||
}
|
||||
}
|
||||
@ -18,7 +18,7 @@ struct TranscriptionHistoryView: View {
|
||||
private let pageSize = 20
|
||||
|
||||
// Query for latest transcriptions (used for real-time updates)
|
||||
@Query(sort: \Transcription.timestamp, order: .reverse, animation: .default)
|
||||
@Query(sort: \Transcription.timestamp, order: .reverse)
|
||||
private var latestTranscriptions: [Transcription]
|
||||
|
||||
// Cursor-based query descriptor
|
||||
@ -69,13 +69,7 @@ struct TranscriptionHistoryView: View {
|
||||
onToggleSelection: { toggleSelection(transcription) }
|
||||
)
|
||||
.onTapGesture {
|
||||
withAnimation {
|
||||
if expandedTranscription == transcription {
|
||||
expandedTranscription = nil
|
||||
} else {
|
||||
expandedTranscription = transcription
|
||||
}
|
||||
}
|
||||
expandedTranscription = expandedTranscription == transcription ? nil : transcription
|
||||
}
|
||||
}
|
||||
|
||||
@ -140,14 +134,10 @@ struct TranscriptionHistoryView: View {
|
||||
await loadInitialContent()
|
||||
}
|
||||
} else {
|
||||
// If we're on a paginated view, show a notification or indicator that new content is available
|
||||
// This could be a banner or button to "Show new transcriptions"
|
||||
withAnimation {
|
||||
// Reset pagination to show the latest content
|
||||
Task {
|
||||
await resetPagination()
|
||||
await loadInitialContent()
|
||||
}
|
||||
// Reset pagination to show the latest content
|
||||
Task {
|
||||
await resetPagination()
|
||||
await loadInitialContent()
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -186,14 +176,22 @@ struct TranscriptionHistoryView: View {
|
||||
}
|
||||
|
||||
private var selectionToolbar: some View {
|
||||
HStack {
|
||||
HStack(spacing: 12) {
|
||||
Text("\(selectedTranscriptions.count) selected")
|
||||
.foregroundColor(.secondary)
|
||||
.font(.system(size: 14))
|
||||
|
||||
Spacer()
|
||||
|
||||
Button(action: {
|
||||
showDeleteConfirmation = true
|
||||
}) {
|
||||
Image(systemName: "trash")
|
||||
HStack(spacing: 4) {
|
||||
Image(systemName: "trash")
|
||||
Text("Delete")
|
||||
}
|
||||
}
|
||||
.buttonStyle(.bordered)
|
||||
.buttonStyle(.borderless)
|
||||
|
||||
if selectedTranscriptions.count < displayedTranscriptions.count {
|
||||
Button("Select All") {
|
||||
@ -201,16 +199,16 @@ struct TranscriptionHistoryView: View {
|
||||
await selectAllTranscriptions()
|
||||
}
|
||||
}
|
||||
.buttonStyle(.bordered)
|
||||
.buttonStyle(.borderless)
|
||||
} else {
|
||||
Button("Deselect All") {
|
||||
selectedTranscriptions.removeAll()
|
||||
}
|
||||
.buttonStyle(.bordered)
|
||||
.buttonStyle(.borderless)
|
||||
}
|
||||
}
|
||||
.padding(24)
|
||||
.background(Color(.windowBackgroundColor).opacity(0.4))
|
||||
.padding(16)
|
||||
.background(Color(.windowBackgroundColor))
|
||||
}
|
||||
|
||||
private func loadInitialContent() async {
|
||||
@ -377,196 +375,3 @@ struct CircularCheckboxStyle: ToggleStyle {
|
||||
.buttonStyle(.plain)
|
||||
}
|
||||
}
|
||||
|
||||
struct TranscriptionCard: View {
|
||||
let transcription: Transcription
|
||||
let isExpanded: Bool
|
||||
let isSelected: Bool
|
||||
let onDelete: () -> Void
|
||||
let onToggleSelection: () -> Void
|
||||
@State private var showOriginalCopiedAlert = false
|
||||
@State private var showEnhancedCopiedAlert = false
|
||||
|
||||
var body: some View {
|
||||
HStack(spacing: 12) {
|
||||
// Selection checkbox in macOS style
|
||||
Toggle("", isOn: Binding(
|
||||
get: { isSelected },
|
||||
set: { _ in onToggleSelection() }
|
||||
))
|
||||
.toggleStyle(CircularCheckboxStyle())
|
||||
.labelsHidden()
|
||||
|
||||
VStack(alignment: .leading, spacing: 8) {
|
||||
// Header with date and duration
|
||||
HStack {
|
||||
Text(transcription.timestamp, style: .date)
|
||||
.font(.system(size: 14, weight: .medium, design: .default))
|
||||
.foregroundColor(.secondary)
|
||||
Spacer()
|
||||
|
||||
Text(formatDuration(transcription.duration))
|
||||
.font(.system(size: 14, weight: .medium, design: .default))
|
||||
.padding(.horizontal, 8)
|
||||
.padding(.vertical, 4)
|
||||
.background(Color.blue.opacity(0.1))
|
||||
.foregroundColor(.blue)
|
||||
.cornerRadius(6)
|
||||
}
|
||||
|
||||
// Original text section
|
||||
VStack(alignment: .leading, spacing: 8) {
|
||||
if isExpanded {
|
||||
HStack {
|
||||
Text("Original")
|
||||
.font(.system(size: 14, weight: .medium))
|
||||
.foregroundColor(.secondary)
|
||||
Spacer()
|
||||
Button {
|
||||
copyToClipboard(transcription.text)
|
||||
showOriginalCopiedAlert = true
|
||||
} label: {
|
||||
HStack(spacing: 4) {
|
||||
Image(systemName: showOriginalCopiedAlert ? "checkmark" : "doc.on.doc")
|
||||
Text(showOriginalCopiedAlert ? "Copied!" : "Copy")
|
||||
}
|
||||
.foregroundColor(showOriginalCopiedAlert ? .green : .blue)
|
||||
.padding(.horizontal, 8)
|
||||
.padding(.vertical, 4)
|
||||
.background(
|
||||
RoundedRectangle(cornerRadius: 6)
|
||||
.fill(showOriginalCopiedAlert ? Color.green.opacity(0.1) : Color.blue.opacity(0.1))
|
||||
)
|
||||
}
|
||||
.buttonStyle(.plain)
|
||||
.animation(.easeInOut(duration: 0.2), value: showOriginalCopiedAlert)
|
||||
}
|
||||
}
|
||||
|
||||
Text(transcription.text)
|
||||
.font(.system(size: 15, weight: .regular, design: .default))
|
||||
.lineLimit(isExpanded ? nil : 2)
|
||||
.lineSpacing(2)
|
||||
}
|
||||
|
||||
// Enhanced text section (only when expanded)
|
||||
if isExpanded, let enhancedText = transcription.enhancedText {
|
||||
Divider()
|
||||
.padding(.vertical, 8)
|
||||
|
||||
VStack(alignment: .leading, spacing: 8) {
|
||||
HStack {
|
||||
HStack(spacing: 4) {
|
||||
Image(systemName: "sparkles")
|
||||
.foregroundColor(.blue)
|
||||
Text("Enhanced")
|
||||
.font(.system(size: 14, weight: .medium))
|
||||
.foregroundColor(.blue)
|
||||
}
|
||||
Spacer()
|
||||
Button {
|
||||
copyToClipboard(enhancedText)
|
||||
showEnhancedCopiedAlert = true
|
||||
} label: {
|
||||
HStack(spacing: 4) {
|
||||
Image(systemName: showEnhancedCopiedAlert ? "checkmark" : "doc.on.doc")
|
||||
Text(showEnhancedCopiedAlert ? "Copied!" : "Copy")
|
||||
}
|
||||
.foregroundColor(showEnhancedCopiedAlert ? .green : .blue)
|
||||
.padding(.horizontal, 8)
|
||||
.padding(.vertical, 4)
|
||||
.background(
|
||||
RoundedRectangle(cornerRadius: 6)
|
||||
.fill(showEnhancedCopiedAlert ? Color.green.opacity(0.1) : Color.blue.opacity(0.1))
|
||||
)
|
||||
}
|
||||
.buttonStyle(.plain)
|
||||
.animation(.easeInOut(duration: 0.2), value: showEnhancedCopiedAlert)
|
||||
}
|
||||
|
||||
Text(enhancedText)
|
||||
.font(.system(size: 15, weight: .regular, design: .default))
|
||||
.lineSpacing(2)
|
||||
}
|
||||
}
|
||||
|
||||
// Audio player (if available)
|
||||
if isExpanded, let urlString = transcription.audioFileURL,
|
||||
let url = URL(string: urlString),
|
||||
FileManager.default.fileExists(atPath: url.path) {
|
||||
Divider()
|
||||
.padding(.vertical, 8)
|
||||
AudioPlayerView(url: url)
|
||||
}
|
||||
|
||||
// Timestamp (only when expanded)
|
||||
if isExpanded {
|
||||
HStack {
|
||||
Text(transcription.timestamp, style: .time)
|
||||
.font(.system(size: 14, weight: .regular, design: .default))
|
||||
.foregroundColor(.secondary)
|
||||
Spacer()
|
||||
}
|
||||
.padding(.top, 4)
|
||||
}
|
||||
}
|
||||
}
|
||||
.padding(16)
|
||||
.background(
|
||||
RoundedRectangle(cornerRadius: 12)
|
||||
.fill(Color(.windowBackgroundColor).opacity(0.4))
|
||||
)
|
||||
.cornerRadius(12)
|
||||
.shadow(color: Color.black.opacity(0.05), radius: 3, x: 0, y: 2)
|
||||
.contextMenu {
|
||||
if let enhancedText = transcription.enhancedText {
|
||||
Button {
|
||||
copyToClipboard(enhancedText)
|
||||
showEnhancedCopiedAlert = true
|
||||
} label: {
|
||||
Label("Copy Enhanced", systemImage: "doc.on.doc")
|
||||
}
|
||||
}
|
||||
|
||||
Button {
|
||||
copyToClipboard(transcription.text)
|
||||
showOriginalCopiedAlert = true
|
||||
} label: {
|
||||
Label("Copy Original", systemImage: "doc.on.doc")
|
||||
}
|
||||
|
||||
Button(role: .destructive) {
|
||||
onDelete()
|
||||
} label: {
|
||||
Label("Delete", systemImage: "trash")
|
||||
}
|
||||
}
|
||||
.onChange(of: showOriginalCopiedAlert) { _, isShowing in
|
||||
if isShowing {
|
||||
DispatchQueue.main.asyncAfter(deadline: .now() + 1.5) {
|
||||
showOriginalCopiedAlert = false
|
||||
}
|
||||
}
|
||||
}
|
||||
.onChange(of: showEnhancedCopiedAlert) { _, isShowing in
|
||||
if isShowing {
|
||||
DispatchQueue.main.asyncAfter(deadline: .now() + 1.5) {
|
||||
showEnhancedCopiedAlert = false
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private func copyToClipboard(_ text: String) {
|
||||
let success = ClipboardManager.copyToClipboard(text)
|
||||
if !success {
|
||||
print("Failed to copy text to clipboard")
|
||||
}
|
||||
}
|
||||
|
||||
private func formatDuration(_ duration: TimeInterval) -> String {
|
||||
let minutes = Int(duration) / 60
|
||||
let seconds = Int(duration) % 60
|
||||
return String(format: "%d:%02d", minutes, seconds)
|
||||
}
|
||||
}
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user