Refactor to state machine for recording states & whisperstate file

This commit is contained in:
Beingpax 2025-07-09 16:27:23 +05:45
parent 28dbe5a2f6
commit 516309cb35
7 changed files with 364 additions and 424 deletions

View File

@ -26,6 +26,11 @@ class HotkeyManager: ObservableObject {
private var whisperState: WhisperState
private var miniRecorderShortcutManager: MiniRecorderShortcutManager
// MARK: - Helper Properties
private var canProcessHotkeyAction: Bool {
whisperState.recordingState != .transcribing && whisperState.recordingState != .enhancing
}
// NSEvent monitoring for modifier keys
private var globalEventMonitor: Any?
private var localEventMonitor: Any?
@ -246,7 +251,7 @@ class HotkeyManager: ObservableObject {
if isHandsFreeMode {
isHandsFreeMode = false
Task { @MainActor in
guard !whisperState.isTranscribing && !whisperState.isProcessing else { return }
guard canProcessHotkeyAction else { return }
await whisperState.handleToggleMiniRecorder()
}
return
@ -254,7 +259,7 @@ class HotkeyManager: ObservableObject {
if !whisperState.isMiniRecorderVisible {
Task { @MainActor in
guard !whisperState.isTranscribing && !whisperState.isProcessing else { return }
guard canProcessHotkeyAction else { return }
await whisperState.handleToggleMiniRecorder()
}
}
@ -268,7 +273,7 @@ class HotkeyManager: ObservableObject {
isHandsFreeMode = true
} else {
Task { @MainActor in
guard !whisperState.isTranscribing && !whisperState.isProcessing else { return }
guard canProcessHotkeyAction else { return }
await whisperState.handleToggleMiniRecorder()
}
}
@ -291,13 +296,13 @@ class HotkeyManager: ObservableObject {
if isShortcutHandsFreeMode {
isShortcutHandsFreeMode = false
guard !whisperState.isTranscribing && !whisperState.isProcessing else { return }
guard canProcessHotkeyAction else { return }
await whisperState.handleToggleMiniRecorder()
return
}
if !whisperState.isMiniRecorderVisible {
guard !whisperState.isTranscribing && !whisperState.isProcessing else { return }
guard canProcessHotkeyAction else { return }
await whisperState.handleToggleMiniRecorder()
}
}
@ -314,7 +319,7 @@ class HotkeyManager: ObservableObject {
if pressDuration < briefPressThreshold {
isShortcutHandsFreeMode = true
} else {
guard !whisperState.isTranscribing && !whisperState.isProcessing else { return }
guard canProcessHotkeyAction else { return }
await whisperState.handleToggleMiniRecorder()
}
}

View File

@ -10,102 +10,119 @@ struct MiniRecorderView: View {
@EnvironmentObject private var enhancementService: AIEnhancementService
private var backgroundView: some View {
ZStack {
Color.black.opacity(0.9)
LinearGradient(
colors: [
Color.black.opacity(0.95),
Color(red: 0.15, green: 0.15, blue: 0.15).opacity(0.9)
],
startPoint: .top,
endPoint: .bottom
)
VisualEffectView(material: .hudWindow, blendingMode: .withinWindow)
.opacity(0.05)
}
.clipShape(Capsule())
}
private var statusView: some View {
Group {
let currentState = whisperState.recordingState
if currentState == .enhancing {
Text("Enhancing")
.foregroundColor(.white)
.font(.system(size: 10, weight: .medium, design: .default))
.lineLimit(1)
.minimumScaleFactor(0.5)
} else if currentState == .transcribing {
Text("Transcribing")
.foregroundColor(.white)
.font(.system(size: 10, weight: .medium, design: .default))
.lineLimit(1)
.minimumScaleFactor(0.5)
} else if currentState == .recording {
AudioVisualizer(
audioMeter: recorder.audioMeter,
color: .white,
isActive: currentState == .recording
)
} else {
StaticVisualizer(color: .white)
}
}
}
private var rightButton: some View {
Group {
if powerModeManager.isPowerModeEnabled {
NotchToggleButton(
isEnabled: powerModeManager.isPowerModeEnabled,
icon: powerModeManager.currentActiveConfiguration.emoji,
color: .orange,
disabled: false
) {
showPowerModePopover.toggle()
}
.frame(width: 24)
.padding(.trailing, 8)
.popover(isPresented: $showPowerModePopover, arrowEdge: .bottom) {
PowerModePopover()
}
} else {
NotchToggleButton(
isEnabled: enhancementService.isEnhancementEnabled,
icon: enhancementService.activePrompt?.icon.rawValue ?? "brain",
color: .blue,
disabled: false
) {
if enhancementService.isEnhancementEnabled {
showEnhancementPromptPopover.toggle()
} else {
enhancementService.isEnhancementEnabled = true
}
}
.frame(width: 24)
.padding(.trailing, 8)
.popover(isPresented: $showEnhancementPromptPopover, arrowEdge: .bottom) {
EnhancementPromptPopover()
.environmentObject(enhancementService)
}
}
}
}
var body: some View {
Group {
if windowManager.isVisible {
Capsule()
.fill(.clear)
.background(
ZStack {
Color.black.opacity(0.9)
LinearGradient(
colors: [
Color.black.opacity(0.95),
Color(red: 0.15, green: 0.15, blue: 0.15).opacity(0.9)
],
startPoint: .top,
endPoint: .bottom
)
VisualEffectView(material: .hudWindow, blendingMode: .withinWindow)
.opacity(0.05)
}
.clipShape(Capsule())
)
.background(backgroundView)
.overlay {
Capsule()
.strokeBorder(Color.white.opacity(0.1), lineWidth: 0.5)
}
.overlay {
HStack(spacing: 0) {
let isRecording = whisperState.recordingState == .recording
let isProcessing = whisperState.recordingState == .transcribing || whisperState.recordingState == .enhancing
NotchRecordButton(
isRecording: whisperState.isRecording,
isProcessing: whisperState.isProcessing
isRecording: isRecording,
isProcessing: isProcessing
) {
Task { await whisperState.toggleRecord() }
}
.frame(width: 24)
.padding(.leading, 8)
Group {
if whisperState.isEnhancing {
Text("Enhancing")
.foregroundColor(.white)
.font(.system(size: 10, weight: .medium, design: .default))
.lineLimit(1)
.minimumScaleFactor(0.5)
} else if whisperState.isTranscribing {
Text("Transcribing")
.foregroundColor(.white)
.font(.system(size: 10, weight: .medium, design: .default))
.lineLimit(1)
.minimumScaleFactor(0.5)
} else if whisperState.isRecording {
AudioVisualizer(
audioMeter: recorder.audioMeter,
color: .white,
isActive: whisperState.isRecording
)
} else {
StaticVisualizer(color: .white)
}
}
.frame(maxWidth: .infinity)
.padding(.horizontal, 8)
statusView
.frame(maxWidth: .infinity)
.padding(.horizontal, 8)
if powerModeManager.isPowerModeEnabled {
NotchToggleButton(
isEnabled: powerModeManager.isPowerModeEnabled,
icon: powerModeManager.currentActiveConfiguration.emoji,
color: .orange,
disabled: false
) {
showPowerModePopover.toggle()
}
.frame(width: 24)
.padding(.trailing, 8)
.popover(isPresented: $showPowerModePopover, arrowEdge: .bottom) {
PowerModePopover()
}
} else {
NotchToggleButton(
isEnabled: enhancementService.isEnhancementEnabled,
icon: enhancementService.activePrompt?.icon.rawValue ?? "brain",
color: .blue,
disabled: false
) {
if enhancementService.isEnhancementEnabled {
showEnhancementPromptPopover.toggle()
} else {
enhancementService.isEnhancementEnabled = true
}
}
.frame(width: 24)
.padding(.trailing, 8)
.popover(isPresented: $showEnhancementPromptPopover, arrowEdge: .bottom) {
EnhancementPromptPopover()
.environmentObject(enhancementService)
}
}
rightButton
}
.padding(.vertical, 8)
}

View File

@ -31,95 +31,119 @@ struct NotchRecorderView: View {
return 200
}
private var leftSection: some View {
HStack(spacing: 8) {
let isRecording = whisperState.recordingState == .recording
let isProcessing = whisperState.recordingState == .transcribing || whisperState.recordingState == .enhancing
NotchRecordButton(
isRecording: isRecording,
isProcessing: isProcessing
) {
Task { await whisperState.toggleRecord() }
}
.frame(width: 22)
rightToggleButton
Spacer()
}
.frame(width: 64)
.padding(.leading, 16)
}
private var rightToggleButton: some View {
Group {
if powerModeManager.isPowerModeEnabled {
NotchToggleButton(
isEnabled: powerModeManager.isPowerModeEnabled,
icon: powerModeManager.currentActiveConfiguration.emoji,
color: .orange,
disabled: false
) {
showPowerModePopover.toggle()
}
.frame(width: 22)
.popover(isPresented: $showPowerModePopover, arrowEdge: .bottom) {
PowerModePopover()
}
} else {
NotchToggleButton(
isEnabled: enhancementService.isEnhancementEnabled,
icon: enhancementService.activePrompt?.icon.rawValue ?? "brain",
color: .blue,
disabled: false
) {
if enhancementService.isEnhancementEnabled {
showEnhancementPromptPopover.toggle()
} else {
enhancementService.isEnhancementEnabled = true
}
}
.frame(width: 22)
.popover(isPresented: $showEnhancementPromptPopover, arrowEdge: .bottom) {
EnhancementPromptPopover()
.environmentObject(enhancementService)
}
}
}
}
private var centerSection: some View {
Rectangle()
.fill(Color.clear)
.frame(width: exactNotchWidth)
.contentShape(Rectangle())
}
private var rightSection: some View {
HStack(spacing: 0) {
Spacer()
statusDisplay
}
.frame(width: 84)
.padding(.trailing, 16)
}
private var statusDisplay: some View {
Group {
let currentState = whisperState.recordingState
if currentState == .enhancing {
Text("Enhancing")
.foregroundColor(.white)
.font(.system(size: 10, weight: .medium, design: .default))
.lineLimit(1)
.minimumScaleFactor(0.5)
} else if currentState == .transcribing {
Text("Transcribing")
.foregroundColor(.white)
.font(.system(size: 10, weight: .medium, design: .default))
.lineLimit(1)
.minimumScaleFactor(0.5)
} else if currentState == .recording {
AudioVisualizer(
audioMeter: recorder.audioMeter,
color: .white,
isActive: currentState == .recording
)
.scaleEffect(y: min(1.0, (menuBarHeight - 8) / 25), anchor: .center)
} else {
StaticVisualizer(color: .white)
.scaleEffect(y: min(1.0, (menuBarHeight - 8) / 25), anchor: .center)
}
}
.frame(width: 70)
.padding(.trailing, 8)
}
var body: some View {
Group {
if windowManager.isVisible {
HStack(spacing: 0) {
HStack(spacing: 8) {
NotchRecordButton(
isRecording: whisperState.isRecording,
isProcessing: whisperState.isProcessing
) {
Task { await whisperState.toggleRecord() }
}
.frame(width: 22)
if powerModeManager.isPowerModeEnabled {
NotchToggleButton(
isEnabled: powerModeManager.isPowerModeEnabled,
icon: powerModeManager.currentActiveConfiguration.emoji,
color: .orange,
disabled: false
) {
showPowerModePopover.toggle()
}
.frame(width: 22)
.popover(isPresented: $showPowerModePopover, arrowEdge: .bottom) {
PowerModePopover()
}
} else {
NotchToggleButton(
isEnabled: enhancementService.isEnhancementEnabled,
icon: enhancementService.activePrompt?.icon.rawValue ?? "brain",
color: .blue,
disabled: false
) {
if enhancementService.isEnhancementEnabled {
showEnhancementPromptPopover.toggle()
} else {
enhancementService.isEnhancementEnabled = true
}
}
.frame(width: 22)
.popover(isPresented: $showEnhancementPromptPopover, arrowEdge: .bottom) {
EnhancementPromptPopover()
.environmentObject(enhancementService)
}
}
Spacer()
}
.frame(width: 64)
.padding(.leading, 16)
Rectangle()
.fill(Color.clear)
.frame(width: exactNotchWidth)
.contentShape(Rectangle())
HStack(spacing: 0) {
Spacer()
Group {
if whisperState.isEnhancing {
Text("Enhancing")
.foregroundColor(.white)
.font(.system(size: 10, weight: .medium, design: .default))
.lineLimit(1)
.minimumScaleFactor(0.5)
} else if whisperState.isTranscribing {
Text("Transcribing")
.foregroundColor(.white)
.font(.system(size: 10, weight: .medium, design: .default))
.lineLimit(1)
.minimumScaleFactor(0.5)
} else if whisperState.isRecording {
AudioVisualizer(
audioMeter: recorder.audioMeter,
color: .white,
isActive: whisperState.isRecording
)
.scaleEffect(y: min(1.0, (menuBarHeight - 8) / 25), anchor: .center)
} else {
StaticVisualizer(color: .white)
.scaleEffect(y: min(1.0, (menuBarHeight - 8) / 25), anchor: .center)
}
}
.frame(width: 70)
.padding(.trailing, 8)
}
.frame(width: 84)
.padding(.trailing, 16)
leftSection
centerSection
rightSection
}
.frame(height: menuBarHeight)
.frame(maxWidth: windowManager.isVisible ? .infinity : 0)

View File

@ -3,8 +3,51 @@ import os
import Zip
import SwiftUI
struct WhisperModel: Identifiable {
let id = UUID()
let name: String
let url: URL
var coreMLEncoderURL: URL? // Path to the unzipped .mlmodelc directory
var isCoreMLDownloaded: Bool { coreMLEncoderURL != nil }
var downloadURL: String {
"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/\(filename)"
}
var filename: String {
"\(name).bin"
}
// Core ML related properties
var coreMLZipDownloadURL: String? {
// Only non-quantized models have Core ML versions
guard !name.contains("q5") && !name.contains("q8") else { return nil }
return "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/\(name)-encoder.mlmodelc.zip"
}
var coreMLEncoderDirectoryName: String? {
guard coreMLZipDownloadURL != nil else { return nil }
return "\(name)-encoder.mlmodelc"
}
}
private class TaskDelegate: NSObject, URLSessionTaskDelegate {
private let continuation: CheckedContinuation<Void, Never>
init(_ continuation: CheckedContinuation<Void, Never>) {
self.continuation = continuation
}
func urlSession(_ session: URLSession, task: URLSessionTask, didCompleteWithError error: Error?) {
continuation.resume()
}
}
// MARK: - Model Management Extension
extension WhisperState {
// MARK: - Model Directory Management
@ -255,7 +298,7 @@ extension WhisperState {
UserDefaults.standard.removeObject(forKey: "CurrentTranscriptionModel")
loadedLocalModel = nil
canTranscribe = false
recordingState = .idle
UserDefaults.standard.removeObject(forKey: "CurrentModel")
}
} catch {

View File

@ -0,0 +1,46 @@
import Foundation
import SwiftUI
@MainActor
extension WhisperState {
// Loads the default transcription model from UserDefaults
func loadCurrentTranscriptionModel() {
if let savedModelName = UserDefaults.standard.string(forKey: "CurrentTranscriptionModel"),
let savedModel = allAvailableModels.first(where: { $0.name == savedModelName }) {
currentTranscriptionModel = savedModel
}
}
// Function to set any transcription model as default
func setDefaultTranscriptionModel(_ model: any TranscriptionModel) {
self.currentTranscriptionModel = model
UserDefaults.standard.set(model.name, forKey: "CurrentTranscriptionModel")
// For cloud models, clear the old loadedLocalModel
if model.provider != .local {
self.loadedLocalModel = nil
}
// Enable transcription for cloud models immediately since they don't need loading
if model.provider != .local {
self.recordingState = .idle
self.isModelLoaded = true
}
// Post notification about the model change
NotificationCenter.default.post(name: .didChangeModel, object: nil, userInfo: ["modelName": model.name])
}
func refreshAllAvailableModels() {
let currentModelId = currentTranscriptionModel?.id
allAvailableModels = PredefinedModels.models
// If there was a current default model, find its new version in the refreshed list and update it.
// This handles cases where the default model was edited.
if let currentId = currentModelId,
let updatedModel = allAvailableModels.first(where: { $0.id == currentId })
{
setDefaultTranscriptionModel(updatedModel)
}
}
}

View File

@ -36,7 +36,7 @@ extension WhisperState {
func toggleMiniRecorder() async {
if isMiniRecorderVisible {
if isRecording {
if recordingState == .recording {
await toggleRecord()
} else {
await cancelRecording()
@ -59,7 +59,7 @@ extension WhisperState {
NotificationManager.shared.dismissNotification()
}
if isRecording {
if recordingState == .recording {
await recorder.stopRecording()
}
@ -67,12 +67,7 @@ extension WhisperState {
hideRecorderPanel()
await MainActor.run {
isRecording = false
isVisualizerActive = false
isProcessing = false
isEnhancing = false
isTranscribing = false
canTranscribe = true
recordingState = .idle
isMiniRecorderVisible = false
}
}

View File

@ -6,11 +6,18 @@ import AppKit
import KeyboardShortcuts
import os
// MARK: - Recording State Machine
enum RecordingState: Equatable {
case idle
case recording
case transcribing
case enhancing
}
@MainActor
class WhisperState: NSObject, ObservableObject {
@Published var recordingState: RecordingState = .idle
@Published var isModelLoaded = false
@Published var canTranscribe = false
@Published var isRecording = false
@Published var loadedLocalModel: WhisperModel?
@Published var currentTranscriptionModel: (any TranscriptionModel)?
@Published var isModelLoading = false
@ -18,10 +25,7 @@ class WhisperState: NSObject, ObservableObject {
@Published var allAvailableModels: [any TranscriptionModel] = PredefinedModels.models
@Published var clipboardMessage = ""
@Published var miniRecorderError: String?
@Published var isProcessing = false
@Published var shouldCancelRecording = false
@Published var isTranscribing = false
@Published var isEnhancing = false
@Published var isAutoCopyEnabled: Bool = UserDefaults.standard.object(forKey: "IsAutoCopyEnabled") as? Bool ?? true {
didSet {
UserDefaults.standard.set(isAutoCopyEnabled, forKey: "IsAutoCopyEnabled")
@ -33,10 +37,6 @@ class WhisperState: NSObject, ObservableObject {
}
}
@Published var isVisualizerActive = false
@Published var isMiniRecorderVisible = false {
didSet {
if isMiniRecorderVisible {
@ -125,27 +125,22 @@ class WhisperState: NSObject, ObservableObject {
}
func toggleRecord() async {
if isRecording {
logger.notice("🛑 Stopping recording")
await MainActor.run {
isRecording = false
isVisualizerActive = false
}
if recordingState == .recording {
await recorder.stopRecording()
if let recordedFile {
if !shouldCancelRecording {
await transcribeAudio(recordedFile)
} else {
logger.info("🛑 Transcription and paste aborted in toggleRecord due to shouldCancelRecording flag.")
await MainActor.run {
isProcessing = false
isTranscribing = false
canTranscribe = true
recordingState = .idle
}
await cleanupModelResources()
}
} else {
logger.error("❌ No recorded file found after stopping recording")
await MainActor.run {
recordingState = .idle
}
}
} else {
guard currentTranscriptionModel != nil else {
@ -158,29 +153,23 @@ class WhisperState: NSObject, ObservableObject {
return
}
shouldCancelRecording = false
logger.notice("🎙️ Starting recording sequence...")
requestRecordPermission { [self] granted in
if granted {
Task {
do {
// --- Prepare temporary file URL within Application Support base directory ---
let baseAppSupportDirectory = self.recordingsDirectory.deletingLastPathComponent()
let file = baseAppSupportDirectory.appendingPathComponent("output.wav")
// Ensure the base directory exists
try? FileManager.default.createDirectory(at: baseAppSupportDirectory, withIntermediateDirectories: true)
// Clean up any old temporary file first
self.recordedFile = file
try await self.recorder.startRecording(toOutputFile: file)
self.logger.notice("✅ Audio engine started successfully.")
// --- Prepare permanent file URL ---
let fileName = "\(UUID().uuidString).wav"
let permanentURL = self.recordingsDirectory.appendingPathComponent(fileName)
self.recordedFile = permanentURL
try await self.recorder.startRecording(toOutputFile: permanentURL)
await MainActor.run {
self.isRecording = true
self.isVisualizerActive = true
self.recordingState = .recording
}
await ActiveWindowService.shared.applyConfigurationForCurrentApp()
// Only load model if it's a local model and not already loaded
if let model = self.currentTranscriptionModel, model.provider == .local {
if let localWhisperModel = self.availableModels.first(where: { $0.name == model.name }),
@ -192,23 +181,19 @@ class WhisperState: NSObject, ObservableObject {
}
}
}
if let enhancementService = self.enhancementService,
enhancementService.useScreenCaptureContext {
await enhancementService.captureScreenContext()
}
} catch {
self.logger.error("❌ Failed to start recording: \(error.localizedDescription)")
await MainActor.run {
self.isRecording = false
self.isVisualizerActive = false
}
if let url = self.recordedFile {
try? FileManager.default.removeItem(at: url)
self.recordedFile = nil
self.logger.notice("🗑️ Cleaned up temporary recording file after failed start.")
self.recordingState = .idle
}
// Do not remove the file on a failed start, to preserve all recordings.
self.recordedFile = nil
}
}
} else {
@ -219,32 +204,20 @@ class WhisperState: NSObject, ObservableObject {
}
private func requestRecordPermission(response: @escaping (Bool) -> Void) {
#if os(macOS)
response(true)
#else
AVAudioSession.sharedInstance().requestRecordPermission { granted in
response(granted)
}
#endif
}
private func transcribeAudio(_ url: URL) async {
if shouldCancelRecording {
logger.info("🎤 Transcription and paste aborted at the beginning of transcribeAudio due to shouldCancelRecording flag.")
await MainActor.run {
isProcessing = false
isTranscribing = false
canTranscribe = true
recordingState = .idle
}
await cleanupModelResources()
return
}
await MainActor.run {
isProcessing = true
isTranscribing = true
canTranscribe = false
recordingState = .transcribing
}
defer {
@ -257,11 +230,7 @@ class WhisperState: NSObject, ObservableObject {
logger.notice("🔄 Starting transcription...")
var permanentURL: URL?
do {
permanentURL = try saveRecordingPermanently(url)
guard let model = currentTranscriptionModel else {
throw WhisperStateError.transcriptionFailed
}
@ -280,8 +249,6 @@ class WhisperState: NSObject, ObservableObject {
var text = try await transcriptionService.transcribe(audioURL: url, model: model)
let transcriptionDuration = Date().timeIntervalSince(transcriptionStart)
await MainActor.run { self.isTranscribing = false }
if await checkCancellationAndCleanup() { return }
text = text.trimmingCharacters(in: .whitespacesAndNewlines)
@ -307,19 +274,14 @@ class WhisperState: NSObject, ObservableObject {
do {
if await checkCancellationAndCleanup() { return }
await MainActor.run { self.isEnhancing = true }
await MainActor.run { self.recordingState = .enhancing }
let textForAI = promptDetectionResult?.processedText ?? text
defer {
Task { @MainActor in
self.isEnhancing = false
}
}
let (enhancedText, enhancementDuration) = try await enhancementService.enhance(textForAI)
let newTranscription = Transcription(
text: originalText,
duration: actualDuration,
enhancedText: enhancedText,
audioFileURL: permanentURL?.absoluteString,
audioFileURL: url.absoluteString,
transcriptionModelName: model.displayName,
aiEnhancementModelName: enhancementService.getAIService()?.currentModel,
transcriptionDuration: transcriptionDuration,
@ -333,7 +295,7 @@ class WhisperState: NSObject, ObservableObject {
text: originalText,
duration: actualDuration,
enhancedText: "Enhancement failed: \(error.localizedDescription)",
audioFileURL: permanentURL?.absoluteString,
audioFileURL: url.absoluteString,
transcriptionModelName: model.displayName,
transcriptionDuration: transcriptionDuration
)
@ -351,7 +313,7 @@ class WhisperState: NSObject, ObservableObject {
let newTranscription = Transcription(
text: originalText,
duration: actualDuration,
audioFileURL: permanentURL?.absoluteString,
audioFileURL: url.absoluteString,
transcriptionModelName: model.displayName,
transcriptionDuration: transcriptionDuration
)
@ -379,7 +341,6 @@ class WhisperState: NSObject, ObservableObject {
ClipboardManager.copyToClipboard(text)
}
}
try? FileManager.default.removeItem(at: url)
if let result = promptDetectionResult,
let enhancementService = enhancementService,
@ -393,48 +354,34 @@ class WhisperState: NSObject, ObservableObject {
}
} catch {
if let permanentURL = permanentURL {
do {
let audioAsset = AVURLAsset(url: permanentURL)
let duration = CMTimeGetSeconds(try await audioAsset.load(.duration))
do {
let audioAsset = AVURLAsset(url: url)
let duration = CMTimeGetSeconds(try await audioAsset.load(.duration))
await MainActor.run {
let errorDescription = (error as? LocalizedError)?.errorDescription ?? error.localizedDescription
let recoverySuggestion = (error as? LocalizedError)?.recoverySuggestion ?? ""
let fullErrorText = recoverySuggestion.isEmpty ? errorDescription : "\(errorDescription) \(recoverySuggestion)"
await MainActor.run {
let errorDescription = (error as? LocalizedError)?.errorDescription ?? error.localizedDescription
let recoverySuggestion = (error as? LocalizedError)?.recoverySuggestion ?? ""
let fullErrorText = recoverySuggestion.isEmpty ? errorDescription : "\(errorDescription) \(recoverySuggestion)"
let failedTranscription = Transcription(
text: "Transcription Failed: \(fullErrorText)",
duration: duration,
enhancedText: nil,
audioFileURL: permanentURL.absoluteString
)
modelContext.insert(failedTranscription)
try? modelContext.save()
}
} catch {
// Silently continue if failed transcription record can't be saved
let failedTranscription = Transcription(
text: "Transcription Failed: \(fullErrorText)",
duration: duration,
enhancedText: nil,
audioFileURL: url.absoluteString
)
modelContext.insert(failedTranscription)
try? modelContext.save()
}
} catch {
logger.error("❌ Could not create a record for the failed transcription: \(error.localizedDescription)")
}
await MainActor.run {
if permanentURL != nil {
NotificationManager.shared.showNotification(
title: "Transcription Failed. Tap to retry.",
type: .error,
onTap: { [weak self] in
Task {
await self?.retryLastTranscription()
}
}
)
} else {
NotificationManager.shared.showNotification(
title: "Recording Failed",
type: .error
)
}
NotificationManager.shared.showNotification(
title: "Transcription Failed",
type: .error
)
}
await self.dismissMiniRecorder()
@ -444,107 +391,10 @@ class WhisperState: NSObject, ObservableObject {
}
}
private func saveRecordingPermanently(_ tempURL: URL) throws -> URL {
let fileName = "\(UUID().uuidString).wav"
let permanentURL = recordingsDirectory.appendingPathComponent(fileName)
try FileManager.default.copyItem(at: tempURL, to: permanentURL)
return permanentURL
}
func retryLastTranscription() async {
do {
let descriptor = FetchDescriptor<Transcription>(
sortBy: [SortDescriptor(\.timestamp, order: .reverse)]
)
let transcriptions = try modelContext.fetch(descriptor)
guard let lastTranscription = transcriptions.first,
lastTranscription.text.hasPrefix("Transcription Failed"),
let audioURLString = lastTranscription.audioFileURL,
let audioURL = URL(string: audioURLString) else {
return
}
guard let model = currentTranscriptionModel else {
throw WhisperStateError.transcriptionFailed
}
let transcriptionService = AudioTranscriptionService(modelContext: modelContext, whisperState: self)
let newTranscription = try await transcriptionService.retranscribeAudio(from: audioURL, using: model)
await MainActor.run {
NotificationManager.shared.showNotification(
title: "Transcription Successful",
type: .success
)
let textToPaste = newTranscription.enhancedText ?? newTranscription.text
DispatchQueue.main.asyncAfter(deadline: .now() + 0.05) {
CursorPaster.pasteAtCursor(textToPaste + " ", shouldPreserveClipboard: !self.isAutoCopyEnabled)
if self.isAutoCopyEnabled {
ClipboardManager.copyToClipboard(textToPaste)
}
}
}
} catch {
await MainActor.run {
NotificationManager.shared.showNotification(
title: "Retry Failed",
type: .error
)
}
}
}
// Loads the default transcription model from UserDefaults
private func loadCurrentTranscriptionModel() {
if let savedModelName = UserDefaults.standard.string(forKey: "CurrentTranscriptionModel"),
let savedModel = allAvailableModels.first(where: { $0.name == savedModelName }) {
currentTranscriptionModel = savedModel
}
}
// Function to set any transcription model as default
func setDefaultTranscriptionModel(_ model: any TranscriptionModel) {
self.currentTranscriptionModel = model
UserDefaults.standard.set(model.name, forKey: "CurrentTranscriptionModel")
// For cloud models, clear the old loadedLocalModel
if model.provider != .local {
self.loadedLocalModel = nil
}
// Enable transcription for cloud models immediately since they don't need loading
if model.provider != .local {
self.canTranscribe = true
self.isModelLoaded = true
}
logger.info("Default transcription model set to: \(model.name) (\(model.provider.rawValue))")
// Post notification about the model change
NotificationCenter.default.post(name: .didChangeModel, object: nil, userInfo: ["modelName": model.name])
}
func getEnhancementService() -> AIEnhancementService? {
return enhancementService
}
func refreshAllAvailableModels() {
let currentModelId = currentTranscriptionModel?.id
allAvailableModels = PredefinedModels.models
// If there was a current default model, find its new version in the refreshed list and update it.
// This handles cases where the default model was edited.
if let currentId = currentModelId,
let updatedModel = allAvailableModels.first(where: { $0.id == currentId })
{
setDefaultTranscriptionModel(updatedModel)
}
}
private func checkCancellationAndCleanup() async -> Bool {
if shouldCancelRecording {
await cleanupAndDismiss()
@ -561,46 +411,6 @@ class WhisperState: NSObject, ObservableObject {
}
}
struct WhisperModel: Identifiable {
let id = UUID()
let name: String
let url: URL
var coreMLEncoderURL: URL? // Path to the unzipped .mlmodelc directory
var isCoreMLDownloaded: Bool { coreMLEncoderURL != nil }
var downloadURL: String {
"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/\(filename)"
}
var filename: String {
"\(name).bin"
}
// Core ML related properties
var coreMLZipDownloadURL: String? {
// Only non-quantized models have Core ML versions
guard !name.contains("q5") && !name.contains("q8") else { return nil }
return "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/\(name)-encoder.mlmodelc.zip"
}
var coreMLEncoderDirectoryName: String? {
guard coreMLZipDownloadURL != nil else { return nil }
return "\(name)-encoder.mlmodelc"
}
}
private class TaskDelegate: NSObject, URLSessionTaskDelegate {
private let continuation: CheckedContinuation<Void, Never>
init(_ continuation: CheckedContinuation<Void, Never>) {
self.continuation = continuation
}
func urlSession(_ session: URLSession, task: URLSessionTask, didCompleteWithError error: Error?) {
continuation.resume()
}
}
extension Notification.Name {
static let toggleMiniRecorder = Notification.Name("toggleMiniRecorder")
static let didChangeModel = Notification.Name("didChangeModel")