Refactor to state machine for recording states & whisperstate file
This commit is contained in:
parent
28dbe5a2f6
commit
516309cb35
@ -26,6 +26,11 @@ class HotkeyManager: ObservableObject {
|
||||
private var whisperState: WhisperState
|
||||
private var miniRecorderShortcutManager: MiniRecorderShortcutManager
|
||||
|
||||
// MARK: - Helper Properties
|
||||
private var canProcessHotkeyAction: Bool {
|
||||
whisperState.recordingState != .transcribing && whisperState.recordingState != .enhancing
|
||||
}
|
||||
|
||||
// NSEvent monitoring for modifier keys
|
||||
private var globalEventMonitor: Any?
|
||||
private var localEventMonitor: Any?
|
||||
@ -246,7 +251,7 @@ class HotkeyManager: ObservableObject {
|
||||
if isHandsFreeMode {
|
||||
isHandsFreeMode = false
|
||||
Task { @MainActor in
|
||||
guard !whisperState.isTranscribing && !whisperState.isProcessing else { return }
|
||||
guard canProcessHotkeyAction else { return }
|
||||
await whisperState.handleToggleMiniRecorder()
|
||||
}
|
||||
return
|
||||
@ -254,7 +259,7 @@ class HotkeyManager: ObservableObject {
|
||||
|
||||
if !whisperState.isMiniRecorderVisible {
|
||||
Task { @MainActor in
|
||||
guard !whisperState.isTranscribing && !whisperState.isProcessing else { return }
|
||||
guard canProcessHotkeyAction else { return }
|
||||
await whisperState.handleToggleMiniRecorder()
|
||||
}
|
||||
}
|
||||
@ -268,7 +273,7 @@ class HotkeyManager: ObservableObject {
|
||||
isHandsFreeMode = true
|
||||
} else {
|
||||
Task { @MainActor in
|
||||
guard !whisperState.isTranscribing && !whisperState.isProcessing else { return }
|
||||
guard canProcessHotkeyAction else { return }
|
||||
await whisperState.handleToggleMiniRecorder()
|
||||
}
|
||||
}
|
||||
@ -291,13 +296,13 @@ class HotkeyManager: ObservableObject {
|
||||
|
||||
if isShortcutHandsFreeMode {
|
||||
isShortcutHandsFreeMode = false
|
||||
guard !whisperState.isTranscribing && !whisperState.isProcessing else { return }
|
||||
guard canProcessHotkeyAction else { return }
|
||||
await whisperState.handleToggleMiniRecorder()
|
||||
return
|
||||
}
|
||||
|
||||
if !whisperState.isMiniRecorderVisible {
|
||||
guard !whisperState.isTranscribing && !whisperState.isProcessing else { return }
|
||||
guard canProcessHotkeyAction else { return }
|
||||
await whisperState.handleToggleMiniRecorder()
|
||||
}
|
||||
}
|
||||
@ -314,7 +319,7 @@ class HotkeyManager: ObservableObject {
|
||||
if pressDuration < briefPressThreshold {
|
||||
isShortcutHandsFreeMode = true
|
||||
} else {
|
||||
guard !whisperState.isTranscribing && !whisperState.isProcessing else { return }
|
||||
guard canProcessHotkeyAction else { return }
|
||||
await whisperState.handleToggleMiniRecorder()
|
||||
}
|
||||
}
|
||||
|
||||
@ -10,102 +10,119 @@ struct MiniRecorderView: View {
|
||||
|
||||
@EnvironmentObject private var enhancementService: AIEnhancementService
|
||||
|
||||
private var backgroundView: some View {
|
||||
ZStack {
|
||||
Color.black.opacity(0.9)
|
||||
LinearGradient(
|
||||
colors: [
|
||||
Color.black.opacity(0.95),
|
||||
Color(red: 0.15, green: 0.15, blue: 0.15).opacity(0.9)
|
||||
],
|
||||
startPoint: .top,
|
||||
endPoint: .bottom
|
||||
)
|
||||
VisualEffectView(material: .hudWindow, blendingMode: .withinWindow)
|
||||
.opacity(0.05)
|
||||
}
|
||||
.clipShape(Capsule())
|
||||
}
|
||||
|
||||
private var statusView: some View {
|
||||
Group {
|
||||
let currentState = whisperState.recordingState
|
||||
|
||||
if currentState == .enhancing {
|
||||
Text("Enhancing")
|
||||
.foregroundColor(.white)
|
||||
.font(.system(size: 10, weight: .medium, design: .default))
|
||||
.lineLimit(1)
|
||||
.minimumScaleFactor(0.5)
|
||||
} else if currentState == .transcribing {
|
||||
Text("Transcribing")
|
||||
.foregroundColor(.white)
|
||||
.font(.system(size: 10, weight: .medium, design: .default))
|
||||
.lineLimit(1)
|
||||
.minimumScaleFactor(0.5)
|
||||
} else if currentState == .recording {
|
||||
AudioVisualizer(
|
||||
audioMeter: recorder.audioMeter,
|
||||
color: .white,
|
||||
isActive: currentState == .recording
|
||||
)
|
||||
} else {
|
||||
StaticVisualizer(color: .white)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private var rightButton: some View {
|
||||
Group {
|
||||
if powerModeManager.isPowerModeEnabled {
|
||||
NotchToggleButton(
|
||||
isEnabled: powerModeManager.isPowerModeEnabled,
|
||||
icon: powerModeManager.currentActiveConfiguration.emoji,
|
||||
color: .orange,
|
||||
disabled: false
|
||||
) {
|
||||
showPowerModePopover.toggle()
|
||||
}
|
||||
.frame(width: 24)
|
||||
.padding(.trailing, 8)
|
||||
.popover(isPresented: $showPowerModePopover, arrowEdge: .bottom) {
|
||||
PowerModePopover()
|
||||
}
|
||||
} else {
|
||||
NotchToggleButton(
|
||||
isEnabled: enhancementService.isEnhancementEnabled,
|
||||
icon: enhancementService.activePrompt?.icon.rawValue ?? "brain",
|
||||
color: .blue,
|
||||
disabled: false
|
||||
) {
|
||||
if enhancementService.isEnhancementEnabled {
|
||||
showEnhancementPromptPopover.toggle()
|
||||
} else {
|
||||
enhancementService.isEnhancementEnabled = true
|
||||
}
|
||||
}
|
||||
.frame(width: 24)
|
||||
.padding(.trailing, 8)
|
||||
.popover(isPresented: $showEnhancementPromptPopover, arrowEdge: .bottom) {
|
||||
EnhancementPromptPopover()
|
||||
.environmentObject(enhancementService)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
var body: some View {
|
||||
Group {
|
||||
if windowManager.isVisible {
|
||||
Capsule()
|
||||
.fill(.clear)
|
||||
.background(
|
||||
ZStack {
|
||||
Color.black.opacity(0.9)
|
||||
LinearGradient(
|
||||
colors: [
|
||||
Color.black.opacity(0.95),
|
||||
Color(red: 0.15, green: 0.15, blue: 0.15).opacity(0.9)
|
||||
],
|
||||
startPoint: .top,
|
||||
endPoint: .bottom
|
||||
)
|
||||
VisualEffectView(material: .hudWindow, blendingMode: .withinWindow)
|
||||
.opacity(0.05)
|
||||
}
|
||||
.clipShape(Capsule())
|
||||
)
|
||||
.background(backgroundView)
|
||||
.overlay {
|
||||
Capsule()
|
||||
.strokeBorder(Color.white.opacity(0.1), lineWidth: 0.5)
|
||||
}
|
||||
.overlay {
|
||||
HStack(spacing: 0) {
|
||||
let isRecording = whisperState.recordingState == .recording
|
||||
let isProcessing = whisperState.recordingState == .transcribing || whisperState.recordingState == .enhancing
|
||||
|
||||
NotchRecordButton(
|
||||
isRecording: whisperState.isRecording,
|
||||
isProcessing: whisperState.isProcessing
|
||||
isRecording: isRecording,
|
||||
isProcessing: isProcessing
|
||||
) {
|
||||
Task { await whisperState.toggleRecord() }
|
||||
}
|
||||
.frame(width: 24)
|
||||
.padding(.leading, 8)
|
||||
|
||||
Group {
|
||||
if whisperState.isEnhancing {
|
||||
Text("Enhancing")
|
||||
.foregroundColor(.white)
|
||||
.font(.system(size: 10, weight: .medium, design: .default))
|
||||
.lineLimit(1)
|
||||
.minimumScaleFactor(0.5)
|
||||
} else if whisperState.isTranscribing {
|
||||
Text("Transcribing")
|
||||
.foregroundColor(.white)
|
||||
.font(.system(size: 10, weight: .medium, design: .default))
|
||||
.lineLimit(1)
|
||||
.minimumScaleFactor(0.5)
|
||||
} else if whisperState.isRecording {
|
||||
AudioVisualizer(
|
||||
audioMeter: recorder.audioMeter,
|
||||
color: .white,
|
||||
isActive: whisperState.isRecording
|
||||
)
|
||||
} else {
|
||||
StaticVisualizer(color: .white)
|
||||
}
|
||||
}
|
||||
.frame(maxWidth: .infinity)
|
||||
.padding(.horizontal, 8)
|
||||
statusView
|
||||
.frame(maxWidth: .infinity)
|
||||
.padding(.horizontal, 8)
|
||||
|
||||
if powerModeManager.isPowerModeEnabled {
|
||||
NotchToggleButton(
|
||||
isEnabled: powerModeManager.isPowerModeEnabled,
|
||||
icon: powerModeManager.currentActiveConfiguration.emoji,
|
||||
color: .orange,
|
||||
disabled: false
|
||||
) {
|
||||
showPowerModePopover.toggle()
|
||||
}
|
||||
.frame(width: 24)
|
||||
.padding(.trailing, 8)
|
||||
.popover(isPresented: $showPowerModePopover, arrowEdge: .bottom) {
|
||||
PowerModePopover()
|
||||
}
|
||||
} else {
|
||||
NotchToggleButton(
|
||||
isEnabled: enhancementService.isEnhancementEnabled,
|
||||
icon: enhancementService.activePrompt?.icon.rawValue ?? "brain",
|
||||
color: .blue,
|
||||
disabled: false
|
||||
) {
|
||||
if enhancementService.isEnhancementEnabled {
|
||||
showEnhancementPromptPopover.toggle()
|
||||
} else {
|
||||
enhancementService.isEnhancementEnabled = true
|
||||
}
|
||||
}
|
||||
.frame(width: 24)
|
||||
.padding(.trailing, 8)
|
||||
.popover(isPresented: $showEnhancementPromptPopover, arrowEdge: .bottom) {
|
||||
EnhancementPromptPopover()
|
||||
.environmentObject(enhancementService)
|
||||
}
|
||||
}
|
||||
rightButton
|
||||
}
|
||||
.padding(.vertical, 8)
|
||||
}
|
||||
|
||||
@ -31,95 +31,119 @@ struct NotchRecorderView: View {
|
||||
return 200
|
||||
}
|
||||
|
||||
private var leftSection: some View {
|
||||
HStack(spacing: 8) {
|
||||
let isRecording = whisperState.recordingState == .recording
|
||||
let isProcessing = whisperState.recordingState == .transcribing || whisperState.recordingState == .enhancing
|
||||
|
||||
NotchRecordButton(
|
||||
isRecording: isRecording,
|
||||
isProcessing: isProcessing
|
||||
) {
|
||||
Task { await whisperState.toggleRecord() }
|
||||
}
|
||||
.frame(width: 22)
|
||||
|
||||
rightToggleButton
|
||||
|
||||
Spacer()
|
||||
}
|
||||
.frame(width: 64)
|
||||
.padding(.leading, 16)
|
||||
}
|
||||
|
||||
private var rightToggleButton: some View {
|
||||
Group {
|
||||
if powerModeManager.isPowerModeEnabled {
|
||||
NotchToggleButton(
|
||||
isEnabled: powerModeManager.isPowerModeEnabled,
|
||||
icon: powerModeManager.currentActiveConfiguration.emoji,
|
||||
color: .orange,
|
||||
disabled: false
|
||||
) {
|
||||
showPowerModePopover.toggle()
|
||||
}
|
||||
.frame(width: 22)
|
||||
.popover(isPresented: $showPowerModePopover, arrowEdge: .bottom) {
|
||||
PowerModePopover()
|
||||
}
|
||||
} else {
|
||||
NotchToggleButton(
|
||||
isEnabled: enhancementService.isEnhancementEnabled,
|
||||
icon: enhancementService.activePrompt?.icon.rawValue ?? "brain",
|
||||
color: .blue,
|
||||
disabled: false
|
||||
) {
|
||||
if enhancementService.isEnhancementEnabled {
|
||||
showEnhancementPromptPopover.toggle()
|
||||
} else {
|
||||
enhancementService.isEnhancementEnabled = true
|
||||
}
|
||||
}
|
||||
.frame(width: 22)
|
||||
.popover(isPresented: $showEnhancementPromptPopover, arrowEdge: .bottom) {
|
||||
EnhancementPromptPopover()
|
||||
.environmentObject(enhancementService)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
private var centerSection: some View {
|
||||
Rectangle()
|
||||
.fill(Color.clear)
|
||||
.frame(width: exactNotchWidth)
|
||||
.contentShape(Rectangle())
|
||||
}
|
||||
|
||||
private var rightSection: some View {
|
||||
HStack(spacing: 0) {
|
||||
Spacer()
|
||||
statusDisplay
|
||||
}
|
||||
.frame(width: 84)
|
||||
.padding(.trailing, 16)
|
||||
}
|
||||
|
||||
private var statusDisplay: some View {
|
||||
Group {
|
||||
let currentState = whisperState.recordingState
|
||||
|
||||
if currentState == .enhancing {
|
||||
Text("Enhancing")
|
||||
.foregroundColor(.white)
|
||||
.font(.system(size: 10, weight: .medium, design: .default))
|
||||
.lineLimit(1)
|
||||
.minimumScaleFactor(0.5)
|
||||
} else if currentState == .transcribing {
|
||||
Text("Transcribing")
|
||||
.foregroundColor(.white)
|
||||
.font(.system(size: 10, weight: .medium, design: .default))
|
||||
.lineLimit(1)
|
||||
.minimumScaleFactor(0.5)
|
||||
} else if currentState == .recording {
|
||||
AudioVisualizer(
|
||||
audioMeter: recorder.audioMeter,
|
||||
color: .white,
|
||||
isActive: currentState == .recording
|
||||
)
|
||||
.scaleEffect(y: min(1.0, (menuBarHeight - 8) / 25), anchor: .center)
|
||||
} else {
|
||||
StaticVisualizer(color: .white)
|
||||
.scaleEffect(y: min(1.0, (menuBarHeight - 8) / 25), anchor: .center)
|
||||
}
|
||||
}
|
||||
.frame(width: 70)
|
||||
.padding(.trailing, 8)
|
||||
}
|
||||
|
||||
var body: some View {
|
||||
Group {
|
||||
if windowManager.isVisible {
|
||||
HStack(spacing: 0) {
|
||||
HStack(spacing: 8) {
|
||||
NotchRecordButton(
|
||||
isRecording: whisperState.isRecording,
|
||||
isProcessing: whisperState.isProcessing
|
||||
) {
|
||||
Task { await whisperState.toggleRecord() }
|
||||
}
|
||||
.frame(width: 22)
|
||||
|
||||
if powerModeManager.isPowerModeEnabled {
|
||||
NotchToggleButton(
|
||||
isEnabled: powerModeManager.isPowerModeEnabled,
|
||||
icon: powerModeManager.currentActiveConfiguration.emoji,
|
||||
color: .orange,
|
||||
disabled: false
|
||||
) {
|
||||
showPowerModePopover.toggle()
|
||||
}
|
||||
.frame(width: 22)
|
||||
.popover(isPresented: $showPowerModePopover, arrowEdge: .bottom) {
|
||||
PowerModePopover()
|
||||
}
|
||||
} else {
|
||||
NotchToggleButton(
|
||||
isEnabled: enhancementService.isEnhancementEnabled,
|
||||
icon: enhancementService.activePrompt?.icon.rawValue ?? "brain",
|
||||
color: .blue,
|
||||
disabled: false
|
||||
) {
|
||||
if enhancementService.isEnhancementEnabled {
|
||||
showEnhancementPromptPopover.toggle()
|
||||
} else {
|
||||
enhancementService.isEnhancementEnabled = true
|
||||
}
|
||||
}
|
||||
.frame(width: 22)
|
||||
.popover(isPresented: $showEnhancementPromptPopover, arrowEdge: .bottom) {
|
||||
EnhancementPromptPopover()
|
||||
.environmentObject(enhancementService)
|
||||
}
|
||||
}
|
||||
|
||||
Spacer()
|
||||
}
|
||||
.frame(width: 64)
|
||||
.padding(.leading, 16)
|
||||
|
||||
Rectangle()
|
||||
.fill(Color.clear)
|
||||
.frame(width: exactNotchWidth)
|
||||
.contentShape(Rectangle())
|
||||
|
||||
HStack(spacing: 0) {
|
||||
Spacer()
|
||||
|
||||
Group {
|
||||
if whisperState.isEnhancing {
|
||||
Text("Enhancing")
|
||||
.foregroundColor(.white)
|
||||
.font(.system(size: 10, weight: .medium, design: .default))
|
||||
.lineLimit(1)
|
||||
.minimumScaleFactor(0.5)
|
||||
} else if whisperState.isTranscribing {
|
||||
Text("Transcribing")
|
||||
.foregroundColor(.white)
|
||||
.font(.system(size: 10, weight: .medium, design: .default))
|
||||
.lineLimit(1)
|
||||
.minimumScaleFactor(0.5)
|
||||
} else if whisperState.isRecording {
|
||||
AudioVisualizer(
|
||||
audioMeter: recorder.audioMeter,
|
||||
color: .white,
|
||||
isActive: whisperState.isRecording
|
||||
)
|
||||
.scaleEffect(y: min(1.0, (menuBarHeight - 8) / 25), anchor: .center)
|
||||
} else {
|
||||
StaticVisualizer(color: .white)
|
||||
.scaleEffect(y: min(1.0, (menuBarHeight - 8) / 25), anchor: .center)
|
||||
}
|
||||
}
|
||||
.frame(width: 70)
|
||||
.padding(.trailing, 8)
|
||||
}
|
||||
.frame(width: 84)
|
||||
.padding(.trailing, 16)
|
||||
leftSection
|
||||
centerSection
|
||||
rightSection
|
||||
}
|
||||
.frame(height: menuBarHeight)
|
||||
.frame(maxWidth: windowManager.isVisible ? .infinity : 0)
|
||||
|
||||
@ -3,8 +3,51 @@ import os
|
||||
import Zip
|
||||
import SwiftUI
|
||||
|
||||
|
||||
struct WhisperModel: Identifiable {
|
||||
let id = UUID()
|
||||
let name: String
|
||||
let url: URL
|
||||
var coreMLEncoderURL: URL? // Path to the unzipped .mlmodelc directory
|
||||
var isCoreMLDownloaded: Bool { coreMLEncoderURL != nil }
|
||||
|
||||
var downloadURL: String {
|
||||
"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/\(filename)"
|
||||
}
|
||||
|
||||
var filename: String {
|
||||
"\(name).bin"
|
||||
}
|
||||
|
||||
// Core ML related properties
|
||||
var coreMLZipDownloadURL: String? {
|
||||
// Only non-quantized models have Core ML versions
|
||||
guard !name.contains("q5") && !name.contains("q8") else { return nil }
|
||||
return "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/\(name)-encoder.mlmodelc.zip"
|
||||
}
|
||||
|
||||
var coreMLEncoderDirectoryName: String? {
|
||||
guard coreMLZipDownloadURL != nil else { return nil }
|
||||
return "\(name)-encoder.mlmodelc"
|
||||
}
|
||||
}
|
||||
|
||||
private class TaskDelegate: NSObject, URLSessionTaskDelegate {
|
||||
private let continuation: CheckedContinuation<Void, Never>
|
||||
|
||||
init(_ continuation: CheckedContinuation<Void, Never>) {
|
||||
self.continuation = continuation
|
||||
}
|
||||
|
||||
func urlSession(_ session: URLSession, task: URLSessionTask, didCompleteWithError error: Error?) {
|
||||
continuation.resume()
|
||||
}
|
||||
}
|
||||
|
||||
// MARK: - Model Management Extension
|
||||
extension WhisperState {
|
||||
|
||||
|
||||
|
||||
// MARK: - Model Directory Management
|
||||
|
||||
@ -255,7 +298,7 @@ extension WhisperState {
|
||||
UserDefaults.standard.removeObject(forKey: "CurrentTranscriptionModel")
|
||||
|
||||
loadedLocalModel = nil
|
||||
canTranscribe = false
|
||||
recordingState = .idle
|
||||
UserDefaults.standard.removeObject(forKey: "CurrentModel")
|
||||
}
|
||||
} catch {
|
||||
46
VoiceInk/Whisper/WhisperState+ModelManagement.swift
Normal file
46
VoiceInk/Whisper/WhisperState+ModelManagement.swift
Normal file
@ -0,0 +1,46 @@
|
||||
import Foundation
|
||||
import SwiftUI
|
||||
|
||||
@MainActor
|
||||
extension WhisperState {
|
||||
// Loads the default transcription model from UserDefaults
|
||||
func loadCurrentTranscriptionModel() {
|
||||
if let savedModelName = UserDefaults.standard.string(forKey: "CurrentTranscriptionModel"),
|
||||
let savedModel = allAvailableModels.first(where: { $0.name == savedModelName }) {
|
||||
currentTranscriptionModel = savedModel
|
||||
}
|
||||
}
|
||||
|
||||
// Function to set any transcription model as default
|
||||
func setDefaultTranscriptionModel(_ model: any TranscriptionModel) {
|
||||
self.currentTranscriptionModel = model
|
||||
UserDefaults.standard.set(model.name, forKey: "CurrentTranscriptionModel")
|
||||
|
||||
// For cloud models, clear the old loadedLocalModel
|
||||
if model.provider != .local {
|
||||
self.loadedLocalModel = nil
|
||||
}
|
||||
|
||||
// Enable transcription for cloud models immediately since they don't need loading
|
||||
if model.provider != .local {
|
||||
self.recordingState = .idle
|
||||
self.isModelLoaded = true
|
||||
}
|
||||
|
||||
// Post notification about the model change
|
||||
NotificationCenter.default.post(name: .didChangeModel, object: nil, userInfo: ["modelName": model.name])
|
||||
}
|
||||
|
||||
func refreshAllAvailableModels() {
|
||||
let currentModelId = currentTranscriptionModel?.id
|
||||
allAvailableModels = PredefinedModels.models
|
||||
|
||||
// If there was a current default model, find its new version in the refreshed list and update it.
|
||||
// This handles cases where the default model was edited.
|
||||
if let currentId = currentModelId,
|
||||
let updatedModel = allAvailableModels.first(where: { $0.id == currentId })
|
||||
{
|
||||
setDefaultTranscriptionModel(updatedModel)
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -36,7 +36,7 @@ extension WhisperState {
|
||||
|
||||
func toggleMiniRecorder() async {
|
||||
if isMiniRecorderVisible {
|
||||
if isRecording {
|
||||
if recordingState == .recording {
|
||||
await toggleRecord()
|
||||
} else {
|
||||
await cancelRecording()
|
||||
@ -59,7 +59,7 @@ extension WhisperState {
|
||||
NotificationManager.shared.dismissNotification()
|
||||
}
|
||||
|
||||
if isRecording {
|
||||
if recordingState == .recording {
|
||||
await recorder.stopRecording()
|
||||
}
|
||||
|
||||
@ -67,12 +67,7 @@ extension WhisperState {
|
||||
hideRecorderPanel()
|
||||
|
||||
await MainActor.run {
|
||||
isRecording = false
|
||||
isVisualizerActive = false
|
||||
isProcessing = false
|
||||
isEnhancing = false
|
||||
isTranscribing = false
|
||||
canTranscribe = true
|
||||
recordingState = .idle
|
||||
isMiniRecorderVisible = false
|
||||
}
|
||||
}
|
||||
|
||||
@ -6,11 +6,18 @@ import AppKit
|
||||
import KeyboardShortcuts
|
||||
import os
|
||||
|
||||
// MARK: - Recording State Machine
|
||||
enum RecordingState: Equatable {
|
||||
case idle
|
||||
case recording
|
||||
case transcribing
|
||||
case enhancing
|
||||
}
|
||||
|
||||
@MainActor
|
||||
class WhisperState: NSObject, ObservableObject {
|
||||
@Published var recordingState: RecordingState = .idle
|
||||
@Published var isModelLoaded = false
|
||||
@Published var canTranscribe = false
|
||||
@Published var isRecording = false
|
||||
@Published var loadedLocalModel: WhisperModel?
|
||||
@Published var currentTranscriptionModel: (any TranscriptionModel)?
|
||||
@Published var isModelLoading = false
|
||||
@ -18,10 +25,7 @@ class WhisperState: NSObject, ObservableObject {
|
||||
@Published var allAvailableModels: [any TranscriptionModel] = PredefinedModels.models
|
||||
@Published var clipboardMessage = ""
|
||||
@Published var miniRecorderError: String?
|
||||
@Published var isProcessing = false
|
||||
@Published var shouldCancelRecording = false
|
||||
@Published var isTranscribing = false
|
||||
@Published var isEnhancing = false
|
||||
@Published var isAutoCopyEnabled: Bool = UserDefaults.standard.object(forKey: "IsAutoCopyEnabled") as? Bool ?? true {
|
||||
didSet {
|
||||
UserDefaults.standard.set(isAutoCopyEnabled, forKey: "IsAutoCopyEnabled")
|
||||
@ -33,10 +37,6 @@ class WhisperState: NSObject, ObservableObject {
|
||||
}
|
||||
}
|
||||
|
||||
@Published var isVisualizerActive = false
|
||||
|
||||
|
||||
|
||||
@Published var isMiniRecorderVisible = false {
|
||||
didSet {
|
||||
if isMiniRecorderVisible {
|
||||
@ -125,27 +125,22 @@ class WhisperState: NSObject, ObservableObject {
|
||||
}
|
||||
|
||||
func toggleRecord() async {
|
||||
if isRecording {
|
||||
logger.notice("🛑 Stopping recording")
|
||||
await MainActor.run {
|
||||
isRecording = false
|
||||
isVisualizerActive = false
|
||||
}
|
||||
if recordingState == .recording {
|
||||
await recorder.stopRecording()
|
||||
if let recordedFile {
|
||||
if !shouldCancelRecording {
|
||||
await transcribeAudio(recordedFile)
|
||||
} else {
|
||||
logger.info("🛑 Transcription and paste aborted in toggleRecord due to shouldCancelRecording flag.")
|
||||
await MainActor.run {
|
||||
isProcessing = false
|
||||
isTranscribing = false
|
||||
canTranscribe = true
|
||||
recordingState = .idle
|
||||
}
|
||||
await cleanupModelResources()
|
||||
}
|
||||
} else {
|
||||
logger.error("❌ No recorded file found after stopping recording")
|
||||
await MainActor.run {
|
||||
recordingState = .idle
|
||||
}
|
||||
}
|
||||
} else {
|
||||
guard currentTranscriptionModel != nil else {
|
||||
@ -158,29 +153,23 @@ class WhisperState: NSObject, ObservableObject {
|
||||
return
|
||||
}
|
||||
shouldCancelRecording = false
|
||||
logger.notice("🎙️ Starting recording sequence...")
|
||||
requestRecordPermission { [self] granted in
|
||||
if granted {
|
||||
Task {
|
||||
do {
|
||||
// --- Prepare temporary file URL within Application Support base directory ---
|
||||
let baseAppSupportDirectory = self.recordingsDirectory.deletingLastPathComponent()
|
||||
let file = baseAppSupportDirectory.appendingPathComponent("output.wav")
|
||||
// Ensure the base directory exists
|
||||
try? FileManager.default.createDirectory(at: baseAppSupportDirectory, withIntermediateDirectories: true)
|
||||
// Clean up any old temporary file first
|
||||
self.recordedFile = file
|
||||
|
||||
try await self.recorder.startRecording(toOutputFile: file)
|
||||
self.logger.notice("✅ Audio engine started successfully.")
|
||||
|
||||
// --- Prepare permanent file URL ---
|
||||
let fileName = "\(UUID().uuidString).wav"
|
||||
let permanentURL = self.recordingsDirectory.appendingPathComponent(fileName)
|
||||
self.recordedFile = permanentURL
|
||||
|
||||
try await self.recorder.startRecording(toOutputFile: permanentURL)
|
||||
|
||||
await MainActor.run {
|
||||
self.isRecording = true
|
||||
self.isVisualizerActive = true
|
||||
self.recordingState = .recording
|
||||
}
|
||||
|
||||
await ActiveWindowService.shared.applyConfigurationForCurrentApp()
|
||||
|
||||
|
||||
// Only load model if it's a local model and not already loaded
|
||||
if let model = self.currentTranscriptionModel, model.provider == .local {
|
||||
if let localWhisperModel = self.availableModels.first(where: { $0.name == model.name }),
|
||||
@ -192,23 +181,19 @@ class WhisperState: NSObject, ObservableObject {
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if let enhancementService = self.enhancementService,
|
||||
enhancementService.useScreenCaptureContext {
|
||||
await enhancementService.captureScreenContext()
|
||||
}
|
||||
|
||||
|
||||
} catch {
|
||||
self.logger.error("❌ Failed to start recording: \(error.localizedDescription)")
|
||||
await MainActor.run {
|
||||
self.isRecording = false
|
||||
self.isVisualizerActive = false
|
||||
}
|
||||
if let url = self.recordedFile {
|
||||
try? FileManager.default.removeItem(at: url)
|
||||
self.recordedFile = nil
|
||||
self.logger.notice("🗑️ Cleaned up temporary recording file after failed start.")
|
||||
self.recordingState = .idle
|
||||
}
|
||||
// Do not remove the file on a failed start, to preserve all recordings.
|
||||
self.recordedFile = nil
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@ -219,32 +204,20 @@ class WhisperState: NSObject, ObservableObject {
|
||||
}
|
||||
|
||||
private func requestRecordPermission(response: @escaping (Bool) -> Void) {
|
||||
#if os(macOS)
|
||||
response(true)
|
||||
#else
|
||||
AVAudioSession.sharedInstance().requestRecordPermission { granted in
|
||||
response(granted)
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
private func transcribeAudio(_ url: URL) async {
|
||||
if shouldCancelRecording {
|
||||
logger.info("🎤 Transcription and paste aborted at the beginning of transcribeAudio due to shouldCancelRecording flag.")
|
||||
await MainActor.run {
|
||||
isProcessing = false
|
||||
isTranscribing = false
|
||||
canTranscribe = true
|
||||
recordingState = .idle
|
||||
}
|
||||
await cleanupModelResources()
|
||||
return
|
||||
}
|
||||
|
||||
await MainActor.run {
|
||||
isProcessing = true
|
||||
isTranscribing = true
|
||||
canTranscribe = false
|
||||
recordingState = .transcribing
|
||||
}
|
||||
|
||||
defer {
|
||||
@ -257,11 +230,7 @@ class WhisperState: NSObject, ObservableObject {
|
||||
|
||||
logger.notice("🔄 Starting transcription...")
|
||||
|
||||
var permanentURL: URL?
|
||||
|
||||
do {
|
||||
permanentURL = try saveRecordingPermanently(url)
|
||||
|
||||
guard let model = currentTranscriptionModel else {
|
||||
throw WhisperStateError.transcriptionFailed
|
||||
}
|
||||
@ -280,8 +249,6 @@ class WhisperState: NSObject, ObservableObject {
|
||||
var text = try await transcriptionService.transcribe(audioURL: url, model: model)
|
||||
let transcriptionDuration = Date().timeIntervalSince(transcriptionStart)
|
||||
|
||||
await MainActor.run { self.isTranscribing = false }
|
||||
|
||||
if await checkCancellationAndCleanup() { return }
|
||||
|
||||
text = text.trimmingCharacters(in: .whitespacesAndNewlines)
|
||||
@ -307,19 +274,14 @@ class WhisperState: NSObject, ObservableObject {
|
||||
do {
|
||||
if await checkCancellationAndCleanup() { return }
|
||||
|
||||
await MainActor.run { self.isEnhancing = true }
|
||||
await MainActor.run { self.recordingState = .enhancing }
|
||||
let textForAI = promptDetectionResult?.processedText ?? text
|
||||
defer {
|
||||
Task { @MainActor in
|
||||
self.isEnhancing = false
|
||||
}
|
||||
}
|
||||
let (enhancedText, enhancementDuration) = try await enhancementService.enhance(textForAI)
|
||||
let newTranscription = Transcription(
|
||||
text: originalText,
|
||||
duration: actualDuration,
|
||||
enhancedText: enhancedText,
|
||||
audioFileURL: permanentURL?.absoluteString,
|
||||
audioFileURL: url.absoluteString,
|
||||
transcriptionModelName: model.displayName,
|
||||
aiEnhancementModelName: enhancementService.getAIService()?.currentModel,
|
||||
transcriptionDuration: transcriptionDuration,
|
||||
@ -333,7 +295,7 @@ class WhisperState: NSObject, ObservableObject {
|
||||
text: originalText,
|
||||
duration: actualDuration,
|
||||
enhancedText: "Enhancement failed: \(error.localizedDescription)",
|
||||
audioFileURL: permanentURL?.absoluteString,
|
||||
audioFileURL: url.absoluteString,
|
||||
transcriptionModelName: model.displayName,
|
||||
transcriptionDuration: transcriptionDuration
|
||||
)
|
||||
@ -351,7 +313,7 @@ class WhisperState: NSObject, ObservableObject {
|
||||
let newTranscription = Transcription(
|
||||
text: originalText,
|
||||
duration: actualDuration,
|
||||
audioFileURL: permanentURL?.absoluteString,
|
||||
audioFileURL: url.absoluteString,
|
||||
transcriptionModelName: model.displayName,
|
||||
transcriptionDuration: transcriptionDuration
|
||||
)
|
||||
@ -379,7 +341,6 @@ class WhisperState: NSObject, ObservableObject {
|
||||
ClipboardManager.copyToClipboard(text)
|
||||
}
|
||||
}
|
||||
try? FileManager.default.removeItem(at: url)
|
||||
|
||||
if let result = promptDetectionResult,
|
||||
let enhancementService = enhancementService,
|
||||
@ -393,48 +354,34 @@ class WhisperState: NSObject, ObservableObject {
|
||||
}
|
||||
|
||||
} catch {
|
||||
if let permanentURL = permanentURL {
|
||||
do {
|
||||
let audioAsset = AVURLAsset(url: permanentURL)
|
||||
let duration = CMTimeGetSeconds(try await audioAsset.load(.duration))
|
||||
do {
|
||||
let audioAsset = AVURLAsset(url: url)
|
||||
let duration = CMTimeGetSeconds(try await audioAsset.load(.duration))
|
||||
|
||||
await MainActor.run {
|
||||
let errorDescription = (error as? LocalizedError)?.errorDescription ?? error.localizedDescription
|
||||
let recoverySuggestion = (error as? LocalizedError)?.recoverySuggestion ?? ""
|
||||
let fullErrorText = recoverySuggestion.isEmpty ? errorDescription : "\(errorDescription) \(recoverySuggestion)"
|
||||
|
||||
await MainActor.run {
|
||||
let errorDescription = (error as? LocalizedError)?.errorDescription ?? error.localizedDescription
|
||||
let recoverySuggestion = (error as? LocalizedError)?.recoverySuggestion ?? ""
|
||||
let fullErrorText = recoverySuggestion.isEmpty ? errorDescription : "\(errorDescription) \(recoverySuggestion)"
|
||||
|
||||
let failedTranscription = Transcription(
|
||||
text: "Transcription Failed: \(fullErrorText)",
|
||||
duration: duration,
|
||||
enhancedText: nil,
|
||||
audioFileURL: permanentURL.absoluteString
|
||||
)
|
||||
|
||||
modelContext.insert(failedTranscription)
|
||||
try? modelContext.save()
|
||||
}
|
||||
} catch {
|
||||
// Silently continue if failed transcription record can't be saved
|
||||
let failedTranscription = Transcription(
|
||||
text: "Transcription Failed: \(fullErrorText)",
|
||||
duration: duration,
|
||||
enhancedText: nil,
|
||||
audioFileURL: url.absoluteString
|
||||
)
|
||||
|
||||
modelContext.insert(failedTranscription)
|
||||
try? modelContext.save()
|
||||
}
|
||||
} catch {
|
||||
logger.error("❌ Could not create a record for the failed transcription: \(error.localizedDescription)")
|
||||
}
|
||||
|
||||
await MainActor.run {
|
||||
if permanentURL != nil {
|
||||
NotificationManager.shared.showNotification(
|
||||
title: "Transcription Failed. Tap to retry.",
|
||||
type: .error,
|
||||
onTap: { [weak self] in
|
||||
Task {
|
||||
await self?.retryLastTranscription()
|
||||
}
|
||||
}
|
||||
)
|
||||
} else {
|
||||
NotificationManager.shared.showNotification(
|
||||
title: "Recording Failed",
|
||||
type: .error
|
||||
)
|
||||
}
|
||||
NotificationManager.shared.showNotification(
|
||||
title: "Transcription Failed",
|
||||
type: .error
|
||||
)
|
||||
}
|
||||
|
||||
await self.dismissMiniRecorder()
|
||||
@ -444,107 +391,10 @@ class WhisperState: NSObject, ObservableObject {
|
||||
}
|
||||
}
|
||||
|
||||
private func saveRecordingPermanently(_ tempURL: URL) throws -> URL {
|
||||
let fileName = "\(UUID().uuidString).wav"
|
||||
let permanentURL = recordingsDirectory.appendingPathComponent(fileName)
|
||||
try FileManager.default.copyItem(at: tempURL, to: permanentURL)
|
||||
return permanentURL
|
||||
}
|
||||
|
||||
func retryLastTranscription() async {
|
||||
do {
|
||||
let descriptor = FetchDescriptor<Transcription>(
|
||||
sortBy: [SortDescriptor(\.timestamp, order: .reverse)]
|
||||
)
|
||||
let transcriptions = try modelContext.fetch(descriptor)
|
||||
|
||||
guard let lastTranscription = transcriptions.first,
|
||||
lastTranscription.text.hasPrefix("Transcription Failed"),
|
||||
let audioURLString = lastTranscription.audioFileURL,
|
||||
let audioURL = URL(string: audioURLString) else {
|
||||
return
|
||||
}
|
||||
|
||||
guard let model = currentTranscriptionModel else {
|
||||
throw WhisperStateError.transcriptionFailed
|
||||
}
|
||||
|
||||
let transcriptionService = AudioTranscriptionService(modelContext: modelContext, whisperState: self)
|
||||
let newTranscription = try await transcriptionService.retranscribeAudio(from: audioURL, using: model)
|
||||
|
||||
await MainActor.run {
|
||||
NotificationManager.shared.showNotification(
|
||||
title: "Transcription Successful",
|
||||
type: .success
|
||||
)
|
||||
|
||||
let textToPaste = newTranscription.enhancedText ?? newTranscription.text
|
||||
DispatchQueue.main.asyncAfter(deadline: .now() + 0.05) {
|
||||
CursorPaster.pasteAtCursor(textToPaste + " ", shouldPreserveClipboard: !self.isAutoCopyEnabled)
|
||||
|
||||
if self.isAutoCopyEnabled {
|
||||
ClipboardManager.copyToClipboard(textToPaste)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
} catch {
|
||||
await MainActor.run {
|
||||
NotificationManager.shared.showNotification(
|
||||
title: "Retry Failed",
|
||||
type: .error
|
||||
)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Loads the default transcription model from UserDefaults
|
||||
private func loadCurrentTranscriptionModel() {
|
||||
if let savedModelName = UserDefaults.standard.string(forKey: "CurrentTranscriptionModel"),
|
||||
let savedModel = allAvailableModels.first(where: { $0.name == savedModelName }) {
|
||||
currentTranscriptionModel = savedModel
|
||||
}
|
||||
}
|
||||
|
||||
// Function to set any transcription model as default
|
||||
func setDefaultTranscriptionModel(_ model: any TranscriptionModel) {
|
||||
self.currentTranscriptionModel = model
|
||||
UserDefaults.standard.set(model.name, forKey: "CurrentTranscriptionModel")
|
||||
|
||||
// For cloud models, clear the old loadedLocalModel
|
||||
if model.provider != .local {
|
||||
self.loadedLocalModel = nil
|
||||
}
|
||||
|
||||
// Enable transcription for cloud models immediately since they don't need loading
|
||||
if model.provider != .local {
|
||||
self.canTranscribe = true
|
||||
self.isModelLoaded = true
|
||||
}
|
||||
|
||||
logger.info("Default transcription model set to: \(model.name) (\(model.provider.rawValue))")
|
||||
|
||||
// Post notification about the model change
|
||||
NotificationCenter.default.post(name: .didChangeModel, object: nil, userInfo: ["modelName": model.name])
|
||||
}
|
||||
|
||||
func getEnhancementService() -> AIEnhancementService? {
|
||||
return enhancementService
|
||||
}
|
||||
|
||||
func refreshAllAvailableModels() {
|
||||
let currentModelId = currentTranscriptionModel?.id
|
||||
allAvailableModels = PredefinedModels.models
|
||||
|
||||
// If there was a current default model, find its new version in the refreshed list and update it.
|
||||
// This handles cases where the default model was edited.
|
||||
if let currentId = currentModelId,
|
||||
let updatedModel = allAvailableModels.first(where: { $0.id == currentId })
|
||||
{
|
||||
setDefaultTranscriptionModel(updatedModel)
|
||||
}
|
||||
}
|
||||
|
||||
private func checkCancellationAndCleanup() async -> Bool {
|
||||
if shouldCancelRecording {
|
||||
await cleanupAndDismiss()
|
||||
@ -561,46 +411,6 @@ class WhisperState: NSObject, ObservableObject {
|
||||
}
|
||||
}
|
||||
|
||||
struct WhisperModel: Identifiable {
|
||||
let id = UUID()
|
||||
let name: String
|
||||
let url: URL
|
||||
var coreMLEncoderURL: URL? // Path to the unzipped .mlmodelc directory
|
||||
var isCoreMLDownloaded: Bool { coreMLEncoderURL != nil }
|
||||
|
||||
var downloadURL: String {
|
||||
"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/\(filename)"
|
||||
}
|
||||
|
||||
var filename: String {
|
||||
"\(name).bin"
|
||||
}
|
||||
|
||||
// Core ML related properties
|
||||
var coreMLZipDownloadURL: String? {
|
||||
// Only non-quantized models have Core ML versions
|
||||
guard !name.contains("q5") && !name.contains("q8") else { return nil }
|
||||
return "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/\(name)-encoder.mlmodelc.zip"
|
||||
}
|
||||
|
||||
var coreMLEncoderDirectoryName: String? {
|
||||
guard coreMLZipDownloadURL != nil else { return nil }
|
||||
return "\(name)-encoder.mlmodelc"
|
||||
}
|
||||
}
|
||||
|
||||
private class TaskDelegate: NSObject, URLSessionTaskDelegate {
|
||||
private let continuation: CheckedContinuation<Void, Never>
|
||||
|
||||
init(_ continuation: CheckedContinuation<Void, Never>) {
|
||||
self.continuation = continuation
|
||||
}
|
||||
|
||||
func urlSession(_ session: URLSession, task: URLSessionTask, didCompleteWithError error: Error?) {
|
||||
continuation.resume()
|
||||
}
|
||||
}
|
||||
|
||||
extension Notification.Name {
|
||||
static let toggleMiniRecorder = Notification.Name("toggleMiniRecorder")
|
||||
static let didChangeModel = Notification.Name("didChangeModel")
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user