From 2d6cf1795af653957fc990db6949b92659d67bf4 Mon Sep 17 00:00:00 2001
From: Alexey Haidamaka <gdmkaa@gmail.com>
Date: Tue, 2 Sep 2025 13:52:16 +0200
Subject: [PATCH 01/21] add prompt reordering for enhancement prompts

---
 VoiceInk/Views/EnhancementSettingsView.swift | 184 +++++++++++++++++--
 1 file changed, 165 insertions(+), 19 deletions(-)

diff --git a/VoiceInk/Views/EnhancementSettingsView.swift b/VoiceInk/Views/EnhancementSettingsView.swift
index 714d677..4026b5e 100644
--- a/VoiceInk/Views/EnhancementSettingsView.swift
+++ b/VoiceInk/Views/EnhancementSettingsView.swift
@@ -1,4 +1,5 @@
 import SwiftUI
+import UniformTypeIdentifiers
 
 struct EnhancementSettingsView: View {
     @EnvironmentObject private var enhancementService: AIEnhancementService
@@ -79,25 +80,22 @@ struct EnhancementSettingsView: View {
                         Text("Enhancement Prompt")
                             .font(.headline)
                         
-                        // Prompts Section
-                        VStack(alignment: .leading, spacing: 12) {
-                            PromptSelectionGrid(
-                                prompts: enhancementService.allPrompts,
-                                selectedPromptId: enhancementService.selectedPromptId,
-                                onPromptSelected: { prompt in
-                                    enhancementService.setActivePrompt(prompt)
-                                },
-                                onEditPrompt: { prompt in
-                                    selectedPromptForEdit = prompt
-                                },
-                                onDeletePrompt: { prompt in
-                                    enhancementService.deletePrompt(prompt)
-                                },
-                                onAddNewPrompt: {
-                                    isEditingPrompt = true
-                                }
-                            )
-                        }
+                        // Reorderable prompts grid with drag-and-drop
+                        ReorderablePromptGrid(
+                            selectedPromptId: enhancementService.selectedPromptId,
+                            onPromptSelected: { prompt in
+                                enhancementService.setActivePrompt(prompt)
+                            },
+                            onEditPrompt: { prompt in
+                                selectedPromptForEdit = prompt
+                            },
+                            onDeletePrompt: { prompt in
+                                enhancementService.deletePrompt(prompt)
+                            },
+                            onAddNewPrompt: {
+                                isEditingPrompt = true
+                            }
+                        )
                     }
                     .padding()
                     .background(CardBackground(isSelected: false))
@@ -115,3 +113,151 @@ struct EnhancementSettingsView: View {
         }
     }
 }
+
+// MARK: - Drag & Drop Reorderable Grid
+private struct ReorderablePromptGrid: View {
+    @EnvironmentObject private var enhancementService: AIEnhancementService
+    
+    let selectedPromptId: UUID?
+    let onPromptSelected: (CustomPrompt) -> Void
+    let onEditPrompt: ((CustomPrompt) -> Void)?
+    let onDeletePrompt: ((CustomPrompt) -> Void)?
+    let onAddNewPrompt: (() -> Void)?
+    
+    @State private var draggingItem: CustomPrompt?
+    
+    var body: some View {
+        VStack(alignment: .leading, spacing: 12) {
+            if enhancementService.customPrompts.isEmpty {
+                Text("No prompts available")
+                    .foregroundColor(.secondary)
+                    .font(.caption)
+            } else {
+                let columns = [
+                    GridItem(.adaptive(minimum: 80, maximum: 100), spacing: 36)
+                ]
+                
+                LazyVGrid(columns: columns, spacing: 16) {
+                    ForEach(enhancementService.customPrompts) { prompt in
+                        prompt.promptIcon(
+                            isSelected: selectedPromptId == prompt.id,
+                            onTap: {
+                                withAnimation(.spring(response: 0.3, dampingFraction: 0.7)) {
+                                    onPromptSelected(prompt)
+                                }
+                            },
+                            onEdit: onEditPrompt,
+                            onDelete: onDeletePrompt
+                        )
+                        .opacity(draggingItem?.id == prompt.id ? 0.3 : 1.0)
+                        .scaleEffect(draggingItem?.id == prompt.id ? 1.05 : 1.0)
+                        .overlay(
+                            RoundedRectangle(cornerRadius: 14)
+                                .stroke(
+                                    draggingItem != nil && draggingItem?.id != prompt.id
+                                    ? Color.accentColor.opacity(0.25)
+                                    : Color.clear,
+                                    lineWidth: 1
+                                )
+                        )
+                        .animation(.easeInOut(duration: 0.15), value: draggingItem?.id == prompt.id)
+                        .onDrag {
+                            draggingItem = prompt
+                            return NSItemProvider(object: prompt.id.uuidString as NSString)
+                        }
+                        .onDrop(
+                            of: [UTType.text],
+                            delegate: PromptDropDelegate(
+                                item: prompt,
+                                prompts: $enhancementService.customPrompts,
+                                draggingItem: $draggingItem
+                            )
+                        )
+                    }
+                    
+                    if let onAddNewPrompt = onAddNewPrompt {
+                        CustomPrompt.addNewButton {
+                            onAddNewPrompt()
+                        }
+                        .help("Add new prompt")
+                        .onDrop(
+                            of: [UTType.text],
+                            delegate: PromptEndDropDelegate(
+                                prompts: $enhancementService.customPrompts,
+                                draggingItem: $draggingItem
+                            )
+                        )
+                    }
+                }
+                .padding(.vertical, 12)
+                .padding(.horizontal, 16)
+                
+                HStack {
+                    Image(systemName: "info.circle")
+                        .font(.caption)
+                        .foregroundColor(.secondary)
+                    
+                    Text("Double-click to edit • Right-click for more options")
+                        .font(.caption)
+                        .foregroundColor(.secondary)
+                }
+                .padding(.top, 8)
+                .padding(.horizontal, 16)
+            }
+        }
+    }
+}
+
+// MARK: - Drop Delegates
+private struct PromptDropDelegate: DropDelegate {
+    let item: CustomPrompt
+    @Binding var prompts: [CustomPrompt]
+    @Binding var draggingItem: CustomPrompt?
+    
+    func dropEntered(info: DropInfo) {
+        guard let draggingItem = draggingItem, draggingItem != item else { return }
+        guard let fromIndex = prompts.firstIndex(of: draggingItem),
+              let toIndex = prompts.firstIndex(of: item) else { return }
+        
+        // Move item as you hover for immediate visual update
+        if prompts[toIndex].id != draggingItem.id {
+            withAnimation(.easeInOut(duration: 0.12)) {
+                let from = fromIndex
+                let to = toIndex
+                prompts.move(fromOffsets: IndexSet(integer: from), toOffset: to > from ? to + 1 : to)
+            }
+        }
+    }
+    
+    func dropUpdated(info: DropInfo) -> DropProposal? {
+        DropProposal(operation: .move)
+    }
+    
+    func performDrop(info: DropInfo) -> Bool {
+        draggingItem = nil
+        return true
+    }
+}
+
+private struct PromptEndDropDelegate: DropDelegate {
+    @Binding var prompts: [CustomPrompt]
+    @Binding var draggingItem: CustomPrompt?
+    
+    func validateDrop(info: DropInfo) -> Bool { true }
+    func dropUpdated(info: DropInfo) -> DropProposal? { DropProposal(operation: .move) }
+    
+    func performDrop(info: DropInfo) -> Bool {
+        guard let draggingItem = draggingItem,
+              let currentIndex = prompts.firstIndex(of: draggingItem) else {
+            self.draggingItem = nil
+            return false
+        }
+        
+        // Move to end if dropped on the trailing "Add New" tile
+        withAnimation(.easeInOut(duration: 0.12)) {
+            prompts.move(fromOffsets: IndexSet(integer: currentIndex), toOffset: prompts.endIndex)
+        }
+        self.draggingItem = nil
+        return true
+    }
+}

From 33f26f82f594a51ad3e8780590d4851718471844 Mon Sep 17 00:00:00 2001
From: Alexey Haidamaka <gdmkaa@gmail.com>
Date: Tue, 2 Sep 2025 14:15:51 +0200
Subject: [PATCH 02/21] add auto send display in powermode view

---
 .../PowerMode/PowerModeViewComponents.swift   | 22 ++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/VoiceInk/PowerMode/PowerModeViewComponents.swift b/VoiceInk/PowerMode/PowerModeViewComponents.swift
index 98222dc..3559734 100644
--- a/VoiceInk/PowerMode/PowerModeViewComponents.swift
+++ b/VoiceInk/PowerMode/PowerModeViewComponents.swift
@@ -203,7 +203,7 @@ struct ConfigurationRow: View {
             .padding(.vertical, 12)
             .padding(.horizontal, 14)
             
-            if selectedModel != nil || selectedLanguage != nil || config.isAIEnhancementEnabled {
+            if selectedModel != nil || selectedLanguage != nil || config.isAIEnhancementEnabled || config.isAutoSendEnabled {
                 Divider()
                     .padding(.horizontal, 16)
                 
@@ -259,6 +259,22 @@ struct ConfigurationRow: View {
                         )
                     }
                     
+                    if config.isAutoSendEnabled {
+                        HStack(spacing: 4) {
+                            Image(systemName: "keyboard")
+                                .font(.system(size: 10))
+                            Text("Auto Send")
+                                .font(.caption)
+                        }
+                        .padding(.horizontal, 8)
+                        .padding(.vertical, 4)
+                        .background(Capsule()
+                            .fill(Color(NSColor.controlBackgroundColor)))
+                        .overlay(
+                            Capsule()
+                                .stroke(Color(NSColor.separatorColor), lineWidth: 0.5)
+                        )
+                    }
                     if config.isAIEnhancementEnabled {
                         if config.useScreenCapture {
                             HStack(spacing: 4) {
@@ -289,7 +305,7 @@ struct ConfigurationRow: View {
                             .fill(Color.accentColor.opacity(0.1)))
                         .foregroundColor(.accentColor)
                     }
-                    
+
                     Spacer()
                 }
                 .padding(.vertical, 10)
@@ -376,4 +392,4 @@ struct AppGridItem: View {
         }
         .buttonStyle(.plain)
     }
-} 
+}

From 0ca1d78041c5a42bb9f7ac5401e603f881cb6708 Mon Sep 17 00:00:00 2001
From: Alexey Haidamaka <gdmkaa@gmail.com>
Date: Tue, 2 Sep 2025 14:45:51 +0200
Subject: [PATCH 03/21] implement default power mode quick change

---
 VoiceInk/PowerMode/PowerModeConfigView.swift | 22 +++++++++-----------
 1 file changed, 10 insertions(+), 12 deletions(-)

diff --git a/VoiceInk/PowerMode/PowerModeConfigView.swift b/VoiceInk/PowerMode/PowerModeConfigView.swift
index a44566d..60fcb12 100644
--- a/VoiceInk/PowerMode/PowerModeConfigView.swift
+++ b/VoiceInk/PowerMode/PowerModeConfigView.swift
@@ -182,18 +182,16 @@ struct ConfigurationView: View {
                         }
                         
                         // Default Power Mode Toggle
-                        if !powerModeManager.hasDefaultConfiguration() || isCurrentConfigDefault {
-                            HStack {
-                                Toggle("Set as default power mode", isOn: $isDefault)
-                                    .font(.system(size: 14))
-                                
-                                InfoTip(
-                                    title: "Default Power Mode",
-                                    message: "Default power mode is used when no specific app or website matches are found"
-                                )
-                                
-                                Spacer()
-                            }
+                        HStack {
+                            Toggle("Set as default power mode", isOn: $isDefault)
+                                .font(.system(size: 14))
+                            
+                            InfoTip(
+                                title: "Default Power Mode",
+                                message: "Default power mode is used when no specific app or website matches are found"
+                            )
+                            
+                            Spacer()
                         }
                     }
                     .padding(.horizontal, 20)

From f74bbd7f5ae9d0338480e408c43f006a8447c798 Mon Sep 17 00:00:00 2001
From: Alexey Haidamaka <gdmkaa@gmail.com>
Date: Wed, 3 Sep 2025 16:31:46 +0200
Subject: [PATCH 04/21] add open with handling

---
 VoiceInk/AppDelegate.swift                    | 27 +++++++++
 VoiceInk/Info.plist                           | 59 +++++++++++++++++++
 VoiceInk/Notifications/AppNotifications.swift |  1 +
 VoiceInk/Views/AudioTranscribeView.swift      |  8 ++-
 VoiceInk/Views/ContentView.swift              | 36 +++++++++++
 VoiceInk/VoiceInk.swift                       |  9 +++
 6 files changed, 139 insertions(+), 1 deletion(-)

diff --git a/VoiceInk/AppDelegate.swift b/VoiceInk/AppDelegate.swift
index bf27f48..4bc530b 100644
--- a/VoiceInk/AppDelegate.swift
+++ b/VoiceInk/AppDelegate.swift
@@ -49,4 +49,31 @@ class AppDelegate: NSObject, NSApplicationDelegate {
         defaults.removeObject(forKey: "defaultPowerModeConfigV2")
         defaults.removeObject(forKey: "isPowerModeEnabled")
     }
+    
+    // Keep in sync with AudioTranscribeView.supportedExtensions
+    private let supportedExtensions = ["wav", "mp3", "m4a", "aiff", "mp4", "mov", "aac", "flac", "caf"]
+    
+    // Stash URL when app cold-starts to avoid spawning a new window/tab
+    var pendingOpenFileURL: URL?
+    
+    func application(_ application: NSApplication, open urls: [URL]) {
+        guard let url = urls.first(where: { supportedExtensions.contains($0.pathExtension.lowercased()) }) else {
+            return
+        }
+        
+        NSApp.activate(ignoringOtherApps: true)
+        
+        if NSApp.windows.isEmpty {
+            // Cold start: do NOT create a window here to avoid extra window/tab.
+            // Defer to SwiftUI’s WindowGroup-created ContentView and let it process this later.
+            pendingOpenFileURL = url
+        } else {
+            // Running: focus current window and route in-place to Transcribe Audio
+            NSApp.windows.first?.makeKeyAndOrderFront(nil)
+            NotificationCenter.default.post(name: .navigateToDestination, object: nil, userInfo: ["destination": "Transcribe Audio"])
+            DispatchQueue.main.async {
+                NotificationCenter.default.post(name: .openFileForTranscription, object: nil, userInfo: ["url": url])
+            }
+        }
+    }
 }
diff --git a/VoiceInk/Info.plist b/VoiceInk/Info.plist
index c29b98f..7584b1c 100644
--- a/VoiceInk/Info.plist
+++ b/VoiceInk/Info.plist
@@ -18,5 +18,64 @@
 	<string>VoiceInk needs to interact with your browser to detect the current website for applying website-specific configurations.</string>
 	<key>NSScreenCaptureUsageDescription</key>
 	<string>VoiceInk needs screen recording access to understand context from your screen for improved transcription accuracy.</string>
+	<key>CFBundleDocumentTypes</key>
+	<array>
+		<dict>
+			<key>CFBundleTypeName</key>
+			<string>Audio/Video File</string>
+			<key>CFBundleTypeRole</key>
+			<string>Viewer</string>
+			<key>LSHandlerRank</key>
+			<string>Alternate</string>
+			<key>LSItemContentTypes</key>
+			<array>
+				<string>public.audio</string>
+				<string>public.movie</string>
+			</array>
+			<key>CFBundleTypeExtensions</key>
+			<array>
+				<string>wav</string>
+				<string>mp3</string>
+				<string>m4a</string>
+				<string>aiff</string>
+				<string>mp4</string>
+				<string>mov</string>
+				<string>aac</string>
+				<string>flac</string>
+				<string>caf</string>
+			</array>
+		</dict>
+	</array>
+</dict>
+</plist>
+<!-- Somewhere near the existing keys -->
+<key>CFBundleDocumentTypes</key>
+<array>
+    <dict>
+        <key>CFBundleTypeName</key>
+        <string>Audio/Video File</string>
+        <key>CFBundleTypeRole</key>
+        <string>Viewer</string>
+        <key>LSHandlerRank</key>
+        <string>Alternate</string>
+        <key>LSItemContentTypes</key>
+        <array>
+            <string>public.audio</string>
+            <string>public.movie</string>
+        </array>
+        <key>CFBundleTypeExtensions</key>
+        <array>
+            <string>wav</string>
+            <string>mp3</string>
+            <string>m4a</string>
+            <string>aiff</string>
+            <string>mp4</string>
+            <string>mov</string>
+            <string>aac</string>
+            <string>flac</string>
+            <string>caf</string>
+        </array>
+    </dict>
+</array>
 </dict>
 </plist>
diff --git a/VoiceInk/Notifications/AppNotifications.swift b/VoiceInk/Notifications/AppNotifications.swift
index 0c1ae30..c23e055 100644
--- a/VoiceInk/Notifications/AppNotifications.swift
+++ b/VoiceInk/Notifications/AppNotifications.swift
@@ -14,4 +14,5 @@ extension Notification.Name {
     static let powerModeConfigurationApplied = Notification.Name("powerModeConfigurationApplied")
     static let transcriptionCreated = Notification.Name("transcriptionCreated")
     static let enhancementToggleChanged = Notification.Name("enhancementToggleChanged")
+    static let openFileForTranscription = Notification.Name("openFileForTranscription")
 }
diff --git a/VoiceInk/Views/AudioTranscribeView.swift b/VoiceInk/Views/AudioTranscribeView.swift
index 9c4ef82..d73c8b8 100644
--- a/VoiceInk/Views/AudioTranscribeView.swift
+++ b/VoiceInk/Views/AudioTranscribeView.swift
@@ -112,6 +112,12 @@ struct AudioTranscribeView: View {
                 Text(errorMessage)
             }
         }
+        .onReceive(NotificationCenter.default.publisher(for: .openFileForTranscription)) { notification in
+            if let url = notification.userInfo?["url"] as? URL {
+                // Do not auto-start; only select file for manual transcription
+                validateAndSetAudioFile(url)
+            }
+        }
     }
     
     private var dropZoneView: some View {
@@ -381,4 +387,4 @@ struct AudioTranscribeView: View {
         let seconds = Int(duration) % 60
         return String(format: "%d:%02d", minutes, seconds)
     }
-} 
+}
diff --git a/VoiceInk/Views/ContentView.swift b/VoiceInk/Views/ContentView.swift
index 3e91955..a610b49 100644
--- a/VoiceInk/Views/ContentView.swift
+++ b/VoiceInk/Views/ContentView.swift
@@ -164,6 +164,8 @@ struct ContentView: View {
     @State private var hasLoadedData = false
     let appVersion = Bundle.main.infoDictionary?["CFBundleShortVersionString"] as? String ?? "1.0.0"
     @StateObject private var licenseViewModel = LicenseViewModel()
+    // Capture the hosting window to update tab/window title dynamically
+    @State private var hostingWindow: NSWindow?
     
     private var isSetupComplete: Bool {
         hasLoadedData &&
@@ -189,9 +191,17 @@ struct ContentView: View {
         }
         .navigationSplitViewStyle(.balanced)
         .frame(minWidth: 940, minHeight: 730)
+        // Resolve hosting NSWindow and set initial title
+        .background(
+            WindowTitleAccessor { window in
+                self.hostingWindow = window
+                self.hostingWindow?.title = selectedView.rawValue
+            }
+        )
         .onAppear {
             hasLoadedData = true
         }
+        // inside ContentView body:
         .onReceive(NotificationCenter.default.publisher(for: .navigateToDestination)) { notification in
             print("ContentView: Received navigation notification")
             if let destination = notification.userInfo?["destination"] as? String {
@@ -215,6 +225,10 @@ struct ContentView: View {
                 case "Enhancement":
                     print("ContentView: Navigating to Enhancement")
                     selectedView = .enhancement
+                case "Transcribe Audio":
+                    // Ensure we switch to the Transcribe Audio view in-place
+                    print("ContentView: Navigating to Transcribe Audio")
+                    selectedView = .transcribeAudio
                 default:
                     print("ContentView: No matching destination found for: \(destination)")
                     break
@@ -223,6 +237,10 @@ struct ContentView: View {
                 print("ContentView: No destination in notification")
             }
         }
+        // Update the tab/window title whenever the active view changes
+        .onChange(of: selectedView) { newValue in
+            hostingWindow?.title = newValue.rawValue
+        }
     }
     
     @ViewBuilder
@@ -259,3 +277,21 @@ struct ContentView: View {
         }
     }
 }
+
+struct WindowTitleAccessor: NSViewRepresentable {
+    var onResolve: (NSWindow?) -> Void
+
+    func makeNSView(context: Context) -> NSView {
+        let view = NSView()
+        DispatchQueue.main.async { [weak view] in
+            onResolve(view?.window)
+        }
+        return view
+    }
+
+    func updateNSView(_ nsView: NSView, context: Context) {
+        DispatchQueue.main.async { [weak nsView] in
+            onResolve(nsView?.window)
+        }
+    }
+}
diff --git a/VoiceInk/VoiceInk.swift b/VoiceInk/VoiceInk.swift
index 549a500..af22cfd 100644
--- a/VoiceInk/VoiceInk.swift
+++ b/VoiceInk/VoiceInk.swift
@@ -114,6 +114,15 @@ struct VoiceInkApp: App {
                         if !UserDefaults.standard.bool(forKey: "IsTranscriptionCleanupEnabled") {
                             audioCleanupManager.startAutomaticCleanup(modelContext: container.mainContext)
                         }
+                        
+                        // Process any pending open-file request now that the main ContentView is ready.
+                        if let pendingURL = appDelegate.pendingOpenFileURL {
+                            NotificationCenter.default.post(name: .navigateToDestination, object: nil, userInfo: ["destination": "Transcribe Audio"])
+                            DispatchQueue.main.asyncAfter(deadline: .now() + 0.3) {
+                                NotificationCenter.default.post(name: .openFileForTranscription, object: nil, userInfo: ["url": pendingURL])
+                            }
+                            appDelegate.pendingOpenFileURL = nil
+                        }
                     }
                     .background(WindowAccessor { window in
                         WindowManager.shared.configureWindow(window)

From c06d01f61e63503d5844c36ce287afe0bb893fa0 Mon Sep 17 00:00:00 2001
From: Alexey Haidamaka <gdmkaa@gmail.com>
Date: Thu, 4 Sep 2025 11:10:54 +0200
Subject: [PATCH 05/21] Handle retry for API rate limit errors

---
 VoiceInk/Services/AIEnhancementService.swift | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/VoiceInk/Services/AIEnhancementService.swift b/VoiceInk/Services/AIEnhancementService.swift
index 7ae13d8..a9d4c05 100644
--- a/VoiceInk/Services/AIEnhancementService.swift
+++ b/VoiceInk/Services/AIEnhancementService.swift
@@ -261,6 +261,8 @@ class AIEnhancementService: ObservableObject {
                     
                     let filteredText = AIEnhancementOutputFilter.filter(enhancedText.trimmingCharacters(in: .whitespacesAndNewlines))
                     return filteredText
+                } else if httpResponse.statusCode == 429 {
+                    throw EnhancementError.rateLimitExceeded
                 } else if (500...599).contains(httpResponse.statusCode) {
                     throw EnhancementError.serverError
                 } else {
@@ -316,6 +318,8 @@ class AIEnhancementService: ObservableObject {
 
                     let filteredText = AIEnhancementOutputFilter.filter(enhancedText.trimmingCharacters(in: .whitespacesAndNewlines))
                     return filteredText
+                } else if httpResponse.statusCode == 429 {
+                    throw EnhancementError.rateLimitExceeded
                 } else if (500...599).contains(httpResponse.statusCode) {
                     throw EnhancementError.serverError
                 } else {
@@ -342,7 +346,7 @@ class AIEnhancementService: ObservableObject {
                 return try await makeRequest(text: text, mode: mode)
             } catch let error as EnhancementError {
                 switch error {
-                case .networkError, .serverError:
+                case .networkError, .serverError, .rateLimitExceeded:
                     retries += 1
                     if retries < maxRetries {
                         logger.warning("Request failed, retrying in \(currentDelay)s... (Attempt \(retries)/\(maxRetries))")
@@ -458,6 +462,7 @@ enum EnhancementError: Error {
     case enhancementFailed
     case networkError
     case serverError
+    case rateLimitExceeded
     case customError(String)
 }
 
@@ -474,6 +479,8 @@ extension EnhancementError: LocalizedError {
             return "Network connection failed. Check your internet."
         case .serverError:
             return "The AI provider's server encountered an error. Please try again later."
+        case .rateLimitExceeded:
+            return "Rate limit exceeded. Please try again later."
         case .customError(let message):
             return message
         }

From 12c850f77b0bedfca4361f53b8ec0f3dde3bf743 Mon Sep 17 00:00:00 2001
From: Beingpax <prakashjoshipaxtek@gmail.com>
Date: Fri, 5 Sep 2025 11:22:14 +0545
Subject: [PATCH 06/21] Open With and title cleanup

---
 VoiceInk.xcodeproj/project.pbxproj            |  8 ++---
 .../xcshareddata/swiftpm/Package.resolved     |  2 +-
 VoiceInk/AppDelegate.swift                    |  6 ++--
 VoiceInk/Info.plist                           | 31 ------------------
 VoiceInk/Services/SupportedMedia.swift        | 28 ++++++++++++++++
 VoiceInk/Views/AudioTranscribeView.swift      | 25 ++-------------
 VoiceInk/Views/ContentView.swift              | 32 ++-----------------
 7 files changed, 39 insertions(+), 93 deletions(-)
 create mode 100644 VoiceInk/Services/SupportedMedia.swift

diff --git a/VoiceInk.xcodeproj/project.pbxproj b/VoiceInk.xcodeproj/project.pbxproj
index f18f690..63a704e 100644
--- a/VoiceInk.xcodeproj/project.pbxproj
+++ b/VoiceInk.xcodeproj/project.pbxproj
@@ -459,7 +459,7 @@
 				"CODE_SIGN_IDENTITY[sdk=macosx*]" = "Apple Development";
 				CODE_SIGN_STYLE = Automatic;
 				COMBINE_HIDPI_IMAGES = YES;
-				CURRENT_PROJECT_VERSION = 152;
+				CURRENT_PROJECT_VERSION = 153;
 				DEVELOPMENT_ASSET_PATHS = "\"VoiceInk/Preview Content\"";
 				DEVELOPMENT_TEAM = V6J6A3VWY2;
 				ENABLE_HARDENED_RUNTIME = YES;
@@ -474,7 +474,7 @@
 					"@executable_path/../Frameworks",
 				);
 				MACOSX_DEPLOYMENT_TARGET = 14.0;
-				MARKETING_VERSION = 1.52;
+				MARKETING_VERSION = 1.53;
 				PRODUCT_BUNDLE_IDENTIFIER = com.prakashjoshipax.VoiceInk;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SWIFT_ACTIVE_COMPILATION_CONDITIONS = "DEBUG ENABLE_NATIVE_SPEECH_ANALYZER $(inherited)";
@@ -493,7 +493,7 @@
 				"CODE_SIGN_IDENTITY[sdk=macosx*]" = "Apple Development";
 				CODE_SIGN_STYLE = Automatic;
 				COMBINE_HIDPI_IMAGES = YES;
-				CURRENT_PROJECT_VERSION = 152;
+				CURRENT_PROJECT_VERSION = 153;
 				DEVELOPMENT_ASSET_PATHS = "\"VoiceInk/Preview Content\"";
 				DEVELOPMENT_TEAM = V6J6A3VWY2;
 				ENABLE_HARDENED_RUNTIME = YES;
@@ -508,7 +508,7 @@
 					"@executable_path/../Frameworks",
 				);
 				MACOSX_DEPLOYMENT_TARGET = 14.0;
-				MARKETING_VERSION = 1.52;
+				MARKETING_VERSION = 1.53;
 				PRODUCT_BUNDLE_IDENTIFIER = com.prakashjoshipax.VoiceInk;
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SWIFT_ACTIVE_COMPILATION_CONDITIONS = "ENABLE_NATIVE_SPEECH_ANALYZER $(inherited)";
diff --git a/VoiceInk.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved b/VoiceInk.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved
index 886cfcd..2c29478 100644
--- a/VoiceInk.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved
+++ b/VoiceInk.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved
@@ -7,7 +7,7 @@
       "location" : "https://github.com/FluidInference/FluidAudio",
       "state" : {
         "branch" : "main",
-        "revision" : "abf7d9ef3f53a693e3721069071971eff84c002f"
+        "revision" : "052cbb27cf073a9407251d74ef3459ea258e41b3"
       }
     },
     {
diff --git a/VoiceInk/AppDelegate.swift b/VoiceInk/AppDelegate.swift
index 4bc530b..20cd81e 100644
--- a/VoiceInk/AppDelegate.swift
+++ b/VoiceInk/AppDelegate.swift
@@ -1,5 +1,6 @@
 import Cocoa
 import SwiftUI
+import UniformTypeIdentifiers
 
 class AppDelegate: NSObject, NSApplicationDelegate {
     func applicationDidFinishLaunching(_ notification: Notification) {
@@ -50,14 +51,11 @@ class AppDelegate: NSObject, NSApplicationDelegate {
         defaults.removeObject(forKey: "isPowerModeEnabled")
     }
     
-    // Keep in sync with AudioTranscribeView.supportedExtensions
-    private let supportedExtensions = ["wav", "mp3", "m4a", "aiff", "mp4", "mov", "aac", "flac", "caf"]
-    
     // Stash URL when app cold-starts to avoid spawning a new window/tab
     var pendingOpenFileURL: URL?
     
     func application(_ application: NSApplication, open urls: [URL]) {
-        guard let url = urls.first(where: { supportedExtensions.contains($0.pathExtension.lowercased()) }) else {
+        guard let url = urls.first(where: { SupportedMedia.isSupported(url: $0) }) else {
             return
         }
         
diff --git a/VoiceInk/Info.plist b/VoiceInk/Info.plist
index 7584b1c..7833f1f 100644
--- a/VoiceInk/Info.plist
+++ b/VoiceInk/Info.plist
@@ -48,34 +48,3 @@
 	</array>
 </dict>
 </plist>
-<!-- Somewhere near the existing keys -->
-<key>CFBundleDocumentTypes</key>
-<array>
-    <dict>
-        <key>CFBundleTypeName</key>
-        <string>Audio/Video File</string>
-        <key>CFBundleTypeRole</key>
-        <string>Viewer</string>
-        <key>LSHandlerRank</key>
-        <string>Alternate</string>
-        <key>LSItemContentTypes</key>
-        <array>
-            <string>public.audio</string>
-            <string>public.movie</string>
-        </array>
-        <key>CFBundleTypeExtensions</key>
-        <array>
-            <string>wav</string>
-            <string>mp3</string>
-            <string>m4a</string>
-            <string>aiff</string>
-            <string>mp4</string>
-            <string>mov</string>
-            <string>aac</string>
-            <string>flac</string>
-            <string>caf</string>
-        </array>
-    </dict>
-</array>
-</dict>
-</plist>
diff --git a/VoiceInk/Services/SupportedMedia.swift b/VoiceInk/Services/SupportedMedia.swift
new file mode 100644
index 0000000..c66c452
--- /dev/null
+++ b/VoiceInk/Services/SupportedMedia.swift
@@ -0,0 +1,28 @@
+import Foundation
+import UniformTypeIdentifiers
+
+struct SupportedMedia {
+    static let extensions: Set<String> = [
+        "wav", "mp3", "m4a", "aiff", "mp4", "mov", "aac", "flac", "caf"
+    ]
+
+    static let contentTypes: [UTType] = [
+        .audio, .movie
+    ]
+
+    static func isSupported(url: URL) -> Bool {
+        let fileExtension = url.pathExtension.lowercased()
+        if !fileExtension.isEmpty, extensions.contains(fileExtension) {
+            return true
+        }
+
+        if let resourceValues = try? url.resourceValues(forKeys: [.contentTypeKey]),
+           let contentType = resourceValues.contentType {
+            return contentTypes.contains(where: { contentType.conforms(to: $0) })
+        }
+
+        return false
+    }
+}
+
+
diff --git a/VoiceInk/Views/AudioTranscribeView.swift b/VoiceInk/Views/AudioTranscribeView.swift
index d73c8b8..3cebfa0 100644
--- a/VoiceInk/Views/AudioTranscribeView.swift
+++ b/VoiceInk/Views/AudioTranscribeView.swift
@@ -353,29 +353,8 @@ struct AudioTranscribeView: View {
             }
         }
         
-        // Validate file type by extension
-        let supportedExtensions = ["wav", "mp3", "m4a", "aiff", "mp4", "mov", "aac", "flac", "caf"]
-        let fileExtension = url.pathExtension.lowercased()
-        
-        // Check file extension first
-        if !fileExtension.isEmpty && supportedExtensions.contains(fileExtension) {
-            print("File type validated by extension: \(fileExtension)")
-        } else {
-            print("Unsupported file extension: \(fileExtension)")
-            // Try to validate by UTType as well
-            if let resourceValues = try? url.resourceValues(forKeys: [.contentTypeKey]),
-               let contentType = resourceValues.contentType {
-                if contentType.conforms(to: .audio) || contentType.conforms(to: .movie) {
-                    print("File type validated by UTType: \(contentType.identifier)")
-                } else {
-                    print("File does not conform to audio or movie type: \(contentType.identifier)")
-                    return
-                }
-            } else {
-                print("Could not validate file type")
-                return
-            }
-        }
+        // Validate file type
+        guard SupportedMedia.isSupported(url: url) else { return }
         
         print("File validated successfully: \(url.lastPathComponent)")
         selectedAudioURL = url
diff --git a/VoiceInk/Views/ContentView.swift b/VoiceInk/Views/ContentView.swift
index a610b49..4736d4b 100644
--- a/VoiceInk/Views/ContentView.swift
+++ b/VoiceInk/Views/ContentView.swift
@@ -164,8 +164,7 @@ struct ContentView: View {
     @State private var hasLoadedData = false
     let appVersion = Bundle.main.infoDictionary?["CFBundleShortVersionString"] as? String ?? "1.0.0"
     @StateObject private var licenseViewModel = LicenseViewModel()
-    // Capture the hosting window to update tab/window title dynamically
-    @State private var hostingWindow: NSWindow?
+    
     
     private var isSetupComplete: Bool {
         hasLoadedData &&
@@ -191,13 +190,6 @@ struct ContentView: View {
         }
         .navigationSplitViewStyle(.balanced)
         .frame(minWidth: 940, minHeight: 730)
-        // Resolve hosting NSWindow and set initial title
-        .background(
-            WindowTitleAccessor { window in
-                self.hostingWindow = window
-                self.hostingWindow?.title = selectedView.rawValue
-            }
-        )
         .onAppear {
             hasLoadedData = true
         }
@@ -237,10 +229,6 @@ struct ContentView: View {
                 print("ContentView: No destination in notification")
             }
         }
-        // Update the tab/window title whenever the active view changes
-        .onChange(of: selectedView) { newValue in
-            hostingWindow?.title = newValue.rawValue
-        }
     }
     
     @ViewBuilder
@@ -278,20 +266,4 @@ struct ContentView: View {
     }
 }
 
-struct WindowTitleAccessor: NSViewRepresentable {
-    var onResolve: (NSWindow?) -> Void
-
-    func makeNSView(context: Context) -> NSView {
-        let view = NSView()
-        DispatchQueue.main.async { [weak view] in
-            onResolve(view?.window)
-        }
-        return view
-    }
-
-    func updateNSView(_ nsView: NSView, context: Context) {
-        DispatchQueue.main.async { [weak nsView] in
-            onResolve(nsView?.window)
-        }
-    }
-}
+ 

From 106fd653eae538c061c77f427b6a592009a4f3c3 Mon Sep 17 00:00:00 2001
From: Beingpax <prakashjoshipaxtek@gmail.com>
Date: Fri, 5 Sep 2025 18:37:16 +0545
Subject: [PATCH 07/21] feat: Integrate experimental VAD for Parakeet

This change introduces a standalone Voice Activity Detection (VAD) service and integrates it into the ParakeetTranscriptionService.

The VAD preprocesses the audio to remove silent segments, aiming to improve transcription accuracy.

This is considered experimental due to a discovered anomaly in the Swift/C bridge where timestamps were being multiplied by 100. A workaround has been implemented to correct this.
---
 .../ParakeetTranscriptionService.swift        |  29 +++-
 VoiceInk/Services/VoiceActivityDetector.swift | 152 ++++++++++++++++++
 2 files changed, 177 insertions(+), 4 deletions(-)
 create mode 100644 VoiceInk/Services/VoiceActivityDetector.swift

diff --git a/VoiceInk/Services/ParakeetTranscriptionService.swift b/VoiceInk/Services/ParakeetTranscriptionService.swift
index f3e4a09..cb39801 100644
--- a/VoiceInk/Services/ParakeetTranscriptionService.swift
+++ b/VoiceInk/Services/ParakeetTranscriptionService.swift
@@ -71,13 +71,34 @@ class ParakeetTranscriptionService: TranscriptionService {
         
         let audioSamples = try readAudioSamples(from: audioURL)
         
-        // Validate audio data before transcription
-        guard audioSamples.count >= 16000 else {
-            logger.notice("🦜 Audio too short for transcription: \(audioSamples.count) samples")
+        // Validate audio data before VAD
+        guard !audioSamples.isEmpty else {
+            logger.notice("🦜 Audio is empty, skipping transcription.")
+            throw ASRError.invalidAudioData
+        }
+
+        // Use VAD to get speech segments
+        let speechAudio: [Float]
+        if let modelPath = await VADModelManager.shared.getModelPath() {
+            if let vad = VoiceActivityDetector(modelPath: modelPath) {
+                speechAudio = vad.process(audioSamples: audioSamples)
+                logger.notice("🦜 VAD processed audio, resulting in \(speechAudio.count) samples.")
+            } else {
+                logger.warning("🦜 VAD could not be initialized. Transcribing original audio.")
+                speechAudio = audioSamples
+            }
+        } else {
+            logger.warning("🦜 VAD model path not found. Transcribing original audio.")
+            speechAudio = audioSamples
+        }
+        
+        // Validate audio data after VAD
+        guard speechAudio.count >= 16000 else {
+            logger.notice("🦜 Audio too short for transcription after VAD: \(speechAudio.count) samples")
             throw ASRError.invalidAudioData
         }
         
-        let result = try await asrManager.transcribe(audioSamples)
+        let result = try await asrManager.transcribe(speechAudio)
         
         // Reset decoder state and cleanup after transcription to avoid blocking the transcription start
         Task {
diff --git a/VoiceInk/Services/VoiceActivityDetector.swift b/VoiceInk/Services/VoiceActivityDetector.swift
new file mode 100644
index 0000000..658deaf
--- /dev/null
+++ b/VoiceInk/Services/VoiceActivityDetector.swift
@@ -0,0 +1,152 @@
+import Foundation
+import AVFoundation
+import os.log
+
+// MARK: - C API Bridge
+
+// Opaque pointers for the C contexts
+fileprivate typealias WhisperVADContext = OpaquePointer
+fileprivate typealias WhisperVADSegments = OpaquePointer
+
+// Define the C function signatures for Swift, scoped to this file
+
+@_silgen_name("whisper_vad_default_params")
+fileprivate func whisper_vad_default_params() -> whisper_vad_params
+
+@_silgen_name("whisper_vad_default_context_params")
+fileprivate func whisper_vad_default_context_params() -> whisper_vad_context_params
+
+@_silgen_name("whisper_vad_init_from_file_with_params")
+fileprivate func whisper_vad_init_from_file_with_params(_ path_model: UnsafePointer<CChar>, _ params: whisper_vad_context_params) -> WhisperVADContext?
+
+@_silgen_name("whisper_vad_detect_speech")
+fileprivate func whisper_vad_detect_speech(_ vctx: WhisperVADContext, _ samples: UnsafePointer<Float>, _ n_samples: Int32) -> Bool
+
+@_silgen_name("whisper_vad_n_probs")
+fileprivate func whisper_vad_n_probs(_ vctx: WhisperVADContext) -> Int32
+
+@_silgen_name("whisper_vad_probs")
+fileprivate func whisper_vad_probs(_ vctx: WhisperVADContext) -> UnsafeMutablePointer<Float>
+
+@_silgen_name("whisper_vad_segments_from_probs")
+fileprivate func whisper_vad_segments_from_probs(_ vctx: WhisperVADContext, _ params: whisper_vad_params) -> WhisperVADSegments?
+
+@_silgen_name("whisper_vad_segments_n_segments")
+fileprivate func whisper_vad_segments_n_segments(_ segments: WhisperVADSegments) -> Int32
+
+@_silgen_name("whisper_vad_segments_get_segment_t0")
+fileprivate func whisper_vad_segments_get_segment_t0(_ segments: WhisperVADSegments, _ i_segment: Int32) -> Float
+
+@_silgen_name("whisper_vad_segments_get_segment_t1")
+fileprivate func whisper_vad_segments_get_segment_t1(_ segments: WhisperVADSegments, _ i_segment: Int32) -> Float
+
+@_silgen_name("whisper_vad_free_segments")
+fileprivate func whisper_vad_free_segments(_ segments: WhisperVADSegments)
+
+@_silgen_name("whisper_vad_free")
+fileprivate func whisper_vad_free(_ ctx: WhisperVADContext)
+
+// Structs matching whisper.h, scoped to this file
+fileprivate struct whisper_vad_params {
+    var threshold: Float
+    var min_speech_duration_ms: Int32
+    var min_silence_duration_ms: Int32
+    var max_speech_duration_s: Float
+    var speech_pad_ms: Int32
+    var samples_overlap: Float
+}
+
+fileprivate struct whisper_vad_context_params {
+    var n_threads: Int32
+    var use_gpu: Bool
+    var gpu_device: Int32
+}
+
+
+// MARK: - VoiceActivityDetector Class
+
+class VoiceActivityDetector {
+    private var vadContext: WhisperVADContext
+    private let logger = Logger(subsystem: "com.voiceink.app", category: "VoiceActivityDetector")
+
+    init?(modelPath: String) {
+        var contextParams = whisper_vad_default_context_params()
+        contextParams.n_threads = max(1, min(8, Int32(ProcessInfo.processInfo.processorCount) - 2))
+        
+        guard let context = whisper_vad_init_from_file_with_params(modelPath, contextParams) else {
+            logger.error("Failed to initialize VAD context.")
+            return nil
+        }
+        self.vadContext = context
+        logger.notice("VAD context initialized successfully.")
+    }
+
+    deinit {
+        whisper_vad_free(vadContext)
+        logger.notice("VAD context freed.")
+    }
+
+    /// Processes audio samples to detect speech segments and returns the stitched audio containing only speech.
+    func process(audioSamples: [Float]) -> [Float] {
+        // 1. Detect speech and get probabilities internally in the context
+        let success = audioSamples.withUnsafeBufferPointer { buffer in
+            whisper_vad_detect_speech(vadContext, buffer.baseAddress!, Int32(audioSamples.count))
+        }
+
+        guard success else {
+            logger.error("Failed to detect speech probabilities.")
+            return []
+        }
+
+        // 2. Get segments from probabilities
+        var vadParams = whisper_vad_default_params()
+        vadParams.threshold = 0.5
+        vadParams.min_speech_duration_ms = 250
+        vadParams.min_silence_duration_ms = 100
+        vadParams.speech_pad_ms = 30
+
+        guard let segments = whisper_vad_segments_from_probs(vadContext, vadParams) else {
+            logger.error("Failed to get VAD segments from probabilities.")
+            return []
+        }
+        defer {
+            // Ensure segments are freed
+            whisper_vad_free_segments(segments)
+        }
+        
+        let nSegments = whisper_vad_segments_n_segments(segments)
+        logger.notice("Detected \(nSegments) speech segments.")
+
+        // 3. Stitch audio segments together
+        var stitchedAudio = [Float]()
+        let sampleRate = 16000 // Assuming 16kHz sample rate
+
+        for i in 0..<nSegments {
+            // Timestamps from C are mysteriously multiplied by 100, so we correct them here.
+            let startTimeSec = whisper_vad_segments_get_segment_t0(segments, i) / 100.0
+            let endTimeSec = whisper_vad_segments_get_segment_t1(segments, i) / 100.0
+
+            logger.debug("Segment \(i): start=\(startTimeSec, privacy: .public)s, end=\(endTimeSec, privacy: .public)s")
+
+            let startSample = Int(startTimeSec * Float(sampleRate))
+            var endSample = Int(endTimeSec * Float(sampleRate))
+
+            logger.debug("Segment \(i): startSample=\(startSample, privacy: .public), endSample=\(endSample, privacy: .public)")
+
+            // Cap endSample to the audio buffer size
+            if endSample > audioSamples.count {
+                logger.debug("Capping endSample from \(endSample, privacy: .public) to \(audioSamples.count, privacy: .public)")
+                endSample = audioSamples.count
+            }
+
+            if startSample < endSample {
+                stitchedAudio.append(contentsOf: audioSamples[startSample..<endSample])
+            } else {
+                logger.warning("Segment \(i): Invalid sample range, skipping.")
+            }
+        }
+
+        logger.notice("Stitched audio contains \(stitchedAudio.count) samples.")
+        return stitchedAudio
+    }
+}
\ No newline at end of file

From c0ed2dc78a4c6be8982a8126310082ee5ce91e3a Mon Sep 17 00:00:00 2001
From: Beingpax <prakashjoshipaxtek@gmail.com>
Date: Sat, 6 Sep 2025 07:13:06 +0545
Subject: [PATCH 08/21] Improved VAD for Parakeet model

---
 .../ParakeetTranscriptionService.swift        | 24 ++++++++--
 VoiceInk/Services/VoiceActivityDetector.swift | 45 ++++++-------------
 2 files changed, 34 insertions(+), 35 deletions(-)

diff --git a/VoiceInk/Services/ParakeetTranscriptionService.swift b/VoiceInk/Services/ParakeetTranscriptionService.swift
index cb39801..1d60491 100644
--- a/VoiceInk/Services/ParakeetTranscriptionService.swift
+++ b/VoiceInk/Services/ParakeetTranscriptionService.swift
@@ -78,11 +78,29 @@ class ParakeetTranscriptionService: TranscriptionService {
         }
 
         // Use VAD to get speech segments
-        let speechAudio: [Float]
+        var speechAudio: [Float] = []
         if let modelPath = await VADModelManager.shared.getModelPath() {
             if let vad = VoiceActivityDetector(modelPath: modelPath) {
-                speechAudio = vad.process(audioSamples: audioSamples)
-                logger.notice("🦜 VAD processed audio, resulting in \(speechAudio.count) samples.")
+                let speechSegments = vad.process(audioSamples: audioSamples)
+                logger.notice("🦜 VAD detected \(speechSegments.count) speech segments.")
+
+                let sampleRate = 16000 // Assuming 16kHz sample rate
+                for segment in speechSegments {
+                    let startSample = Int(segment.start * Double(sampleRate))
+                    var endSample = Int(segment.end * Double(sampleRate))
+
+                    // Cap endSample to the audio buffer size
+                    if endSample > audioSamples.count {
+                        endSample = audioSamples.count
+                    }
+
+                    if startSample < endSample {
+                        speechAudio.append(contentsOf: audioSamples[startSample..<endSample])
+                    } else {
+                        logger.warning("🦜 Invalid sample range for segment: start=\(startSample), end=\(endSample). Skipping.")
+                    }
+                }
+                logger.notice("🦜 Extracted \(speechAudio.count) samples from VAD segments.")
             } else {
                 logger.warning("🦜 VAD could not be initialized. Transcribing original audio.")
                 speechAudio = audioSamples
diff --git a/VoiceInk/Services/VoiceActivityDetector.swift b/VoiceInk/Services/VoiceActivityDetector.swift
index 658deaf..4937c2d 100644
--- a/VoiceInk/Services/VoiceActivityDetector.swift
+++ b/VoiceInk/Services/VoiceActivityDetector.swift
@@ -86,8 +86,8 @@ class VoiceActivityDetector {
         logger.notice("VAD context freed.")
     }
 
-    /// Processes audio samples to detect speech segments and returns the stitched audio containing only speech.
-    func process(audioSamples: [Float]) -> [Float] {
+    /// Processes audio samples to detect speech segments and returns an array of (start: TimeInterval, end: TimeInterval) tuples.
+    func process(audioSamples: [Float]) -> [(start: TimeInterval, end: TimeInterval)] {
         // 1. Detect speech and get probabilities internally in the context
         let success = audioSamples.withUnsafeBufferPointer { buffer in
             whisper_vad_detect_speech(vadContext, buffer.baseAddress!, Int32(audioSamples.count))
@@ -100,10 +100,12 @@ class VoiceActivityDetector {
 
         // 2. Get segments from probabilities
         var vadParams = whisper_vad_default_params()
-        vadParams.threshold = 0.5
-        vadParams.min_speech_duration_ms = 250
-        vadParams.min_silence_duration_ms = 100
-        vadParams.speech_pad_ms = 30
+        vadParams.threshold = 0.45
+        vadParams.min_speech_duration_ms = 150
+        vadParams.min_silence_duration_ms = 750
+        vadParams.max_speech_duration_s = Float.greatestFiniteMagnitude // Use the largest representable Float value for no max duration
+        vadParams.speech_pad_ms = 100
+        vadParams.samples_overlap = 0.1 // Add samples_overlap parameter
 
         guard let segments = whisper_vad_segments_from_probs(vadContext, vadParams) else {
             logger.error("Failed to get VAD segments from probabilities.")
@@ -117,36 +119,15 @@ class VoiceActivityDetector {
         let nSegments = whisper_vad_segments_n_segments(segments)
         logger.notice("Detected \(nSegments) speech segments.")
 
-        // 3. Stitch audio segments together
-        var stitchedAudio = [Float]()
-        let sampleRate = 16000 // Assuming 16kHz sample rate
-
+        var speechSegments: [(start: TimeInterval, end: TimeInterval)] = []
         for i in 0..<nSegments {
             // Timestamps from C are mysteriously multiplied by 100, so we correct them here.
             let startTimeSec = whisper_vad_segments_get_segment_t0(segments, i) / 100.0
             let endTimeSec = whisper_vad_segments_get_segment_t1(segments, i) / 100.0
-
-            logger.debug("Segment \(i): start=\(startTimeSec, privacy: .public)s, end=\(endTimeSec, privacy: .public)s")
-
-            let startSample = Int(startTimeSec * Float(sampleRate))
-            var endSample = Int(endTimeSec * Float(sampleRate))
-
-            logger.debug("Segment \(i): startSample=\(startSample, privacy: .public), endSample=\(endSample, privacy: .public)")
-
-            // Cap endSample to the audio buffer size
-            if endSample > audioSamples.count {
-                logger.debug("Capping endSample from \(endSample, privacy: .public) to \(audioSamples.count, privacy: .public)")
-                endSample = audioSamples.count
-            }
-
-            if startSample < endSample {
-                stitchedAudio.append(contentsOf: audioSamples[startSample..<endSample])
-            } else {
-                logger.warning("Segment \(i): Invalid sample range, skipping.")
-            }
+            speechSegments.append((start: TimeInterval(startTimeSec), end: TimeInterval(endTimeSec)))
         }
 
-        logger.notice("Stitched audio contains \(stitchedAudio.count) samples.")
-        return stitchedAudio
+        logger.notice("Returning \(speechSegments.count) speech segments.")
+        return speechSegments
     }
-}
\ No newline at end of file
+}

From 5eacee467a3f87fce02bfac5dca2a0a9dd40158b Mon Sep 17 00:00:00 2001
From: Beingpax <prakashjoshipaxtek@gmail.com>
Date: Sat, 6 Sep 2025 08:57:32 +0545
Subject: [PATCH 09/21] Feat: Respect VAD user setting in
 ParakeetTranscriptionService

---
 .../ParakeetTranscriptionService.swift        | 47 +++++++++++--------
 1 file changed, 27 insertions(+), 20 deletions(-)

diff --git a/VoiceInk/Services/ParakeetTranscriptionService.swift b/VoiceInk/Services/ParakeetTranscriptionService.swift
index 1d60491..da2d092 100644
--- a/VoiceInk/Services/ParakeetTranscriptionService.swift
+++ b/VoiceInk/Services/ParakeetTranscriptionService.swift
@@ -79,34 +79,41 @@ class ParakeetTranscriptionService: TranscriptionService {
 
         // Use VAD to get speech segments
         var speechAudio: [Float] = []
-        if let modelPath = await VADModelManager.shared.getModelPath() {
-            if let vad = VoiceActivityDetector(modelPath: modelPath) {
-                let speechSegments = vad.process(audioSamples: audioSamples)
-                logger.notice("🦜 VAD detected \(speechSegments.count) speech segments.")
+        let isVADEnabled = UserDefaults.standard.object(forKey: "IsVADEnabled") as? Bool ?? true
 
-                let sampleRate = 16000 // Assuming 16kHz sample rate
-                for segment in speechSegments {
-                    let startSample = Int(segment.start * Double(sampleRate))
-                    var endSample = Int(segment.end * Double(sampleRate))
+        if isVADEnabled {
+            if let modelPath = await VADModelManager.shared.getModelPath() {
+                if let vad = VoiceActivityDetector(modelPath: modelPath) {
+                    let speechSegments = vad.process(audioSamples: audioSamples)
+                    logger.notice("🦜 VAD detected \(speechSegments.count) speech segments.")
 
-                    // Cap endSample to the audio buffer size
-                    if endSample > audioSamples.count {
-                        endSample = audioSamples.count
-                    }
-
-                    if startSample < endSample {
-                        speechAudio.append(contentsOf: audioSamples[startSample..<endSample])
-                    } else {
-                        logger.warning("🦜 Invalid sample range for segment: start=\(startSample), end=\(endSample). Skipping.")
+                    let sampleRate = 16000 // Assuming 16kHz sample rate
+                    for segment in speechSegments {
+                        let startSample = Int(segment.start * Double(sampleRate))
+                        var endSample = Int(segment.end * Double(sampleRate))
+
+                        // Cap endSample to the audio buffer size
+                        if endSample > audioSamples.count {
+                            endSample = audioSamples.count
+                        }
+
+                        if startSample < endSample {
+                            speechAudio.append(contentsOf: audioSamples[startSample..<endSample])
+                        } else {
+                            logger.warning("🦜 Invalid sample range for segment: start=\(startSample), end=\(endSample). Skipping.")
+                        }
                     }
+                    logger.notice("🦜 Extracted \(speechAudio.count) samples from VAD segments.")
+                } else {
+                    logger.warning("🦜 VAD could not be initialized. Transcribing original audio.")
+                    speechAudio = audioSamples
                 }
-                logger.notice("🦜 Extracted \(speechAudio.count) samples from VAD segments.")
             } else {
-                logger.warning("🦜 VAD could not be initialized. Transcribing original audio.")
+                logger.warning("🦜 VAD model path not found. Transcribing original audio.")
                 speechAudio = audioSamples
             }
         } else {
-            logger.warning("🦜 VAD model path not found. Transcribing original audio.")
+            logger.notice("🦜 VAD is disabled by user setting. Transcribing original audio.")
             speechAudio = audioSamples
         }
         

From 33c07f2a2d57c55e76f892eac92ba1cce0246e6d Mon Sep 17 00:00:00 2001
From: Beingpax <prakashjoshipaxtek@gmail.com>
Date: Sat, 6 Sep 2025 08:58:37 +0545
Subject: [PATCH 10/21] Fix: Update VAD info tip in ModelSettingsView

---
 VoiceInk/Views/ModelSettingsView.swift | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/VoiceInk/Views/ModelSettingsView.swift b/VoiceInk/Views/ModelSettingsView.swift
index 0af2337..5a1a06d 100644
--- a/VoiceInk/Views/ModelSettingsView.swift
+++ b/VoiceInk/Views/ModelSettingsView.swift
@@ -99,7 +99,7 @@ struct ModelSettingsView: View {
                 
                 InfoTip(
                     title: "Voice Activity Detection",
-                    message: "Detects speech segments and filters out silence to reduce hallucinations in local Whisper models."
+                    message: "Detect speech segments and filter out silence to improve accuracy of local models."
                 )
             }
 

From 95e57bf3f7b801c6dcf0f9a7d3bed1b6bd7b5172 Mon Sep 17 00:00:00 2001
From: Beingpax <prakashjoshipaxtek@gmail.com>
Date: Sat, 6 Sep 2025 09:36:55 +0545
Subject: [PATCH 11/21] Added support for retry functionality

---
 VoiceInk/HotkeyManager.swift                | 11 +++++++++--
 VoiceInk/Services/ImportExportService.swift |  5 +++++
 VoiceInk/Views/Settings/SettingsView.swift  | 19 +++++++++++++++++++
 3 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/VoiceInk/HotkeyManager.swift b/VoiceInk/HotkeyManager.swift
index 407986e..ae9f3a5 100644
--- a/VoiceInk/HotkeyManager.swift
+++ b/VoiceInk/HotkeyManager.swift
@@ -7,6 +7,7 @@ extension KeyboardShortcuts.Name {
     static let toggleMiniRecorder = Self("toggleMiniRecorder")
     static let toggleMiniRecorder2 = Self("toggleMiniRecorder2")
     static let pasteLastTranscription = Self("pasteLastTranscription")
+    static let retryLastTranscription = Self("retryLastTranscription")
 }
 
 @MainActor
@@ -140,18 +141,24 @@ class HotkeyManager: ObservableObject {
         
         self.whisperState = whisperState
         self.miniRecorderShortcutManager = MiniRecorderShortcutManager(whisperState: whisperState)
-
         if KeyboardShortcuts.getShortcut(for: .pasteLastTranscription) == nil {
             let defaultPasteShortcut = KeyboardShortcuts.Shortcut(.v, modifiers: [.command, .option])
             KeyboardShortcuts.setShortcut(defaultPasteShortcut, for: .pasteLastTranscription)
         }
-        
+
         KeyboardShortcuts.onKeyUp(for: .pasteLastTranscription) { [weak self] in
             guard let self = self else { return }
             Task { @MainActor in
                 LastTranscriptionService.pasteLastTranscription(from: self.whisperState.modelContext)
             }
         }
+
+        KeyboardShortcuts.onKeyUp(for: .retryLastTranscription) { [weak self] in
+            guard let self = self else { return }
+            Task { @MainActor in
+                LastTranscriptionService.retryLastTranscription(from: self.whisperState.modelContext, whisperState: self.whisperState)
+            }
+        }
         
         Task { @MainActor in
             try? await Task.sleep(nanoseconds: 100_000_000)
diff --git a/VoiceInk/Services/ImportExportService.swift b/VoiceInk/Services/ImportExportService.swift
index 2b08313..1578ffc 100644
--- a/VoiceInk/Services/ImportExportService.swift
+++ b/VoiceInk/Services/ImportExportService.swift
@@ -7,6 +7,7 @@ import LaunchAtLogin
 struct GeneralSettings: Codable {
     let toggleMiniRecorderShortcut: KeyboardShortcuts.Shortcut?
     let toggleMiniRecorderShortcut2: KeyboardShortcuts.Shortcut?
+    let retryLastTranscriptionShortcut: KeyboardShortcuts.Shortcut?
     let selectedHotkey1RawValue: String?
     let selectedHotkey2RawValue: String?
     let launchAtLoginEnabled: Bool?
@@ -86,6 +87,7 @@ class ImportExportService {
         let generalSettingsToExport = GeneralSettings(
             toggleMiniRecorderShortcut: KeyboardShortcuts.getShortcut(for: .toggleMiniRecorder),
             toggleMiniRecorderShortcut2: KeyboardShortcuts.getShortcut(for: .toggleMiniRecorder2),
+            retryLastTranscriptionShortcut: KeyboardShortcuts.getShortcut(for: .retryLastTranscription),
             selectedHotkey1RawValue: hotkeyManager.selectedHotkey1.rawValue,
             selectedHotkey2RawValue: hotkeyManager.selectedHotkey2.rawValue,
             launchAtLoginEnabled: LaunchAtLogin.isEnabled,
@@ -218,6 +220,9 @@ class ImportExportService {
                         if let shortcut2 = general.toggleMiniRecorderShortcut2 {
                             KeyboardShortcuts.setShortcut(shortcut2, for: .toggleMiniRecorder2)
                         }
+                        if let retryShortcut = general.retryLastTranscriptionShortcut {
+                            KeyboardShortcuts.setShortcut(retryShortcut, for: .retryLastTranscription)
+                        }
                         if let hotkeyRaw = general.selectedHotkey1RawValue,
                            let hotkey = HotkeyManager.HotkeyOption(rawValue: hotkeyRaw) {
                             hotkeyManager.selectedHotkey1 = hotkey
diff --git a/VoiceInk/Views/Settings/SettingsView.swift b/VoiceInk/Views/Settings/SettingsView.swift
index e54ae0a..93b7f0e 100644
--- a/VoiceInk/Views/Settings/SettingsView.swift
+++ b/VoiceInk/Views/Settings/SettingsView.swift
@@ -130,6 +130,25 @@ struct SettingsView: View {
 
                         Divider()
 
+                        // Retry Last Transcription
+                        HStack(spacing: 12) {
+                            Text("Retry Last Transcription")
+                                .font(.system(size: 13, weight: .medium))
+                                .foregroundColor(.secondary)
+
+                            KeyboardShortcuts.Recorder(for: .retryLastTranscription)
+                                .controlSize(.small)
+
+                            InfoTip(
+                                title: "Retry Last Transcription",
+                                message: "Re-transcribe the last recorded audio using the current model and copy the result."
+                            )
+
+                            Spacer()
+                        }
+
+                        Divider()
+
                         // Middle-Click Toggle
                         VStack(alignment: .leading, spacing: 12) {
                             HStack(spacing: 8) {

From 1ab2386af0f3110bafeae954b48a5931e1ac7166 Mon Sep 17 00:00:00 2001
From: Beingpax <prakashjoshipaxtek@gmail.com>
Date: Sat, 6 Sep 2025 09:43:03 +0545
Subject: [PATCH 12/21] Retry hotkey and settings reorder

---
 VoiceInk/Views/Settings/SettingsView.swift | 76 ++++++++++------------
 1 file changed, 36 insertions(+), 40 deletions(-)

diff --git a/VoiceInk/Views/Settings/SettingsView.swift b/VoiceInk/Views/Settings/SettingsView.swift
index 93b7f0e..cb9d354 100644
--- a/VoiceInk/Views/Settings/SettingsView.swift
+++ b/VoiceInk/Views/Settings/SettingsView.swift
@@ -74,6 +74,42 @@ struct SettingsView: View {
                     subtitle: "Additional shortcuts for VoiceInk"
                 ) {
                     VStack(alignment: .leading, spacing: 18) {
+                        // Paste Last Transcription
+                        HStack(spacing: 12) {
+                            Text("Paste Last Transcription")
+                                .font(.system(size: 13, weight: .medium))
+                                .foregroundColor(.secondary)
+                            
+                            KeyboardShortcuts.Recorder(for: .pasteLastTranscription)
+                                .controlSize(.small)
+                            
+                            InfoTip(
+                                title: "Paste Last Transcription",
+                                message: "Shortcut for pasting the most recent transcription at current cursor position."
+                            )
+                            
+                            Spacer()
+                        }
+
+                        // Retry Last Transcription
+                        HStack(spacing: 12) {
+                            Text("Retry Last Transcription")
+                                .font(.system(size: 13, weight: .medium))
+                                .foregroundColor(.secondary)
+
+                            KeyboardShortcuts.Recorder(for: .retryLastTranscription)
+                                .controlSize(.small)
+
+                            InfoTip(
+                                title: "Retry Last Transcription",
+                                message: "Re-transcribe the last recorded audio using the current model and copy the result."
+                            )
+
+                            Spacer()
+                        }
+
+                        
+                        
                         // Custom Cancel Shortcut
                         VStack(alignment: .leading, spacing: 12) {
                             HStack(spacing: 8) {
@@ -109,46 +145,6 @@ struct SettingsView: View {
                             }
                         }
 
-                        Divider()
-
-                                                // Paste Last Transcription
-                        HStack(spacing: 12) {
-                            Text("Paste Last Transcription")
-                                .font(.system(size: 13, weight: .medium))
-                                .foregroundColor(.secondary)
-                            
-                            KeyboardShortcuts.Recorder(for: .pasteLastTranscription)
-                                .controlSize(.small)
-                            
-                            InfoTip(
-                                title: "Paste Last Transcription",
-                                message: "Shortcut for pasting the most recent transcription at current cursor position."
-                            )
-                            
-                            Spacer()
-                        }
-
-                        Divider()
-
-                        // Retry Last Transcription
-                        HStack(spacing: 12) {
-                            Text("Retry Last Transcription")
-                                .font(.system(size: 13, weight: .medium))
-                                .foregroundColor(.secondary)
-
-                            KeyboardShortcuts.Recorder(for: .retryLastTranscription)
-                                .controlSize(.small)
-
-                            InfoTip(
-                                title: "Retry Last Transcription",
-                                message: "Re-transcribe the last recorded audio using the current model and copy the result."
-                            )
-
-                            Spacer()
-                        }
-
-                        Divider()
-
                         // Middle-Click Toggle
                         VStack(alignment: .leading, spacing: 12) {
                             HStack(spacing: 8) {

From 8b54be301935728366fbc46aee248a85c50de366 Mon Sep 17 00:00:00 2001
From: Beingpax <prakashjoshipaxtek@gmail.com>
Date: Sat, 6 Sep 2025 14:52:14 +0545
Subject: [PATCH 13/21] Minor correction on settings name

---
 VoiceInk/Views/Settings/ExperimentalFeaturesSection.swift | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/VoiceInk/Views/Settings/ExperimentalFeaturesSection.swift b/VoiceInk/Views/Settings/ExperimentalFeaturesSection.swift
index 900af55..b0479b9 100644
--- a/VoiceInk/Views/Settings/ExperimentalFeaturesSection.swift
+++ b/VoiceInk/Views/Settings/ExperimentalFeaturesSection.swift
@@ -37,7 +37,7 @@ struct ExperimentalFeaturesSection: View {
 
             if isExperimentalFeaturesEnabled {
                 Toggle(isOn: $playbackController.isPauseMediaEnabled) {
-                    Text("Pause Media on Playback")
+                    Text("Pause Media during recording")
                 }
                 .toggleStyle(.switch)
                 .help("Automatically pause active media playback during recordings and resume afterward.")

From 53d1507a5317251881e3455347a2f62de2655de2 Mon Sep 17 00:00:00 2001
From: Beingpax <prakashjoshipaxtek@gmail.com>
Date: Sat, 6 Sep 2025 16:56:28 +0545
Subject: [PATCH 14/21] improve hallucination filter and integrate with
 parakeet transcription service

---
 .../ParakeetTranscriptionService.swift        | 10 +++-
 .../Whisper/WhisperHallucinationFilter.swift  | 55 +++++++++----------
 2 files changed, 35 insertions(+), 30 deletions(-)

diff --git a/VoiceInk/Services/ParakeetTranscriptionService.swift b/VoiceInk/Services/ParakeetTranscriptionService.swift
index da2d092..71b6de7 100644
--- a/VoiceInk/Services/ParakeetTranscriptionService.swift
+++ b/VoiceInk/Services/ParakeetTranscriptionService.swift
@@ -137,10 +137,16 @@ class ParakeetTranscriptionService: TranscriptionService {
             logger.notice("🦜 Warning: Empty transcription result for \(audioSamples.count) samples - possible vocabulary issue")
         }
         
+        var text = result.text
+        
         if UserDefaults.standard.object(forKey: "IsTextFormattingEnabled") as? Bool ?? true {
-            return WhisperTextFormatter.format(result.text)
+            text = WhisperTextFormatter.format(text)
         }
-        return result.text
+        
+        // Apply hallucination and filler word filtering
+        text = WhisperHallucinationFilter.filter(text)
+        
+        return text
     }
 
     private func readAudioSamples(from url: URL) throws -> [Float] {
diff --git a/VoiceInk/Whisper/WhisperHallucinationFilter.swift b/VoiceInk/Whisper/WhisperHallucinationFilter.swift
index f95d59d..bea80b3 100644
--- a/VoiceInk/Whisper/WhisperHallucinationFilter.swift
+++ b/VoiceInk/Whisper/WhisperHallucinationFilter.swift
@@ -4,49 +4,48 @@ import os
 struct WhisperHallucinationFilter {
     private static let logger = Logger(subsystem: "com.prakashjoshipax.voiceink", category: "WhisperHallucinationFilter")
     
-    // Pattern-based approach for detecting hallucinations - focusing on format indicators
     private static let hallucinationPatterns = [
-        // Text in various types of brackets - the most reliable hallucination indicators
-        #"\[.*?\]"#,                  // [Text in square brackets]
-        #"\(.*?\)"#,                  // (Text in parentheses)
-        #"\{.*?\}"#,                  // {Text in curly braces}
-        #"<.*?>"#,                    // <Text in angle brackets>
-        
-        // Text with special formatting
-        #"\*.*?\*"#,                  // *Text with asterisks*
-        #"_.*?_"#,                    // _Text with underscores_
-        
-        // Time indicators often added by Whisper
-        #"(?i)\d{1,2}:\d{2}(:\d{2})?\s*-\s*\d{1,2}:\d{2}(:\d{2})?"#  // 00:00 - 00:00 format
+        #"\[.*?\]"#,     // Square brackets
+        #"\(.*?\)"#,     // Parentheses
+        #"\{.*?\}"#      // Curly braces
+    ]
+
+    private static let fillerWords = [
+        "uh", "um", "uhm", "umm", "uhh", "uhhh", "er", "ah", "eh",
+        "hmm", "hm", "h", "m", "mmm", "mm", "mh", "ha", "ehh"
     ]
-    
-    /// Removes hallucinations from transcription text using pattern matching
-    /// - Parameter text: Original transcription text from Whisper
-    /// - Returns: Filtered text with hallucinations removed
     static func filter(_ text: String) -> String {
-        logger.notice("🧹 Applying pattern-based hallucination filter to transcription")
-        
+        logger.notice("🧹 Filtering hallucinations and filler words")
         var filteredText = text
-        
-        // Remove pattern-based hallucinations
+
+        // Remove bracketed hallucinations
         for pattern in hallucinationPatterns {
             if let regex = try? NSRegularExpression(pattern: pattern) {
                 let range = NSRange(filteredText.startIndex..., in: filteredText)
                 filteredText = regex.stringByReplacingMatches(in: filteredText, options: [], range: range, withTemplate: "")
             }
         }
-        
-        // Clean up extra whitespace and newlines that might be left after removing hallucinations
+
+        // Remove filler words
+        for fillerWord in fillerWords {
+            let pattern = "\\b\(NSRegularExpression.escapedPattern(for: fillerWord))\\b[,.]?"
+            if let regex = try? NSRegularExpression(pattern: pattern, options: .caseInsensitive) {
+                let range = NSRange(filteredText.startIndex..., in: filteredText)
+                filteredText = regex.stringByReplacingMatches(in: filteredText, options: [], range: range, withTemplate: "")
+            }
+        }
+
+        // Clean whitespace
         filteredText = filteredText.replacingOccurrences(of: #"\s{2,}"#, with: " ", options: .regularExpression)
         filteredText = filteredText.trimmingCharacters(in: .whitespacesAndNewlines)
-        
-        // Add logging to track effectiveness
+
+        // Log results
         if filteredText != text {
-            logger.notice("✅ Removed hallucinations using pattern matching")
+            logger.notice("✅ Removed hallucinations and filler words")
         } else {
-            logger.notice("✅ No hallucinations detected with pattern matching")
+            logger.notice("✅ No hallucinations or filler words found")
         }
-        
+
         return filteredText
     }
 } 
\ No newline at end of file

From 574e381b0a768794411ef434ff6e9b729bbcf51d Mon Sep 17 00:00:00 2001
From: Anton Novoselov <nantog@gmail.com>
Date: Thu, 11 Sep 2025 09:42:10 +0100
Subject: [PATCH 15/21] Update Groq Kimi K2 model name

---
 VoiceInk/Services/AIService.swift | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/VoiceInk/Services/AIService.swift b/VoiceInk/Services/AIService.swift
index b6b6799..da94353 100644
--- a/VoiceInk/Services/AIService.swift
+++ b/VoiceInk/Services/AIService.swift
@@ -82,7 +82,7 @@ enum AIProvider: String, CaseIterable {
         case .groq:
             return [
                 "llama-3.3-70b-versatile",
-                "moonshotai/kimi-k2-instruct",
+                "moonshotai/kimi-k2-instruct-0905",
                 "qwen/qwen3-32b",
                 "meta-llama/llama-4-maverick-17b-128e-instruct",
                 "openai/gpt-oss-120b"

From 2a4f48ac0d69dea8ef3fa42d9d75f1933c62be67 Mon Sep 17 00:00:00 2001
From: Beingpax <prakashjoshipaxtek@gmail.com>
Date: Fri, 12 Sep 2025 10:14:10 +0545
Subject: [PATCH 16/21] Optimize recorder startup by backgrounding media
 controls

---
 VoiceInk/Recorder.swift                          | 16 +++++++---------
 VoiceInk/Services/DictionaryContextService.swift |  2 +-
 2 files changed, 8 insertions(+), 10 deletions(-)

diff --git a/VoiceInk/Recorder.swift b/VoiceInk/Recorder.swift
index 6cc1d01..b683f7b 100644
--- a/VoiceInk/Recorder.swift
+++ b/VoiceInk/Recorder.swift
@@ -76,15 +76,7 @@ class Recorder: NSObject, ObservableObject, AVAudioRecorderDelegate {
         UserDefaults.standard.set(String(currentDeviceID), forKey: "lastUsedMicrophoneDeviceID")
         
         hasDetectedAudioInCurrentSession = false
-        
-        // Coordinate media control and system audio sequentially for better reliability
-        await playbackController.pauseMedia()
-        
-        // Small delay to allow media command to process before muting system audio
-        try? await Task.sleep(nanoseconds: 100_000_000) // 100ms
-        
-        _ = await mediaController.muteSystemAudio()
-        
+
         let deviceID = deviceManager.getCurrentDevice()
         if deviceID != 0 {
             do {
@@ -114,6 +106,12 @@ class Recorder: NSObject, ObservableObject, AVAudioRecorderDelegate {
                 throw RecorderError.couldNotStartRecording
             }
             
+            Task { [weak self] in
+                guard let self = self else { return }
+                await self.playbackController.pauseMedia()
+                _ = await self.mediaController.muteSystemAudio()
+            }
+            
             audioLevelCheckTask?.cancel()
             audioMeterUpdateTask?.cancel()
             
diff --git a/VoiceInk/Services/DictionaryContextService.swift b/VoiceInk/Services/DictionaryContextService.swift
index 6e51b9a..9086112 100644
--- a/VoiceInk/Services/DictionaryContextService.swift
+++ b/VoiceInk/Services/DictionaryContextService.swift
@@ -6,7 +6,7 @@ class DictionaryContextService {
     
     private init() {}
     
-    private let predefinedWords = "VoiceInk, chatGPT, GPT-4o, GPT-5-mini, Kimi-K2, GLM V4.5, Claude, Claude 4 sonnet, Claude opus, ultrathink, Vibe-coding, groq, cerebras, gpt-oss-120B, Wispr flow, deepseek, gemini-2.5, Veo 3, elevenlabs, Kyutai"
+    private let predefinedWords = "VoiceInk, chatGPT, GPT-4o, GPT-5-mini, Kimi-K2, GLM V4.5, Claude, Claude 4 sonnet, Claude opus, ultrathink, Vibe-coding, groq, cerebras, gpt-oss-120B, deepseek, gemini-2.5, Veo 3, elevenlabs, Kyutai"
     
     func getDictionaryContext() -> String {
         var allWords: [String] = []

From 8334f75360c567d551262645cc424e43f6e16571 Mon Sep 17 00:00:00 2001
From: Beingpax <prakashjoshipaxtek@gmail.com>
Date: Fri, 12 Sep 2025 10:25:37 +0545
Subject: [PATCH 17/21] Reduce keyboard shortcut timing from 1.7s to 0.8s

---
 VoiceInk/HotkeyManager.swift | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/VoiceInk/HotkeyManager.swift b/VoiceInk/HotkeyManager.swift
index ae9f3a5..1198db8 100644
--- a/VoiceInk/HotkeyManager.swift
+++ b/VoiceInk/HotkeyManager.swift
@@ -58,7 +58,7 @@ class HotkeyManager: ObservableObject {
     // Key state tracking
     private var currentKeyState = false
     private var keyPressStartTime: Date?
-    private let briefPressThreshold = 1.7
+    private let briefPressThreshold = 0.8
     private var isHandsFreeMode = false
     
     // Debounce for Fn key

From f1fb2168c2f5521be90212cd7d3e3c263308ad83 Mon Sep 17 00:00:00 2001
From: Beingpax <prakashjoshipaxtek@gmail.com>
Date: Fri, 12 Sep 2025 11:21:00 +0545
Subject: [PATCH 18/21] Added support for comma-separated words for word
 replacement.

---
 .../Services/WordReplacementService.swift     | 11 ++---
 .../Dictionary/EditReplacementSheet.swift     | 15 ++++---
 .../Dictionary/WordReplacementView.swift      | 40 ++++++++++++++-----
 3 files changed, 46 insertions(+), 20 deletions(-)

diff --git a/VoiceInk/Services/WordReplacementService.swift b/VoiceInk/Services/WordReplacementService.swift
index 1b84858..79aa210 100644
--- a/VoiceInk/Services/WordReplacementService.swift
+++ b/VoiceInk/Services/WordReplacementService.swift
@@ -15,12 +15,10 @@ class WordReplacementService {
         
         // Apply replacements (case-insensitive)
         for (original, replacement) in replacements {
-            let isPhrase = original.contains(" ") || original.trimmingCharacters(in: .whitespacesAndNewlines) != original
+            let usesBoundaries = usesWordBoundaries(for: original)
 
-            if isPhrase || !usesWordBoundaries(for: original) {
-                modifiedText = modifiedText.replacingOccurrences(of: original, with: replacement, options: .caseInsensitive)
-            } else {
-                // Use word boundaries for spaced languages
+            if usesBoundaries {
+                // Word-boundary regex for full original string
                 let pattern = "\\b\(NSRegularExpression.escapedPattern(for: original))\\b"
                 if let regex = try? NSRegularExpression(pattern: pattern, options: .caseInsensitive) {
                     let range = NSRange(modifiedText.startIndex..., in: modifiedText)
@@ -31,6 +29,9 @@ class WordReplacementService {
                         withTemplate: replacement
                     )
                 }
+            } else {
+                // Fallback substring replace for non-spaced scripts
+                modifiedText = modifiedText.replacingOccurrences(of: original, with: replacement, options: .caseInsensitive)
             }
         }
         
diff --git a/VoiceInk/Views/Dictionary/EditReplacementSheet.swift b/VoiceInk/Views/Dictionary/EditReplacementSheet.swift
index 988ffce..50113b2 100644
--- a/VoiceInk/Views/Dictionary/EditReplacementSheet.swift
+++ b/VoiceInk/Views/Dictionary/EditReplacementSheet.swift
@@ -1,8 +1,5 @@
 import SwiftUI
-
-/// A reusable sheet for editing an existing word replacement entry.
-/// Mirrors the UI of `AddReplacementSheet` for consistency while pre-populating
-/// the fields with the existing values.
+// Edit existing word replacement entry
 struct EditReplacementSheet: View {
     @ObservedObject var manager: WordReplacementManager
     let originalKey: String
@@ -84,8 +81,9 @@ struct EditReplacementSheet: View {
                         .font(.caption)
                         .foregroundColor(.secondary)
                 }
-                TextField("Enter word or phrase to replace", text: $originalWord)
+                TextField("Enter word or phrase to replace (use commas for multiple)", text: $originalWord)
                     .textFieldStyle(.roundedBorder)
+                
             }
             .padding(.horizontal)
 
@@ -117,7 +115,12 @@ struct EditReplacementSheet: View {
     private func saveChanges() {
         let newOriginal = originalWord.trimmingCharacters(in: .whitespacesAndNewlines)
         let newReplacement = replacementWord.trimmingCharacters(in: .whitespacesAndNewlines)
-        guard !newOriginal.isEmpty, !newReplacement.isEmpty else { return }
+        // Ensure at least one non-empty token
+        let tokens = newOriginal
+            .split(separator: ",")
+            .map { $0.trimmingCharacters(in: .whitespacesAndNewlines) }
+            .filter { !$0.isEmpty }
+        guard !tokens.isEmpty, !newReplacement.isEmpty else { return }
 
         manager.updateReplacement(oldOriginal: originalKey, newOriginal: newOriginal, newReplacement: newReplacement)
         dismiss()
diff --git a/VoiceInk/Views/Dictionary/WordReplacementView.swift b/VoiceInk/Views/Dictionary/WordReplacementView.swift
index ee8b4d0..a68ea4f 100644
--- a/VoiceInk/Views/Dictionary/WordReplacementView.swift
+++ b/VoiceInk/Views/Dictionary/WordReplacementView.swift
@@ -23,7 +23,15 @@ class WordReplacementManager: ObservableObject {
     }
     
     func addReplacement(original: String, replacement: String) {
-        replacements[original] = replacement
+        // Support comma-separated originals mapping to the same replacement
+        let tokens = original
+            .split(separator: ",")
+            .map { $0.trimmingCharacters(in: .whitespacesAndNewlines) }
+            .filter { !$0.isEmpty }
+        guard !tokens.isEmpty else { return }
+        for token in tokens {
+            replacements[token] = replacement
+        }
     }
     
     func removeReplacement(original: String) {
@@ -31,12 +39,18 @@ class WordReplacementManager: ObservableObject {
     }
     
     func updateReplacement(oldOriginal: String, newOriginal: String, newReplacement: String) {
-        // Remove the old key if the original text has changed
-        if oldOriginal != newOriginal {
-            replacements.removeValue(forKey: oldOriginal)
+        // Always remove the old key being edited
+        replacements.removeValue(forKey: oldOriginal)
+        
+        // Add one or more new keys (comma-separated) pointing to the same replacement
+        let tokens = newOriginal
+            .split(separator: ",")
+            .map { $0.trimmingCharacters(in: .whitespacesAndNewlines) }
+            .filter { !$0.isEmpty }
+        guard !tokens.isEmpty else { return }
+        for token in tokens {
+            replacements[token] = newReplacement
         }
-        // Update (or insert) the new key/value pair
-        replacements[newOriginal] = newReplacement
     }
 }
 
@@ -142,7 +156,7 @@ struct EmptyStateView: View {
             Text("No Replacements")
                 .font(.headline)
             
-            Text("Add word replacements to automatically replace text during AI enhancement.")
+            Text("Add word replacements to automatically replace text.")
                 .font(.subheadline)
                 .foregroundColor(.secondary)
                 .multilineTextAlignment(.center)
@@ -221,9 +235,12 @@ struct AddReplacementSheet: View {
                                     .foregroundColor(.secondary)
                             }
                             
-                            TextField("Enter word or phrase to replace", text: $originalWord)
+                            TextField("Enter word or phrase to replace (use commas for multiple)", text: $originalWord)
                                 .textFieldStyle(.roundedBorder)
                                 .font(.body)
+                            Text("Separate multiple originals with commas, e.g. Voicing, Voice ink, Voiceing")
+                                .font(.caption)
+                                .foregroundColor(.secondary)
                         }
                         .padding(.horizontal)
                         
@@ -297,7 +314,12 @@ struct AddReplacementSheet: View {
         let original = originalWord
         let replacement = replacementWord
         
-        guard !original.isEmpty && !replacement.isEmpty else { return }
+        // Validate that at least one non-empty token exists
+        let tokens = original
+            .split(separator: ",")
+            .map { $0.trimmingCharacters(in: .whitespacesAndNewlines) }
+            .filter { !$0.isEmpty }
+        guard !tokens.isEmpty && !replacement.isEmpty else { return }
         
         manager.addReplacement(original: original, replacement: replacement)
         dismiss()

From 7161bc3f71850d22a44c07c331292875855ea2e1 Mon Sep 17 00:00:00 2001
From: Beingpax <prakashjoshipaxtek@gmail.com>
Date: Fri, 12 Sep 2025 11:58:26 +0545
Subject: [PATCH 19/21] Improved the comma seperated replacement values to be
 consolidated

---
 .../ParakeetTranscriptionService.swift        |  1 +
 .../Services/WordReplacementService.swift     | 40 +++++++-----
 .../Dictionary/EditReplacementSheet.swift     |  2 +-
 .../Dictionary/WordReplacementView.swift      | 61 ++++++++++++-------
 4 files changed, 65 insertions(+), 39 deletions(-)

diff --git a/VoiceInk/Services/ParakeetTranscriptionService.swift b/VoiceInk/Services/ParakeetTranscriptionService.swift
index 71b6de7..cd0d315 100644
--- a/VoiceInk/Services/ParakeetTranscriptionService.swift
+++ b/VoiceInk/Services/ParakeetTranscriptionService.swift
@@ -124,6 +124,7 @@ class ParakeetTranscriptionService: TranscriptionService {
         }
         
         let result = try await asrManager.transcribe(speechAudio)
+        print(result.text)
         
         // Reset decoder state and cleanup after transcription to avoid blocking the transcription start
         Task {
diff --git a/VoiceInk/Services/WordReplacementService.swift b/VoiceInk/Services/WordReplacementService.swift
index 79aa210..09a72d4 100644
--- a/VoiceInk/Services/WordReplacementService.swift
+++ b/VoiceInk/Services/WordReplacementService.swift
@@ -14,24 +14,32 @@ class WordReplacementService {
         var modifiedText = text
         
         // Apply replacements (case-insensitive)
-        for (original, replacement) in replacements {
-            let usesBoundaries = usesWordBoundaries(for: original)
+        for (originalGroup, replacement) in replacements {
+            // Split comma-separated originals at apply time only
+            let variants = originalGroup
+                .split(separator: ",")
+                .map { $0.trimmingCharacters(in: .whitespacesAndNewlines) }
+                .filter { !$0.isEmpty }
 
-            if usesBoundaries {
-                // Word-boundary regex for full original string
-                let pattern = "\\b\(NSRegularExpression.escapedPattern(for: original))\\b"
-                if let regex = try? NSRegularExpression(pattern: pattern, options: .caseInsensitive) {
-                    let range = NSRange(modifiedText.startIndex..., in: modifiedText)
-                    modifiedText = regex.stringByReplacingMatches(
-                        in: modifiedText,
-                        options: [],
-                        range: range,
-                        withTemplate: replacement
-                    )
+            for original in variants {
+                let usesBoundaries = usesWordBoundaries(for: original)
+
+                if usesBoundaries {
+                    // Word-boundary regex for full original string
+                    let pattern = "\\b\(NSRegularExpression.escapedPattern(for: original))\\b"
+                    if let regex = try? NSRegularExpression(pattern: pattern, options: .caseInsensitive) {
+                        let range = NSRange(modifiedText.startIndex..., in: modifiedText)
+                        modifiedText = regex.stringByReplacingMatches(
+                            in: modifiedText,
+                            options: [],
+                            range: range,
+                            withTemplate: replacement
+                        )
+                    }
+                } else {
+                    // Fallback substring replace for non-spaced scripts
+                    modifiedText = modifiedText.replacingOccurrences(of: original, with: replacement, options: .caseInsensitive)
                 }
-            } else {
-                // Fallback substring replace for non-spaced scripts
-                modifiedText = modifiedText.replacingOccurrences(of: original, with: replacement, options: .caseInsensitive)
             }
         }
         
diff --git a/VoiceInk/Views/Dictionary/EditReplacementSheet.swift b/VoiceInk/Views/Dictionary/EditReplacementSheet.swift
index 50113b2..0ac4de8 100644
--- a/VoiceInk/Views/Dictionary/EditReplacementSheet.swift
+++ b/VoiceInk/Views/Dictionary/EditReplacementSheet.swift
@@ -23,7 +23,7 @@ struct EditReplacementSheet: View {
             Divider()
             formContent
         }
-        .frame(width: 460, height: 480)
+        .frame(width: 460, height: 560)
     }
 
     // MARK: – Subviews
diff --git a/VoiceInk/Views/Dictionary/WordReplacementView.swift b/VoiceInk/Views/Dictionary/WordReplacementView.swift
index a68ea4f..a8a6aa7 100644
--- a/VoiceInk/Views/Dictionary/WordReplacementView.swift
+++ b/VoiceInk/Views/Dictionary/WordReplacementView.swift
@@ -23,15 +23,10 @@ class WordReplacementManager: ObservableObject {
     }
     
     func addReplacement(original: String, replacement: String) {
-        // Support comma-separated originals mapping to the same replacement
-        let tokens = original
-            .split(separator: ",")
-            .map { $0.trimmingCharacters(in: .whitespacesAndNewlines) }
-            .filter { !$0.isEmpty }
-        guard !tokens.isEmpty else { return }
-        for token in tokens {
-            replacements[token] = replacement
-        }
+        // Preserve comma-separated originals as a single entry
+        let trimmed = original.trimmingCharacters(in: .whitespacesAndNewlines)
+        guard !trimmed.isEmpty else { return }
+        replacements[trimmed] = replacement
     }
     
     func removeReplacement(original: String) {
@@ -39,18 +34,11 @@ class WordReplacementManager: ObservableObject {
     }
     
     func updateReplacement(oldOriginal: String, newOriginal: String, newReplacement: String) {
-        // Always remove the old key being edited
+        // Replace old key with the new comma-preserved key
         replacements.removeValue(forKey: oldOriginal)
-        
-        // Add one or more new keys (comma-separated) pointing to the same replacement
-        let tokens = newOriginal
-            .split(separator: ",")
-            .map { $0.trimmingCharacters(in: .whitespacesAndNewlines) }
-            .filter { !$0.isEmpty }
-        guard !tokens.isEmpty else { return }
-        for token in tokens {
-            replacements[token] = newReplacement
-        }
+        let trimmed = newOriginal.trimmingCharacters(in: .whitespacesAndNewlines)
+        guard !trimmed.isEmpty else { return }
+        replacements[trimmed] = newReplacement
     }
 }
 
@@ -272,10 +260,11 @@ struct AddReplacementSheet: View {
                     
                     // Example Section
                     VStack(alignment: .leading, spacing: 8) {
-                        Text("Example")
+                        Text("Examples")
                             .font(.subheadline)
                             .foregroundColor(.secondary)
                         
+                        // Single original -> replacement
                         HStack(spacing: 12) {
                             VStack(alignment: .leading, spacing: 4) {
                                 Text("Original:")
@@ -297,6 +286,34 @@ struct AddReplacementSheet: View {
                                     .font(.callout)
                             }
                         }
+                        .frame(maxWidth: .infinity, alignment: .leading)
+                        .padding(12)
+                        .background(Color(.textBackgroundColor))
+                        .cornerRadius(8)
+
+                        // Comma-separated originals -> single replacement
+                        HStack(spacing: 12) {
+                            VStack(alignment: .leading, spacing: 4) {
+                                Text("Original:")
+                                    .font(.caption)
+                                    .foregroundColor(.secondary)
+                                Text("Voicing, Voice ink, Voiceing")
+                                    .font(.callout)
+                            }
+                            
+                            Image(systemName: "arrow.right")
+                                .font(.caption)
+                                .foregroundColor(.secondary)
+                            
+                            VStack(alignment: .leading, spacing: 4) {
+                                Text("Replacement:")
+                                    .font(.caption)
+                                    .foregroundColor(.secondary)
+                                Text("VoiceInk")
+                                    .font(.callout)
+                            }
+                        }
+                        .frame(maxWidth: .infinity, alignment: .leading)
                         .padding(12)
                         .background(Color(.textBackgroundColor))
                         .cornerRadius(8)
@@ -307,7 +324,7 @@ struct AddReplacementSheet: View {
                 .padding(.vertical)
             }
         }
-        .frame(width: 460, height: 480)
+        .frame(width: 460, height: 520)
     }
     
     private func addReplacement() {

From 61ba16ed9265b876f7dfab97af48bdb4bfcd60f3 Mon Sep 17 00:00:00 2001
From: Beingpax <prakashjoshipaxtek@gmail.com>
Date: Fri, 12 Sep 2025 12:17:21 +0545
Subject: [PATCH 20/21] Lable correction

---
 VoiceInk/Views/Dictionary/EditReplacementSheet.swift | 2 +-
 VoiceInk/Views/Dictionary/WordReplacementView.swift  | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/VoiceInk/Views/Dictionary/EditReplacementSheet.swift b/VoiceInk/Views/Dictionary/EditReplacementSheet.swift
index 0ac4de8..5b3e315 100644
--- a/VoiceInk/Views/Dictionary/EditReplacementSheet.swift
+++ b/VoiceInk/Views/Dictionary/EditReplacementSheet.swift
@@ -62,7 +62,7 @@ struct EditReplacementSheet: View {
     }
 
     private var descriptionSection: some View {
-        Text("Update the word or phrase that should be automatically replaced during AI enhancement.")
+        Text("Update the word or phrase that should be automatically replaced.")
             .font(.subheadline)
             .foregroundColor(.secondary)
             .frame(maxWidth: .infinity, alignment: .leading)
diff --git a/VoiceInk/Views/Dictionary/WordReplacementView.swift b/VoiceInk/Views/Dictionary/WordReplacementView.swift
index a8a6aa7..2f871a2 100644
--- a/VoiceInk/Views/Dictionary/WordReplacementView.swift
+++ b/VoiceInk/Views/Dictionary/WordReplacementView.swift
@@ -202,7 +202,7 @@ struct AddReplacementSheet: View {
             ScrollView {
                 VStack(spacing: 20) {
                     // Description
-                    Text("Define a word or phrase to be automatically replaced during AI enhancement.")
+                    Text("Define a word or phrase to be automatically replaced.")
                         .font(.subheadline)
                         .foregroundColor(.secondary)
                         .frame(maxWidth: .infinity, alignment: .leading)

From 0429af667865f748f277cc1436fe91620115780c Mon Sep 17 00:00:00 2001
From: Beingpax <prakashjoshipaxtek@gmail.com>
Date: Fri, 12 Sep 2025 14:13:31 +0545
Subject: [PATCH 21/21] Use whisper module in VoiceActivityDetector to resolve
 calling-convention conflict with whisper_vad_default_params; no functional
 changes

---
 VoiceInk/Services/VoiceActivityDetector.swift | 67 +++----------------
 1 file changed, 11 insertions(+), 56 deletions(-)

diff --git a/VoiceInk/Services/VoiceActivityDetector.swift b/VoiceInk/Services/VoiceActivityDetector.swift
index 4937c2d..67cd0f3 100644
--- a/VoiceInk/Services/VoiceActivityDetector.swift
+++ b/VoiceInk/Services/VoiceActivityDetector.swift
@@ -1,6 +1,11 @@
 import Foundation
 import AVFoundation
 import os.log
+#if canImport(whisper)
+import whisper
+#else
+#error("Unable to import whisper module. Please check your project configuration.")
+#endif
 
 // MARK: - C API Bridge
 
@@ -8,60 +13,6 @@ import os.log
 fileprivate typealias WhisperVADContext = OpaquePointer
 fileprivate typealias WhisperVADSegments = OpaquePointer
 
-// Define the C function signatures for Swift, scoped to this file
-
-@_silgen_name("whisper_vad_default_params")
-fileprivate func whisper_vad_default_params() -> whisper_vad_params
-
-@_silgen_name("whisper_vad_default_context_params")
-fileprivate func whisper_vad_default_context_params() -> whisper_vad_context_params
-
-@_silgen_name("whisper_vad_init_from_file_with_params")
-fileprivate func whisper_vad_init_from_file_with_params(_ path_model: UnsafePointer<CChar>, _ params: whisper_vad_context_params) -> WhisperVADContext?
-
-@_silgen_name("whisper_vad_detect_speech")
-fileprivate func whisper_vad_detect_speech(_ vctx: WhisperVADContext, _ samples: UnsafePointer<Float>, _ n_samples: Int32) -> Bool
-
-@_silgen_name("whisper_vad_n_probs")
-fileprivate func whisper_vad_n_probs(_ vctx: WhisperVADContext) -> Int32
-
-@_silgen_name("whisper_vad_probs")
-fileprivate func whisper_vad_probs(_ vctx: WhisperVADContext) -> UnsafeMutablePointer<Float>
-
-@_silgen_name("whisper_vad_segments_from_probs")
-fileprivate func whisper_vad_segments_from_probs(_ vctx: WhisperVADContext, _ params: whisper_vad_params) -> WhisperVADSegments?
-
-@_silgen_name("whisper_vad_segments_n_segments")
-fileprivate func whisper_vad_segments_n_segments(_ segments: WhisperVADSegments) -> Int32
-
-@_silgen_name("whisper_vad_segments_get_segment_t0")
-fileprivate func whisper_vad_segments_get_segment_t0(_ segments: WhisperVADSegments, _ i_segment: Int32) -> Float
-
-@_silgen_name("whisper_vad_segments_get_segment_t1")
-fileprivate func whisper_vad_segments_get_segment_t1(_ segments: WhisperVADSegments, _ i_segment: Int32) -> Float
-
-@_silgen_name("whisper_vad_free_segments")
-fileprivate func whisper_vad_free_segments(_ segments: WhisperVADSegments)
-
-@_silgen_name("whisper_vad_free")
-fileprivate func whisper_vad_free(_ ctx: WhisperVADContext)
-
-// Structs matching whisper.h, scoped to this file
-fileprivate struct whisper_vad_params {
-    var threshold: Float
-    var min_speech_duration_ms: Int32
-    var min_silence_duration_ms: Int32
-    var max_speech_duration_s: Float
-    var speech_pad_ms: Int32
-    var samples_overlap: Float
-}
-
-fileprivate struct whisper_vad_context_params {
-    var n_threads: Int32
-    var use_gpu: Bool
-    var gpu_device: Int32
-}
-
 
 // MARK: - VoiceActivityDetector Class
 
@@ -72,8 +23,12 @@ class VoiceActivityDetector {
     init?(modelPath: String) {
         var contextParams = whisper_vad_default_context_params()
         contextParams.n_threads = max(1, min(8, Int32(ProcessInfo.processInfo.processorCount) - 2))
-        
-        guard let context = whisper_vad_init_from_file_with_params(modelPath, contextParams) else {
+
+        let contextOpt: WhisperVADContext? = modelPath.withCString { cPath in
+            whisper_vad_init_from_file_with_params(cPath, contextParams)
+        }
+
+        guard let context = contextOpt else {
             logger.error("Failed to initialize VAD context.")
             return nil
         }