Add hybrid streaming transcription for improved accuracy

- Implement real-time streaming preview using Parakeet EOU (160ms chunks) - Add batch transcription on completion for accurate final result - Prefer Whisper large-v3-turbo (2.7% WER) over Parakeet (6.05% WER) when available - Remove audio preprocessing that hurts ASR accuracy (gain control, noise reduction) - Add streaming audio callback support in Recorder and CoreAudioRecorder - Raw audio passthrough - SDK handles resampling internally Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
2026-01-16 07:35:53 -05:00 · 2026-01-16 07:35:53 -05:00 · de1c1e51aa
commit de1c1e51aa
parent 652859414c
11 changed files with 706 additions and 70 deletions
--- a/VoiceInk.xcodeproj/project.pbxproj
+++ b/VoiceInk.xcodeproj/project.pbxproj
@ -471,21 +471,24 @@
 				COMBINE_HIDPI_IMAGES = YES;
 				CURRENT_PROJECT_VERSION = 169;
 				DEVELOPMENT_ASSET_PATHS = "\"VoiceInk/Preview Content\"";
-				DEVELOPMENT_TEAM = V6J6A3VWY2;
+				DEVELOPMENT_TEAM = QP43ZA49TG;
 				ENABLE_HARDENED_RUNTIME = YES;
 				ENABLE_PREVIEWS = YES;
 				GENERATE_INFOPLIST_FILE = YES;
 				INFOPLIST_FILE = VoiceInk/Info.plist;
 				INFOPLIST_KEY_CFBundleDisplayName = VoiceInk;
 				INFOPLIST_KEY_LSApplicationCategoryType = "public.app-category.productivity";
+				INFOPLIST_KEY_LSUIElement = NO;
+				INFOPLIST_KEY_NSAppleEventsUsageDescription = "VoiceInk needs to interact with your browser to detect the current website for applying website-specific configurations.";
 				INFOPLIST_KEY_NSHumanReadableCopyright = "";
+				INFOPLIST_KEY_NSMicrophoneUsageDescription = "VoiceInk needs access to your microphone to record audio for transcription.";
 				LD_RUNPATH_SEARCH_PATHS = (
 					"$(inherited)",
 					"@executable_path/../Frameworks",
 				);
 				MACOSX_DEPLOYMENT_TARGET = 14.0;
 				MARKETING_VERSION = 1.69;
-				PRODUCT_BUNDLE_IDENTIFIER = com.prakashjoshipax.VoiceInk;
+				PRODUCT_BUNDLE_IDENTIFIER = "--com.jakeshore.VoiceInk-com.jakeshore.VoiceInk";
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SWIFT_ACTIVE_COMPILATION_CONDITIONS = "DEBUG ENABLE_NATIVE_SPEECH_ANALYZER $(inherited)";
 				SWIFT_EMIT_LOC_STRINGS = YES;
@ -505,21 +508,24 @@
 				COMBINE_HIDPI_IMAGES = YES;
 				CURRENT_PROJECT_VERSION = 169;
 				DEVELOPMENT_ASSET_PATHS = "\"VoiceInk/Preview Content\"";
-				DEVELOPMENT_TEAM = V6J6A3VWY2;
+				DEVELOPMENT_TEAM = QP43ZA49TG;
 				ENABLE_HARDENED_RUNTIME = YES;
 				ENABLE_PREVIEWS = YES;
 				GENERATE_INFOPLIST_FILE = YES;
 				INFOPLIST_FILE = VoiceInk/Info.plist;
 				INFOPLIST_KEY_CFBundleDisplayName = VoiceInk;
 				INFOPLIST_KEY_LSApplicationCategoryType = "public.app-category.productivity";
+				INFOPLIST_KEY_LSUIElement = NO;
+				INFOPLIST_KEY_NSAppleEventsUsageDescription = "VoiceInk needs to interact with your browser to detect the current website for applying website-specific configurations.";
 				INFOPLIST_KEY_NSHumanReadableCopyright = "";
+				INFOPLIST_KEY_NSMicrophoneUsageDescription = "VoiceInk needs access to your microphone to record audio for transcription.";
 				LD_RUNPATH_SEARCH_PATHS = (
 					"$(inherited)",
 					"@executable_path/../Frameworks",
 				);
 				MACOSX_DEPLOYMENT_TARGET = 14.0;
 				MARKETING_VERSION = 1.69;
-				PRODUCT_BUNDLE_IDENTIFIER = com.prakashjoshipax.VoiceInk;
+				PRODUCT_BUNDLE_IDENTIFIER = "--com.jakeshore.VoiceInk-com.jakeshore.VoiceInk";
 				PRODUCT_NAME = "$(TARGET_NAME)";
 				SWIFT_ACTIVE_COMPILATION_CONDITIONS = "ENABLE_NATIVE_SPEECH_ANALYZER $(inherited)";
 				SWIFT_EMIT_LOC_STRINGS = YES;
--- a/VoiceInk.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved
+++ b/VoiceInk.xcodeproj/project.xcworkspace/xcshareddata/swiftpm/Package.resolved
@ -1,5 +1,5 @@
 {
-  "originHash" : "93572b72309723585f9fa623350a6b09a152df9dec03f14a5b938629e0f677a0",
+  "originHash" : "144ae35ef0b62c92588dc767eb6b2d443797062688bf1347662bed55d75a7ec2",
  "pins" : [
    {
      "identity" : "axswift",
@ -16,7 +16,7 @@
      "location" : "https://github.com/FluidInference/FluidAudio",
      "state" : {
        "branch" : "main",
-        "revision" : "ddee663c4a9806d4f139943b0978b0f0a961587b"
+        "revision" : "11805437821b7e2efc044fc9c5b9b8ce88f6f29f"
      }
    },
    {
@ -52,7 +52,7 @@
      "location" : "https://github.com/ejbills/mediaremote-adapter",
      "state" : {
        "branch" : "master",
-        "revision" : "3529aa25023082a2ceadebcd2c9c4a9430ee96b9"
+        "revision" : "78aae86c03adab11a7b352211cc82381737cf854"
      }
    },
    {
@ -69,8 +69,8 @@
      "kind" : "remoteSourceControl",
      "location" : "https://github.com/sparkle-project/Sparkle",
      "state" : {
-        "revision" : "9a1d2a19d3595fcf8d9c447173f9a1687b3dcadb",
-        "version" : "2.8.0"
+        "revision" : "5581748cef2bae787496fe6d61139aebe0a451f6",
+        "version" : "2.8.1"
      }
    },
    {
--- a/VoiceInk/CoreAudioRecorder.swift
+++ b/VoiceInk/CoreAudioRecorder.swift
@ -48,6 +48,9 @@ final class CoreAudioRecorder {
    private var renderBuffer: UnsafeMutablePointer<Float32>?
    private var renderBufferSize: UInt32 = 0

+    // Streaming callback for real-time audio processing (called from audio thread)
+    var streamingAudioCallback: ((_ samples: UnsafePointer<Float32>, _ frameCount: UInt32, _ sampleRate: Double, _ channelCount: UInt32) -> Void)?
+
    // MARK: - Initialization

    init() {}
@ -541,7 +544,6 @@ final class CoreAudioRecorder {
        inBusNumber: UInt32,
        inNumberFrames: UInt32
    ) -> OSStatus {
-
        guard let audioUnit = audioUnit, isRecording, let renderBuf = renderBuffer else {
            return noErr
        }
@ -581,6 +583,11 @@ final class CoreAudioRecorder {
            return status
        }

+        // Call streaming callback with raw audio samples (for real-time transcription)
+        if let callback = streamingAudioCallback {
+            callback(renderBuf, inNumberFrames, deviceFormat.mSampleRate, channelCount)
+        }
+
        // Calculate audio meters from input buffer
        calculateMeters(from: &bufferList, frameCount: inNumberFrames)

--- a/VoiceInk/CursorPaster.swift
+++ b/VoiceInk/CursorPaster.swift
@ -1,12 +1,31 @@
 import Foundation
 import AppKit
+import os.log

 class CursorPaster {
+    private static let logger = Logger(subsystem: "com.jakeshore.VoiceInk", category: "CursorPaster")
+
+    // MARK: - Streaming Mode
+    // When streaming is active, we skip clipboard save/restore to avoid conflicts
+    // with rapid consecutive paste operations
+    private static var isStreamingMode: Bool = false
+
+    /// Enable or disable streaming mode. When enabled, clipboard save/restore is skipped
+    /// to prevent race conditions during rapid streaming text updates.
+    static func setStreamingMode(_ enabled: Bool) {
+        isStreamingMode = enabled
+        logger.notice("📋 Streaming mode \(enabled ? "enabled" : "disabled")")
+    }

    static func pasteAtCursor(_ text: String) {
+        logger.notice("📋 pasteAtCursor called with \(text.count) chars: '\(text.prefix(50))...'")
+        logger.notice("📋 AXIsProcessTrusted = \(AXIsProcessTrusted())")
        let pasteboard = NSPasteboard.general
-        // Default to true if not explicitly set by user
-        let shouldRestoreClipboard = UserDefaults.standard.object(forKey: "restoreClipboardAfterPaste") as? Bool ?? true
+
+        // During streaming mode, skip clipboard save/restore to avoid race conditions
+        // with rapid consecutive paste operations
+        let userWantsRestore = UserDefaults.standard.object(forKey: "restoreClipboardAfterPaste") as? Bool ?? true
+        let shouldRestoreClipboard = userWantsRestore && !isStreamingMode

        var savedContents: [(NSPasteboard.PasteboardType, Data)] = []

@ -67,7 +86,9 @@ class CursorPaster {
    }
    
    private static func pasteUsingCommandV() {
+        logger.notice("📋 pasteUsingCommandV called")
        guard AXIsProcessTrusted() else {
+            logger.error("❌ pasteUsingCommandV: AXIsProcessTrusted() returned false!")
            return
        }

@ -81,11 +102,13 @@ class CursorPaster {
        cmdDown?.flags = .maskCommand
        vDown?.flags = .maskCommand
        vUp?.flags = .maskCommand
+        cmdUp?.flags = .maskCommand  // Fix: cmdUp also needs .maskCommand flag

        cmdDown?.post(tap: .cghidEventTap)
        vDown?.post(tap: .cghidEventTap)
        vUp?.post(tap: .cghidEventTap)
        cmdUp?.post(tap: .cghidEventTap)
+        logger.notice("📋 pasteUsingCommandV: Posted Cmd+V events")
    }

    // Simulate pressing the Return / Enter key
@ -97,4 +120,32 @@ class CursorPaster {
        enterDown?.post(tap: .cghidEventTap)
        enterUp?.post(tap: .cghidEventTap)
    }
+
+    /// Deletes the specified number of characters by simulating backspace key presses
+    /// Includes inter-key delays to ensure reliable deletion across all applications
+    static func deleteCharacters(count: Int) {
+        logger.notice("📋 deleteCharacters called with count=\(count)")
+        guard AXIsProcessTrusted() else {
+            logger.error("❌ deleteCharacters: AXIsProcessTrusted() returned false!")
+            return
+        }
+        guard count > 0 else { return }
+
+        let source = CGEventSource(stateID: .hidSystemState)
+        let backspaceKeyCode: CGKeyCode = 0x33  // Backspace key
+
+        for i in 0..<count {
+            let backspaceDown = CGEvent(keyboardEventSource: source, virtualKey: backspaceKeyCode, keyDown: true)
+            let backspaceUp = CGEvent(keyboardEventSource: source, virtualKey: backspaceKeyCode, keyDown: false)
+            backspaceDown?.post(tap: .cghidEventTap)
+            backspaceUp?.post(tap: .cghidEventTap)
+
+            // Add small delay every 5 keystrokes to let the system process them
+            // This prevents keystroke loss in applications that can't handle rapid input
+            if i % 5 == 4 && i < count - 1 {
+                usleep(1500)  // 1.5ms pause every 5 keystrokes
+            }
+        }
+        logger.notice("📋 deleteCharacters: Deleted \(count) characters")
+    }
 }
--- a/VoiceInk/Info.plist
+++ b/VoiceInk/Info.plist
@ -2,36 +2,9 @@
 <!DOCTYPE plist PUBLIC "-//Apple//DTD PLIST 1.0//EN" "http://www.apple.com/DTDs/PropertyList-1.0.dtd">
 <plist version="1.0">
 <dict>
-	<key>SUEnableInstallerLauncherService</key>
-	<true/>
-	<key>SUFeedURL</key>
-	<string>https://beingpax.github.io/VoiceInk/appcast.xml</string>
-	<key>SUPublicEDKey</key>
-	<string>rLRdZIjK3gHKfqNlAF9nT7FbjwSvwkJ8BVn0v2mD1Mo=</string>
-	<key>LSUIElement</key>
-	<false/>
-	<key>SUEnableAutomaticChecks</key>
-	<true/>
-	<key>NSMicrophoneUsageDescription</key>
-	<string>VoiceInk needs access to your microphone to record audio for transcription.</string>
-	<key>NSAppleEventsUsageDescription</key>
-	<string>VoiceInk needs to interact with your browser to detect the current website for applying website-specific configurations.</string>
-	<key>NSScreenCaptureUsageDescription</key>
-	<string>VoiceInk needs screen recording access to understand context from your screen for improved transcription accuracy.</string>
 	<key>CFBundleDocumentTypes</key>
 	<array>
 		<dict>
-			<key>CFBundleTypeName</key>
-			<string>Audio/Video File</string>
-			<key>CFBundleTypeRole</key>
-			<string>Viewer</string>
-			<key>LSHandlerRank</key>
-			<string>Alternate</string>
-			<key>LSItemContentTypes</key>
-			<array>
-				<string>public.audio</string>
-				<string>public.movie</string>
-			</array>
 			<key>CFBundleTypeExtensions</key>
 			<array>
 				<string>wav</string>
@ -44,7 +17,28 @@
 				<string>flac</string>
 				<string>caf</string>
 			</array>
+			<key>CFBundleTypeName</key>
+			<string>Audio/Video File</string>
+			<key>CFBundleTypeRole</key>
+			<string>Viewer</string>
+			<key>LSHandlerRank</key>
+			<string>Alternate</string>
+			<key>LSItemContentTypes</key>
+			<array>
+				<string>public.audio</string>
+				<string>public.movie</string>
+			</array>
 		</dict>
 	</array>
+	<key>NSScreenCaptureUsageDescription</key>
+	<string>VoiceInk needs screen recording access to understand context from your screen for improved transcription accuracy.</string>
+	<key>SUEnableAutomaticChecks</key>
+	<true/>
+	<key>SUEnableInstallerLauncherService</key>
+	<true/>
+	<key>SUFeedURL</key>
+	<string>https://beingpax.github.io/VoiceInk/appcast.xml</string>
+	<key>SUPublicEDKey</key>
+	<string>rLRdZIjK3gHKfqNlAF9nT7FbjwSvwkJ8BVn0v2mD1Mo=</string>
 </dict>
 </plist>
--- a/VoiceInk/Models/LicenseViewModel.swift
+++ b/VoiceInk/Models/LicenseViewModel.swift
@ -19,9 +19,11 @@ class LicenseViewModel: ObservableObject {
    private let polarService = PolarService()
    private let userDefaults = UserDefaults.standard
    private let licenseManager = LicenseManager.shared
+    private var isInitializing = true

    init() {
        loadLicenseState()
+        isInitializing = false
    }

    func startTrial() {
@ -29,7 +31,10 @@ class LicenseViewModel: ObservableObject {
        if licenseManager.trialStartDate == nil {
            licenseManager.trialStartDate = Date()
            licenseState = .trial(daysRemaining: trialPeriodDays)
-            NotificationCenter.default.post(name: .licenseStatusChanged, object: nil)
+            // Don't post notification during initialization to prevent recursive loop
+            if !isInitializing {
+                NotificationCenter.default.post(name: .licenseStatusChanged, object: nil)
+            }
        }
    }

--- a/VoiceInk/PlaybackController.swift
+++ b/VoiceInk/PlaybackController.swift
@ -40,7 +40,7 @@ class PlaybackController: ObservableObject {
    
    private func setupMediaControllerCallbacks() {
        mediaController.onTrackInfoReceived = { [weak self] trackInfo in
-            self?.isMediaPlaying = trackInfo.payload.isPlaying ?? false
+            self?.isMediaPlaying = trackInfo?.payload.isPlaying ?? false
            self?.lastKnownTrackInfo = trackInfo
        }
        
--- a/VoiceInk/Recorder.swift
+++ b/VoiceInk/Recorder.swift
@ -19,6 +19,9 @@ class Recorder: NSObject, ObservableObject {
    private var audioRestorationTask: Task<Void, Never>?
    private var hasDetectedAudioInCurrentSession = false

+    /// Stored streaming callback - applied when CoreAudioRecorder is created
+    private var pendingStreamingCallback: ((_ samples: UnsafePointer<Float32>, _ frameCount: UInt32, _ sampleRate: Double, _ channelCount: UInt32) -> Void)?
+    
    enum RecorderError: Error {
        case couldNotStartRecording
    }
@ -127,6 +130,12 @@ class Recorder: NSObject, ObservableObject {
            let coreAudioRecorder = CoreAudioRecorder()
            recorder = coreAudioRecorder

+            // Apply any pending streaming callback that was set before recording started
+            if let callback = pendingStreamingCallback {
+                coreAudioRecorder.streamingAudioCallback = callback
+                logger.notice("🎙️ Applied pending streaming callback to recorder")
+            }
+
            try coreAudioRecorder.startRecording(toOutputFile: url, deviceID: deviceID)

            audioRestorationTask?.cancel()
@ -179,6 +188,7 @@ class Recorder: NSObject, ObservableObject {
    func stopRecording() {
        audioLevelCheckTask?.cancel()
        audioMeterUpdateTask?.cancel()
+        recorder?.streamingAudioCallback = nil  // Clear streaming callback
        recorder?.stopRecording()
        recorder = nil
        audioMeter = AudioMeter(averagePower: 0, peakPower: 0)
@ -190,6 +200,15 @@ class Recorder: NSObject, ObservableObject {
        deviceManager.isRecordingActive = false
    }

+    /// Sets a callback to receive real-time audio samples for streaming transcription.
+    /// The callback is invoked on the audio thread - do not perform blocking operations.
+    /// Note: The callback is stored and applied when recording starts (CoreAudioRecorder is created lazily).
+    func setStreamingAudioCallback(_ callback: ((_ samples: UnsafePointer<Float32>, _ frameCount: UInt32, _ sampleRate: Double, _ channelCount: UInt32) -> Void)?) {
+        pendingStreamingCallback = callback
+        // Also apply immediately if recorder already exists
+        recorder?.streamingAudioCallback = callback
+    }
+
    private func handleRecordingError(_ error: Error) async {
        logger.error("❌ Recording error occurred: \(error.localizedDescription)")

--- a/VoiceInk/Services/ParakeetTranscriptionService.swift
+++ b/VoiceInk/Services/ParakeetTranscriptionService.swift
@ -21,6 +21,17 @@ class ParakeetTranscriptionService: TranscriptionService {
    private var activeVersion: AsrModelVersion?
    private let logger = Logger(subsystem: "com.prakashjoshipax.voiceink.parakeet", category: "ParakeetTranscriptionService")

+    init() {
+        logger.notice("🆕 ParakeetTranscriptionService initialized (v4 - raw audio, no preprocessing)")
+    }
+
+    // MARK: - Streaming Properties (using StreamingEouAsrManager for low-latency 160ms chunks)
+    private var streamingEouManager: StreamingEouAsrManager?
+    private var streamingTask: Task<Void, Never>?
+    private var streamingContinuation: AsyncStream<String>.Continuation?
+    private var streamAudioCallCount = 0
+    private var lastPartialTranscript: String = ""
+
    private func version(for model: any TranscriptionModel) -> AsrModelVersion {
        model.name.lowercased().contains("v2") ? .v2 : .v3
    }
@ -121,4 +132,190 @@ class ParakeetTranscriptionService: TranscriptionService {
        vadManager = nil
        activeVersion = nil
    }
+
+    // MARK: - Streaming Transcription (Low-Latency EOU Mode)
+
+    /// Gets the directory for EOU streaming models
+    private func getEouModelsDirectory() -> URL {
+        let applicationSupportURL = FileManager.default.urls(
+            for: .applicationSupportDirectory, in: .userDomainMask
+        ).first!
+        let appDirectory = applicationSupportURL.appendingPathComponent("FluidAudio", isDirectory: true)
+        return appDirectory.appendingPathComponent("Models/parakeet-eou-streaming/160ms", isDirectory: true)
+    }
+
+    /// Downloads EOU models if not already present
+    private func ensureEouModelsDownloaded() async throws -> URL {
+        let modelsDir = getEouModelsDirectory()
+        let encoderPath = modelsDir.appendingPathComponent("streaming_encoder.mlmodelc")
+
+        if !FileManager.default.fileExists(atPath: encoderPath.path) {
+            logger.notice("🎙️ Downloading Parakeet EOU 160ms models for streaming preview...")
+            let baseDir = modelsDir.deletingLastPathComponent().deletingLastPathComponent()
+            try await DownloadUtils.downloadRepo(.parakeetEou160, to: baseDir)
+            logger.notice("🎙️ EOU 160ms models downloaded successfully")
+        }
+
+        return modelsDir
+    }
+
+    /// Starts a streaming transcription session using StreamingEouAsrManager for near-instant results.
+    /// Uses 160ms chunks for lowest latency (~160ms between updates).
+    /// Returns an AsyncStream that emits transcription text updates as they arrive.
+    func startStreaming(model: ParakeetModel) async throws -> AsyncStream<String> {
+        logger.notice("🎙️ Starting low-latency EOU streaming transcription")
+
+        // Reset state
+        streamAudioCallCount = 0
+        lastPartialTranscript = ""
+
+        // Download EOU models if needed
+        let modelsDir = try await ensureEouModelsDownloaded()
+
+        // Create StreamingEouAsrManager with 160ms chunks for lowest latency preview
+        // In HYBRID mode: streaming is just for visual feedback, batch provides accuracy
+        // EOU debounce of 1280ms means end-of-utterance detection after ~1.3s of silence
+        let manager = StreamingEouAsrManager(chunkSize: .ms160, eouDebounceMs: 1280)
+        streamingEouManager = manager
+
+        // Load Parakeet EOU models
+        try await manager.loadModels(modelDir: modelsDir)
+
+        logger.notice("🎙️ EOU streaming preview started with 160ms chunks (batch will provide accuracy)")
+
+        // Create stream using makeStream for proper continuation management
+        let (stream, continuation) = AsyncStream<String>.makeStream()
+        self.streamingContinuation = continuation
+
+        // Set up partial callback BEFORE returning the stream (fixes race condition)
+        await manager.setPartialCallback { [weak self] partialText in
+            guard let self = self else { return }
+            let trimmed = partialText.trimmingCharacters(in: .whitespaces)
+            if !trimmed.isEmpty && trimmed != self.lastPartialTranscript {
+                self.lastPartialTranscript = trimmed
+                self.logger.notice("🎙️ Partial update: '\(trimmed.prefix(50))...'")
+                continuation.yield(trimmed)
+            }
+        }
+
+        // Note: Removed onTermination callback that called cancelStreaming()
+        // This was causing a race condition where the manager was nullified
+        // before finishStreaming() could call manager.finish()
+        // Cleanup is handled by finishStreaming()'s defer block instead
+
+        logger.notice("🎙️ Callback registered, streaming ready")
+        return stream
+    }
+
+    /// Feeds raw audio samples to the streaming EOU transcription engine.
+    /// Called from the audio thread - creates AVAudioPCMBuffer and forwards to manager.
+    /// SDK handles resampling to 16kHz internally. No preprocessing applied (research shows it hurts accuracy).
+    func streamAudio(samples: UnsafePointer<Float32>, frameCount: UInt32, sampleRate: Double, channels: UInt32) {
+        streamAudioCallCount += 1
+
+        // Create buffer at original sample rate
+        // SDK's process() method handles resampling to 16kHz internally via AudioConverter
+        guard let audioBuffer = createOriginalFormatBuffer(samples: samples, frameCount: frameCount, sampleRate: sampleRate, channels: channels) else {
+            if streamAudioCallCount <= 5 {
+                logger.warning("Failed to create audio buffer at chunk #\(self.streamAudioCallCount)")
+            }
+            return
+        }
+
+        guard streamingEouManager != nil else {
+            return
+        }
+
+        // StreamingEouAsrManager.process is an actor method, dispatch to avoid blocking audio thread
+        Task.detached { [weak self, audioBuffer] in
+            do {
+                _ = try await self?.streamingEouManager?.process(audioBuffer: audioBuffer)
+            } catch {
+                self?.logger.warning("EOU process error: \(error.localizedDescription)")
+            }
+        }
+    }
+
+    /// Creates a MONO AVAudioPCMBuffer from interleaved input samples.
+    /// No preprocessing - research shows gain control and noise reduction HURT ASR accuracy.
+    /// Just converts stereo to mono if needed, passes raw audio otherwise.
+    private func createOriginalFormatBuffer(samples: UnsafePointer<Float32>, frameCount: UInt32, sampleRate: Double, channels: UInt32) -> AVAudioPCMBuffer? {
+        // Create MONO non-interleaved format - simplest format for ASR
+        guard let format = AVAudioFormat(
+            commonFormat: .pcmFormatFloat32,
+            sampleRate: sampleRate,
+            channels: 1,  // Output is MONO
+            interleaved: false
+        ) else {
+            return nil
+        }
+
+        guard let buffer = AVAudioPCMBuffer(pcmFormat: format, frameCapacity: frameCount) else {
+            return nil
+        }
+
+        buffer.frameLength = frameCount
+
+        guard let monoData = buffer.floatChannelData?[0] else {
+            return nil
+        }
+
+        let channelCount = Int(channels)
+        let frames = Int(frameCount)
+
+        if channelCount == 1 {
+            // Already mono - direct copy (no gain, no processing)
+            for frame in 0..<frames {
+                monoData[frame] = samples[frame]
+            }
+        } else {
+            // Stereo or multi-channel - mix to mono (simple average, no gain)
+            let channelWeight = 1.0 / Float(channelCount)
+            for frame in 0..<frames {
+                var sum: Float = 0
+                for channel in 0..<channelCount {
+                    // Input is interleaved: L0 R0 L1 R1 L2 R2 ...
+                    sum += samples[frame * channelCount + channel]
+                }
+                monoData[frame] = sum * channelWeight
+            }
+        }
+
+        return buffer
+    }
+
+    /// Finishes the streaming session and returns the final transcription.
+    func finishStreaming() async throws -> String {
+        defer {
+            streamingTask?.cancel()
+            streamingTask = nil
+            streamingContinuation?.finish()
+            streamingContinuation = nil
+            streamingEouManager = nil
+            lastPartialTranscript = ""
+        }
+
+        guard let manager = streamingEouManager else {
+            return ""
+        }
+        let finalText = try await manager.finish()
+        logger.notice("🎙️ EOU streaming finished with \(finalText.count) characters")
+        return finalText
+    }
+
+    /// Cancels the streaming session without returning results.
+    func cancelStreaming() async {
+        streamingTask?.cancel()
+        streamingTask = nil
+        streamingContinuation?.finish()
+        streamingContinuation = nil
+
+        if let manager = streamingEouManager {
+            await manager.reset()
+            streamingEouManager = nil
+            lastPartialTranscript = ""
+            logger.notice("🎙️ Cancelled EOU streaming transcription")
+        }
+    }
+
 }
--- a/VoiceInk/Whisper/WhisperState.swift
+++ b/VoiceInk/Whisper/WhisperState.swift
@ -28,6 +28,11 @@ class WhisperState: NSObject, ObservableObject {
    @Published var miniRecorderError: String?
    @Published var shouldCancelRecording = false

+    // MARK: - Streaming Transcription Properties
+    private var streamingUpdateTask: Task<Void, Never>?
+    private var lastStreamedText: String = ""
+    private var isStreamingActive: Bool = false
+

    @Published var recorderType: String = UserDefaults.standard.string(forKey: "RecorderType") ?? "mini" {
        didSet {
@ -101,6 +106,11 @@ class WhisperState: NSObject, ObservableObject {
    @Published var downloadProgress: [String: Double] = [:]
    @Published var parakeetDownloadStates: [String: Bool] = [:]

+    /// Returns true if the current transcription model supports streaming (Parakeet only)
+    var isStreamingSupported: Bool {
+        currentTranscriptionModel?.provider == .parakeet
+    }
+
    init(modelContext: ModelContext, enhancementService: AIEnhancementService? = nil) {
        self.modelContext = modelContext
        let appSupportDirectory = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask)[0]
@ -141,28 +151,41 @@ class WhisperState: NSObject, ObservableObject {
    func toggleRecord(powerModeId: UUID? = nil) async {
        if recordingState == .recording {
            await recorder.stopRecording()
-            if let recordedFile {
-                if !shouldCancelRecording {
-                    let audioAsset = AVURLAsset(url: recordedFile)
-                    let duration = (try? CMTimeGetSeconds(await audioAsset.load(.duration))) ?? 0.0

-                    let transcription = Transcription(
-                        text: "",
-                        duration: duration,
-                        audioFileURL: recordedFile.absoluteString,
-                        transcriptionStatus: .pending
-                    )
-                    modelContext.insert(transcription)
-                    try? modelContext.save()
-                    NotificationCenter.default.post(name: .transcriptionCreated, object: transcription)
-
-                    await transcribeAudio(on: transcription)
-                } else {
-                    await MainActor.run {
-                        recordingState = .idle
-                    }
-                    await cleanupModelResources()
+            // Handle cancellation - clean up streaming if active
+            if shouldCancelRecording {
+                if isStreamingActive {
+                    await cancelStreamingTranscription()
                }
+                await MainActor.run {
+                    recordingState = .idle
+                }
+                await cleanupModelResources()
+                return
+            }
+
+            // Handle streaming transcription completion
+            if isStreamingActive {
+                await handleStreamingCompletion()
+                return
+            }
+
+            // Non-streaming (batch) transcription
+            if let recordedFile {
+                let audioAsset = AVURLAsset(url: recordedFile)
+                let duration = (try? CMTimeGetSeconds(await audioAsset.load(.duration))) ?? 0.0
+
+                let transcription = Transcription(
+                    text: "",
+                    duration: duration,
+                    audioFileURL: recordedFile.absoluteString,
+                    transcriptionStatus: .pending
+                )
+                modelContext.insert(transcription)
+                try? modelContext.save()
+                NotificationCenter.default.post(name: .transcriptionCreated, object: transcription)
+
+                await transcribeAudio(on: transcription)
            } else {
                logger.error("❌ No recorded file found after stopping recording")
                await MainActor.run {
@ -189,7 +212,16 @@ class WhisperState: NSObject, ObservableObject {
                            let permanentURL = self.recordingsDirectory.appendingPathComponent(fileName)
                            self.recordedFile = permanentURL

+                            // IMPORTANT: Set up streaming BEFORE starting recording to avoid losing early audio
+                            // Check if we're using a Parakeet model and set up streaming first
+                            let isParakeetModel = self.currentTranscriptionModel is ParakeetModel
+                            if isParakeetModel {
+                                self.logger.notice("🎙️ Detected Parakeet model, setting up streaming BEFORE recording...")
+                                await self.startStreamingTranscription()
+                            }
+
                            try await self.recorder.startRecording(toOutputFile: permanentURL)
+                            self.logger.notice("🎙️ Recording started\(isParakeetModel ? " (streaming already active)" : "")")

                            await MainActor.run {
                                self.recordingState = .recording
@ -202,9 +234,19 @@ class WhisperState: NSObject, ObservableObject {

                            // Load model and capture context in background without blocking
                            Task.detached { [weak self] in
-                                guard let self = self else { return }
+                                guard let self = self else {
+                                    print("⚠️ Self was deallocated in Task.detached!")
+                                    return
+                                }
+
+                                // Debug: Check what model type we have
+                                let modelType = await type(of: self.currentTranscriptionModel)
+                                let modelName = await self.currentTranscriptionModel?.displayName ?? "nil"
+                                print("🔍 DEBUG: Model type = \(modelType), name = \(modelName)")
+                                print("🔍 DEBUG: Is ParakeetModel? \(await self.currentTranscriptionModel is ParakeetModel)")

                                // Only load model if it's a local model and not already loaded
+                                // Note: Parakeet streaming is now set up BEFORE recording starts (above)
                                if let model = await self.currentTranscriptionModel, model.provider == .local {
                                    if let localWhisperModel = await self.availableModels.first(where: { $0.name == model.name }),
                                       await self.whisperContext == nil {
@ -214,8 +256,10 @@ class WhisperState: NSObject, ObservableObject {
                                            await self.logger.error("❌ Model loading failed: \(error.localizedDescription)")
                                        }
                                    }
-                                } else if let parakeetModel = await self.currentTranscriptionModel as? ParakeetModel {
-                                    try? await self.serviceRegistry.parakeetTranscriptionService.loadModel(for: parakeetModel)
+                                } else if !(await self.currentTranscriptionModel is ParakeetModel) {
+                                    // Non-Parakeet, non-local models - just log
+                                    let modelDesc = await self.currentTranscriptionModel?.displayName ?? "nil"
+                                    await self.logger.notice("🎙️ Model is not local or Parakeet: \(modelDesc)")
                                }

                                if let enhancementService = await self.enhancementService {
@ -245,6 +289,319 @@ class WhisperState: NSObject, ObservableObject {
        response(true)
    }

+    // MARK: - Streaming Transcription Methods
+
+    /// Starts streaming transcription for Parakeet models
+    private func startStreamingTranscription() async {
+        guard let parakeetModel = currentTranscriptionModel as? ParakeetModel else { return }
+
+        // Capture direct reference to the service to avoid @MainActor isolation issues in audio callback
+        let parakeetService = serviceRegistry.parakeetTranscriptionService
+
+        // Set up audio callback BEFORE starting streaming to avoid losing early audio
+        // Note: callback runs on audio thread, so we capture parakeetService directly
+        // Audio will be silently dropped until manager is created (streamAudio has a guard)
+        logger.notice("🎙️ Setting up streaming audio callback")
+        recorder.setStreamingAudioCallback { samples, frameCount, sampleRate, channels in
+            parakeetService.streamAudio(
+                samples: samples,
+                frameCount: frameCount,
+                sampleRate: sampleRate,
+                channels: channels
+            )
+        }
+
+        do {
+            let transcriptStream = try await parakeetService.startStreaming(model: parakeetModel)
+
+            isStreamingActive = true
+            lastStreamedText = ""
+
+            // Enable streaming mode in CursorPaster to skip clipboard save/restore
+            // This prevents race conditions during rapid paste operations
+            CursorPaster.setStreamingMode(true)
+
+            // Start task to handle streaming updates
+            logger.notice("🎙️ Starting streaming update task...")
+            streamingUpdateTask = Task {
+                self.logger.notice("🎙️ Streaming update task running, waiting for transcripts...")
+                for await text in transcriptStream {
+                    self.logger.notice("🎙️ Got transcript from stream: '\(text.prefix(30))...'")
+                    await self.handleStreamingUpdate(text)
+                }
+                self.logger.notice("🎙️ Streaming update task ended")
+            }
+
+            logger.notice("🎙️ Started streaming transcription - all setup complete")
+        } catch {
+            logger.error("❌ Failed to start streaming transcription: \(error.localizedDescription)")
+            isStreamingActive = false
+        }
+    }
+
+    /// Handles incoming streaming transcription updates by pasting text to active app
+    /// Optimized to use differential updates when possible to reduce flicker
+    private func handleStreamingUpdate(_ newText: String) async {
+        guard isStreamingActive else { return }
+
+        await MainActor.run {
+            let oldText = self.lastStreamedText
+
+            // Optimization: If new text starts with old text, just append the delta
+            // This is the common case during continuous speech and avoids flicker
+            if newText.hasPrefix(oldText) && !oldText.isEmpty {
+                let deltaText = String(newText.dropFirst(oldText.count))
+                if !deltaText.isEmpty {
+                    self.lastStreamedText = newText
+                    CursorPaster.pasteAtCursor(deltaText)
+                    self.logger.notice("🎙️ Appended delta: '\(deltaText.prefix(30))...'")
+                }
+                return
+            }
+
+            // Full replacement needed (model corrected itself or first update)
+            let charsToDelete = oldText.count
+
+            // Step 1: Delete previously streamed text
+            if charsToDelete > 0 {
+                CursorPaster.deleteCharacters(count: charsToDelete)
+            }
+
+            // Step 2: Wait for deletions to complete before pasting
+            let deleteWaitTime = max(0.02, Double(charsToDelete) * 0.002)  // ~2ms per char, min 20ms
+
+            DispatchQueue.main.asyncAfter(deadline: .now() + deleteWaitTime) { [weak self] in
+                guard let self = self, self.isStreamingActive else { return }
+
+                self.lastStreamedText = newText
+                CursorPaster.pasteAtCursor(newText)
+                self.logger.notice("🎙️ Full replacement: '\(newText.prefix(30))...'")
+            }
+        }
+    }
+
+    /// Finishes streaming and returns the final transcription text
+    private func finishStreamingTranscription() async -> String? {
+        guard isStreamingActive else { return nil }
+
+        // Stop receiving updates
+        streamingUpdateTask?.cancel()
+        streamingUpdateTask = nil
+
+        // Clear the audio callback
+        recorder.setStreamingAudioCallback(nil)
+
+        // Get final text
+        var finalText: String
+        do {
+            finalText = try await serviceRegistry.parakeetTranscriptionService.finishStreaming()
+            // If EOU returns empty but we have streamed text, use that as fallback
+            if finalText.isEmpty && !self.lastStreamedText.isEmpty {
+                logger.warning("⚠️ EOU returned empty, using lastStreamedText fallback (\(self.lastStreamedText.count) chars)")
+                finalText = self.lastStreamedText
+            }
+        } catch {
+            logger.error("❌ Failed to finish streaming: \(error.localizedDescription)")
+            finalText = self.lastStreamedText  // Fall back to last streamed text
+        }
+
+        // Delete the streamed preview text (will be replaced by batch transcription in hybrid mode)
+        await MainActor.run {
+            if !self.lastStreamedText.isEmpty {
+                CursorPaster.deleteCharacters(count: self.lastStreamedText.count)
+            }
+        }
+
+        self.isStreamingActive = false
+        self.lastStreamedText = ""
+
+        // Disable streaming mode - clipboard operations can resume normally
+        CursorPaster.setStreamingMode(false)
+
+        logger.notice("🎙️ Finished streaming transcription: \(finalText.count) characters")
+        return finalText
+    }
+
+    /// Cancels streaming transcription
+    private func cancelStreamingTranscription() async {
+        guard isStreamingActive else { return }
+
+        streamingUpdateTask?.cancel()
+        streamingUpdateTask = nil
+        recorder.setStreamingAudioCallback(nil)
+
+        await serviceRegistry.parakeetTranscriptionService.cancelStreaming()
+
+        // Delete any streamed text
+        await MainActor.run {
+            if !lastStreamedText.isEmpty {
+                CursorPaster.deleteCharacters(count: lastStreamedText.count)
+            }
+        }
+
+        isStreamingActive = false
+        lastStreamedText = ""
+
+        // Disable streaming mode - clipboard operations can resume normally
+        CursorPaster.setStreamingMode(false)
+
+        logger.notice("🎙️ Cancelled streaming transcription")
+    }
+
+    /// Handles completion of streaming transcription using HYBRID approach:
+    /// 1. Streaming provided real-time preview (low accuracy, fast)
+    /// 2. Now run BATCH transcription for accurate final result
+    private func handleStreamingCompletion() async {
+        guard let recordedFile = recordedFile else {
+            await MainActor.run {
+                recordingState = .idle
+            }
+            return
+        }
+
+        // Step 1: Clean up streaming and delete the preview text
+        // We discard the streaming result and use batch transcription for accuracy
+        _ = await finishStreamingTranscription()
+
+        // If there was streamed text, it's already been deleted by finishStreamingTranscription()
+        // Now we'll paste the accurate batch result
+
+        // Play stop sound
+        Task {
+            let isSystemMuteEnabled = UserDefaults.standard.bool(forKey: "isSystemMuteEnabled")
+            if isSystemMuteEnabled {
+                try? await Task.sleep(nanoseconds: 200_000_000)
+            }
+            await MainActor.run {
+                SoundManager.shared.playStopSound()
+            }
+        }
+
+        // Step 2: Switch to transcribing state for batch processing
+        await MainActor.run {
+            recordingState = .transcribing
+        }
+
+        logger.notice("🎙️ HYBRID: Streaming preview done, now running accurate batch transcription...")
+
+        // Get audio duration
+        let audioAsset = AVURLAsset(url: recordedFile)
+        let duration = (try? CMTimeGetSeconds(await audioAsset.load(.duration))) ?? 0.0
+
+        // Create transcription record
+        let transcription = Transcription(
+            text: "",
+            duration: duration,
+            audioFileURL: recordedFile.absoluteString,
+            transcriptionStatus: .pending
+        )
+        modelContext.insert(transcription)
+        try? modelContext.save()
+        NotificationCenter.default.post(name: .transcriptionCreated, object: transcription)
+
+        // Step 3: Run BATCH transcription for accurate result
+        // HYBRID MODE: Prefer Whisper for accuracy (2.7% WER) over Parakeet (6.05% WER)
+        var text: String
+        do {
+            guard let model = currentTranscriptionModel else {
+                throw WhisperStateError.transcriptionFailed
+            }
+
+            // Check if we should prefer Whisper for better accuracy
+            var transcriptionModel: any TranscriptionModel = model
+            var usedWhisper = false
+
+            if model is ParakeetModel {
+                // Parakeet was selected for streaming, but check if Whisper is available for better batch accuracy
+                // Look for Whisper large-v3-turbo in available models (2.7% WER vs Parakeet's 6.05%)
+                if let turboModel = allAvailableModels.first(where: {
+                    $0.provider == .local && $0.name.contains("large-v3-turbo")
+                }) {
+                    // Check if this model is actually downloaded
+                    let isDownloaded = availableModels.contains(where: { $0.name == turboModel.name })
+                    if isDownloaded {
+                        transcriptionModel = turboModel
+                        usedWhisper = true
+                        logger.notice("🎙️ HYBRID: Using Whisper turbo for accuracy: \(turboModel.name)")
+                    }
+                }
+            }
+
+            text = try await serviceRegistry.transcribe(audioURL: recordedFile, model: transcriptionModel)
+            logger.notice("🎙️ HYBRID: Batch transcription complete\(usedWhisper ? " (Whisper)" : ""): \(text.prefix(50))...")
+        } catch {
+            logger.error("❌ Batch transcription failed: \(error.localizedDescription)")
+            transcription.text = "Transcription Failed: \(error.localizedDescription)"
+            transcription.transcriptionStatus = TranscriptionStatus.failed.rawValue
+            try? modelContext.save()
+            await MainActor.run {
+                recordingState = .idle
+            }
+            await dismissMiniRecorder()
+            return
+        }
+
+        // Step 4: Apply post-processing pipeline
+        text = TranscriptionOutputFilter.filter(text)
+
+        let shouldFormatText = UserDefaults.standard.object(forKey: "EnableTextFormatting") as? Bool ?? true
+        if shouldFormatText {
+            text = WhisperTextFormatter.format(text)
+        }
+
+        text = WordReplacementService.shared.applyReplacements(to: text, using: modelContext)
+
+        // Update transcription record
+        transcription.text = text
+        transcription.transcriptionModelName = currentTranscriptionModel?.displayName
+
+        // AI Enhancement (if enabled)
+        var enhancedText: String?
+        if let enhancementService = enhancementService,
+           enhancementService.isEnhancementEnabled,
+           enhancementService.isConfigured {
+            await MainActor.run {
+                recordingState = .enhancing
+            }
+
+            do {
+                let (enhanced, enhancementDuration, promptName) = try await enhancementService.enhance(text)
+                enhancedText = enhanced
+                transcription.enhancedText = enhanced
+                transcription.enhancementDuration = enhancementDuration
+                transcription.promptName = promptName
+            } catch {
+                logger.error("❌ Enhancement failed: \(error.localizedDescription)")
+            }
+        }
+
+        // Mark transcription as complete
+        transcription.transcriptionStatus = TranscriptionStatus.completed.rawValue
+        try? modelContext.save()
+
+        NotificationCenter.default.post(name: .transcriptionCompleted, object: transcription)
+
+        // Step 5: Paste the accurate final text
+        let finalText = enhancedText ?? text
+        await MainActor.run {
+            DispatchQueue.main.asyncAfter(deadline: .now() + 0.05) {
+                CursorPaster.pasteAtCursor(finalText + " ")
+
+                // Auto-send if Power Mode enabled
+                let powerMode = PowerModeManager.shared
+                if let activeConfig = powerMode.currentActiveConfiguration,
+                   activeConfig.isAutoSendEnabled {
+                    CursorPaster.pressEnter()
+                }
+            }
+        }
+
+        await MainActor.run {
+            recordingState = .idle
+        }
+        await dismissMiniRecorder()
+    }
+
    private func transcribeAudio(on transcription: Transcription) async {
        guard let urlString = transcription.audioFileURL, let url = URL(string: urlString) else {
            logger.error("❌ Invalid audio file URL in transcription object.")
--- a/default.profraw
+++ b/default.profraw