From 5ca848af91a8819168602bb0da61835b3f09ebb4 Mon Sep 17 00:00:00 2001 From: Beingpax Date: Thu, 18 Dec 2025 21:29:35 +0545 Subject: [PATCH] Add TranscriptionServiceRegistry to eliminate duplicate service routing logic - Create centralized registry for managing transcription services - Replace duplicate switch statements across 4 manager classes - Consolidate service initialization into single registry pattern - Add cleanup method to registry for resource management - Ensure fresh service registry on each transcription request --- .../AudioFileTranscriptionManager.swift | 49 ++++--------------- .../AudioFileTranscriptionService.swift | 29 ++--------- VoiceInk/Services/ModelPrewarmService.swift | 40 ++------------- .../TranscriptionServiceRegistry.swift | 46 +++++++++++++++++ .../WhisperState+LocalModelManager.swift | 3 +- VoiceInk/Whisper/WhisperState.swift | 28 +++-------- 6 files changed, 72 insertions(+), 123 deletions(-) create mode 100644 VoiceInk/Services/TranscriptionServiceRegistry.swift diff --git a/VoiceInk/Services/AudioFileTranscriptionManager.swift b/VoiceInk/Services/AudioFileTranscriptionManager.swift index aa17b3d..4ae1ecb 100644 --- a/VoiceInk/Services/AudioFileTranscriptionManager.swift +++ b/VoiceInk/Services/AudioFileTranscriptionManager.swift @@ -16,12 +16,7 @@ class AudioTranscriptionManager: ObservableObject { private var currentTask: Task? private let audioProcessor = AudioProcessor() private let logger = Logger(subsystem: "com.prakashjoshipax.voiceink", category: "AudioTranscriptionManager") - - // Transcription services - will be initialized when needed - private var localTranscriptionService: LocalTranscriptionService? - private lazy var cloudTranscriptionService = CloudTranscriptionService() - private lazy var nativeAppleTranscriptionService = NativeAppleTranscriptionService() - private var parakeetTranscriptionService: ParakeetTranscriptionService? + private var serviceRegistry: TranscriptionServiceRegistry? enum ProcessingPhase { case idle @@ -64,52 +59,28 @@ class AudioTranscriptionManager: ObservableObject { guard let currentModel = whisperState.currentTranscriptionModel else { throw TranscriptionError.noModelSelected } - - // Initialize local transcription service if needed - if localTranscriptionService == nil { - localTranscriptionService = LocalTranscriptionService(modelsDirectory: whisperState.modelsDirectory, whisperState: whisperState) - } - - // Initialize parakeet transcription service if needed - if parakeetTranscriptionService == nil { - parakeetTranscriptionService = ParakeetTranscriptionService() - } - - // Process audio file + + serviceRegistry = TranscriptionServiceRegistry(whisperState: whisperState, modelsDirectory: whisperState.modelsDirectory) + processingPhase = .processingAudio let samples = try await audioProcessor.processAudioToSamples(url) - - // Get audio duration + let audioAsset = AVURLAsset(url: url) let duration = CMTimeGetSeconds(try await audioAsset.load(.duration)) - - // Create permanent copy of the audio file + let recordingsDirectory = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask)[0] .appendingPathComponent("com.prakashjoshipax.VoiceInk") .appendingPathComponent("Recordings") - + let fileName = "transcribed_\(UUID().uuidString).wav" let permanentURL = recordingsDirectory.appendingPathComponent(fileName) - + try FileManager.default.createDirectory(at: recordingsDirectory, withIntermediateDirectories: true) try audioProcessor.saveSamplesAsWav(samples: samples, to: permanentURL) - - // Transcribe using appropriate service + processingPhase = .transcribing let transcriptionStart = Date() - var text: String - - switch currentModel.provider { - case .local: - text = try await localTranscriptionService!.transcribe(audioURL: permanentURL, model: currentModel) - case .parakeet: - text = try await parakeetTranscriptionService!.transcribe(audioURL: permanentURL, model: currentModel) - case .nativeApple: - text = try await nativeAppleTranscriptionService.transcribe(audioURL: permanentURL, model: currentModel) - default: // Cloud models - text = try await cloudTranscriptionService.transcribe(audioURL: permanentURL, model: currentModel) - } - + var text = try await serviceRegistry!.transcribe(audioURL: permanentURL, model: currentModel) let transcriptionDuration = Date().timeIntervalSince(transcriptionStart) text = TranscriptionOutputFilter.filter(text) text = text.trimmingCharacters(in: .whitespacesAndNewlines) diff --git a/VoiceInk/Services/AudioFileTranscriptionService.swift b/VoiceInk/Services/AudioFileTranscriptionService.swift index 5a3de26..959853b 100644 --- a/VoiceInk/Services/AudioFileTranscriptionService.swift +++ b/VoiceInk/Services/AudioFileTranscriptionService.swift @@ -14,12 +14,7 @@ class AudioTranscriptionService: ObservableObject { private let whisperState: WhisperState private let promptDetectionService = PromptDetectionService() private let logger = Logger(subsystem: "com.prakashjoshipax.voiceink", category: "AudioTranscriptionService") - - // Transcription services - private let localTranscriptionService: LocalTranscriptionService - private lazy var cloudTranscriptionService = CloudTranscriptionService() - private lazy var nativeAppleTranscriptionService = NativeAppleTranscriptionService() - private lazy var parakeetTranscriptionService = ParakeetTranscriptionService() + private let serviceRegistry: TranscriptionServiceRegistry enum TranscriptionError: Error { case noAudioFile @@ -32,7 +27,7 @@ class AudioTranscriptionService: ObservableObject { self.modelContext = modelContext self.whisperState = whisperState self.enhancementService = whisperState.enhancementService - self.localTranscriptionService = LocalTranscriptionService(modelsDirectory: whisperState.modelsDirectory, whisperState: whisperState) + self.serviceRegistry = TranscriptionServiceRegistry(whisperState: whisperState, modelsDirectory: whisperState.modelsDirectory) } func retranscribeAudio(from url: URL, using model: any TranscriptionModel) async throws -> Transcription { @@ -45,21 +40,8 @@ class AudioTranscriptionService: ObservableObject { } do { - // Delegate transcription to appropriate service let transcriptionStart = Date() - var text: String - - switch model.provider { - case .local: - text = try await localTranscriptionService.transcribe(audioURL: url, model: model) - case .parakeet: - text = try await parakeetTranscriptionService.transcribe(audioURL: url, model: model) - case .nativeApple: - text = try await nativeAppleTranscriptionService.transcribe(audioURL: url, model: model) - default: // Cloud models - text = try await cloudTranscriptionService.transcribe(audioURL: url, model: model) - } - + var text = try await serviceRegistry.transcribe(audioURL: url, model: model) let transcriptionDuration = Date().timeIntervalSince(transcriptionStart) text = TranscriptionOutputFilter.filter(text) text = text.trimmingCharacters(in: .whitespacesAndNewlines) @@ -75,12 +57,9 @@ class AudioTranscriptionService: ObservableObject { text = WordReplacementService.shared.applyReplacements(to: text) logger.notice("✅ Word replacements applied") - - // Get audio duration + let audioAsset = AVURLAsset(url: url) let duration = CMTimeGetSeconds(try await audioAsset.load(.duration)) - - // Create a permanent copy of the audio file let recordingsDirectory = FileManager.default.urls(for: .applicationSupportDirectory, in: .userDomainMask)[0] .appendingPathComponent("com.prakashjoshipax.VoiceInk") .appendingPathComponent("Recordings") diff --git a/VoiceInk/Services/ModelPrewarmService.swift b/VoiceInk/Services/ModelPrewarmService.swift index c1a9a58..41ef3ad 100644 --- a/VoiceInk/Services/ModelPrewarmService.swift +++ b/VoiceInk/Services/ModelPrewarmService.swift @@ -8,17 +8,11 @@ final class ModelPrewarmService: ObservableObject { private let whisperState: WhisperState private let modelContext: ModelContext private let logger = Logger(subsystem: "com.prakashjoshipax.voiceink", category: "ModelPrewarm") - - // Services (initialized lazily) - private var localTranscriptionService: LocalTranscriptionService? - private var parakeetTranscriptionService: ParakeetTranscriptionService? - private let nativeAppleTranscriptionService = NativeAppleTranscriptionService() - private let cloudTranscriptionService = CloudTranscriptionService() - - // Sample audio for prewarming + private lazy var serviceRegistry = TranscriptionServiceRegistry( + whisperState: whisperState, + modelsDirectory: whisperState.modelsDirectory + ) private let prewarmAudioURL = Bundle.main.url(forResource: "esc", withExtension: "wav") - - // User preference key private let prewarmEnabledKey = "PrewarmModelOnWake" init(whisperState: WhisperState, modelContext: ModelContext) { @@ -83,33 +77,9 @@ final class ModelPrewarmService: ObservableObject { let startTime = Date() do { - // Initialize services lazily - if localTranscriptionService == nil { - localTranscriptionService = LocalTranscriptionService( - modelsDirectory: whisperState.modelsDirectory, - whisperState: whisperState - ) - } - if parakeetTranscriptionService == nil { - parakeetTranscriptionService = ParakeetTranscriptionService() - } - - // Run transcription to trigger model loading and ANE compilation - let transcribedText: String - switch currentModel.provider { - case .local: - transcribedText = try await localTranscriptionService!.transcribe(audioURL: audioURL, model: currentModel) - case .parakeet: - transcribedText = try await parakeetTranscriptionService!.transcribe(audioURL: audioURL, model: currentModel) - case .nativeApple: - transcribedText = try await nativeAppleTranscriptionService.transcribe(audioURL: audioURL, model: currentModel) - default: - transcribedText = try await cloudTranscriptionService.transcribe(audioURL: audioURL, model: currentModel) - } - + let transcribedText = try await serviceRegistry.transcribe(audioURL: audioURL, model: currentModel) let duration = Date().timeIntervalSince(startTime) - // Save for telemetry let transcription = Transcription( text: "[PREWARM] \(transcribedText)", duration: 1.0, diff --git a/VoiceInk/Services/TranscriptionServiceRegistry.swift b/VoiceInk/Services/TranscriptionServiceRegistry.swift new file mode 100644 index 0000000..a7703ea --- /dev/null +++ b/VoiceInk/Services/TranscriptionServiceRegistry.swift @@ -0,0 +1,46 @@ +import Foundation +import SwiftUI +import os + +@MainActor +class TranscriptionServiceRegistry { + private let whisperState: WhisperState + private let modelsDirectory: URL + private let logger = Logger(subsystem: "com.prakashjoshipax.voiceink", category: "TranscriptionServiceRegistry") + + private(set) lazy var localTranscriptionService = LocalTranscriptionService( + modelsDirectory: modelsDirectory, + whisperState: whisperState + ) + private(set) lazy var cloudTranscriptionService = CloudTranscriptionService() + private(set) lazy var nativeAppleTranscriptionService = NativeAppleTranscriptionService() + private(set) lazy var parakeetTranscriptionService = ParakeetTranscriptionService() + + init(whisperState: WhisperState, modelsDirectory: URL) { + self.whisperState = whisperState + self.modelsDirectory = modelsDirectory + } + + func service(for provider: ModelProvider) -> TranscriptionService { + switch provider { + case .local: + return localTranscriptionService + case .parakeet: + return parakeetTranscriptionService + case .nativeApple: + return nativeAppleTranscriptionService + default: + return cloudTranscriptionService + } + } + + func transcribe(audioURL: URL, model: any TranscriptionModel) async throws -> String { + let service = service(for: model.provider) + logger.debug("Transcribing with \(model.displayName) using \(String(describing: type(of: service)))") + return try await service.transcribe(audioURL: audioURL, model: model) + } + + func cleanup() { + parakeetTranscriptionService.cleanup() + } +} diff --git a/VoiceInk/Whisper/WhisperState+LocalModelManager.swift b/VoiceInk/Whisper/WhisperState+LocalModelManager.swift index 3fe94e9..a00a1ef 100644 --- a/VoiceInk/Whisper/WhisperState+LocalModelManager.swift +++ b/VoiceInk/Whisper/WhisperState+LocalModelManager.swift @@ -340,8 +340,7 @@ extension WhisperState { await whisperContext?.releaseResources() whisperContext = nil isModelLoaded = false - - parakeetTranscriptionService.cleanup() + serviceRegistry.cleanup() } // MARK: - Helper Methods diff --git a/VoiceInk/Whisper/WhisperState.swift b/VoiceInk/Whisper/WhisperState.swift index 273beac..d8aa71f 100644 --- a/VoiceInk/Whisper/WhisperState.swift +++ b/VoiceInk/Whisper/WhisperState.swift @@ -68,11 +68,7 @@ class WhisperState: NSObject, ObservableObject { let modelContext: ModelContext - // Transcription Services - private var localTranscriptionService: LocalTranscriptionService! - private lazy var cloudTranscriptionService = CloudTranscriptionService() - private lazy var nativeAppleTranscriptionService = NativeAppleTranscriptionService() - internal lazy var parakeetTranscriptionService = ParakeetTranscriptionService() + internal var serviceRegistry: TranscriptionServiceRegistry! private var modelUrl: URL? { let possibleURLs = [ @@ -122,9 +118,9 @@ class WhisperState: NSObject, ObservableObject { if let enhancementService = enhancementService { PowerModeSessionManager.shared.configure(whisperState: self, enhancementService: enhancementService) } - - // Set the whisperState reference after super.init() - self.localTranscriptionService = LocalTranscriptionService(modelsDirectory: self.modelsDirectory, whisperState: self) + + // Initialize the transcription service registry + self.serviceRegistry = TranscriptionServiceRegistry(whisperState: self, modelsDirectory: self.modelsDirectory) setupNotifications() createModelsDirectoryIfNeeded() @@ -216,7 +212,7 @@ class WhisperState: NSObject, ObservableObject { } } } else if let parakeetModel = await self.currentTranscriptionModel as? ParakeetModel { - try? await self.parakeetTranscriptionService.loadModel(for: parakeetModel) + try? await self.serviceRegistry.parakeetTranscriptionService.loadModel(for: parakeetModel) } if let enhancementService = await self.enhancementService { @@ -299,20 +295,8 @@ class WhisperState: NSObject, ObservableObject { throw WhisperStateError.transcriptionFailed } - let transcriptionService: TranscriptionService - switch model.provider { - case .local: - transcriptionService = localTranscriptionService - case .parakeet: - transcriptionService = parakeetTranscriptionService - case .nativeApple: - transcriptionService = nativeAppleTranscriptionService - default: - transcriptionService = cloudTranscriptionService - } - let transcriptionStart = Date() - var text = try await transcriptionService.transcribe(audioURL: url, model: model) + var text = try await serviceRegistry.transcribe(audioURL: url, model: model) logger.notice("📝 Raw transcript: \(text, privacy: .public)") text = TranscriptionOutputFilter.filter(text) logger.notice("📝 Output filter result: \(text, privacy: .public)")