diff --git a/VoiceInk/Models/PredefinedModels.swift b/VoiceInk/Models/PredefinedModels.swift index 7ddf481..96c8ae2 100644 --- a/VoiceInk/Models/PredefinedModels.swift +++ b/VoiceInk/Models/PredefinedModels.swift @@ -223,6 +223,16 @@ import Foundation isMultilingual: true, supportedLanguages: getLanguageDictionary(isMultilingual: true, provider: .deepgram) ), + CloudModel( + name: "voxtral-mini-2507", + displayName: "Voxtral Mini (Mistral)", + description: "Mistral's latest SOTA transcription model.", + provider: .mistral, + speed: 0.8, + accuracy: 0.97, + isMultilingual: true, + supportedLanguages: getLanguageDictionary(isMultilingual: true, provider: .mistral) + ) ] static let allLanguages = [ diff --git a/VoiceInk/Models/TranscriptionModel.swift b/VoiceInk/Models/TranscriptionModel.swift index dae0816..ac905ed 100644 --- a/VoiceInk/Models/TranscriptionModel.swift +++ b/VoiceInk/Models/TranscriptionModel.swift @@ -6,6 +6,7 @@ enum ModelProvider: String, Codable, Hashable, CaseIterable { case groq = "Groq" case elevenLabs = "ElevenLabs" case deepgram = "Deepgram" + case mistral = "Mistral" case custom = "Custom" case nativeApple = "Native Apple" // Future providers can be added here diff --git a/VoiceInk/Services/AIService.swift b/VoiceInk/Services/AIService.swift index 00b7a8f..1cd4be1 100644 --- a/VoiceInk/Services/AIService.swift +++ b/VoiceInk/Services/AIService.swift @@ -30,7 +30,7 @@ enum AIProvider: String, CaseIterable { case .openRouter: return "https://openrouter.ai/api/v1/chat/completions" case .mistral: - return "https://api.mistral.ai/v1/chat/completions" + return "https://api.mistral.ai/v1/audio/transcriptions" case .elevenLabs: return "https://api.elevenlabs.io/v1/speech-to-text" case .ollama: @@ -106,9 +106,7 @@ enum AIProvider: String, CaseIterable { ] case .mistral: return [ - "mistral-large-latest", - "mistral-small-latest", - "mistral-saba-latest" + "voxtral-mini-2507" ] case .elevenLabs: return ["scribe_v1", "scribe_v1_experimental"] @@ -298,6 +296,8 @@ class AIService: ObservableObject { verifyElevenLabsAPIKey(key, completion: completion) case .deepgram: verifyDeepgramAPIKey(key, completion: completion) + case .mistral: + verifyMistralAPIKey(key, completion: completion) default: verifyOpenAICompatibleAPIKey(key, completion: completion) } @@ -429,6 +429,37 @@ class AIService: ObservableObject { }.resume() } + private func verifyMistralAPIKey(_ key: String, completion: @escaping (Bool) -> Void) { + let url = URL(string: "https://api.mistral.ai/v1/models")! + var request = URLRequest(url: url) + request.httpMethod = "GET" + request.addValue("Bearer \(key)", forHTTPHeaderField: "Authorization") + + URLSession.shared.dataTask(with: request) { data, response, error in + if let error = error { + self.logger.error("Mistral API key verification failed: \(error.localizedDescription)") + completion(false) + return + } + + if let httpResponse = response as? HTTPURLResponse { + if httpResponse.statusCode == 200 { + completion(true) + } else { + if let data = data, let body = String(data: data, encoding: .utf8) { + self.logger.error("Mistral API key verification failed with status code \(httpResponse.statusCode): \(body)") + } else { + self.logger.error("Mistral API key verification failed with status code \(httpResponse.statusCode) and no response body.") + } + completion(false) + } + } else { + self.logger.error("Mistral API key verification failed: Invalid response from server.") + completion(false) + } + }.resume() + } + private func verifyDeepgramAPIKey(_ key: String, completion: @escaping (Bool) -> Void) { let url = URL(string: "https://api.deepgram.com/v1/auth/token")! var request = URLRequest(url: url) diff --git a/VoiceInk/Services/CloudTranscription/CloudTranscriptionService.swift b/VoiceInk/Services/CloudTranscription/CloudTranscriptionService.swift index bad2b01..9c180b1 100644 --- a/VoiceInk/Services/CloudTranscription/CloudTranscriptionService.swift +++ b/VoiceInk/Services/CloudTranscription/CloudTranscriptionService.swift @@ -38,6 +38,7 @@ class CloudTranscriptionService: TranscriptionService { private lazy var groqService = GroqTranscriptionService() private lazy var elevenLabsService = ElevenLabsTranscriptionService() private lazy var deepgramService = DeepgramTranscriptionService() + private lazy var mistralService = MistralTranscriptionService() private lazy var openAICompatibleService = OpenAICompatibleTranscriptionService() func transcribe(audioURL: URL, model: any TranscriptionModel) async throws -> String { @@ -50,6 +51,8 @@ class CloudTranscriptionService: TranscriptionService { text = try await elevenLabsService.transcribe(audioURL: audioURL, model: model) case .deepgram: text = try await deepgramService.transcribe(audioURL: audioURL, model: model) + case .mistral: + text = try await mistralService.transcribe(audioURL: audioURL, model: model) case .custom: guard let customModel = model as? CustomCloudModel else { throw CloudTranscriptionError.unsupportedProvider diff --git a/VoiceInk/Services/CloudTranscription/MistralTranscriptionService.swift b/VoiceInk/Services/CloudTranscription/MistralTranscriptionService.swift new file mode 100644 index 0000000..e2c9086 --- /dev/null +++ b/VoiceInk/Services/CloudTranscription/MistralTranscriptionService.swift @@ -0,0 +1,70 @@ +import Foundation +import os + +class MistralTranscriptionService { + private let logger = Logger(subsystem: "com.prakashjoshipax.voiceink", category: "MistralTranscriptionService") + + func transcribe(audioURL: URL, model: any TranscriptionModel) async throws -> String { + logger.notice("Sending transcription request to Mistral for model: \(model.name)") + let apiKey = UserDefaults.standard.string(forKey: "MistralAPIKey") ?? "" + guard !apiKey.isEmpty else { + logger.error("Mistral API key is missing.") + throw CloudTranscriptionError.missingAPIKey + } + + let url = URL(string: "https://api.mistral.ai/v1/audio/transcriptions")! + var request = URLRequest(url: url) + request.httpMethod = "POST" + + let boundary = "Boundary-\(UUID().uuidString)" + request.setValue("multipart/form-data; boundary=\(boundary)", forHTTPHeaderField: "Content-Type") + request.setValue("Bearer \(apiKey)", forHTTPHeaderField: "Authorization") + + var body = Data() + + // Add model field + body.append("--\(boundary)\r\n".data(using: .utf8)!) + body.append("Content-Disposition: form-data; name=\"model\"\r\n\r\n".data(using: .utf8)!) + body.append(model.name.data(using: .utf8)!) + body.append("\r\n".data(using: .utf8)!) + + // Add file data + guard let audioData = try? Data(contentsOf: audioURL) else { + throw CloudTranscriptionError.audioFileNotFound + } + body.append("--\(boundary)\r\n".data(using: .utf8)!) + body.append("Content-Disposition: form-data; name=\"file\"; filename=\"\(audioURL.lastPathComponent)\"\r\n".data(using: .utf8)!) + body.append("Content-Type: audio/mpeg\r\n\r\n".data(using: .utf8)!) + body.append(audioData) + body.append("\r\n".data(using: .utf8)!) + + body.append("--\(boundary)--\r\n".data(using: .utf8)!) + + request.httpBody = body + do { + let (data, response) = try await URLSession.shared.data(for: request) + + guard let httpResponse = response as? HTTPURLResponse, httpResponse.statusCode == 200 else { + let errorResponse = String(data: data, encoding: .utf8) ?? "No response body" + logger.error("Mistral transcription request failed with status code \((response as? HTTPURLResponse)?.statusCode ?? 500): \(errorResponse)") + throw CloudTranscriptionError.apiRequestFailed(statusCode: (response as? HTTPURLResponse)?.statusCode ?? 500, message: errorResponse) + } + + do { + let transcriptionResponse = try JSONDecoder().decode(MistralTranscriptionResponse.self, from: data) + logger.notice("Successfully received transcription from Mistral.") + return transcriptionResponse.text + } catch { + logger.error("Failed to decode Mistral response: \(error.localizedDescription)") + throw CloudTranscriptionError.noTranscriptionReturned + } + } catch { + logger.error("Mistral transcription request threw an error: \(error.localizedDescription)") + throw error + } + } +} + +struct MistralTranscriptionResponse: Codable { + let text: String +} diff --git a/VoiceInk/Views/AI Models/CloudModelCardRowView.swift b/VoiceInk/Views/AI Models/CloudModelCardRowView.swift index fd32638..f01954b 100644 --- a/VoiceInk/Views/AI Models/CloudModelCardRowView.swift +++ b/VoiceInk/Views/AI Models/CloudModelCardRowView.swift @@ -34,6 +34,8 @@ struct CloudModelCardView: View { return "ElevenLabs" case .deepgram: return "Deepgram" + case .mistral: + return "Mistral" default: return model.provider.rawValue } @@ -266,17 +268,24 @@ struct CloudModelCardView: View { isVerifying = true verificationStatus = .verifying - // Set the provider in AIService temporarily for verification - let originalProvider = aiService.selectedProvider - if model.provider == .groq { + switch model.provider { + case .groq: aiService.selectedProvider = .groq - } else if model.provider == .elevenLabs { + case .elevenLabs: aiService.selectedProvider = .elevenLabs - } else if model.provider == .deepgram { + case .deepgram: aiService.selectedProvider = .deepgram + case .mistral: + aiService.selectedProvider = .mistral + default: + // This case should ideally not be hit for cloud models in this view + print("Warning: verifyAPIKey called for unsupported provider \(model.provider.rawValue)") + isVerifying = false + verificationStatus = .failure + return } - aiService.verifyAPIKey(apiKey) { [self] isValid in + aiService.saveAPIKey(apiKey) { isValid in DispatchQueue.main.async { self.isVerifying = false if isValid { @@ -294,7 +303,7 @@ struct CloudModelCardView: View { } // Restore original provider - aiService.selectedProvider = originalProvider + // aiService.selectedProvider = originalProvider // This line was removed as per the new_code } } } diff --git a/VoiceInk/Views/AI Models/ModelCardRowView.swift b/VoiceInk/Views/AI Models/ModelCardRowView.swift index ddb3fba..8a3c7c1 100644 --- a/VoiceInk/Views/AI Models/ModelCardRowView.swift +++ b/VoiceInk/Views/AI Models/ModelCardRowView.swift @@ -38,7 +38,7 @@ struct ModelCardRowView: View { setDefaultAction: setDefaultAction ) } - case .groq, .elevenLabs, .deepgram: + case .groq, .elevenLabs, .deepgram, .mistral: if let cloudModel = model as? CloudModel { CloudModelCardView( model: cloudModel, diff --git a/VoiceInk/Views/AI Models/ModelManagementView.swift b/VoiceInk/Views/AI Models/ModelManagementView.swift index aac9559..25c4db2 100644 --- a/VoiceInk/Views/AI Models/ModelManagementView.swift +++ b/VoiceInk/Views/AI Models/ModelManagementView.swift @@ -192,7 +192,7 @@ struct ModelManagementView: View { case .local: return whisperState.allAvailableModels.filter { $0.provider == .local || $0.provider == .nativeApple } case .cloud: - let cloudProviders: [ModelProvider] = [.groq, .elevenLabs, .deepgram] + let cloudProviders: [ModelProvider] = [.groq, .elevenLabs, .deepgram, .mistral] return whisperState.allAvailableModels.filter { cloudProviders.contains($0.provider) } case .custom: return whisperState.allAvailableModels.filter { $0.provider == .custom } diff --git a/VoiceInk/Whisper/WhisperState+ModelQueries.swift b/VoiceInk/Whisper/WhisperState+ModelQueries.swift index 64d1ebd..63bf707 100644 --- a/VoiceInk/Whisper/WhisperState+ModelQueries.swift +++ b/VoiceInk/Whisper/WhisperState+ModelQueries.swift @@ -21,6 +21,9 @@ extension WhisperState { case .deepgram: let key = UserDefaults.standard.string(forKey: "DeepgramAPIKey") return key != nil && !key!.isEmpty + case .mistral: + let key = UserDefaults.standard.string(forKey: "MistralAPIKey") + return key != nil && !key!.isEmpty case .custom: // Custom models are always usable since they contain their own API keys return true