feat: Added Deepgram transcription support

This commit is contained in:
Beingpax 2025-06-16 21:26:35 +05:45
parent fde8b168eb
commit 00e1d5e8da
11 changed files with 417 additions and 214 deletions

View File

@ -144,7 +144,16 @@ import Foundation
isMultilingual: true,
supportedLanguages: getLanguageDictionary(isMultilingual: true, isLargeV3: true)
),
CloudModel(
name: "nova-2",
displayName: "Nova (Deepgram)",
description: "Deepgram's Nova model for fast, accurate, and cost-effective transcription.",
provider: .deepgram,
speed: 0.9,
accuracy: 0.95,
isMultilingual: true,
supportedLanguages: getLanguageDictionary(isMultilingual: true, isLargeV3: true)
),
]
static let allLanguages = [

View File

@ -5,6 +5,7 @@ enum ModelProvider: String, Codable, Hashable, CaseIterable {
case local = "Local"
case groq = "Groq"
case elevenLabs = "ElevenLabs"
case deepgram = "Deepgram"
// Future providers can be added here
}

View File

@ -10,6 +10,7 @@ enum AIProvider: String, CaseIterable {
case mistral = "Mistral"
case ollama = "Ollama"
case elevenLabs = "ElevenLabs"
case deepgram = "Deepgram"
case custom = "Custom"
var baseURL: String {
@ -30,6 +31,8 @@ enum AIProvider: String, CaseIterable {
return "https://api.elevenlabs.io/v1/speech-to-text"
case .ollama:
return UserDefaults.standard.string(forKey: "ollamaBaseURL") ?? "http://localhost:11434"
case .deepgram:
return "https://api.deepgram.com/v1/listen"
case .custom:
return UserDefaults.standard.string(forKey: "customProviderBaseURL") ?? ""
}
@ -53,6 +56,8 @@ enum AIProvider: String, CaseIterable {
return "scribe_v1"
case .ollama:
return UserDefaults.standard.string(forKey: "ollamaSelectedModel") ?? "mistral"
case .deepgram:
return "whisper-1"
case .custom:
return UserDefaults.standard.string(forKey: "customProviderModel") ?? ""
}
@ -97,6 +102,8 @@ enum AIProvider: String, CaseIterable {
return ["scribe_v1", "scribe_v1_experimental"]
case .ollama:
return []
case .deepgram:
return ["whisper-1"]
case .custom:
return []
}
@ -266,6 +273,8 @@ class AIService: ObservableObject {
verifyAnthropicAPIKey(key, completion: completion)
case .elevenLabs:
verifyElevenLabsAPIKey(key, completion: completion)
case .deepgram:
verifyDeepgramAPIKey(key, completion: completion)
default:
verifyOpenAICompatibleAPIKey(key, completion: completion)
}
@ -400,7 +409,26 @@ class AIService: ObservableObject {
}.resume()
}
private func verifyDeepgramAPIKey(_ key: String, completion: @escaping (Bool) -> Void) {
let url = URL(string: "https://api.deepgram.com/v1/auth/token")!
var request = URLRequest(url: url)
request.httpMethod = "GET"
request.addValue("Token \(key)", forHTTPHeaderField: "Authorization")
URLSession.shared.dataTask(with: request) { data, response, error in
if let error = error {
self.logger.error("Deepgram API key verification failed: \(error.localizedDescription)")
completion(false)
return
}
if let httpResponse = response as? HTTPURLResponse {
completion(httpResponse.statusCode == 200)
} else {
completion(false)
}
}.resume()
}
func clearAPIKey() {
guard selectedProvider.requiresAPIKey else { return }

View File

@ -0,0 +1,57 @@
import Foundation
import os
enum CloudTranscriptionError: Error, LocalizedError {
case unsupportedProvider
case missingAPIKey
case invalidAPIKey
case audioFileNotFound
case apiRequestFailed(statusCode: Int, message: String)
case networkError(Error)
case noTranscriptionReturned
case dataEncodingError
var errorDescription: String? {
switch self {
case .unsupportedProvider:
return "The model provider is not supported by this service."
case .missingAPIKey:
return "API key for this service is missing. Please configure it in the settings."
case .invalidAPIKey:
return "The provided API key is invalid."
case .audioFileNotFound:
return "The audio file to transcribe could not be found."
case .apiRequestFailed(let statusCode, let message):
return "The API request failed with status code \(statusCode): \(message)"
case .networkError(let error):
return "A network error occurred: \(error.localizedDescription)"
case .noTranscriptionReturned:
return "The API returned an empty or invalid response."
case .dataEncodingError:
return "Failed to encode the request body."
}
}
}
class CloudTranscriptionService: TranscriptionService {
private let groqService = GroqTranscriptionService()
private let elevenLabsService = ElevenLabsTranscriptionService()
private let deepgramService = DeepgramTranscriptionService()
func transcribe(audioURL: URL, model: any TranscriptionModel) async throws -> String {
switch model.provider {
case .groq:
return try await groqService.transcribe(audioURL: audioURL, model: model)
case .elevenLabs:
return try await elevenLabsService.transcribe(audioURL: audioURL, model: model)
case .deepgram:
return try await deepgramService.transcribe(audioURL: audioURL, model: model)
default:
throw CloudTranscriptionError.unsupportedProvider
}
}
}

View File

@ -0,0 +1,103 @@
import Foundation
import os
class DeepgramTranscriptionService {
private let logger = Logger(subsystem: "com.voiceink.transcription", category: "DeepgramService")
func transcribe(audioURL: URL, model: any TranscriptionModel) async throws -> String {
let config = try getAPIConfig(for: model)
var request = URLRequest(url: config.url)
request.httpMethod = "POST"
request.setValue("Token \(config.apiKey)", forHTTPHeaderField: "Authorization")
request.setValue("audio/wav", forHTTPHeaderField: "Content-Type")
guard let audioData = try? Data(contentsOf: audioURL) else {
throw CloudTranscriptionError.audioFileNotFound
}
let (data, response) = try await URLSession.shared.upload(for: request, from: audioData)
guard let httpResponse = response as? HTTPURLResponse else {
throw CloudTranscriptionError.networkError(URLError(.badServerResponse))
}
if !(200...299).contains(httpResponse.statusCode) {
let errorMessage = String(data: data, encoding: .utf8) ?? "No error message"
logger.error("Deepgram API request failed with status \(httpResponse.statusCode): \(errorMessage, privacy: .public)")
throw CloudTranscriptionError.apiRequestFailed(statusCode: httpResponse.statusCode, message: errorMessage)
}
do {
let transcriptionResponse = try JSONDecoder().decode(DeepgramResponse.self, from: data)
guard let transcript = transcriptionResponse.results.channels.first?.alternatives.first?.transcript,
!transcript.isEmpty else {
logger.error("No transcript found in Deepgram response")
throw CloudTranscriptionError.noTranscriptionReturned
}
return transcript
} catch {
logger.error("Failed to decode Deepgram API response: \(error.localizedDescription)")
throw CloudTranscriptionError.noTranscriptionReturned
}
}
private func getAPIConfig(for model: any TranscriptionModel) throws -> APIConfig {
guard let apiKey = UserDefaults.standard.string(forKey: "DeepgramAPIKey"), !apiKey.isEmpty else {
throw CloudTranscriptionError.missingAPIKey
}
// Build the URL with query parameters
var components = URLComponents(string: "https://api.deepgram.com/v1/listen")!
var queryItems: [URLQueryItem] = []
// Add language parameter if not auto-detect
let selectedLanguage = UserDefaults.standard.string(forKey: "SelectedLanguage") ?? "auto"
// Choose model based on language
let modelName = selectedLanguage == "en" ? "nova-3" : "nova-2"
queryItems.append(URLQueryItem(name: "model", value: modelName))
queryItems.append(contentsOf: [
URLQueryItem(name: "smart_format", value: "true"),
URLQueryItem(name: "dictation", value: "true"),
URLQueryItem(name: "punctuate", value: "true"),
URLQueryItem(name: "paragraphs", value: "true"),
URLQueryItem(name: "filler_words", value: "false")
])
if selectedLanguage != "auto" && !selectedLanguage.isEmpty {
queryItems.append(URLQueryItem(name: "language", value: selectedLanguage))
}
components.queryItems = queryItems
guard let apiURL = components.url else {
throw CloudTranscriptionError.dataEncodingError
}
return APIConfig(url: apiURL, apiKey: apiKey, modelName: model.name)
}
private struct APIConfig {
let url: URL
let apiKey: String
let modelName: String
}
private struct DeepgramResponse: Decodable {
let results: Results
struct Results: Decodable {
let channels: [Channel]
struct Channel: Decodable {
let alternatives: [Alternative]
struct Alternative: Decodable {
let transcript: String
let confidence: Double?
}
}
}
}
}

View File

@ -0,0 +1,93 @@
import Foundation
class ElevenLabsTranscriptionService {
func transcribe(audioURL: URL, model: any TranscriptionModel) async throws -> String {
let config = try getAPIConfig(for: model)
let boundary = "Boundary-\(UUID().uuidString)"
var request = URLRequest(url: config.url)
request.httpMethod = "POST"
request.setValue("multipart/form-data; boundary=\(boundary)", forHTTPHeaderField: "Content-Type")
request.setValue(config.apiKey, forHTTPHeaderField: "xi-api-key")
let body = try createElevenLabsRequestBody(audioURL: audioURL, modelName: config.modelName, boundary: boundary)
let (data, response) = try await URLSession.shared.upload(for: request, from: body)
guard let httpResponse = response as? HTTPURLResponse else {
throw CloudTranscriptionError.networkError(URLError(.badServerResponse))
}
if !(200...299).contains(httpResponse.statusCode) {
let errorMessage = String(data: data, encoding: .utf8) ?? "No error message"
throw CloudTranscriptionError.apiRequestFailed(statusCode: httpResponse.statusCode, message: errorMessage)
}
do {
let transcriptionResponse = try JSONDecoder().decode(TranscriptionResponse.self, from: data)
return transcriptionResponse.text
} catch {
throw CloudTranscriptionError.noTranscriptionReturned
}
}
private func getAPIConfig(for model: any TranscriptionModel) throws -> APIConfig {
guard let apiKey = UserDefaults.standard.string(forKey: "ElevenLabsAPIKey"), !apiKey.isEmpty else {
throw CloudTranscriptionError.missingAPIKey
}
let apiURL = URL(string: "https://api.elevenlabs.io/v1/speech-to-text")!
return APIConfig(url: apiURL, apiKey: apiKey, modelName: model.name)
}
private func createElevenLabsRequestBody(audioURL: URL, modelName: String, boundary: String) throws -> Data {
var body = Data()
let crlf = "\r\n"
guard let audioData = try? Data(contentsOf: audioURL) else {
throw CloudTranscriptionError.audioFileNotFound
}
// File
body.append("--\(boundary)\(crlf)".data(using: .utf8)!)
body.append("Content-Disposition: form-data; name=\"file\"; filename=\"\(audioURL.lastPathComponent)\"\(crlf)".data(using: .utf8)!)
body.append("Content-Type: audio/wav\(crlf)\(crlf)".data(using: .utf8)!)
body.append(audioData)
body.append(crlf.data(using: .utf8)!)
// Model ID
body.append("--\(boundary)\(crlf)".data(using: .utf8)!)
body.append("Content-Disposition: form-data; name=\"model_id\"\(crlf)\(crlf)".data(using: .utf8)!)
body.append(modelName.data(using: .utf8)!)
body.append(crlf.data(using: .utf8)!)
let selectedLanguage = UserDefaults.standard.string(forKey: "SelectedLanguage") ?? "auto"
if selectedLanguage != "auto", !selectedLanguage.isEmpty {
body.append("--\(boundary)\(crlf)".data(using: .utf8)!)
body.append("Content-Disposition: form-data; name=\"language_code\"\(crlf)\(crlf)".data(using: .utf8)!)
body.append(selectedLanguage.data(using: .utf8)!)
body.append(crlf.data(using: .utf8)!)
}
body.append("--\(boundary)--\(crlf)".data(using: .utf8)!)
return body
}
private struct APIConfig {
let url: URL
let apiKey: String
let modelName: String
}
private struct TranscriptionResponse: Decodable {
let text: String
let language: String?
let duration: Double?
let x_groq: GroqMetadata?
struct GroqMetadata: Decodable {
let id: String?
}
}
}

View File

@ -0,0 +1,114 @@
import Foundation
import os
class GroqTranscriptionService {
private let logger = Logger(subsystem: "com.voiceink.transcription", category: "GroqService")
func transcribe(audioURL: URL, model: any TranscriptionModel) async throws -> String {
let config = try getAPIConfig(for: model)
let boundary = "Boundary-\(UUID().uuidString)"
var request = URLRequest(url: config.url)
request.httpMethod = "POST"
request.setValue("multipart/form-data; boundary=\(boundary)", forHTTPHeaderField: "Content-Type")
request.setValue("Bearer \(config.apiKey)", forHTTPHeaderField: "Authorization")
let body = try createOpenAICompatibleRequestBody(audioURL: audioURL, modelName: config.modelName, boundary: boundary)
let (data, response) = try await URLSession.shared.upload(for: request, from: body)
guard let httpResponse = response as? HTTPURLResponse else {
throw CloudTranscriptionError.networkError(URLError(.badServerResponse))
}
if !(200...299).contains(httpResponse.statusCode) {
let errorMessage = String(data: data, encoding: .utf8) ?? "No error message"
logger.error("Groq API request failed with status \(httpResponse.statusCode): \(errorMessage, privacy: .public)")
throw CloudTranscriptionError.apiRequestFailed(statusCode: httpResponse.statusCode, message: errorMessage)
}
do {
let transcriptionResponse = try JSONDecoder().decode(TranscriptionResponse.self, from: data)
return transcriptionResponse.text
} catch {
logger.error("Failed to decode Groq API response: \(error.localizedDescription)")
throw CloudTranscriptionError.noTranscriptionReturned
}
}
private func getAPIConfig(for model: any TranscriptionModel) throws -> APIConfig {
guard let apiKey = UserDefaults.standard.string(forKey: "GROQAPIKey"), !apiKey.isEmpty else {
throw CloudTranscriptionError.missingAPIKey
}
let apiURL = URL(string: "https://api.groq.com/openai/v1/audio/transcriptions")!
return APIConfig(url: apiURL, apiKey: apiKey, modelName: model.name)
}
private func createOpenAICompatibleRequestBody(audioURL: URL, modelName: String, boundary: String) throws -> Data {
var body = Data()
let crlf = "\r\n"
guard let audioData = try? Data(contentsOf: audioURL) else {
throw CloudTranscriptionError.audioFileNotFound
}
let selectedLanguage = UserDefaults.standard.string(forKey: "SelectedLanguage") ?? "auto"
let prompt = UserDefaults.standard.string(forKey: "TranscriptionPrompt") ?? ""
body.append("--\(boundary)\(crlf)".data(using: .utf8)!)
body.append("Content-Disposition: form-data; name=\"file\"; filename=\"\(audioURL.lastPathComponent)\"\(crlf)".data(using: .utf8)!)
body.append("Content-Type: audio/wav\(crlf)\(crlf)".data(using: .utf8)!)
body.append(audioData)
body.append(crlf.data(using: .utf8)!)
body.append("--\(boundary)\(crlf)".data(using: .utf8)!)
body.append("Content-Disposition: form-data; name=\"model\"\(crlf)\(crlf)".data(using: .utf8)!)
body.append(modelName.data(using: .utf8)!)
body.append(crlf.data(using: .utf8)!)
if selectedLanguage != "auto", !selectedLanguage.isEmpty {
body.append("--\(boundary)\(crlf)".data(using: .utf8)!)
body.append("Content-Disposition: form-data; name=\"language\"\(crlf)\(crlf)".data(using: .utf8)!)
body.append(selectedLanguage.data(using: .utf8)!)
body.append(crlf.data(using: .utf8)!)
}
// Include prompt for OpenAI-compatible APIs
if !prompt.isEmpty {
body.append("--\(boundary)\(crlf)".data(using: .utf8)!)
body.append("Content-Disposition: form-data; name=\"prompt\"\(crlf)\(crlf)".data(using: .utf8)!)
body.append(prompt.data(using: .utf8)!)
body.append(crlf.data(using: .utf8)!)
}
body.append("--\(boundary)\(crlf)".data(using: .utf8)!)
body.append("Content-Disposition: form-data; name=\"response_format\"\(crlf)\(crlf)".data(using: .utf8)!)
body.append("json".data(using: .utf8)!)
body.append(crlf.data(using: .utf8)!)
body.append("--\(boundary)\(crlf)".data(using: .utf8)!)
body.append("Content-Disposition: form-data; name=\"temperature\"\(crlf)\(crlf)".data(using: .utf8)!)
body.append("0".data(using: .utf8)!)
body.append(crlf.data(using: .utf8)!)
body.append("--\(boundary)--\(crlf)".data(using: .utf8)!)
return body
}
private struct APIConfig {
let url: URL
let apiKey: String
let modelName: String
}
private struct TranscriptionResponse: Decodable {
let text: String
let language: String?
let duration: Double?
let x_groq: GroqMetadata?
struct GroqMetadata: Decodable {
let id: String?
}
}
}

View File

@ -1,211 +0,0 @@
import Foundation
import os
enum CloudTranscriptionError: Error, LocalizedError {
case unsupportedProvider
case missingAPIKey
case invalidAPIKey
case audioFileNotFound
case apiRequestFailed(statusCode: Int, message: String)
case networkError(Error)
case noTranscriptionReturned
case dataEncodingError
var errorDescription: String? {
switch self {
case .unsupportedProvider:
return "The model provider is not supported by this service."
case .missingAPIKey:
return "API key for this service is missing. Please configure it in the settings."
case .invalidAPIKey:
return "The provided API key is invalid."
case .audioFileNotFound:
return "The audio file to transcribe could not be found."
case .apiRequestFailed(let statusCode, let message):
return "The API request failed with status code \(statusCode): \(message)"
case .networkError(let error):
return "A network error occurred: \(error.localizedDescription)"
case .noTranscriptionReturned:
return "The API returned an empty or invalid response."
case .dataEncodingError:
return "Failed to encode the request body."
}
}
}
class CloudTranscriptionService: TranscriptionService {
private struct APIConfig {
let url: URL
let apiKey: String
let modelName: String
}
func transcribe(audioURL: URL, model: any TranscriptionModel) async throws -> String {
let config = try getAPIConfig(for: model)
var request: URLRequest
var body: Data
switch model.provider {
case .elevenLabs:
let boundary = "Boundary-\(UUID().uuidString)"
var elevenLabsRequest = URLRequest(url: config.url)
elevenLabsRequest.httpMethod = "POST"
elevenLabsRequest.setValue("multipart/form-data; boundary=\(boundary)", forHTTPHeaderField: "Content-Type")
elevenLabsRequest.setValue(config.apiKey, forHTTPHeaderField: "xi-api-key")
body = try createElevenLabsRequestBody(audioURL: audioURL, modelName: config.modelName, boundary: boundary)
request = elevenLabsRequest
case .groq:
let boundary = "Boundary-\(UUID().uuidString)"
var openAICompatibleRequest = URLRequest(url: config.url)
openAICompatibleRequest.httpMethod = "POST"
openAICompatibleRequest.setValue("multipart/form-data; boundary=\(boundary)", forHTTPHeaderField: "Content-Type")
openAICompatibleRequest.setValue("Bearer \(config.apiKey)", forHTTPHeaderField: "Authorization")
body = try createOpenAICompatibleRequestBody(audioURL: audioURL, modelName: config.modelName, boundary: boundary)
request = openAICompatibleRequest
default:
throw CloudTranscriptionError.unsupportedProvider
}
let (data, response) = try await URLSession.shared.upload(for: request, from: body)
guard let httpResponse = response as? HTTPURLResponse else {
throw CloudTranscriptionError.networkError(URLError(.badServerResponse))
}
if !(200...299).contains(httpResponse.statusCode) {
let errorMessage = String(data: data, encoding: .utf8) ?? "No error message"
throw CloudTranscriptionError.apiRequestFailed(statusCode: httpResponse.statusCode, message: errorMessage)
}
do {
let transcriptionResponse = try JSONDecoder().decode(TranscriptionResponse.self, from: data)
return transcriptionResponse.text
} catch {
throw CloudTranscriptionError.noTranscriptionReturned
}
}
private func getAPIConfig(for model: any TranscriptionModel) throws -> APIConfig {
let providerKey: String
let apiURL: URL
switch model.provider {
case .groq:
providerKey = "GROQ"
apiURL = URL(string: "https://api.groq.com/openai/v1/audio/transcriptions")!
case .elevenLabs:
providerKey = "ElevenLabs"
apiURL = URL(string: "https://api.elevenlabs.io/v1/speech-to-text")!
default:
throw CloudTranscriptionError.unsupportedProvider
}
guard let apiKey = UserDefaults.standard.string(forKey: "\(providerKey)APIKey"), !apiKey.isEmpty else {
throw CloudTranscriptionError.missingAPIKey
}
return APIConfig(url: apiURL, apiKey: apiKey, modelName: model.name)
}
private func createElevenLabsRequestBody(audioURL: URL, modelName: String, boundary: String) throws -> Data {
var body = Data()
let crlf = "\r\n"
guard let audioData = try? Data(contentsOf: audioURL) else {
throw CloudTranscriptionError.audioFileNotFound
}
// File
body.append("--\(boundary)\(crlf)".data(using: .utf8)!)
body.append("Content-Disposition: form-data; name=\"file\"; filename=\"\(audioURL.lastPathComponent)\"\(crlf)".data(using: .utf8)!)
body.append("Content-Type: audio/wav\(crlf)\(crlf)".data(using: .utf8)!)
body.append(audioData)
body.append(crlf.data(using: .utf8)!)
// Model ID
body.append("--\(boundary)\(crlf)".data(using: .utf8)!)
body.append("Content-Disposition: form-data; name=\"model_id\"\(crlf)\(crlf)".data(using: .utf8)!)
body.append(modelName.data(using: .utf8)!)
body.append(crlf.data(using: .utf8)!)
let selectedLanguage = UserDefaults.standard.string(forKey: "SelectedLanguage") ?? "auto"
if selectedLanguage != "auto", !selectedLanguage.isEmpty {
body.append("--\(boundary)\(crlf)".data(using: .utf8)!)
body.append("Content-Disposition: form-data; name=\"language_code\"\(crlf)\(crlf)".data(using: .utf8)!)
body.append(selectedLanguage.data(using: .utf8)!)
body.append(crlf.data(using: .utf8)!)
}
body.append("--\(boundary)--\(crlf)".data(using: .utf8)!)
return body
}
private func createOpenAICompatibleRequestBody(audioURL: URL, modelName: String, boundary: String) throws -> Data {
var body = Data()
let crlf = "\r\n"
guard let audioData = try? Data(contentsOf: audioURL) else {
throw CloudTranscriptionError.audioFileNotFound
}
let selectedLanguage = UserDefaults.standard.string(forKey: "SelectedLanguage") ?? "auto"
let prompt = UserDefaults.standard.string(forKey: "TranscriptionPrompt") ?? ""
body.append("--\(boundary)\(crlf)".data(using: .utf8)!)
body.append("Content-Disposition: form-data; name=\"file\"; filename=\"\(audioURL.lastPathComponent)\"\(crlf)".data(using: .utf8)!)
body.append("Content-Type: audio/wav\(crlf)\(crlf)".data(using: .utf8)!)
body.append(audioData)
body.append(crlf.data(using: .utf8)!)
body.append("--\(boundary)\(crlf)".data(using: .utf8)!)
body.append("Content-Disposition: form-data; name=\"model\"\(crlf)\(crlf)".data(using: .utf8)!)
body.append(modelName.data(using: .utf8)!)
body.append(crlf.data(using: .utf8)!)
if selectedLanguage != "auto", !selectedLanguage.isEmpty {
body.append("--\(boundary)\(crlf)".data(using: .utf8)!)
body.append("Content-Disposition: form-data; name=\"language\"\(crlf)\(crlf)".data(using: .utf8)!)
body.append(selectedLanguage.data(using: .utf8)!)
body.append(crlf.data(using: .utf8)!)
}
// Include prompt for OpenAI-compatible APIs
if !prompt.isEmpty {
body.append("--\(boundary)\(crlf)".data(using: .utf8)!)
body.append("Content-Disposition: form-data; name=\"prompt\"\(crlf)\(crlf)".data(using: .utf8)!)
body.append(prompt.data(using: .utf8)!)
body.append(crlf.data(using: .utf8)!)
}
body.append("--\(boundary)\(crlf)".data(using: .utf8)!)
body.append("Content-Disposition: form-data; name=\"response_format\"\(crlf)\(crlf)".data(using: .utf8)!)
body.append("json".data(using: .utf8)!)
body.append(crlf.data(using: .utf8)!)
body.append("--\(boundary)\(crlf)".data(using: .utf8)!)
body.append("Content-Disposition: form-data; name=\"temperature\"\(crlf)\(crlf)".data(using: .utf8)!)
body.append("0".data(using: .utf8)!)
body.append(crlf.data(using: .utf8)!)
body.append("--\(boundary)--\(crlf)".data(using: .utf8)!)
return body
}
private struct TranscriptionResponse: Decodable {
let text: String
let language: String?
let duration: Double?
let x_groq: GroqMetadata?
struct GroqMetadata: Decodable {
let id: String?
}
}
}

View File

@ -387,6 +387,8 @@ struct APIKeyManagementView: View {
URL(string: "https://console.mistral.ai/api-keys")!
case .elevenLabs:
URL(string: "https://elevenlabs.io/speech-synthesis")!
case .deepgram:
URL(string: "https://console.deepgram.com/api-keys")!
case .ollama, .custom:
URL(string: "")! // This case should never be reached
}

View File

@ -29,7 +29,7 @@ struct ModelCardRowView: View {
downloadAction: downloadAction
)
}
case .groq, .elevenLabs:
case .groq, .elevenLabs, .deepgram:
if let cloudModel = model as? CloudModel {
CloudModelCardView(
model: cloudModel,
@ -259,6 +259,8 @@ struct CloudModelCardView: View {
return "GROQ"
case .elevenLabs:
return "ElevenLabs"
case .deepgram:
return "Deepgram"
default:
return model.provider.rawValue
}
@ -497,6 +499,8 @@ struct CloudModelCardView: View {
aiService.selectedProvider = .groq
} else if model.provider == .elevenLabs {
aiService.selectedProvider = .elevenLabs
} else if model.provider == .deepgram {
aiService.selectedProvider = .deepgram
}
aiService.verifyAPIKey(apiKey) { [self] isValid in

View File

@ -12,6 +12,9 @@ extension WhisperState {
case .elevenLabs:
let key = UserDefaults.standard.string(forKey: "ElevenLabsAPIKey")
return key != nil && !key!.isEmpty
case .deepgram:
let key = UserDefaults.standard.string(forKey: "DeepgramAPIKey")
return key != nil && !key!.isEmpty
}
}
}