Add support for scribe v2 model
This commit is contained in:
parent
35a08dce7b
commit
308098694d
@ -205,14 +205,24 @@ import Foundation
|
|||||||
),
|
),
|
||||||
CloudModel(
|
CloudModel(
|
||||||
name: "scribe_v1",
|
name: "scribe_v1",
|
||||||
displayName: "Scribe v1 (ElevenLabs)",
|
displayName: "Scribe v1",
|
||||||
description: "ElevenLabs' Scribe model for fast and accurate transcription.",
|
description: "ElevenLabs' Scribe model for fast & accurate transcription.",
|
||||||
provider: .elevenLabs,
|
provider: .elevenLabs,
|
||||||
speed: 0.7,
|
speed: 0.7,
|
||||||
accuracy: 0.98,
|
accuracy: 0.98,
|
||||||
isMultilingual: true,
|
isMultilingual: true,
|
||||||
supportedLanguages: getLanguageDictionary(isMultilingual: true, provider: .elevenLabs)
|
supportedLanguages: getLanguageDictionary(isMultilingual: true, provider: .elevenLabs)
|
||||||
),
|
),
|
||||||
|
CloudModel(
|
||||||
|
name: "scribe_v2_realtime",
|
||||||
|
displayName: "Scribe v2",
|
||||||
|
description: "ElevenLabs' Scribe v2 model for the most accurate transcription.",
|
||||||
|
provider: .elevenLabs,
|
||||||
|
speed: 0.75,
|
||||||
|
accuracy: 0.99,
|
||||||
|
isMultilingual: true,
|
||||||
|
supportedLanguages: getLanguageDictionary(isMultilingual: true, provider: .elevenLabs)
|
||||||
|
),
|
||||||
CloudModel(
|
CloudModel(
|
||||||
name: "nova-2",
|
name: "nova-2",
|
||||||
displayName: "Nova (Deepgram)",
|
displayName: "Nova (Deepgram)",
|
||||||
|
|||||||
@ -1,107 +1,86 @@
|
|||||||
import Foundation
|
import Foundation
|
||||||
|
import OSLog
|
||||||
|
|
||||||
class ElevenLabsTranscriptionService {
|
class ElevenLabsTranscriptionService {
|
||||||
|
private let apiURL = URL(string: "https://api.elevenlabs.io/v1/speech-to-text")!
|
||||||
|
private let logger = Logger(subsystem: "com.prakashjoshipax.voiceink", category: "ElevenLabsTranscriptionService")
|
||||||
|
|
||||||
func transcribe(audioURL: URL, model: any TranscriptionModel) async throws -> String {
|
func transcribe(audioURL: URL, model: any TranscriptionModel) async throws -> String {
|
||||||
let config = try getAPIConfig(for: model)
|
guard let apiKey = UserDefaults.standard.string(forKey: "ElevenLabsAPIKey"), !apiKey.isEmpty else {
|
||||||
|
throw CloudTranscriptionError.missingAPIKey
|
||||||
|
}
|
||||||
|
|
||||||
let boundary = "Boundary-\(UUID().uuidString)"
|
let boundary = "Boundary-\(UUID().uuidString)"
|
||||||
var request = URLRequest(url: config.url)
|
var request = URLRequest(url: apiURL)
|
||||||
request.httpMethod = "POST"
|
request.httpMethod = "POST"
|
||||||
request.setValue("multipart/form-data; boundary=\(boundary)", forHTTPHeaderField: "Content-Type")
|
request.setValue("multipart/form-data; boundary=\(boundary)", forHTTPHeaderField: "Content-Type")
|
||||||
request.setValue("application/json", forHTTPHeaderField: "Accept")
|
request.setValue("application/json", forHTTPHeaderField: "Accept")
|
||||||
request.setValue(config.apiKey, forHTTPHeaderField: "xi-api-key")
|
request.setValue(apiKey, forHTTPHeaderField: "xi-api-key")
|
||||||
|
|
||||||
let body = try createElevenLabsRequestBody(audioURL: audioURL, modelName: config.modelName, boundary: boundary)
|
let body = try createRequestBody(audioURL: audioURL, modelName: model.name, boundary: boundary)
|
||||||
|
|
||||||
let (data, response) = try await URLSession.shared.upload(for: request, from: body)
|
let (data, response) = try await URLSession.shared.upload(for: request, from: body)
|
||||||
|
|
||||||
guard let httpResponse = response as? HTTPURLResponse else {
|
guard let httpResponse = response as? HTTPURLResponse else {
|
||||||
throw CloudTranscriptionError.networkError(URLError(.badServerResponse))
|
throw CloudTranscriptionError.networkError(URLError(.badServerResponse))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
logger.notice("ElevenLabs API Response Status: \(httpResponse.statusCode)")
|
||||||
|
if let responseBody = String(data: data, encoding: .utf8) {
|
||||||
|
logger.notice("ElevenLabs API Response Body: \(responseBody)")
|
||||||
|
}
|
||||||
|
|
||||||
if !(200...299).contains(httpResponse.statusCode) {
|
if !(200...299).contains(httpResponse.statusCode) {
|
||||||
let errorMessage = String(data: data, encoding: .utf8) ?? "No error message"
|
let errorMessage = String(data: data, encoding: .utf8) ?? "No error message"
|
||||||
throw CloudTranscriptionError.apiRequestFailed(statusCode: httpResponse.statusCode, message: errorMessage)
|
throw CloudTranscriptionError.apiRequestFailed(statusCode: httpResponse.statusCode, message: errorMessage)
|
||||||
}
|
}
|
||||||
|
|
||||||
do {
|
do {
|
||||||
let transcriptionResponse = try JSONDecoder().decode(TranscriptionResponse.self, from: data)
|
let transcriptionResponse = try JSONDecoder().decode(ElevenLabsTranscriptionResponse.self, from: data)
|
||||||
return transcriptionResponse.text
|
return transcriptionResponse.text
|
||||||
} catch {
|
} catch {
|
||||||
throw CloudTranscriptionError.noTranscriptionReturned
|
throw CloudTranscriptionError.noTranscriptionReturned
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
private func getAPIConfig(for model: any TranscriptionModel) throws -> APIConfig {
|
private func createRequestBody(audioURL: URL, modelName: String, boundary: String) throws -> Data {
|
||||||
guard let apiKey = UserDefaults.standard.string(forKey: "ElevenLabsAPIKey"), !apiKey.isEmpty else {
|
|
||||||
throw CloudTranscriptionError.missingAPIKey
|
|
||||||
}
|
|
||||||
|
|
||||||
guard let apiURL = URL(string: "https://api.elevenlabs.io/v1/speech-to-text") else {
|
|
||||||
throw NSError(domain: "ElevenLabsTranscriptionService", code: -1, userInfo: [NSLocalizedDescriptionKey: "Invalid API URL"])
|
|
||||||
}
|
|
||||||
return APIConfig(url: apiURL, apiKey: apiKey, modelName: model.name)
|
|
||||||
}
|
|
||||||
|
|
||||||
private func createElevenLabsRequestBody(audioURL: URL, modelName: String, boundary: String) throws -> Data {
|
|
||||||
var body = Data()
|
var body = Data()
|
||||||
let crlf = "\r\n"
|
|
||||||
|
|
||||||
guard let audioData = try? Data(contentsOf: audioURL) else {
|
body.append(formField: "file", fileName: audioURL.lastPathComponent, fileData: try Data(contentsOf: audioURL), mimeType: "audio/wav", boundary: boundary)
|
||||||
throw CloudTranscriptionError.audioFileNotFound
|
body.append(formField: "model_id", value: modelName, boundary: boundary)
|
||||||
}
|
body.append(formField: "temperature", value: "0.0", boundary: boundary)
|
||||||
|
body.append(formField: "tag_audio_events", value: "false", boundary: boundary)
|
||||||
// File
|
|
||||||
body.append("--\(boundary)\(crlf)".data(using: .utf8)!)
|
|
||||||
body.append("Content-Disposition: form-data; name=\"file\"; filename=\"\(audioURL.lastPathComponent)\"\(crlf)".data(using: .utf8)!)
|
|
||||||
body.append("Content-Type: audio/wav\(crlf)\(crlf)".data(using: .utf8)!)
|
|
||||||
body.append(audioData)
|
|
||||||
body.append(crlf.data(using: .utf8)!)
|
|
||||||
|
|
||||||
// Model ID
|
|
||||||
body.append("--\(boundary)\(crlf)".data(using: .utf8)!)
|
|
||||||
body.append("Content-Disposition: form-data; name=\"model_id\"\(crlf)\(crlf)".data(using: .utf8)!)
|
|
||||||
body.append(modelName.data(using: .utf8)!)
|
|
||||||
body.append(crlf.data(using: .utf8)!)
|
|
||||||
|
|
||||||
// Disable audio event tagging
|
|
||||||
body.append("--\(boundary)\(crlf)".data(using: .utf8)!)
|
|
||||||
body.append("Content-Disposition: form-data; name=\"tag_audio_events\"\(crlf)\(crlf)".data(using: .utf8)!)
|
|
||||||
body.append("false".data(using: .utf8)!)
|
|
||||||
body.append(crlf.data(using: .utf8)!)
|
|
||||||
|
|
||||||
body.append("--\(boundary)\(crlf)".data(using: .utf8)!)
|
|
||||||
body.append("Content-Disposition: form-data; name=\"temperature\"\(crlf)\(crlf)".data(using: .utf8)!)
|
|
||||||
body.append("0".data(using: .utf8)!)
|
|
||||||
body.append(crlf.data(using: .utf8)!)
|
|
||||||
|
|
||||||
let selectedLanguage = UserDefaults.standard.string(forKey: "SelectedLanguage") ?? "auto"
|
let selectedLanguage = UserDefaults.standard.string(forKey: "SelectedLanguage") ?? "auto"
|
||||||
if selectedLanguage != "auto", !selectedLanguage.isEmpty {
|
if selectedLanguage != "auto", !selectedLanguage.isEmpty {
|
||||||
body.append("--\(boundary)\(crlf)".data(using: .utf8)!)
|
body.append(formField: "language_code", value: selectedLanguage, boundary: boundary)
|
||||||
body.append("Content-Disposition: form-data; name=\"language_code\"\(crlf)\(crlf)".data(using: .utf8)!)
|
|
||||||
body.append(selectedLanguage.data(using: .utf8)!)
|
|
||||||
body.append(crlf.data(using: .utf8)!)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
body.append("--\(boundary)--\(crlf)".data(using: .utf8)!)
|
body.append("--\(boundary)--\r\n".data(using: .utf8)!)
|
||||||
|
|
||||||
return body
|
return body
|
||||||
}
|
}
|
||||||
|
|
||||||
private struct APIConfig {
|
private struct ElevenLabsTranscriptionResponse: Decodable {
|
||||||
let url: URL
|
let text: String
|
||||||
let apiKey: String
|
}
|
||||||
let modelName: String
|
}
|
||||||
|
|
||||||
|
private extension Data {
|
||||||
|
mutating func append(formField: String, value: String, boundary: String) {
|
||||||
|
let crlf = "\r\n"
|
||||||
|
append("--\(boundary)\(crlf)".data(using: .utf8)!)
|
||||||
|
append("Content-Disposition: form-data; name=\"\(formField)\"\(crlf)\(crlf)".data(using: .utf8)!)
|
||||||
|
append(value.data(using: .utf8)!)
|
||||||
|
append(crlf.data(using: .utf8)!)
|
||||||
}
|
}
|
||||||
|
|
||||||
private struct TranscriptionResponse: Decodable {
|
mutating func append(formField: String, fileName: String, fileData: Data, mimeType: String, boundary: String) {
|
||||||
let text: String
|
let crlf = "\r\n"
|
||||||
let language: String?
|
append("--\(boundary)\(crlf)".data(using: .utf8)!)
|
||||||
let duration: Double?
|
append("Content-Disposition: form-data; name=\"\(formField)\"; filename=\"\(fileName)\"\(crlf)".data(using: .utf8)!)
|
||||||
let x_groq: GroqMetadata?
|
append("Content-Type: \(mimeType)\(crlf)\(crlf)".data(using: .utf8)!)
|
||||||
|
append(fileData)
|
||||||
struct GroqMetadata: Decodable {
|
append(crlf.data(using: .utf8)!)
|
||||||
let id: String?
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Loading…
x
Reference in New Issue
Block a user