Merge pull request #38 from WingCH/feature/support_cantonese

Added Cantonese support for Whisper large-v3
This commit is contained in:
Prakash Joshi Pax 2025-03-27 21:26:08 +05:45 committed by GitHub
commit e24212518d
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
3 changed files with 205 additions and 153 deletions

View File

@ -5,37 +5,60 @@ struct PredefinedModel: Identifiable, Hashable {
let name: String
let displayName: String
let size: String
let language: String
let supportedLanguages: [String: String]
let description: String
let speed: Double
let accuracy: Double
let ramUsage: Double
let hash: String
var downloadURL: String {
"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/\(filename)"
}
var filename: String {
"\(name).bin"
}
func hash(into hasher: inout Hasher) {
hasher.combine(id)
}
static func == (lhs: PredefinedModel, rhs: PredefinedModel) -> Bool {
lhs.id == rhs.id
}
var isMultilingualModel: Bool {
supportedLanguages.count > 1
}
var language: String {
isMultilingualModel ? "Multilingual" : "English-only"
}
}
struct PredefinedModels {
enum PredefinedModels {
static func getLanguageDictionary(isMultilingual: Bool, isLargeV3: Bool = false) -> [String:
String]
{
if !isMultilingual {
return ["en": "English"]
} else if isLargeV3 {
return allLanguages // Large v3 models support all languages including Cantonese
} else {
// Create a dictionary without Cantonese for non-Large v3 models
var languagesWithoutCantonese = allLanguages
languagesWithoutCantonese.removeValue(forKey: "yue")
return languagesWithoutCantonese
}
}
static let models: [PredefinedModel] = [
PredefinedModel(
name: "ggml-tiny",
displayName: "Tiny",
size: "75 MiB",
language: "Multilingual",
supportedLanguages: getLanguageDictionary(isMultilingual: true),
description: "Tiny model, fastest, least accurate, supports multiple languages",
speed: 0.95,
accuracy: 0.6,
@ -46,7 +69,7 @@ struct PredefinedModels {
name: "ggml-tiny.en",
displayName: "Tiny (English)",
size: "75 MiB",
language: "English",
supportedLanguages: getLanguageDictionary(isMultilingual: false),
description: "Tiny model optimized for English, fastest, least accurate",
speed: 0.95,
accuracy: 0.65,
@ -57,8 +80,9 @@ struct PredefinedModels {
name: "ggml-base",
displayName: "Base",
size: "142 MiB",
language: "Multilingual",
description: "Base model, good balance of speed and accuracy, supports multiple languages",
supportedLanguages: getLanguageDictionary(isMultilingual: true),
description:
"Base model, good balance of speed and accuracy, supports multiple languages",
speed: 0.8,
accuracy: 0.75,
ramUsage: 0.5,
@ -68,7 +92,7 @@ struct PredefinedModels {
name: "ggml-base.en",
displayName: "Base (English)",
size: "142 MiB",
language: "English",
supportedLanguages: PredefinedModels.getLanguageDictionary(isMultilingual: false),
description: "Base model optimized for English, good balance of speed and accuracy",
speed: 0.8,
accuracy: 0.8,
@ -79,8 +103,9 @@ struct PredefinedModels {
name: "ggml-small",
displayName: "Small",
size: "466 MiB",
language: "Multilingual",
description: "Small model, slower but more accurate than base, supports multiple languages",
supportedLanguages: getLanguageDictionary(isMultilingual: true),
description:
"Small model, slower but more accurate than base, supports multiple languages",
speed: 0.6,
accuracy: 0.85,
ramUsage: 0.7,
@ -90,7 +115,7 @@ struct PredefinedModels {
name: "ggml-small.en",
displayName: "Small (English)",
size: "466 MiB",
language: "English",
supportedLanguages: PredefinedModels.getLanguageDictionary(isMultilingual: false),
description: "Small model optimized for English, slower but more accurate than base",
speed: 0.6,
accuracy: 0.9,
@ -101,7 +126,7 @@ struct PredefinedModels {
name: "ggml-medium",
displayName: "Medium",
size: "1.5 GiB",
language: "Multilingual",
supportedLanguages: getLanguageDictionary(isMultilingual: true),
description: "Medium model, slow but very accurate, supports multiple languages",
speed: 0.4,
accuracy: 0.92,
@ -112,7 +137,7 @@ struct PredefinedModels {
name: "ggml-medium.en",
displayName: "Medium (English)",
size: "1.5 GiB",
language: "English",
supportedLanguages: PredefinedModels.getLanguageDictionary(isMultilingual: false),
description: "Medium model optimized for English, slow but very accurate",
speed: 0.4,
accuracy: 0.95,
@ -123,7 +148,7 @@ struct PredefinedModels {
name: "ggml-large-v3",
displayName: "Large v3",
size: "2.9 GiB",
language: "Multilingual",
supportedLanguages: getLanguageDictionary(isMultilingual: true, isLargeV3: true),
description: "Large model v3, very slow but most accurate, supports multiple languages",
speed: 0.2,
accuracy: 0.98,
@ -134,7 +159,7 @@ struct PredefinedModels {
name: "ggml-large-v3-q5_0",
displayName: "Large v3 (Quantized)",
size: "1.1 GiB",
language: "Multilingual",
supportedLanguages: getLanguageDictionary(isMultilingual: true, isLargeV3: true),
description: "Quantized version of Large v3, faster with slightly lower accuracy",
speed: 0.3,
accuracy: 0.97,
@ -145,8 +170,9 @@ struct PredefinedModels {
name: "ggml-large-v3-turbo",
displayName: "Large v3 Turbo",
size: "1.5 GiB",
language: "Multilingual",
description: "Large model v3 Turbo, faster than v3 with similar accuracy, supports multiple languages",
supportedLanguages: getLanguageDictionary(isMultilingual: true, isLargeV3: true),
description:
"Large model v3 Turbo, faster than v3 with similar accuracy, supports multiple languages",
speed: 0.5,
accuracy: 0.97,
ramUsage: 1.8,
@ -156,12 +182,117 @@ struct PredefinedModels {
name: "ggml-large-v3-turbo-q5_0",
displayName: "Large v3 Turbo (Quantized)",
size: "547 MiB",
language: "Multilingual",
supportedLanguages: getLanguageDictionary(isMultilingual: true, isLargeV3: true),
description: "Quantized version of Large v3 Turbo, faster with slightly lower accuracy",
speed: 0.6,
accuracy: 0.96,
ramUsage: 1.0,
hash: "e050f7970618a659205450ad97eb95a18d69c9ee"
)
),
]
static let allLanguages = [
"auto": "Auto-detect",
"af": "Afrikaans",
"am": "Amharic",
"ar": "Arabic",
"as": "Assamese",
"az": "Azerbaijani",
"ba": "Bashkir",
"be": "Belarusian",
"bg": "Bulgarian",
"bn": "Bengali",
"bo": "Tibetan",
"br": "Breton",
"bs": "Bosnian",
"ca": "Catalan",
"cs": "Czech",
"cy": "Welsh",
"da": "Danish",
"de": "German",
"el": "Greek",
"en": "English",
"es": "Spanish",
"et": "Estonian",
"eu": "Basque",
"fa": "Persian",
"fi": "Finnish",
"fo": "Faroese",
"fr": "French",
"ga": "Irish",
"gl": "Galician",
"gu": "Gujarati",
"ha": "Hausa",
"he": "Hebrew",
"hi": "Hindi",
"hr": "Croatian",
"ht": "Haitian Creole",
"hu": "Hungarian",
"hy": "Armenian",
"id": "Indonesian",
"is": "Icelandic",
"it": "Italian",
"ja": "Japanese",
"jw": "Javanese",
"ka": "Georgian",
"kk": "Kazakh",
"km": "Khmer",
"kn": "Kannada",
"ko": "Korean",
"la": "Latin",
"lb": "Luxembourgish",
"ln": "Lingala",
"lo": "Lao",
"lt": "Lithuanian",
"lv": "Latvian",
"mg": "Malagasy",
"mi": "Maori",
"mk": "Macedonian",
"ml": "Malayalam",
"mn": "Mongolian",
"mr": "Marathi",
"ms": "Malay",
"mt": "Maltese",
"my": "Myanmar",
"ne": "Nepali",
"nl": "Dutch",
"nn": "Norwegian Nynorsk",
"no": "Norwegian",
"oc": "Occitan",
"pa": "Punjabi",
"pl": "Polish",
"ps": "Pashto",
"pt": "Portuguese",
"ro": "Romanian",
"ru": "Russian",
"sa": "Sanskrit",
"sd": "Sindhi",
"si": "Sinhala",
"sk": "Slovak",
"sl": "Slovenian",
"sn": "Shona",
"so": "Somali",
"sq": "Albanian",
"sr": "Serbian",
"su": "Sundanese",
"sv": "Swedish",
"sw": "Swahili",
"ta": "Tamil",
"te": "Telugu",
"tg": "Tajik",
"th": "Thai",
"tk": "Turkmen",
"tl": "Tagalog",
"tr": "Turkish",
"tt": "Tatar",
"ug": "Uyghur",
"uk": "Ukrainian",
"ur": "Urdu",
"uz": "Uzbek",
"vi": "Vietnamese",
"yi": "Yiddish",
"yo": "Yoruba",
"yue": "Cantonese",
"zh": "Chinese",
]
}

View File

@ -2,8 +2,8 @@ import SwiftUI
// Define a display mode for flexible usage
enum LanguageDisplayMode {
case full // For settings page with descriptions
case menuItem // For menu bar with compact layout
case full // For settings page with descriptions
case menuItem // For menu bar with compact layout
}
struct LanguageSelectionView: View {
@ -11,115 +11,11 @@ struct LanguageSelectionView: View {
@AppStorage("SelectedLanguage") private var selectedLanguage: String = "en"
// Add display mode parameter with full as the default
var displayMode: LanguageDisplayMode = .full
let languages = [
"auto": "Auto-detect",
"af": "Afrikaans",
"am": "Amharic",
"ar": "Arabic",
"as": "Assamese",
"az": "Azerbaijani",
"ba": "Bashkir",
"be": "Belarusian",
"bg": "Bulgarian",
"bn": "Bengali",
"bo": "Tibetan",
"br": "Breton",
"bs": "Bosnian",
"ca": "Catalan",
"cs": "Czech",
"cy": "Welsh",
"da": "Danish",
"de": "German",
"el": "Greek",
"en": "English",
"es": "Spanish",
"et": "Estonian",
"eu": "Basque",
"fa": "Persian",
"fi": "Finnish",
"fo": "Faroese",
"fr": "French",
"ga": "Irish",
"gl": "Galician",
"gu": "Gujarati",
"ha": "Hausa",
"he": "Hebrew",
"hi": "Hindi",
"hr": "Croatian",
"ht": "Haitian Creole",
"hu": "Hungarian",
"hy": "Armenian",
"id": "Indonesian",
"is": "Icelandic",
"it": "Italian",
"ja": "Japanese",
"jw": "Javanese",
"ka": "Georgian",
"kk": "Kazakh",
"km": "Khmer",
"kn": "Kannada",
"ko": "Korean",
"la": "Latin",
"lb": "Luxembourgish",
"ln": "Lingala",
"lo": "Lao",
"lt": "Lithuanian",
"lv": "Latvian",
"mg": "Malagasy",
"mi": "Maori",
"mk": "Macedonian",
"ml": "Malayalam",
"mn": "Mongolian",
"mr": "Marathi",
"ms": "Malay",
"mt": "Maltese",
"my": "Myanmar",
"ne": "Nepali",
"nl": "Dutch",
"nn": "Norwegian Nynorsk",
"no": "Norwegian",
"oc": "Occitan",
"pa": "Punjabi",
"pl": "Polish",
"ps": "Pashto",
"pt": "Portuguese",
"ro": "Romanian",
"ru": "Russian",
"sa": "Sanskrit",
"sd": "Sindhi",
"si": "Sinhala",
"sk": "Slovak",
"sl": "Slovenian",
"sn": "Shona",
"so": "Somali",
"sq": "Albanian",
"sr": "Serbian",
"su": "Sundanese",
"sv": "Swedish",
"sw": "Swahili",
"ta": "Tamil",
"te": "Telugu",
"tg": "Tajik",
"th": "Thai",
"tk": "Turkmen",
"tl": "Tagalog",
"tr": "Turkish",
"tt": "Tatar",
"ug": "Uyghur",
"uk": "Ukrainian",
"ur": "Urdu",
"uz": "Uzbek",
"vi": "Vietnamese",
"yi": "Yiddish",
"yo": "Yoruba",
"zh": "Chinese"
]
private func updateLanguage(_ language: String) {
// Update UI state - the UserDefaults updating is now automatic with @AppStorage
selectedLanguage = language
// Post notification for language change
NotificationCenter.default.post(name: .languageDidChange, object: nil)
}
@ -130,14 +26,26 @@ struct LanguageSelectionView: View {
let predefinedModel = PredefinedModels.models.first(where: { $0.name == currentModel.name }) else {
return false
}
return predefinedModel.language == "Multilingual"
return predefinedModel.isMultilingualModel
}
// Function to get current model's supported languages
private func getCurrentModelLanguages() -> [String: String] {
guard let currentModel = whisperState.currentModel,
let predefinedModel = PredefinedModels.models.first(where: {
$0.name == currentModel.name
})
else {
return ["en": "English"] // Default to English if no model found
}
return predefinedModel.supportedLanguages
}
// Get the display name of the current language
private func currentLanguageDisplayName() -> String {
return languages[selectedLanguage] ?? "Unknown"
return getCurrentModelLanguages()[selectedLanguage] ?? "Unknown"
}
var body: some View {
switch displayMode {
case .full:
@ -146,20 +54,26 @@ struct LanguageSelectionView: View {
menuItemView
}
}
// The original full view layout for settings page
private var fullView: some View {
VStack(alignment: .leading, spacing: 16) {
Text("Transcription Language")
.font(.headline)
if let currentModel = whisperState.currentModel,
let predefinedModel = PredefinedModels.models.first(where: { $0.name == currentModel.name }) {
if predefinedModel.language == "Multilingual" {
let predefinedModel = PredefinedModels.models.first(where: {
$0.name == currentModel.name
})
{
if isMultilingualModel() {
VStack(alignment: .leading, spacing: 8) {
Picker("Select Language", selection: $selectedLanguage) {
ForEach(languages.sorted(by: { $0.value < $1.value }), id: \.key) { key, value in
ForEach(
predefinedModel.supportedLanguages.sorted(by: {
$0.value < $1.value
}), id: \.key
) { key, value in
Text(value).tag(key)
}
}
@ -167,14 +81,16 @@ struct LanguageSelectionView: View {
.onChange(of: selectedLanguage) { newValue in
updateLanguage(newValue)
}
Text("Current model: \(predefinedModel.displayName)")
.font(.caption)
.foregroundColor(.secondary)
Text("This model supports multiple languages. You can choose auto-detect or select a specific language.")
.font(.caption)
.foregroundColor(.secondary)
Text(
"This model supports multiple languages. You can choose auto-detect or select a specific language."
)
.font(.caption)
.foregroundColor(.secondary)
}
} else {
// For English-only models, force set language to English
@ -182,14 +98,16 @@ struct LanguageSelectionView: View {
Text("Language: English")
.font(.subheadline)
.foregroundColor(.primary)
Text("Current model: \(predefinedModel.displayName)")
.font(.caption)
.foregroundColor(.secondary)
Text("This is an English-optimized model and only supports English transcription.")
.font(.caption)
.foregroundColor(.secondary)
Text(
"This is an English-optimized model and only supports English transcription."
)
.font(.caption)
.foregroundColor(.secondary)
}
.onAppear {
// Ensure English is set when viewing English-only model
@ -207,13 +125,15 @@ struct LanguageSelectionView: View {
.background(Color(NSColor.controlBackgroundColor))
.cornerRadius(10)
}
// New compact view for menu bar
private var menuItemView: some View {
Group {
if isMultilingualModel() {
Menu {
ForEach(languages.sorted(by: { $0.value < $1.value }), id: \.key) { key, value in
ForEach(
getCurrentModelLanguages().sorted(by: { $0.value < $1.value }), id: \.key
) { key, value in
Button {
updateLanguage(key)
} label: {

View File

@ -24,6 +24,7 @@ class WhisperPrompt: ObservableObject {
"zh": "你好,最近好吗?见到你很高兴。",
"th": "สวัสดีครับ/ค่ะ, สบายดีไหม? ยินดีที่ได้พบคุณ",
"vi": "Xin chào, bạn khỏe không? Rất vui được gặp bạn.",
"yue": "你好,最近點呀?見到你好開心。",
// European Languages
"es": "¡Hola, ¿cómo estás? Encantado de conocerte.",
@ -115,4 +116,4 @@ class WhisperPrompt: ObservableObject {
updateTranscriptionPrompt()
}
}
}
}