faet: Added Cantonese support in WhisperPrompt.

Improved code readability and organization.
This commit is contained in:
WingCH 2025-03-27 22:27:30 +08:00
parent 9a3fc048b1
commit b35103a37f
No known key found for this signature in database
3 changed files with 205 additions and 153 deletions

View File

@ -5,37 +5,60 @@ struct PredefinedModel: Identifiable, Hashable {
let name: String let name: String
let displayName: String let displayName: String
let size: String let size: String
let language: String let supportedLanguages: [String: String]
let description: String let description: String
let speed: Double let speed: Double
let accuracy: Double let accuracy: Double
let ramUsage: Double let ramUsage: Double
let hash: String let hash: String
var downloadURL: String { var downloadURL: String {
"https://huggingface.co/ggerganov/whisper.cpp/resolve/main/\(filename)" "https://huggingface.co/ggerganov/whisper.cpp/resolve/main/\(filename)"
} }
var filename: String { var filename: String {
"\(name).bin" "\(name).bin"
} }
func hash(into hasher: inout Hasher) { func hash(into hasher: inout Hasher) {
hasher.combine(id) hasher.combine(id)
} }
static func == (lhs: PredefinedModel, rhs: PredefinedModel) -> Bool { static func == (lhs: PredefinedModel, rhs: PredefinedModel) -> Bool {
lhs.id == rhs.id lhs.id == rhs.id
} }
var isMultilingualModel: Bool {
supportedLanguages.count > 1
}
var language: String {
isMultilingualModel ? "Multilingual" : "English-only"
}
} }
struct PredefinedModels { enum PredefinedModels {
static func getLanguageDictionary(isMultilingual: Bool, isLargeV3: Bool = false) -> [String:
String]
{
if !isMultilingual {
return ["en": "English"]
} else if isLargeV3 {
return allLanguages // Large v3 models support all languages including Cantonese
} else {
// Create a dictionary without Cantonese for non-Large v3 models
var languagesWithoutCantonese = allLanguages
languagesWithoutCantonese.removeValue(forKey: "yue")
return languagesWithoutCantonese
}
}
static let models: [PredefinedModel] = [ static let models: [PredefinedModel] = [
PredefinedModel( PredefinedModel(
name: "ggml-tiny", name: "ggml-tiny",
displayName: "Tiny", displayName: "Tiny",
size: "75 MiB", size: "75 MiB",
language: "Multilingual", supportedLanguages: getLanguageDictionary(isMultilingual: true),
description: "Tiny model, fastest, least accurate, supports multiple languages", description: "Tiny model, fastest, least accurate, supports multiple languages",
speed: 0.95, speed: 0.95,
accuracy: 0.6, accuracy: 0.6,
@ -46,7 +69,7 @@ struct PredefinedModels {
name: "ggml-tiny.en", name: "ggml-tiny.en",
displayName: "Tiny (English)", displayName: "Tiny (English)",
size: "75 MiB", size: "75 MiB",
language: "English", supportedLanguages: getLanguageDictionary(isMultilingual: false),
description: "Tiny model optimized for English, fastest, least accurate", description: "Tiny model optimized for English, fastest, least accurate",
speed: 0.95, speed: 0.95,
accuracy: 0.65, accuracy: 0.65,
@ -57,8 +80,9 @@ struct PredefinedModels {
name: "ggml-base", name: "ggml-base",
displayName: "Base", displayName: "Base",
size: "142 MiB", size: "142 MiB",
language: "Multilingual", supportedLanguages: getLanguageDictionary(isMultilingual: true),
description: "Base model, good balance of speed and accuracy, supports multiple languages", description:
"Base model, good balance of speed and accuracy, supports multiple languages",
speed: 0.8, speed: 0.8,
accuracy: 0.75, accuracy: 0.75,
ramUsage: 0.5, ramUsage: 0.5,
@ -68,7 +92,7 @@ struct PredefinedModels {
name: "ggml-base.en", name: "ggml-base.en",
displayName: "Base (English)", displayName: "Base (English)",
size: "142 MiB", size: "142 MiB",
language: "English", supportedLanguages: PredefinedModels.getLanguageDictionary(isMultilingual: false),
description: "Base model optimized for English, good balance of speed and accuracy", description: "Base model optimized for English, good balance of speed and accuracy",
speed: 0.8, speed: 0.8,
accuracy: 0.8, accuracy: 0.8,
@ -79,8 +103,9 @@ struct PredefinedModels {
name: "ggml-small", name: "ggml-small",
displayName: "Small", displayName: "Small",
size: "466 MiB", size: "466 MiB",
language: "Multilingual", supportedLanguages: getLanguageDictionary(isMultilingual: true),
description: "Small model, slower but more accurate than base, supports multiple languages", description:
"Small model, slower but more accurate than base, supports multiple languages",
speed: 0.6, speed: 0.6,
accuracy: 0.85, accuracy: 0.85,
ramUsage: 0.7, ramUsage: 0.7,
@ -90,7 +115,7 @@ struct PredefinedModels {
name: "ggml-small.en", name: "ggml-small.en",
displayName: "Small (English)", displayName: "Small (English)",
size: "466 MiB", size: "466 MiB",
language: "English", supportedLanguages: PredefinedModels.getLanguageDictionary(isMultilingual: false),
description: "Small model optimized for English, slower but more accurate than base", description: "Small model optimized for English, slower but more accurate than base",
speed: 0.6, speed: 0.6,
accuracy: 0.9, accuracy: 0.9,
@ -101,7 +126,7 @@ struct PredefinedModels {
name: "ggml-medium", name: "ggml-medium",
displayName: "Medium", displayName: "Medium",
size: "1.5 GiB", size: "1.5 GiB",
language: "Multilingual", supportedLanguages: getLanguageDictionary(isMultilingual: true),
description: "Medium model, slow but very accurate, supports multiple languages", description: "Medium model, slow but very accurate, supports multiple languages",
speed: 0.4, speed: 0.4,
accuracy: 0.92, accuracy: 0.92,
@ -112,7 +137,7 @@ struct PredefinedModels {
name: "ggml-medium.en", name: "ggml-medium.en",
displayName: "Medium (English)", displayName: "Medium (English)",
size: "1.5 GiB", size: "1.5 GiB",
language: "English", supportedLanguages: PredefinedModels.getLanguageDictionary(isMultilingual: false),
description: "Medium model optimized for English, slow but very accurate", description: "Medium model optimized for English, slow but very accurate",
speed: 0.4, speed: 0.4,
accuracy: 0.95, accuracy: 0.95,
@ -123,7 +148,7 @@ struct PredefinedModels {
name: "ggml-large-v3", name: "ggml-large-v3",
displayName: "Large v3", displayName: "Large v3",
size: "2.9 GiB", size: "2.9 GiB",
language: "Multilingual", supportedLanguages: getLanguageDictionary(isMultilingual: true, isLargeV3: true),
description: "Large model v3, very slow but most accurate, supports multiple languages", description: "Large model v3, very slow but most accurate, supports multiple languages",
speed: 0.2, speed: 0.2,
accuracy: 0.98, accuracy: 0.98,
@ -134,7 +159,7 @@ struct PredefinedModels {
name: "ggml-large-v3-q5_0", name: "ggml-large-v3-q5_0",
displayName: "Large v3 (Quantized)", displayName: "Large v3 (Quantized)",
size: "1.1 GiB", size: "1.1 GiB",
language: "Multilingual", supportedLanguages: getLanguageDictionary(isMultilingual: true, isLargeV3: true),
description: "Quantized version of Large v3, faster with slightly lower accuracy", description: "Quantized version of Large v3, faster with slightly lower accuracy",
speed: 0.3, speed: 0.3,
accuracy: 0.97, accuracy: 0.97,
@ -145,8 +170,9 @@ struct PredefinedModels {
name: "ggml-large-v3-turbo", name: "ggml-large-v3-turbo",
displayName: "Large v3 Turbo", displayName: "Large v3 Turbo",
size: "1.5 GiB", size: "1.5 GiB",
language: "Multilingual", supportedLanguages: getLanguageDictionary(isMultilingual: true, isLargeV3: true),
description: "Large model v3 Turbo, faster than v3 with similar accuracy, supports multiple languages", description:
"Large model v3 Turbo, faster than v3 with similar accuracy, supports multiple languages",
speed: 0.5, speed: 0.5,
accuracy: 0.97, accuracy: 0.97,
ramUsage: 1.8, ramUsage: 1.8,
@ -156,12 +182,117 @@ struct PredefinedModels {
name: "ggml-large-v3-turbo-q5_0", name: "ggml-large-v3-turbo-q5_0",
displayName: "Large v3 Turbo (Quantized)", displayName: "Large v3 Turbo (Quantized)",
size: "547 MiB", size: "547 MiB",
language: "Multilingual", supportedLanguages: getLanguageDictionary(isMultilingual: true, isLargeV3: true),
description: "Quantized version of Large v3 Turbo, faster with slightly lower accuracy", description: "Quantized version of Large v3 Turbo, faster with slightly lower accuracy",
speed: 0.6, speed: 0.6,
accuracy: 0.96, accuracy: 0.96,
ramUsage: 1.0, ramUsage: 1.0,
hash: "e050f7970618a659205450ad97eb95a18d69c9ee" hash: "e050f7970618a659205450ad97eb95a18d69c9ee"
) ),
]
static let allLanguages = [
"auto": "Auto-detect",
"af": "Afrikaans",
"am": "Amharic",
"ar": "Arabic",
"as": "Assamese",
"az": "Azerbaijani",
"ba": "Bashkir",
"be": "Belarusian",
"bg": "Bulgarian",
"bn": "Bengali",
"bo": "Tibetan",
"br": "Breton",
"bs": "Bosnian",
"ca": "Catalan",
"cs": "Czech",
"cy": "Welsh",
"da": "Danish",
"de": "German",
"el": "Greek",
"en": "English",
"es": "Spanish",
"et": "Estonian",
"eu": "Basque",
"fa": "Persian",
"fi": "Finnish",
"fo": "Faroese",
"fr": "French",
"ga": "Irish",
"gl": "Galician",
"gu": "Gujarati",
"ha": "Hausa",
"he": "Hebrew",
"hi": "Hindi",
"hr": "Croatian",
"ht": "Haitian Creole",
"hu": "Hungarian",
"hy": "Armenian",
"id": "Indonesian",
"is": "Icelandic",
"it": "Italian",
"ja": "Japanese",
"jw": "Javanese",
"ka": "Georgian",
"kk": "Kazakh",
"km": "Khmer",
"kn": "Kannada",
"ko": "Korean",
"la": "Latin",
"lb": "Luxembourgish",
"ln": "Lingala",
"lo": "Lao",
"lt": "Lithuanian",
"lv": "Latvian",
"mg": "Malagasy",
"mi": "Maori",
"mk": "Macedonian",
"ml": "Malayalam",
"mn": "Mongolian",
"mr": "Marathi",
"ms": "Malay",
"mt": "Maltese",
"my": "Myanmar",
"ne": "Nepali",
"nl": "Dutch",
"nn": "Norwegian Nynorsk",
"no": "Norwegian",
"oc": "Occitan",
"pa": "Punjabi",
"pl": "Polish",
"ps": "Pashto",
"pt": "Portuguese",
"ro": "Romanian",
"ru": "Russian",
"sa": "Sanskrit",
"sd": "Sindhi",
"si": "Sinhala",
"sk": "Slovak",
"sl": "Slovenian",
"sn": "Shona",
"so": "Somali",
"sq": "Albanian",
"sr": "Serbian",
"su": "Sundanese",
"sv": "Swedish",
"sw": "Swahili",
"ta": "Tamil",
"te": "Telugu",
"tg": "Tajik",
"th": "Thai",
"tk": "Turkmen",
"tl": "Tagalog",
"tr": "Turkish",
"tt": "Tatar",
"ug": "Uyghur",
"uk": "Ukrainian",
"ur": "Urdu",
"uz": "Uzbek",
"vi": "Vietnamese",
"yi": "Yiddish",
"yo": "Yoruba",
"yue": "Cantonese",
"zh": "Chinese",
] ]
} }

View File

@ -2,8 +2,8 @@ import SwiftUI
// Define a display mode for flexible usage // Define a display mode for flexible usage
enum LanguageDisplayMode { enum LanguageDisplayMode {
case full // For settings page with descriptions case full // For settings page with descriptions
case menuItem // For menu bar with compact layout case menuItem // For menu bar with compact layout
} }
struct LanguageSelectionView: View { struct LanguageSelectionView: View {
@ -11,115 +11,11 @@ struct LanguageSelectionView: View {
@AppStorage("SelectedLanguage") private var selectedLanguage: String = "en" @AppStorage("SelectedLanguage") private var selectedLanguage: String = "en"
// Add display mode parameter with full as the default // Add display mode parameter with full as the default
var displayMode: LanguageDisplayMode = .full var displayMode: LanguageDisplayMode = .full
let languages = [
"auto": "Auto-detect",
"af": "Afrikaans",
"am": "Amharic",
"ar": "Arabic",
"as": "Assamese",
"az": "Azerbaijani",
"ba": "Bashkir",
"be": "Belarusian",
"bg": "Bulgarian",
"bn": "Bengali",
"bo": "Tibetan",
"br": "Breton",
"bs": "Bosnian",
"ca": "Catalan",
"cs": "Czech",
"cy": "Welsh",
"da": "Danish",
"de": "German",
"el": "Greek",
"en": "English",
"es": "Spanish",
"et": "Estonian",
"eu": "Basque",
"fa": "Persian",
"fi": "Finnish",
"fo": "Faroese",
"fr": "French",
"ga": "Irish",
"gl": "Galician",
"gu": "Gujarati",
"ha": "Hausa",
"he": "Hebrew",
"hi": "Hindi",
"hr": "Croatian",
"ht": "Haitian Creole",
"hu": "Hungarian",
"hy": "Armenian",
"id": "Indonesian",
"is": "Icelandic",
"it": "Italian",
"ja": "Japanese",
"jw": "Javanese",
"ka": "Georgian",
"kk": "Kazakh",
"km": "Khmer",
"kn": "Kannada",
"ko": "Korean",
"la": "Latin",
"lb": "Luxembourgish",
"ln": "Lingala",
"lo": "Lao",
"lt": "Lithuanian",
"lv": "Latvian",
"mg": "Malagasy",
"mi": "Maori",
"mk": "Macedonian",
"ml": "Malayalam",
"mn": "Mongolian",
"mr": "Marathi",
"ms": "Malay",
"mt": "Maltese",
"my": "Myanmar",
"ne": "Nepali",
"nl": "Dutch",
"nn": "Norwegian Nynorsk",
"no": "Norwegian",
"oc": "Occitan",
"pa": "Punjabi",
"pl": "Polish",
"ps": "Pashto",
"pt": "Portuguese",
"ro": "Romanian",
"ru": "Russian",
"sa": "Sanskrit",
"sd": "Sindhi",
"si": "Sinhala",
"sk": "Slovak",
"sl": "Slovenian",
"sn": "Shona",
"so": "Somali",
"sq": "Albanian",
"sr": "Serbian",
"su": "Sundanese",
"sv": "Swedish",
"sw": "Swahili",
"ta": "Tamil",
"te": "Telugu",
"tg": "Tajik",
"th": "Thai",
"tk": "Turkmen",
"tl": "Tagalog",
"tr": "Turkish",
"tt": "Tatar",
"ug": "Uyghur",
"uk": "Ukrainian",
"ur": "Urdu",
"uz": "Uzbek",
"vi": "Vietnamese",
"yi": "Yiddish",
"yo": "Yoruba",
"zh": "Chinese"
]
private func updateLanguage(_ language: String) { private func updateLanguage(_ language: String) {
// Update UI state - the UserDefaults updating is now automatic with @AppStorage // Update UI state - the UserDefaults updating is now automatic with @AppStorage
selectedLanguage = language selectedLanguage = language
// Post notification for language change // Post notification for language change
NotificationCenter.default.post(name: .languageDidChange, object: nil) NotificationCenter.default.post(name: .languageDidChange, object: nil)
} }
@ -130,14 +26,26 @@ struct LanguageSelectionView: View {
let predefinedModel = PredefinedModels.models.first(where: { $0.name == currentModel.name }) else { let predefinedModel = PredefinedModels.models.first(where: { $0.name == currentModel.name }) else {
return false return false
} }
return predefinedModel.language == "Multilingual" return predefinedModel.isMultilingualModel
} }
// Function to get current model's supported languages
private func getCurrentModelLanguages() -> [String: String] {
guard let currentModel = whisperState.currentModel,
let predefinedModel = PredefinedModels.models.first(where: {
$0.name == currentModel.name
})
else {
return ["en": "English"] // Default to English if no model found
}
return predefinedModel.supportedLanguages
}
// Get the display name of the current language // Get the display name of the current language
private func currentLanguageDisplayName() -> String { private func currentLanguageDisplayName() -> String {
return languages[selectedLanguage] ?? "Unknown" return getCurrentModelLanguages()[selectedLanguage] ?? "Unknown"
} }
var body: some View { var body: some View {
switch displayMode { switch displayMode {
case .full: case .full:
@ -146,20 +54,26 @@ struct LanguageSelectionView: View {
menuItemView menuItemView
} }
} }
// The original full view layout for settings page // The original full view layout for settings page
private var fullView: some View { private var fullView: some View {
VStack(alignment: .leading, spacing: 16) { VStack(alignment: .leading, spacing: 16) {
Text("Transcription Language") Text("Transcription Language")
.font(.headline) .font(.headline)
if let currentModel = whisperState.currentModel, if let currentModel = whisperState.currentModel,
let predefinedModel = PredefinedModels.models.first(where: { $0.name == currentModel.name }) { let predefinedModel = PredefinedModels.models.first(where: {
$0.name == currentModel.name
if predefinedModel.language == "Multilingual" { })
{
if isMultilingualModel() {
VStack(alignment: .leading, spacing: 8) { VStack(alignment: .leading, spacing: 8) {
Picker("Select Language", selection: $selectedLanguage) { Picker("Select Language", selection: $selectedLanguage) {
ForEach(languages.sorted(by: { $0.value < $1.value }), id: \.key) { key, value in ForEach(
predefinedModel.supportedLanguages.sorted(by: {
$0.value < $1.value
}), id: \.key
) { key, value in
Text(value).tag(key) Text(value).tag(key)
} }
} }
@ -167,14 +81,16 @@ struct LanguageSelectionView: View {
.onChange(of: selectedLanguage) { newValue in .onChange(of: selectedLanguage) { newValue in
updateLanguage(newValue) updateLanguage(newValue)
} }
Text("Current model: \(predefinedModel.displayName)") Text("Current model: \(predefinedModel.displayName)")
.font(.caption) .font(.caption)
.foregroundColor(.secondary) .foregroundColor(.secondary)
Text("This model supports multiple languages. You can choose auto-detect or select a specific language.") Text(
.font(.caption) "This model supports multiple languages. You can choose auto-detect or select a specific language."
.foregroundColor(.secondary) )
.font(.caption)
.foregroundColor(.secondary)
} }
} else { } else {
// For English-only models, force set language to English // For English-only models, force set language to English
@ -182,14 +98,16 @@ struct LanguageSelectionView: View {
Text("Language: English") Text("Language: English")
.font(.subheadline) .font(.subheadline)
.foregroundColor(.primary) .foregroundColor(.primary)
Text("Current model: \(predefinedModel.displayName)") Text("Current model: \(predefinedModel.displayName)")
.font(.caption) .font(.caption)
.foregroundColor(.secondary) .foregroundColor(.secondary)
Text("This is an English-optimized model and only supports English transcription.") Text(
.font(.caption) "This is an English-optimized model and only supports English transcription."
.foregroundColor(.secondary) )
.font(.caption)
.foregroundColor(.secondary)
} }
.onAppear { .onAppear {
// Ensure English is set when viewing English-only model // Ensure English is set when viewing English-only model
@ -207,13 +125,15 @@ struct LanguageSelectionView: View {
.background(Color(NSColor.controlBackgroundColor)) .background(Color(NSColor.controlBackgroundColor))
.cornerRadius(10) .cornerRadius(10)
} }
// New compact view for menu bar // New compact view for menu bar
private var menuItemView: some View { private var menuItemView: some View {
Group { Group {
if isMultilingualModel() { if isMultilingualModel() {
Menu { Menu {
ForEach(languages.sorted(by: { $0.value < $1.value }), id: \.key) { key, value in ForEach(
getCurrentModelLanguages().sorted(by: { $0.value < $1.value }), id: \.key
) { key, value in
Button { Button {
updateLanguage(key) updateLanguage(key)
} label: { } label: {

View File

@ -24,6 +24,7 @@ class WhisperPrompt: ObservableObject {
"zh": "你好,最近好吗?见到你很高兴。", "zh": "你好,最近好吗?见到你很高兴。",
"th": "สวัสดีครับ/ค่ะ, สบายดีไหม? ยินดีที่ได้พบคุณ", "th": "สวัสดีครับ/ค่ะ, สบายดีไหม? ยินดีที่ได้พบคุณ",
"vi": "Xin chào, bạn khỏe không? Rất vui được gặp bạn.", "vi": "Xin chào, bạn khỏe không? Rất vui được gặp bạn.",
"yue": "你好,最近點呀?見到你好開心。",
// European Languages // European Languages
"es": "¡Hola, ¿cómo estás? Encantado de conocerte.", "es": "¡Hola, ¿cómo estás? Encantado de conocerte.",
@ -115,4 +116,4 @@ class WhisperPrompt: ObservableObject {
updateTranscriptionPrompt() updateTranscriptionPrompt()
} }
} }
} }